def setupSelfPlayZero(self):
		#r = random.randint(0,self.iterNo-1)
		r = self.iterNo-1
		if self.ishumanFirstPlayer and self.ishumanCut:
			self.model = deepq.load_act("model/selfPlayZero/shannon_switching_train_{}_{}_{}.pkl".format(1, 1, r))
		elif self.ishumanFirstPlayer and not self.ishumanCut:
			self.model = deepq.load_act("model/selfPlayZero/shannon_switching_train_{}_{}_{}.pkl".format(1, 0, r))
		elif not self.ishumanFirstPlayer and self.ishumanCut:
			self.model = deepq.load_act("model/selfPlayZero/shannon_switching_train_{}_{}_{}.pkl".format(0, 1, r))
		else:
			self.model = deepq.load_act("model/selfPlayZero/shannon_switching_train_{}_{}_{}.pkl".format(0, 0, r))
		print("Self play Zero set up")
Beispiel #2
0
def main():
    # setup environment
    computerType = sys.argv[1]
    ishumanFirstPlayer = int(sys.argv[2])
    ishumanCut = int(sys.argv[3])
    fileName = sys.argv[4]
    env = gym.make('shannon_switching-v0')
    env.configureEnvironment(computerType=computerType,
                             ishumanFirstPlayer=ishumanFirstPlayer,
                             ishumanCut=ishumanCut,
                             iterNo=20,
                             epsilon=0.2)
    print("ishumanFirstPlayer ", ishumanFirstPlayer)
    print("ishumanCut", ishumanCut)
    model = deepq.load_act(fileName)
    totalIterations = 2000
    totalWins = 0
    for i in range(totalIterations):
        print(i)
        state = env.reset()
        while True:
            state, reward, isOver, __ = env.step(model.step(state)[0][0])
            if isOver != 0:
                break
        if reward == 1000:
            totalWins += 1
    print("Accuracy: ", totalWins / totalIterations)
def main():
    ##np.seterr(all='raise')
    env = gym.make("apl-v0")
    #act = deepq.learn(
        #env,
        #network='mlp',
        #lr=1e-3,
        #checkpoint_freq=None,
        #total_timesteps=int(1e5),
        #buffer_size=50000,
        #exploration_fraction=0.1,
        #exploration_final_eps=0.02,
        #print_freq=10,
        #load_path="./models/apl-v0-dqn-20181003-151750",
        #callback=callback
    #)
    #timestr = time.strftime("%Y%m%d-%H%M%S")
    #act.save("./models/apl-v0-dqn-" + timestr)
    act = deepq.load_act("./models/apl-v0-dqn-20181003-152611.pickle")
    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
Beispiel #4
0
def load_agent_from_file(name):
    """Loads a pickled RL agent from file"""
    from baselines.deepq import load_act

    # needed to get the unpickling to work since the pickling is done
    # from a __name__=="__main__"
    # pylint: disable=unused-import
    from q_network import EdgeQNetwork

    act = load_act(name)
    return act
Beispiel #5
0
def main():
    print('-*-*-*- enjoy worker -*-*-*-')
    # tf.graph().as_default()
    # tf.reset_default_graph()
    env = gym.make("CartPole-v0")
    act = deepq.load_act("model.pkl")

    max_episodes = 5

    while max_episodes > 0:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
        max_episodes = max_episodes - 1
Beispiel #6
0
def test(env, load_path, num_episodes=1000):
    act = deepq.load_act(load_path + ".pkl")
    # success_count=0.0
    test_render_file = open(load_path + ".txt", "w")
    best_obs = np.ones(env.n * env.m, dtype=int)
    best_episode_rew = -1 * env.n
    for i in range(num_episodes):
        obs, done = env.reset(), False
        episode_rew = 0.0
        while not done:
            render_string = env.render(mode='ansi') + "\n"
            test_render_file.write(render_string)
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        if episode_rew < best_episode_rew:
            best_episode_rew = episode_rew
            best_obs = obs
        render_string = env.render(mode='ansi') + "\n"
        test_render_file.write(render_string)
        test_render_file.write("Episode reward " + str(episode_rew) + "\n")
    test_render_file.close()
    obs_pm1 = best_obs * 2 - 1
    state_pm1 = np.reshape(obs_pm1, [env.m, env.n])
    print('State')
    print(best_obs)
    print(state_pm1)
    print()
    pairs = np.array(list(itertools.combinations(range(env.m), 2)))
    first_i = pairs[:, 0]
    second_i = pairs[:, 1]

    # Compute auto
    auto_corr_vectors = np.flip(np.fft.ifft(
        np.fft.fft(np.flip(state_pm1, axis=1)) * np.fft.fft(state_pm1)),
                                axis=1)
    mean_sqr_side_peak_auto = np.mean(
        np.square(np.abs(auto_corr_vectors[:, 1:])))
    var_sqr_side_peak_auto = np.var(
        np.mean(np.square(np.abs(auto_corr_vectors[:, 1:])), axis=1))
    var_sqr_side_peak_auto_norm = np.var(
        np.mean(np.square(np.abs(auto_corr_vectors[:, 1:])), axis=1) /
        (env.n * env.n))
    print('Auto')
    print(np.real(auto_corr_vectors))
    print()

    # Compute average balance
    bal = np.mean(np.abs(np.sum(state_pm1, axis=1)))

    # Compute cross
    cross_corr_vectors = np.flip(np.fft.ifft(
        np.fft.fft(np.flip(state_pm1[first_i, :], axis=1)) *
        np.fft.fft(state_pm1[second_i, :])),
                                 axis=1)
    mean_sqr_side_peak_cross = np.mean(np.square(np.abs(cross_corr_vectors)))
    var_sqr_side_peak_cross = np.var(
        np.mean(np.square(np.abs(cross_corr_vectors)), axis=1))
    var_sqr_side_peak_cross_norm = np.var(
        np.mean(np.square(np.abs(cross_corr_vectors)), axis=1) /
        (env.n * env.n))
    print('Cross')
    print(np.real(cross_corr_vectors))
    print()

    mean_sqr_side_peak = 0.5 * mean_sqr_side_peak_auto + 0.5 * mean_sqr_side_peak_cross

    print('Mean sqr (auto):', mean_sqr_side_peak_auto)
    print('Mean sqr (cross):', mean_sqr_side_peak_cross)
    print('Mean sqr:', mean_sqr_side_peak)
    print('Var sqr (auto):', var_sqr_side_peak_auto)
    print('Var sqr (cross):', var_sqr_side_peak_cross)
    print('Mean bal:', bal)

    print()
    print('----------Normalized----------')
    print('Mean sqr (auto):', mean_sqr_side_peak_auto / (env.n * env.n))
    print('Mean sqr (cross):', mean_sqr_side_peak_cross / (env.n * env.n))
    print('Mean sqr:', mean_sqr_side_peak / (env.n * env.n))
    print('Var sqr (auto):', var_sqr_side_peak_auto_norm)
    print('Var sqr (cross):', var_sqr_side_peak_cross_norm)
    print()
Beispiel #7
0
def main():
    env = gym.make(ENV)
    act = deepq.load_act(MODEL)
    steps = 0
    outfile = open(FILE, 'w')
    bcfile = open(BC_FILE, 'w')
    total_reward = 0
    episodes = 0

    while steps < 50000:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            #env.render()
            state_1 = obs

            if np.random.uniform(0, 1) <= RANDOM:
                action = act(obs[None])[0]
            else:
                action = env.action_space.sample()

            obs, rew, done, _ = env.step(action)
            state_2 = obs

            if RANDOM == DEFAULT:
                # write to AON file
                to_write = '['
                for w in state_1:
                    to_write += str(w) + ','
                to_write = to_write[:-1]
                to_write += ']'

                outfile.write(to_write)
                outfile.write(" ")
                to_write = '['

                for w in state_2:
                    to_write += str(w) + ','
                to_write = to_write[:-1]
                to_write += ']'

                outfile.write(to_write)
                outfile.write("\n")

                # write to BC file
                to_write = '['
                for w in state_1:
                    to_write += str(w) + ','
                to_write = to_write[:-1]
                to_write += ']'

                bcfile.write(to_write)
                bcfile.write(" ")

                bcfile.write("[" + str(action) + "]")
                bcfile.write(" ")

                to_write = '['
                for w in state_2:
                    to_write += str(w) + ','
                to_write = to_write[:-1]
                to_write += ']'

                bcfile.write(to_write)
                bcfile.write("\n")

            episode_rew += rew

            steps += 1

        print(steps)
        print("Episode reward", episode_rew)
        total_reward += episode_rew
        episodes += 1.

    print("Average reward", total_reward / episodes)
    outfile.close()
    bcfile.close()
Beispiel #8
0
 def __init__(self):
     expert_path = data_root_path / "experts/mountaincar_deepq_custom.pickle"
     from baselines import deepq
     self.expert = deepq.load_act(expert_path)