Beispiel #1
0
def get_random_state(game_name, random_steps=20):
    gym_env = gym.make(game_name)
    env = CustomGym(gym_env, 'SpaceInvaders-v0')
        
    # Reset the environment and then take 20 random actions
    state = env.reset()
    for _ in range(random_steps):
        state, _, _, _ = env.step(random.randrange(env.action_size))

    return state
Beispiel #2
0
def main(argv):
    save_path = None
    T = None
    game_name = None
    try:
        opts, args = getopt.getopt(argv, "g:s:T:")
    except getopt.GetoptError:
        print "Usage: python run_agent.py -g <game name> -s <save path> -T <T>"
    for opt, arg in opts:
        if opt == '-g':
            game_name = arg
        elif opt == '-s':
            save_path = arg
        elif opt == '-T':
            T = arg
    if game_name is None:
        print "No game name specified"
        sys.exit()
    if save_path is None:
        print "No save path specified"
        sys.exit()
    if T is None:
        print "No T specified"
        sys.exit()
    print "Reading from", save_path
    print "Running agent"
    env = CustomGym(game_name)
    run_agent(env, save_path, T, game_name)
def qlearn(game_name, nb_threads=8):
    processes = []
    envs = []
    for _ in range(nb_threads):
        gym_env = gym.make(game_name)
        env = CustomGym(gym_env)
        envs.append(env)

    T_queue = Queue.Queue()
    T_queue.put(0)

    with tf.Session() as sess:
        agent = Agent(session=sess, action_size=envs[0].action_size,
        h=84, w=84, channels=STATE_FRAMES,
        optimizer=tf.train.AdamOptimizer(INITIAL_LEARNING_RATE))
        sess.run(tf.global_variables_initializer())
        
        summary = Summary('tensorboard', agent)

        for i in range(NUM_THREADS):
            processes.append(threading.Thread(target=async_trainer, args=(agent,
            envs[i], sess, i, T_queue, summary,)))
        for p in processes:
            p.daemon = True
            p.start()

        while not training_finished:
            sleep(0.01)
        for p in processes:
            p.join()
Beispiel #4
0
def a3c(game_name, num_threads=8, restore=None, save_path='model'):
    processes = []
    envs = []
    for _ in range(num_threads+1):
        gym_env = gym.make(game_name)
        if game_name == 'CartPole-v0':
            env = CustomGymClassicControl(gym_env)
        else:
            print('Assuming ATARI game and playing with pixels')
            env = CustomGym(gym_env, game_name, nb_frames=1)
        envs.append(env)

    # Separate out the evaluation environment
    evaluation_env = envs[0]
    envs = envs[1:]

    with tf.Session() as sess:
        agent = Agent(session=sess,
        action_size=envs[0].action_size,
        model='mnih-lstm',
        optimizer=tf.train.AdamOptimizer(INITIAL_LEARNING_RATE))

        # Create a saver, and only keep 2 checkpoints.
        saver = tf.train.Saver(max_to_keep=2)

        T_queue = queue.Queue()

        # Either restore the parameters or don't.
        if restore is not None:
            saver.restore(sess, save_path + '-' + str(restore))
            last_T = restore
            print('T was:', last_T)
            T_queue.put(last_T)
        else:
            sess.run(tf.global_variables_initializer())
            T_queue.put(0)

        summary = Summary(save_path, agent)

        # Create a process for each worker
        for i in range(num_threads):
            processes.append(threading.Thread(target=async_trainer, args=(agent,
            envs[i], sess, i, T_queue, summary, saver, save_path,)))

        # Create a process to evaluate the agent
        processes.append(threading.Thread(target=evaluator, args=(agent,
        evaluation_env, sess, T_queue, summary, saver, save_path,)))

        # Start all the processes
        for p in processes:
            p.daemon = True
            p.start()

        # Until training is finished
        while not training_finished:
            sleep(0.01)

        # Join the processes, so we get this thread back.
        for p in processes:
            p.join()
def run(double_dqn, task_num):
    title = "Double_Deep_Q" if double_dqn else "Deep_Q"
    env = Monitor(CustomGym(
        agentXY,
        goalXY,
        tasks[task_num][0],
        tasks[task_num][1],
        title=f"{title}_Task_{task_num + 1}",
    ),
                  filename=None)
    model = DQN(MlpPolicy,
                env,
                verbose=verbose,
                gamma=gamma,
                learning_rate=learning_rate,
                double_q=bool(double_dqn))
    while len(env.get_episode_rewards()) < episodes:
        model.learn(total_timesteps=time_steps)
    env.save_csv()
    env.destroy()
Beispiel #6
0
def play(agent,
         game_name,
         render=True,
         num_episodes=10,
         fps=5.0,
         monitor=True):
    gym_env = gym.make(game_name)
    if monitor:
        print(gym_env)
        gym_env = wrappers.Monitor(gym_env, 'videos/-v0')
    print(gym_env)
    print(game_name)
    env = CustomGym(game_name)

    desired_frame_length = 1.0 / fps

    episode_rewards = []
    episode_vals = []
    t = 0
    for ep in range(num_episodes):
        print("Starting episode", ep)
        episode_reward = 0
        state = env.reset()
        terminal = False
        current_time = time()
        while not terminal:
            policy, value = agent.get_policy_and_value(state)
            action_idx = np.random.choice(agent.action_size, p=policy)
            state, reward, terminal, _ = env.step(action_idx)
            if render:
                env.render()
            t += 1
            episode_vals.append(value)
            episode_reward += reward
            # Sleep so the frame rate is correct
            next_time = time()
            frame_length = next_time - current_time
            if frame_length < desired_frame_length:
                sleep(desired_frame_length - frame_length)
            current_time = next_time
        episode_rewards.append(episode_reward)
    if monitor:
        gym_env.monitor.close()
    return episode_rewards, episode_vals