Esempio n. 1
0
def test_against_environment(env_name, num_runs, agent_name):
    env = gym.make(env_name)
    # env.seed(0)
    try:
        predictor = load_predictive_model(env_name, env.action_space.n)
        if agent_name == 'Next_agent':
            agent = StateAgent(env.action_space.n, env_name)
            agent.set_weights()
        elif agent_name == 'DQN':
            agent = Agent(gamma=0.99,
                          epsilon=0.00,
                          alpha=0.0001,
                          input_dims=(104, 80, 4),
                          n_actions=env.action_space.n,
                          mem_size=25000,
                          eps_min=0.00,
                          batch_size=32,
                          replace=1000,
                          eps_dec=1e-5,
                          env_name=env_name)
            agent.load_models()
    except:
        print(
            "Error loading model, check environment name and action space dimensions"
        )

    rewards = []

    start = time.time()

    total_steps = 0.0
    for i in range(num_runs):
        frame_queue = deque(maxlen=4)

        observation = env.reset()
        done = False

        if agent_name == 'DQN':
            init_queue(frame_queue, observation, True)
        else:
            init_queue(frame_queue, observation)

        total_reward = 0.0
        frame_count = 0
        while not done:
            observation_states = np.concatenate(frame_queue, axis=2)

            # Human start of breakout since the next state agent just keeps moving to the left
            if agent_name == 'Next_agent':
                if env_name == 'BreakoutDeterministic-v4' and not frame_count:
                    agent_action = 1
                else:
                    next_states = predictor.generate_output_states(
                        np.expand_dims(observation_states, axis=0))
                    agent_action = agent.choose_action_from_next_states(
                        np.expand_dims(next_states, axis=0))
            elif agent_name == 'DQN':
                agent_action = agent.choose_action(observation_states)
            else:
                agent_action = env.action_space.sample()

            observation, reward, done, _ = env.step(agent_action)
            total_reward += reward
            frame_count += 1
            total_steps += 1

            frame_queue.pop()
            if agent_name == 'DQN':
                frame_queue.appendleft(preprocess_frame_dqn(observation))
            else:
                frame_queue.appendleft(preprocess_frame(observation))

        print("Completed episode {} with reward {}".format(
            i + 1, total_reward))
        rewards.append(total_reward)
    end = time.time()

    time_taken = (end - start) / total_steps

    print("Test complete - Average score: {}    Max score: {}".format(
        np.average(rewards), np.max(rewards)))
    return (rewards, time_taken)
Esempio n. 2
0
def init_queue(queue, observation, dqn=False):
    for i in range(4):
        if dqn:
            queue.append(preprocess_frame_dqn(observation))
        else:
            queue.append(preprocess_frame(observation))
Esempio n. 3
0
def main(args):
    env_name = args.env_name
    env = gym.make(env_name)
    env.seed(0)

    num_games = 5
    load_checkpoint = True
    best_score = 0
    agent = Agent(gamma=0.99,
                  epsilon=0.0,
                  alpha=0.0001,
                  input_dims=(104, 80, 4),
                  n_actions=env.action_space.n,
                  mem_size=25000,
                  eps_min=0.02,
                  batch_size=32,
                  replace=1000,
                  eps_dec=1e-5,
                  env_name=env_name)

    try:
        agent.load_models()
    except:
        print('No DQN models found for %s in models folder' % env_name)
        raise

    scores, eps_history = [], []
    n_steps = 0

    for i in range(num_games):
        done = False
        observation = env.reset()
        frame_queue = deque(maxlen=4)

        observation = preprocess_frame_dqn(observation)
        for j in range(4):
            frame_queue.append(observation)
        observation = np.concatenate(frame_queue, axis=2)

        score = 0
        while not done:
            action = agent.choose_action(observation)
            next_frame, reward, done, info = env.step(action)

            n_steps += 1
            score += reward

            frame_queue.pop()
            frame_queue.appendleft(preprocess_frame_dqn(next_frame))

            observation_ = np.concatenate(frame_queue, axis=2)

            observation = observation_

        scores.append(score)

        avg_score = np.mean(scores[-100:])
        print('episode: ', i, 'score: ', score,
              ' average score %.3f' % avg_score,
              'epsilon %.2f' % agent.epsilon, 'steps', n_steps)

        eps_history.append(agent.epsilon)
def main(args):
    env_name = args.env_name
    new_model = args.new_model
    num_games = args.num_games

    env = gym.make(env_name)
    env.seed(0)

    # terminating_steps = 250000

    # set this to be a very low number so it works for a variety of games
    # should be set to minimum score of game
    best_score = -9999.0
    agent = Agent(gamma=0.99,
                  epsilon=1.0,
                  alpha=0.0001,
                  input_dims=(104, 80, 4),
                  n_actions=env.action_space.n,
                  mem_size=25000,
                  eps_min=0.02,
                  batch_size=32,
                  replace=1000,
                  eps_dec=1e-5,
                  env_name=env_name)

    if not new_model:
        try:
            agent.load_models()
        except:
            print('No DQN models found for %s in models folder' % env_name)
            raise

    scores, eps_history = [], []
    n_steps = 0

    for i in range(num_games):
        done = False
        observation = env.reset()
        frame_queue = deque(maxlen=4)

        observation = preprocess_frame_dqn(observation)
        for j in range(4):
            frame_queue.append(observation)
        observation = np.concatenate(frame_queue, axis=2)

        score = 0

        while not done:
            action = agent.choose_action(observation)
            next_frame, reward, done, info = env.step(action)

            n_steps += 1
            score += reward

            frame_queue.pop()
            frame_queue.appendleft(preprocess_frame_dqn(next_frame))

            observation_ = np.concatenate(frame_queue, axis=2)

            agent.store_transition(observation, action, reward, observation_,
                                   int(done))
            agent.learn()

            observation = observation_

        scores.append(score)

        avg_score = np.mean(scores[-100:])
        print('episode: ', i, 'score: ', score,
              ' average score %.3f' % avg_score,
              'epsilon %.2f' % agent.epsilon, 'steps', n_steps)

        if avg_score > best_score:
            agent.save_models()
            print('avg score %.2f better than best score %.2f, saving model' %
                  (avg_score, best_score))
            best_score = avg_score

        eps_history.append(agent.epsilon)
def generate_agent_episodes(args):

    full_path = ROLLOUT_DIR + '/rollout_' + args.env_name

    if not os.path.exists(full_path):
        os.umask(0o000)
        os.makedirs(full_path)

    env_name = args.env_name
    total_episodes = args.total_episodes
    time_steps = args.time_steps

    envs_to_generate = [env_name]

    for current_env_name in envs_to_generate:
        print("Generating data for env {}".format(current_env_name))

        env = gym.make(current_env_name)  # Create the environment
        env.seed(0)

        # First load the DQN agent and the predictive auto encoder with their weights
        agent = Agent(gamma=0.99,
                      epsilon=0.0,
                      alpha=0.0001,
                      input_dims=(104, 80, 4),
                      n_actions=env.action_space.n,
                      mem_size=25000,
                      eps_min=0.0,
                      batch_size=32,
                      replace=1000,
                      eps_dec=1e-5,
                      env_name=current_env_name)
        agent.load_models()

        predictor = load_predictive_model(current_env_name, env.action_space.n)

        s = 0

        while s < total_episodes:

            rollout_file = os.path.join(full_path, 'rollout-%d.npz' % s)

            observation = env.reset()
            frame_queue = deque(maxlen=4)
            dqn_queue = deque(maxlen=4)

            t = 0

            next_state_sequence = []
            correct_state_sequence = []
            total_reward = 0
            while t < time_steps:
                # preprocess frames for predictive model and dqn
                converted_obs = preprocess_frame(observation)
                converted_obs_dqn = preprocess_frame_dqn(observation)

                if t == 0:
                    for i in range(4):
                        frame_queue.append(converted_obs)
                        dqn_queue.append(converted_obs_dqn)
                else:
                    frame_queue.pop()
                    dqn_queue.pop()
                    frame_queue.appendleft(converted_obs)
                    dqn_queue.appendleft(converted_obs_dqn)

                observation_states = np.concatenate(frame_queue, axis=2)
                dqn_states = np.concatenate(dqn_queue, axis=2)
                next_states = predictor.generate_output_states(
                    np.expand_dims(observation_states, axis=0))
                next_state_sequence.append(next_states)
                action = agent.choose_action(dqn_states)
                correct_state_sequence.append(
                    encode_action(env.action_space.n, action))

                observation, reward, done, info = env.step(
                    action)  # Take a random action
                total_reward += reward
                t = t + 1

            print(
                "Episode {} finished after {} timesteps with reward {}".format(
                    s, t, total_reward))

            np.savez_compressed(rollout_file,
                                next=next_state_sequence,
                                correct=correct_state_sequence)

            s = s + 1

        env.close()
def main(args):

    env_name = args.env_name
    total_episodes = args.total_episodes
    time_steps = args.time_steps
    informed = args.informed
    # action_refresh_rate = args.action_refresh_rate

    if informed:
        full_path = ROLLOUT_DIR + '/informed_rollout_' + args.env_name
    else:
        full_path = ROLLOUT_DIR + '/random_rollout_' + args.env_name

    if not os.path.exists(full_path):
        os.umask(0o000)
        os.makedirs(full_path)

    envs_to_generate = [env_name]

    for current_env_name in envs_to_generate:
        print("Generating data for env {}".format(current_env_name))

        env = gym.make(current_env_name)  # Create the environment
        env.seed(0)

        s = 0

        if informed:
            agent = load_dqn(env)

        while s < total_episodes:

            rollout_file = os.path.join(full_path, 'rollout-%d.npz' % s)

            observation = env.reset()
            frame_queue = deque(maxlen=4)
            dqn_queue = deque(maxlen=4)

            t = 0

            obs_sequence = []
            action_sequence = []
            next_sequence = []

            while t < time_steps:

                # convert image to greyscale, downsize
                converted_obs = preprocess_frame(observation)

                if t == 0:
                    for i in range(4):
                        frame_queue.append(converted_obs)
                else:
                    frame_queue.pop()
                    frame_queue.appendleft(converted_obs)

                stacked_state = np.concatenate(frame_queue, axis=2)
                obs_sequence.append(stacked_state)

                if informed:
                    dqn_obs = preprocess_frame_dqn(observation)
                    if t == 0:
                        for i in range(4):
                            dqn_queue.append(dqn_obs)
                    else:
                        dqn_queue.pop()
                        dqn_queue.appendleft(dqn_obs)
                    stacked = np.concatenate(dqn_queue, axis=2)
                    action = agent.choose_action(stacked)
                else:
                    action = env.action_space.sample()

                action_sequence.append(
                    encode_action(env.action_space.n, action))

                observation, _, _, _ = env.step(action)  # Take a random action
                t = t + 1

                next_sequence.append(preprocess_frame(observation))

            print("Episode {} finished after {} timesteps".format(s, t))

            np.savez_compressed(rollout_file,
                                obs=obs_sequence,
                                actions=action_sequence,
                                next_frame=next_sequence)

            s = s + 1

        env.close()