Beispiel #1
0
    n_games = 200
    best_score = env.reward_range[0]
    score_history = []
    load_checkpoints = False

    if load_checkpoints:
        agent.load_models()
        env.render(mode='human')

    for i in range(n_games):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.remember(observation, action, reward, observation_, done)
            #if not load_checkpoints:
            agent.learn()
            observation = observation_
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])

        #         if avg_score > best_score:
        #           best_score = avg_score
        #             #if not load_checkpoints:
        #             agent.save_models()

        print('episode ', i, ' score %.1f ' % score,
              'avg score %.1f ' % avg_score)
Beispiel #2
0
def main():
    Hyper.init()
    env = make_env(
        Constants.env_id)  # See wrapper code for environment in atari_image.py
    Hyper.n_actions = env.action_space.n
    shape = (env.observation_space.shape)
    agent = Agent(input_dims=shape, env=env, n_actions=env.action_space.n)
    filename = f"{Constants.env_id}_games{Hyper.n_games}_alpha{Hyper.alpha}.png"
    figure_file = f'plots/{filename}'

    best_ave_score = env.reward_range[0]
    best_score = 0
    score_history = []
    load_checkpoint = False
    if load_checkpoint:
        agent.load_models()
        env.render(mode='human')
    total_steps = 0
    game_id = 0
    for i in range(Hyper.n_games):
        game_id += 1
        if game_id % 20 == 0:
            Hyper.alpha = Hyper.alpha * 1.2
            Hyper.beta = Hyper.beta * 1.2
        observation = env.reset()
        done = False
        steps = 0
        score = 0
        while not done:
            # Sample action from the policy
            action = agent.choose_action(observation)

            # Sample transition from the environment
            new_observation, reward, done, info = env.step(action)
            steps += 1
            total_steps += 1

            # Store transition in the replay buffer
            agent.remember(observation, action, reward, new_observation, done)
            if not load_checkpoint:
                agent.learn()
            score += reward
            observation = new_observation
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])
        if score > best_score:
            best_score = score

        if avg_score > best_ave_score:
            best_ave_score = avg_score
            if not load_checkpoint:
                agent.save_models()

        episode = i + 1
        print(
            f"episode {episode}: score {score}, best_score {best_score}, best ave score {best_ave_score}, trailing 100 games avg {avg_score}, steps {steps}, total steps {total_steps}"
        )

    print(f"total number of steps taken: {total_steps}")
    if not load_checkpoint:
        x = [i + 1 for i in range(Hyper.n_games)]
        plot_learning_curve(x, score_history, figure_file)