Exemple #1
0
    # training loop
    while True:
        current_state = agent.get_state()
        next_action = agent.get_action(current_state)

        game_over, score, reward = game.play_step(next_action)
        new_state = agent.get_state()
        agent.train_over_sample(current_state, next_action, reward, new_state,
                                game_over)

        agent.save_values(current_state, next_action, reward, new_state,
                          game_over)

        if game_over:
            game.restart()
            agent.num_of_games += 1
            agent.train_over_batch()

            if score > maximum_score:
                # save checkpoint

                maximum_score = score
                agent.train.save_checkpoint()

            print('Game', agent.num_of_games, 'Score', score, 'Record:',
                  maximum_score)

            scores.append(score)
            total_score += score
            mean_score = total_score / agent.num_of_games