# training loop while True: current_state = agent.get_state() next_action = agent.get_action(current_state) game_over, score, reward = game.play_step(next_action) new_state = agent.get_state() agent.train_over_sample(current_state, next_action, reward, new_state, game_over) agent.save_values(current_state, next_action, reward, new_state, game_over) if game_over: game.restart() agent.num_of_games += 1 agent.train_over_batch() if score > maximum_score: # save checkpoint maximum_score = score agent.train.save_checkpoint() print('Game', agent.num_of_games, 'Score', score, 'Record:', maximum_score) scores.append(score) total_score += score mean_score = total_score / agent.num_of_games