print("Starting the training.") time_start = time() for epoch in range(epochs): print("\nEpoch %d\n-------" % (epoch + 1)) train_episodes_finished = 0 # training print("Training...") train_scores = [] losses = [] game.new_episode() for learning_step in trange(learning_steps_per_epoch, leave=False): loss = model.step(training=True) losses.append(loss) if game.is_episode_finished(): score = game.get_total_reward() train_scores.append(score) game.new_episode() train_episodes_finished += 1 train_scores = np.array(train_scores) losses = np.array(losses) print("%d training episodes played." % train_episodes_finished) print("Current size of the memory buffer:", sys.getsizeof(model.memory)) print( "Results: mean score: %.1f +/- %.1f," % (train_scores.mean(), train_scores.std()), "min: %.1f," % train_scores.min(), "max: %.1f," % train_scores.max())
actions, file_name, ddqn=use_ddqn, parameter_exploration=use_parameter_exploration, gpu=use_gpu, loading=1) print("======================================") print("Testing trained neural network.") print("Testing...") test_scores = [] for _ in range(episodes_to_watch): game.new_episode() while not game.is_episode_finished(): model.step(training=False, showing=True) # Sleep between episodes sleep(1.0) score = game.get_total_reward() test_scores.append(score) print("Total score: ", score) test_scores = np.array(test_scores) print("%d test episodes played." % episodes_to_watch) print( "Results: mean: %.1f +/- %.1f," % (test_scores.mean(), test_scores.std()), "min: %.1f," % test_scores.min(), "max: %.1f," % test_scores.max())