s2, r, terminal = env.step(a) # store experience if SAVE_EXPERIENCE: if np.abs(r) > 0.0: er.add_experience(s, a, r, terminal, s2) else: if np.random.random() < 0.0018: er.add_experience(s, a, r, terminal, s2) s = s2 score += r font = pygame.font.SysFont(None, 18) text = font.render("Score: %.2f" % score, True, BLACK) gameDisplay.blit(text, (DISPLAY_SHAPE[0] / 2 - 30, 60)) # Update Display pygame.display.update() gameover(score) while True: gameLoop() if SAVE_EXPERIENCE: er.save(EXP_REPLAY_FILE) pygame.quit() quit()
epsilon -= epsilon_decay if config.total_step % config.args.online_update_freq == 0: train_batch = replay.sample(config.args.batch_size) loss = qnet.learn_on_minibatch(train_batch, config.args.gamma) sys.stdout.write( "\rTrain step at {}th step | loss {} | epsilon {}".format( config.total_step, loss, epsilon)) sys.stdout.flush() if config.total_step % config.args.target_update_freq == 0: # print("Update target net") qnet.update_target_model(config.args.tau) config.total_step += 1 total_reward += reward state = newstate if done: break replay.add(replay_ep.buffer) print("\nDone epoch in {} steps, {} random steps, Total reward: {}".format( config.total_step - start_step, num_random_step, total_reward)) if (config.episode % config.args.save_model_freq == 0 and config.total_step > config.args.num_pretrain_step): qnet.save(config.args.output_dir) config.save() replay.save() print("Save model at {}".format(config.args.output_dir))