Ejemplo n.º 1
0
        s2, r, terminal = env.step(a)

        # store experience
        if SAVE_EXPERIENCE:
            if np.abs(r) > 0.0:
                er.add_experience(s, a, r, terminal, s2)
            else:
                if np.random.random() < 0.0018:
                    er.add_experience(s, a, r, terminal, s2)

        s = s2
        score += r

        font = pygame.font.SysFont(None, 18)
        text = font.render("Score: %.2f" % score, True, BLACK)
        gameDisplay.blit(text, (DISPLAY_SHAPE[0] / 2 - 30, 60))

        # Update Display
        pygame.display.update()

    gameover(score)


while True:
    gameLoop()
    if SAVE_EXPERIENCE:
        er.save(EXP_REPLAY_FILE)

pygame.quit()
quit()
Ejemplo n.º 2
0
                epsilon -= epsilon_decay

            if config.total_step % config.args.online_update_freq == 0:
                train_batch = replay.sample(config.args.batch_size)
                loss = qnet.learn_on_minibatch(train_batch, config.args.gamma)
                sys.stdout.write(
                    "\rTrain step at {}th step | loss {} | epsilon {}".format(
                        config.total_step, loss, epsilon))
                sys.stdout.flush()

            if config.total_step % config.args.target_update_freq == 0:

                # print("Update target net")
                qnet.update_target_model(config.args.tau)

        config.total_step += 1
        total_reward += reward
        state = newstate
        if done:
            break

    replay.add(replay_ep.buffer)
    print("\nDone epoch in {} steps, {} random steps, Total reward: {}".format(
        config.total_step - start_step, num_random_step, total_reward))

    if (config.episode % config.args.save_model_freq == 0
            and config.total_step > config.args.num_pretrain_step):
        qnet.save(config.args.output_dir)
        config.save()
        replay.save()
        print("Save model at {}".format(config.args.output_dir))