Beispiel #1
0
        score, done = 0.0, False

        # Loop until terminal state
        while not done:

            clock.tick(FPS) # generate new frame

            gameDisplay.fill(WHITE)

            env.render(gameDisplay)

            a = qvalue_network.best_actions( np.expand_dims(s[0],axis=0), np.expand_dims(s[1], axis=0) ).ravel()

            action_index = np.argmax( a )

            s, reward, done = env.step(action_index)

            if (reward != 0.0):
                score += reward

            font = pygame.font.SysFont(None, 18)
            text = font.render("Score: %.2f" % score, True, BLACK)
            gameDisplay.blit(text,(DISPLAY_SHAPE[0]/3,60))

            # Update Display
            pygame.display.update()

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    quit()
Beispiel #2
0
        
        ep_reward = 0
        terminal = False
        num_steps = 0
        l = 1.0

        while not terminal:

            # Epsilon Greedy
            if np.random.random() < EPSILON_ALPHA * np.exp(-EPSILON_BETA * num_steps):
                a = generate_random_action()
            else:
                a = qvalue_network.best_actions( np.expand_dims(s[0],axis=0), np.expand_dims(s[1], axis=0) ).ravel()

            # Collect environment data
            s2, r, terminal = env.step( np.argmax(a) )

            # Add data to ExperienceReplay memory
            if UPDATE_REPLAY:
                if np.abs(r) > 0.0:
                    er.add_experience(s, a, r, terminal, s2)
                else:
                    if np.random.random() < 0.0018:
                        er.add_experience(s, a, r, terminal, s2)

            # Keep adding experience to the memory until
            # there are at least minibatch size samples
            if er.size() > MINIBATCH_SIZE:

                s_batch, a_batch, r_batch, t_batch, s2_batch = er.get_batch(MINIBATCH_SIZE)