score, done = 0.0, False # Loop until terminal state while not done: clock.tick(FPS) # generate new frame gameDisplay.fill(WHITE) env.render(gameDisplay) a = qvalue_network.best_actions( np.expand_dims(s[0],axis=0), np.expand_dims(s[1], axis=0) ).ravel() action_index = np.argmax( a ) s, reward, done = env.step(action_index) if (reward != 0.0): score += reward font = pygame.font.SysFont(None, 18) text = font.render("Score: %.2f" % score, True, BLACK) gameDisplay.blit(text,(DISPLAY_SHAPE[0]/3,60)) # Update Display pygame.display.update() for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit()
ep_reward = 0 terminal = False num_steps = 0 l = 1.0 while not terminal: # Epsilon Greedy if np.random.random() < EPSILON_ALPHA * np.exp(-EPSILON_BETA * num_steps): a = generate_random_action() else: a = qvalue_network.best_actions( np.expand_dims(s[0],axis=0), np.expand_dims(s[1], axis=0) ).ravel() # Collect environment data s2, r, terminal = env.step( np.argmax(a) ) # Add data to ExperienceReplay memory if UPDATE_REPLAY: if np.abs(r) > 0.0: er.add_experience(s, a, r, terminal, s2) else: if np.random.random() < 0.0018: er.add_experience(s, a, r, terminal, s2) # Keep adding experience to the memory until # there are at least minibatch size samples if er.size() > MINIBATCH_SIZE: s_batch, a_batch, r_batch, t_batch, s2_batch = er.get_batch(MINIBATCH_SIZE)