Beispiel #1
0
 def main_loop(self):
     agent = Agent()
     numGames = 0
     top = 0
     while numGames < 100:
         food = Food(self.size, self.screen)
         food.food_new()
         snake = Snake(size=self.size)
         while not self.over:
             agent.epsilon = 100 - numGames
             oldState = agent.get_state(snake, food)
             if randint(0, 200) < agent.epsilon:
                 move = to_categorical(randint(0, 2),
                                       num_classes=3,
                                       dtype='int32')
             else:
                 predict = agent.model.predict(oldState.reshape(1, 11))
                 move = to_categorical(np.argmax(predict[0]),
                                       num_classes=3,
                                       dtype='int32')
             if np.array_equal(move, [1, 0, 0]):
                 snake.xVel = 10
                 print('condition1')
             elif np.array_equal(
                     move,
                 [0, 1, 0]) and snake.yVel == 0:  # right - going horizontal
                 snake.yVel = 10
                 print('condition2')
             elif np.array_equal(
                     move,
                 [0, 1, 0]) and snake.xVel == 0:  # right - going vertical
                 snake.xVel = 10
                 print('condition3')
             elif np.array_equal(
                     move,
                 [0, 0, 1]) and snake.yVel == 0:  # left - going horizontal
                 snake.yVel = -10
                 print('condition4')
             elif np.array_equal(
                     move,
                 [0, 0, 1]) and snake.xVel == 0:  # left - going vertical
                 snake.xVel = -10
                 print('condition5')
             snake.snake_move()
             self.check_collisions(snake, food)
             self.update_window(snake, food)
             self.clock.tick(10)
             newState = agent.get_state(snake, food)
             reward = agent.get_reward(self.foodCollide, self.over)
             agent.train_short(oldState, move, reward, newState, self.over)
             agent.write_memory(oldState, move, reward, newState, self.over)
         agent.replay(agent.mem)
         numGames += 1
         print(numGames)
Beispiel #2
0
    agent = Agent(gamma=0.99,
                  epsilon=1.0,
                  alpha=0.00025,
                  input_dims=env.observation_space.shape,
                  n_actions=env.action_space.n,
                  mem_size=200_000,
                  eps_min=0.1,
                  batch_size=32,
                  replace=10_000,
                  eps_dec=1e-5,
                  save_name='dqn_model',
                  load_name='dqn_model_5000it.h5')

    if load_checkpoint:
        agent.epsilon = 0.1
        agent.load_models()

    last_ep = 0
    for episode in tqdm(range(num_games)):
        done = False
        observation = env.reset()
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            n_steps += 1
            score += reward
            if not render:
                agent.store_transition(observation, action, reward,
                                       observation_, int(done))