def main_loop(self): agent = Agent() numGames = 0 top = 0 while numGames < 100: food = Food(self.size, self.screen) food.food_new() snake = Snake(size=self.size) while not self.over: agent.epsilon = 100 - numGames oldState = agent.get_state(snake, food) if randint(0, 200) < agent.epsilon: move = to_categorical(randint(0, 2), num_classes=3, dtype='int32') else: predict = agent.model.predict(oldState.reshape(1, 11)) move = to_categorical(np.argmax(predict[0]), num_classes=3, dtype='int32') if np.array_equal(move, [1, 0, 0]): snake.xVel = 10 print('condition1') elif np.array_equal( move, [0, 1, 0]) and snake.yVel == 0: # right - going horizontal snake.yVel = 10 print('condition2') elif np.array_equal( move, [0, 1, 0]) and snake.xVel == 0: # right - going vertical snake.xVel = 10 print('condition3') elif np.array_equal( move, [0, 0, 1]) and snake.yVel == 0: # left - going horizontal snake.yVel = -10 print('condition4') elif np.array_equal( move, [0, 0, 1]) and snake.xVel == 0: # left - going vertical snake.xVel = -10 print('condition5') snake.snake_move() self.check_collisions(snake, food) self.update_window(snake, food) self.clock.tick(10) newState = agent.get_state(snake, food) reward = agent.get_reward(self.foodCollide, self.over) agent.train_short(oldState, move, reward, newState, self.over) agent.write_memory(oldState, move, reward, newState, self.over) agent.replay(agent.mem) numGames += 1 print(numGames)
agent = Agent(gamma=0.99, epsilon=1.0, alpha=0.00025, input_dims=env.observation_space.shape, n_actions=env.action_space.n, mem_size=200_000, eps_min=0.1, batch_size=32, replace=10_000, eps_dec=1e-5, save_name='dqn_model', load_name='dqn_model_5000it.h5') if load_checkpoint: agent.epsilon = 0.1 agent.load_models() last_ep = 0 for episode in tqdm(range(num_games)): done = False observation = env.reset() score = 0 while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) n_steps += 1 score += reward if not render: agent.store_transition(observation, action, reward, observation_, int(done))