Ejemplo n.º 1
0
def main():
    times = []
    env = SnakeEnv()
    for i in range(100):
        st = time.time()
        done = False
        env.reset()
        score = 0
        food = 0
        while not done:
            info = {"Food": (food, (10, 30))}
            state, reward, done = env.step(get_input(), info=info)
            score += reward
            if reward == settings.FOOD_REWARD:
                food += 1

            env.render(sleep=False)
            # print(reward)
            if done:
                et = time.time()
                times.append(et - st)
                # quit()
                break
    print(1 / (mean(times)), end=" games per second\n")
    print(1 / (max(times)), end=" slowest games per second\n")
    print(1 / (min(times)), end=" fastest games per second\n")
Ejemplo n.º 2
0
class Application:
    def __init__(self, args):
        self.args = args
        self.env = SnakeEnv(args.snake_head_x, args.snake_head_y, args.food_x,
                            args.food_y)
        self.agent = Agent(self.env.get_actions(), args.Ne, args.C, args.gamma)

    def execute(self):
        if not self.args.human:
            if self.args.train_eps != 0:
                self.train()
            return self.test()
        self.show_games()

    def train(self):
        print("Train Phase:")
        self.agent.train()
        window = self.args.window
        self.points_results = []
        first_eat = True
        start = time.time()

        for game in range(1, self.args.train_eps + 1):
            state = self.env.get_state()
            dead = False
            action = self.agent.act(state, 0, dead)
            while not dead:
                state, points, dead = self.env.step(action)

                # For debug convenience, you can check if your Q-table mathches ours for given setting of parameters
                # (see Debug Convenience part on homework 4 web page)
                if first_eat and points == 1:
                    self.agent.save_model(utils.CHECKPOINT)
                    first_eat = False

                action = self.agent.act(state, points, dead)

            points = self.env.get_points()
            self.points_results.append(points)
            if game % self.args.window == 0:
                print(
                    "Games:",
                    len(self.points_results) - window,
                    "-",
                    len(self.points_results),
                    "Points (Average:",
                    sum(self.points_results[-window:]) / window,
                    "Max:",
                    max(self.points_results[-window:]),
                    "Min:",
                    min(self.points_results[-window:]),
                    ")",
                )
            self.env.reset()
        print("Training takes", time.time() - start, "seconds")
        self.agent.save_model(self.args.model_name)

    def test(self):
        print("Test Phase:")
        self.agent.eval()
        self.agent.load_model(self.args.model_name)
        points_results = []
        start = time.time()

        for game in range(1, self.args.test_eps + 1):
            state = self.env.get_state()
            dead = False
            action = self.agent.act(state, 0, dead)
            while not dead:
                state, points, dead = self.env.step(action)
                action = self.agent.act(state, points, dead)
            points = self.env.get_points()
            points_results.append(points)
            self.env.reset()

        print("Testing takes", time.time() - start, "seconds")
        print("Number of Games:", len(points_results))
        print("Average Points:", sum(points_results) / len(points_results))
        print("Max Points:", max(points_results))
        print("Min Points:", min(points_results))
        return sum(points_results) / len(points_results)

    def show_games(self):
        print("Display Games")
        self.env.display()
        pygame.event.pump()
        self.agent.eval()
        points_results = []
        end = False
        for game in range(1, self.args.show_eps + 1):
            state = self.env.get_state()
            dead = False
            action = self.agent.act(state, 0, dead)
            count = 0
            while not dead:
                count += 1
                pygame.event.pump()
                keys = pygame.key.get_pressed()
                if keys[K_ESCAPE] or self.check_quit():
                    end = True
                    break
                state, points, dead = self.env.step(action)
                # Qlearning agent
                if not self.args.human:
                    action = self.agent.act(state, points, dead)
                # for human player
                else:
                    print((state[0] + 1) // 40, (state[1] + 1) // 40)
                    for event in pygame.event.get():
                        if event.type == pygame.KEYDOWN:
                            if event.key == pygame.K_UP:
                                action = 0
                            elif event.key == pygame.K_DOWN:
                                action = 1
                            elif event.key == pygame.K_LEFT:
                                action = 2
                            elif event.key == pygame.K_RIGHT:
                                action = 3
            if end:
                break
            self.env.reset()
            points_results.append(points)
            print("Game:",
                  str(game) + "/" + str(self.args.show_eps), "Points:", points)
        if len(points_results) == 0:
            return
        print("Average Points:", sum(points_results) / len(points_results))

    def check_quit(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                return True
        return False
Ejemplo n.º 3
0
            if k == key.RIGHT: a = 3

        env.render()
        env.viewer.window.on_key_press = key_press
    else:
        size = (args.dim + 2) * args.zoom
        model = DQN(size, size, batch_norm=True)
        model.load_state_dict(torch.load(args.filename))
        policy = PurePolicy(model)
    try:
        while True:
            state = env.reset()
            total_reward = 0.0
            steps = 0
            restart = False
            while True:
                pyglet.clock.tick()
                if (policy is not None):
                    state_ten = tensorize(state)
                    a = policy.get(state_ten)
                state, r, done, info = env.step(a)
                total_reward += r
                steps += 1
                env.render()
                if (steps % 20 == 0):
                    print('step {0} score: {1:.2f}'.format(
                        steps, total_reward))
                if (restart or done): break
    except (KeyboardInterrupt):
        env.close()
Ejemplo n.º 4
0
            # Take random action
            action = np.random.choice(4)
        else:
            # Predict action Q-values
            # From environment state
            state_tensor = tf.convert_to_tensor(state)
            state_tensor = tf.expand_dims(state_tensor, 0)
            action_probability = model(state_tensor, training=False)
            # Take best action
            action = tf.argmax(action_probability[0]).numpy()

        # Decay probability of taking random action
        epsilon -= epsilon_interval / epsilon_greedy_frames
        epsilon = max(epsilon, epsilon_min)

        state_next, reward, done = snake.step(action)
        state_next = np.array(state_next)

        episode_reward += reward

        # Save actions and states in replay buffer
        action_history.append(action)
        state_history.append(state)
        state_next_history.append(state_next)
        done_history.append(done)
        rewards_history.append(reward)
        state = state_next

        if frame_count % update_after_actions == 0 and len(
                done_history) > batch_size:
            # Get indices of samples for replay buffers
Ejemplo n.º 5
0
env = SnakeEnv()
agent = DQNAgent(env)
for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit="episode"):
    agent.tensorboard.step = episode
    episode_reward = 0
    step = 1
    current_state = env.reset()

    done = False
    while not done:
        if np.random.random() > epsilon:
            action = np.argmax(agent.get_qs(current_state))
        else:
            action = np.random.randint(0, env.ACTION_SPACE_SIZE)

        new_state, reward, done = env.step(action)
        episode_reward += reward

        if SHOW_PREVIEW and episode % AGGREGATE_STATS_EVERY == 0:
            env.render()

        agent.update_replay_memory(
            (current_state, action, reward, new_state, done))
        agent.train(done)

        current_state = new_state
        step += 1

    ep_rewards.append(episode_reward)
    if SAVE_MODELS and episode % AGGREGATE_STATS_EVERY == 0:
        average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(