Esempio n. 1
0
    def move(self, environment):
        BaseDDQNGameModel.move(self, environment)

        if np.random.rand() < 0.01:
            action_vector = random.randrange(self.action_space)
        else:
            state = environment.state()
            q_values = self.ddqn.predict(np.expand_dims(
                np.asarray(state).astype(np.float64), axis=0),
                                         batch_size=1)
            action_vector = Action.action_from_vector(np.argmax(q_values[0]))
        return Action.normalized_action(environment.snake_action,
                                        action_vector)
Esempio n. 2
0
 def _predict(self, environment, model):
     predictions = []
     actions = [
         Action.left_neighbor(environment.snake_action),
         environment.snake_action,
         Action.right_neighbor(environment.snake_action)
     ]
     for action in actions:
         observation_for_prediction = environment.observation(action)
         predictions.append(
             model.model.predict(
                 np.array(observation_for_prediction).reshape(
                     -1, Constants.MODEL_FEATURE_COUNT, 1)))
     best_prediction_index = np.argmax(np.array(predictions))
     return actions[best_prediction_index]
Esempio n. 3
0
    def _ddqn(self,
              total_step_limit=10000000,
              total_run_limit=None,
              clip=True):
        run = 0
        total_step = 0
        scores = []
        while True:
            if total_run_limit is not None and run >= total_run_limit:
                print "Reached total run limit of: " + str(total_run_limit)
                exit(0)

            run += 1
            env = self.prepare_training_environment()
            current_state = env.state()
            step = 0
            score = env.reward()
            while True:
                if total_step >= total_step_limit:
                    print "Reached total step limit of: " + str(
                        total_step_limit)
                    exit(0)
                total_step += 1
                step += 1

                action = self._predict_move(current_state)
                action_vector = Action.action_from_vector(action)
                normalized_action = Action.normalized_action(
                    env.snake_action, action_vector)
                next_state, reward, terminal = env.full_step(normalized_action)
                if clip:
                    np.sign(reward)
                score += reward
                self._remember(current_state, action, reward, next_state,
                               terminal)
                current_state = next_state

                self._step_update(total_step)

                if terminal:
                    scores.append(score)
                    if len(scores) % SCORE_LOGGING_FREQUENCY == 0:
                        self.log_score(mean(scores))
                        print('{{"metric": "score", "value": {}}}'.format(
                            mean(scores)))
                        print('{{"metric": "run", "value": {}}}'.format(run))
                        scores = []
                    break
Esempio n. 4
0
    def longest_path(self, start, end, environment):
        longest_path_from_transposition_table = self._path_from_transposition_table(end)
        if longest_path_from_transposition_table:
            return longest_path_from_transposition_table
        shortest_path_solver = ShortestPathBFSSolver()
        path = shortest_path_solver.shortest_path(environment, start, end)
        path.reverse()

        if not path or len(path) <= 1:
            return []
        index = 0
        while True:
            a = path[index]
            b = path[index+1]

            extended_nodes = []

            rotated_actions = [Action.left_neighbor(b.action), Action.right_neighbor(b.action)]
            for rotated_action in rotated_actions:
                inverse_a_action = (a.action[0] * -1, a.action[1] * -1)
                if rotated_action == inverse_a_action:
                    continue
                rotated_neighbor = self._neighbor(a, rotated_action, environment)
                if rotated_neighbor:
                    directed_neighbor = self._neighbor(rotated_neighbor, b.action, environment)
                    if directed_neighbor:
                        if rotated_neighbor not in path and directed_neighbor not in path:
                            extended_nodes = [rotated_neighbor, directed_neighbor]

            if len(extended_nodes) == 2:
                x = extended_nodes[0]
                y = extended_nodes[1]

                path.insert(index+1, x)
                path.insert(index+2, y)

                b = path[index+3]
                b.action = (b.point.x - y.point.x, b.point.y - y.point.y)
                path[index+3] = b
                continue

            index += 1
            if index == len(path)-1:
                break
        self.transposition_table[end] = path
        return path
Esempio n. 5
0
 def set_snake(self):
     self._clear_environment_for(Tile.snake)
     random_position = self._random_available_position()
     self.tiles[random_position.x][random_position.y] = Tile.snake
     self.snake = self._points_of(Tile.snake)
     self.snake_length = 1
     self.snake_moves = 0
     if self.snake_action is None:
         self.snake_action = random.choice(Action.all())
     return self.snake
Esempio n. 6
0
 def _angle_from_fruit(self):
     snake = self.snake[0]
     fruit = self.fruit[0]
     angle = math.atan2(fruit.y - snake.y, fruit.x - snake.x)
     adjusted_angles = Action.adjusted_angles(self.snake_action)
     adjusted_angle_cw = angle + adjusted_angles[0]
     adjusted_angle_ccw = angle - adjusted_angles[1]
     if abs(adjusted_angle_cw) < abs(adjusted_angle_ccw):
         return adjusted_angle_cw
     else:
         return adjusted_angle_ccw
Esempio n. 7
0
 def print_path(self, path):
     environment_string = ""
     for y in range(0, self.height):
         environment_string += "\n"
         for x in range(0, self.width):
             tile = self.tiles[y][x]
             for p in path:
                 if tile == Tile.empty and p.point == Point(x, y):
                     tile = Action.description(p.action)
             environment_string += " " + tile + " "
     print environment_string
Esempio n. 8
0
 def observation(self, new_action):
     head = self.snake[0]
     left_neighbor_action = Action.left_neighbor(self.snake_action)
     left_neighbor_point = Point(head.x + left_neighbor_action[0],
                                 head.y + left_neighbor_action[1])
     left_neighbor_accessible = self._is_point_accessible(
         left_neighbor_point)
     top_neighbor_point = Point(head.x + self.snake_action[0],
                                head.y + self.snake_action[1])
     top_neighbor_accessible = self._is_point_accessible(top_neighbor_point)
     right_neighbor_action = Action.right_neighbor(self.snake_action)
     right_neighbor_point = Point(head.x + right_neighbor_action[0],
                                  head.y + right_neighbor_action[1])
     right_point_accessible = self._is_point_accessible(
         right_neighbor_point)
     action_vector = Action.vector(self.snake_action, new_action)
     return [
         action_vector, left_neighbor_accessible, top_neighbor_accessible,
         right_point_accessible,
         self._angle_from_fruit()
     ]
Esempio n. 9
0
    def __init__(self, long_name, short_name, abbreviation):
        BaseGameModel.__init__(self, long_name, short_name, abbreviation)

        self.model_path = self.model_dir_path + Constants.DQN_MODEL_NAME

        if os.path.exists(os.path.dirname(self.model_path)):
            shutil.rmtree(os.path.dirname(self.model_path), ignore_errors=True)
        os.makedirs(os.path.dirname(self.model_path))

        self.action_space = len(Action.possible())
        self.ddqn = DDQNModel(self.model_input_shape, self.action_space).model
        self._load_model()
Esempio n. 10
0
 def step(self, action):
     if Action.is_reverse(self.snake_action, action):
         #print "Forbidden reverse action attempt!"
         return
     self.snake_action = action
     head = self.snake[0]
     x, y = self.snake_action
     new = Point(x=(head.x + x), y=(head.y + y))
     if new in self.snake:
         #print "Hit snake"
         return False
     elif new in self.wall:
         #print "Hit wall"
         return False
     else:
         self.snake_moves += 1
         self.snake.insert(0, new)
         self.tiles[new.y][new.x] = Tile.snake
         if len(self.snake) > self.reward():
             last = self.snake.pop()
             self.tiles[last.y][last.x] = Tile.empty
         self._update_frames()
         return True
Esempio n. 11
0
 def possible_actions_for_current_action(self, current_action):
     actions = Action.all()
     reverse_action = (current_action[0] * -1, current_action[1] * -1)
     actions.remove(reverse_action)
     return actions