def move(self, environment): BaseDDQNGameModel.move(self, environment) if np.random.rand() < 0.01: action_vector = random.randrange(self.action_space) else: state = environment.state() q_values = self.ddqn.predict(np.expand_dims( np.asarray(state).astype(np.float64), axis=0), batch_size=1) action_vector = Action.action_from_vector(np.argmax(q_values[0])) return Action.normalized_action(environment.snake_action, action_vector)
def _predict(self, environment, model): predictions = [] actions = [ Action.left_neighbor(environment.snake_action), environment.snake_action, Action.right_neighbor(environment.snake_action) ] for action in actions: observation_for_prediction = environment.observation(action) predictions.append( model.model.predict( np.array(observation_for_prediction).reshape( -1, Constants.MODEL_FEATURE_COUNT, 1))) best_prediction_index = np.argmax(np.array(predictions)) return actions[best_prediction_index]
def _ddqn(self, total_step_limit=10000000, total_run_limit=None, clip=True): run = 0 total_step = 0 scores = [] while True: if total_run_limit is not None and run >= total_run_limit: print "Reached total run limit of: " + str(total_run_limit) exit(0) run += 1 env = self.prepare_training_environment() current_state = env.state() step = 0 score = env.reward() while True: if total_step >= total_step_limit: print "Reached total step limit of: " + str( total_step_limit) exit(0) total_step += 1 step += 1 action = self._predict_move(current_state) action_vector = Action.action_from_vector(action) normalized_action = Action.normalized_action( env.snake_action, action_vector) next_state, reward, terminal = env.full_step(normalized_action) if clip: np.sign(reward) score += reward self._remember(current_state, action, reward, next_state, terminal) current_state = next_state self._step_update(total_step) if terminal: scores.append(score) if len(scores) % SCORE_LOGGING_FREQUENCY == 0: self.log_score(mean(scores)) print('{{"metric": "score", "value": {}}}'.format( mean(scores))) print('{{"metric": "run", "value": {}}}'.format(run)) scores = [] break
def longest_path(self, start, end, environment): longest_path_from_transposition_table = self._path_from_transposition_table(end) if longest_path_from_transposition_table: return longest_path_from_transposition_table shortest_path_solver = ShortestPathBFSSolver() path = shortest_path_solver.shortest_path(environment, start, end) path.reverse() if not path or len(path) <= 1: return [] index = 0 while True: a = path[index] b = path[index+1] extended_nodes = [] rotated_actions = [Action.left_neighbor(b.action), Action.right_neighbor(b.action)] for rotated_action in rotated_actions: inverse_a_action = (a.action[0] * -1, a.action[1] * -1) if rotated_action == inverse_a_action: continue rotated_neighbor = self._neighbor(a, rotated_action, environment) if rotated_neighbor: directed_neighbor = self._neighbor(rotated_neighbor, b.action, environment) if directed_neighbor: if rotated_neighbor not in path and directed_neighbor not in path: extended_nodes = [rotated_neighbor, directed_neighbor] if len(extended_nodes) == 2: x = extended_nodes[0] y = extended_nodes[1] path.insert(index+1, x) path.insert(index+2, y) b = path[index+3] b.action = (b.point.x - y.point.x, b.point.y - y.point.y) path[index+3] = b continue index += 1 if index == len(path)-1: break self.transposition_table[end] = path return path
def set_snake(self): self._clear_environment_for(Tile.snake) random_position = self._random_available_position() self.tiles[random_position.x][random_position.y] = Tile.snake self.snake = self._points_of(Tile.snake) self.snake_length = 1 self.snake_moves = 0 if self.snake_action is None: self.snake_action = random.choice(Action.all()) return self.snake
def _angle_from_fruit(self): snake = self.snake[0] fruit = self.fruit[0] angle = math.atan2(fruit.y - snake.y, fruit.x - snake.x) adjusted_angles = Action.adjusted_angles(self.snake_action) adjusted_angle_cw = angle + adjusted_angles[0] adjusted_angle_ccw = angle - adjusted_angles[1] if abs(adjusted_angle_cw) < abs(adjusted_angle_ccw): return adjusted_angle_cw else: return adjusted_angle_ccw
def print_path(self, path): environment_string = "" for y in range(0, self.height): environment_string += "\n" for x in range(0, self.width): tile = self.tiles[y][x] for p in path: if tile == Tile.empty and p.point == Point(x, y): tile = Action.description(p.action) environment_string += " " + tile + " " print environment_string
def observation(self, new_action): head = self.snake[0] left_neighbor_action = Action.left_neighbor(self.snake_action) left_neighbor_point = Point(head.x + left_neighbor_action[0], head.y + left_neighbor_action[1]) left_neighbor_accessible = self._is_point_accessible( left_neighbor_point) top_neighbor_point = Point(head.x + self.snake_action[0], head.y + self.snake_action[1]) top_neighbor_accessible = self._is_point_accessible(top_neighbor_point) right_neighbor_action = Action.right_neighbor(self.snake_action) right_neighbor_point = Point(head.x + right_neighbor_action[0], head.y + right_neighbor_action[1]) right_point_accessible = self._is_point_accessible( right_neighbor_point) action_vector = Action.vector(self.snake_action, new_action) return [ action_vector, left_neighbor_accessible, top_neighbor_accessible, right_point_accessible, self._angle_from_fruit() ]
def __init__(self, long_name, short_name, abbreviation): BaseGameModel.__init__(self, long_name, short_name, abbreviation) self.model_path = self.model_dir_path + Constants.DQN_MODEL_NAME if os.path.exists(os.path.dirname(self.model_path)): shutil.rmtree(os.path.dirname(self.model_path), ignore_errors=True) os.makedirs(os.path.dirname(self.model_path)) self.action_space = len(Action.possible()) self.ddqn = DDQNModel(self.model_input_shape, self.action_space).model self._load_model()
def step(self, action): if Action.is_reverse(self.snake_action, action): #print "Forbidden reverse action attempt!" return self.snake_action = action head = self.snake[0] x, y = self.snake_action new = Point(x=(head.x + x), y=(head.y + y)) if new in self.snake: #print "Hit snake" return False elif new in self.wall: #print "Hit wall" return False else: self.snake_moves += 1 self.snake.insert(0, new) self.tiles[new.y][new.x] = Tile.snake if len(self.snake) > self.reward(): last = self.snake.pop() self.tiles[last.y][last.x] = Tile.empty self._update_frames() return True
def possible_actions_for_current_action(self, current_action): actions = Action.all() reverse_action = (current_action[0] * -1, current_action[1] * -1) actions.remove(reverse_action) return actions