def execute_move(self, spot): move_result = self.game_board.make_a_move(self.current_player, GameBoard.GameMove(spot)) if not move_result.is_move_valid: raise ValueError('Invalid move.') self.game_board = move_result.new_game_board self.current_player = self.game_board.get_next_player( self.current_player) self.game_ended = self.game_board.game_ended return self
def execute_move( self, action: int) -> Tuple[np.ndarray, float, int, GameWrapper, bool]: # action: 0 .. 64. # 0 .. 63 are placing a piece. # 64 - pass to opponent # Returns the tuple: (observation, reward of this step, # done in int, new GameWrapper object, # move_valid) # 1. observation: flat observation of the game board, shape (64,) # 2. reward of this step: # - legal move: 1 * how many pieces flipped # - illegal move: -1 # - pass to opponent: -2 # - if game is done, reward: 100 * (my-pieces - opponent-pieces) # 3. done: game ended. 0-not, 1- ended. # 4. new state of the Game, as GameWrapper # 5. is valid move? True/False # This method will flip pieces, then switch player, set current_player to next. # The returned observation is for the next player. For the convenience during training. if action < self.PASS_TURN_ACTION: # execute a move, place a piece spot = self.convert_action_to_spot(action) move_result = self.game_board.make_a_move(self.current_player, GameBoard.GameMove(spot)) self.game_board = move_result.new_game_board game_ended = self.game_board.game_ended # reward_of_this_move if game_ended: if self.current_player == GameBoard.PLAYER_1: reward_of_this_move = (self.game_board.player_1_count - self.game_board.player_2_count) else: reward_of_this_move = (self.game_board.player_2_count - self.game_board.player_1_count) if reward_of_this_move > 0: reward_of_this_move = 10 * reward_of_this_move + 10 else: reward_of_this_move = 10 * reward_of_this_move - 10 else: if move_result.is_move_valid: reward_of_this_move = 1.5 else: reward_of_this_move = -0.1 # invalid move # update to new state. Switch player etc if move_result.is_move_valid: # next player self.current_player = self.game_board.get_next_player( self.current_player) self.game_ended = self.game_board.game_ended # observation for next player observation = self.observe(self.current_player) is_move_valid = move_result.is_move_valid else: # pass, turn to opponent # no move, so just observe. # the current_player, not next player, observation num_possible_moves = len( self.game_board.get_valid_spots(self.current_player)) reward_of_this_move = -2 * num_possible_moves game_ended = self.game_board.game_ended self.current_player = self.game_board.get_next_player( self.current_player) observation = self.observe(self.current_player) spot = None is_move_valid = True if self.is_log_play: self.log_move(spot, self) return (observation, reward_of_this_move, game_ended, self, is_move_valid)