コード例 #1
0
 def _get_diagonal_actions_in_center(self, actions, center_x, center_y, x_diff, y_diff):
     new_x = center_x + x_diff
     new_y = center_y + y_diff
     value = self.board.get(new_x, new_y)
     if value is None or value.color != self.color:
         actions.append(Action(self.x, self.y, new_x, new_y))
コード例 #2
0
def _raw_to_examples(line_iterator, proba=None):
    game_number = 1
    blue_starting = None
    red_starting = None
    fen_starting = None
    board = None
    is_blue = True
    round = 0
    examples = []
    for line in line_iterator:
        line = line.strip()
        if "{" in line:
            examples = []
            json_to_examples(line, examples, proba)
            game_number += 1
            for example in examples:
                yield example
            examples = []
        elif line == "":
            if is_blue:
                winner = Color.RED
            else:
                winner = Color.BLUE
            set_winner(examples, winner)
            for example in examples:
                yield example
            # End game
            blue_starting = None
            red_starting = None
            fen_starting = None
            board = None
            is_blue = True
            round = 0
            examples = []
            game_number += 1
        elif "/" in line:
            fen_starting = line
        elif fen_starting is None and blue_starting is None:
            blue_starting = line
        elif fen_starting is None and red_starting is None:
            red_starting = line
        else:
            if board is None:
                if fen_starting is None:
                    board = Board(start_blue=blue_starting,
                                  start_red=red_starting)
                else:
                    board = Board.from_fen(fen_starting)
            if line == "XXXX":
                action = None
                get_policy = get_none_action_policy
            else:
                action = Action(int(line[0]), int(line[1]), int(line[2]),
                                int(line[3]))
                get_policy = action.get_policy
            update_examples(board, examples, get_policy, is_blue, proba, round)
            try:
                board.apply_action(action)
            except AttributeError:
                print("Wrong action", action, ", round:", round)
            round += 1
            is_blue = not is_blue
    if is_blue:
        winner = Color.RED
    else:
        winner = Color.BLUE
    set_winner(examples, winner)
    for example in examples:
        yield example
コード例 #3
0
 def test_read_strange(self):
     board = Board.from_fen("1bnaa1bn1/R8/5k1cr/1p2p1B1p/2p6/9/1PP2P2P/4CCN2/1N2K4/2BA1A2R w - - 0 1")
     self.assertNotEqual(board.get_actions(Color.RED), [Action(0, 0, 0, 0)])
コード例 #4
0
 def test_strange_move_chariot(self):
     board = Board.from_fen("2b1akb1B/2r6/4C2c1/5p3/1p1P1Pp2/9/1PnN5/5R3/2B1K4/5AN2 w - - 1 67")
     actions = board.get(2, 5).get_actions()
     self.assertNotIn(Action(2, 5, 1, 4), actions)
コード例 #5
0
    def run_simulation(self, current_node, game, predictor):
        if game.is_finished():
            reward = game.get_reward()
            return -reward

        current_node.lock.acquire()
        if current_node.probabilities is None:
            possible_actions = game.get_current_actions()
            probabilities, predicted_value = predictor.predict(game, possible_actions)
            current_node.set_up(probabilities, game.current_player, possible_actions, predicted_value)
            current_node.lock.release()
            return -predicted_value
        else:
            possible_actions = list(current_node.q.keys())

        random.shuffle(possible_actions)

        u_max, best_action = -float("inf"), None
        for action in possible_actions:
            if action is None:
                continue
            try:
                u = current_node.q[action] + \
                    self.c_puct * current_node.probabilities[action] * \
                    math.sqrt(current_node.total_N) / (1 + current_node.N[action]) - current_node.virtual_loss[action]
            except:
                print(current_node.probabilities)
                print(repr(game.board))
                print(current_node.total_N)
                raise
            if u > u_max:
                u_max = u
                best_action = action
        # Best action is None when there is no legal move

        if best_action is not None:
            # We have to duplicate it in case of multithreading as apply_action modifies the action (eaten)
            best_action = Action(best_action.x_from, best_action.y_from, best_action.x_to, best_action.y_to)

        game.apply_action(best_action, invalidate_cache=False)
        if best_action not in current_node.next_nodes:
            next_node = MCTSNode()
            current_node.next_nodes[best_action] = next_node
        else:
            next_node = current_node.next_nodes[best_action]

        current_node.virtual_loss[best_action] += 1
        current_node.lock.release()

        value = self.run_simulation(next_node, game, predictor)
        game.reverse_action()

        current_node.lock.acquire()
        # Might be a problem if not enough simulations
        current_node.q[best_action] = (current_node.N[best_action] * current_node.q[best_action] + value) \
                                      / (current_node.N[best_action] + 1)
        current_node.N[best_action] += 1
        current_node.total_N += 1
        current_node.virtual_loss[best_action] -= 1
        current_node.lock.release()

        return -value