Beispiel #1
0
 def get_actions(self):
     actions = []
     left = self.y - 1
     if 0 <= left:
         value_left = self.board.get(self.x, left)
         if value_left is None or value_left.color != self.color:
             actions.append(Action(self.x, self.y, self.x, left))
     right = self.y + 1
     if right < BOARD_WIDTH:
         value_right = self.board.get(self.x, right)
         if value_right is None or value_right.color != self.color:
             actions.append(Action(self.x, self.y, self.x, right))
     top = self.x + self.color.value
     if 0 <= top < BOARD_HEIGHT:
         value_top = self.board.get(top, self.y)
         if value_top is None or value_top.color != self.color:
             actions.append(Action(self.x, self.y, top, self.y))
         if self.color == Color.BLUE:
             is_mid = self.x == 8 and self.y == 4
             can_diagonal_right = is_mid or (self.x == 7 and self.y == 3)
             can_diagonal_left = is_mid or (self.x == 7 and self.y == 5)
         else:
             is_mid = self.x == 1 and self.y == 4
             can_diagonal_right = (self.x == 2 and self.y == 3) or is_mid
             can_diagonal_left = (self.x == 2 and self.y == 5) or is_mid
         if can_diagonal_right:
             value_top_right = self.board.get(top, right)
             if value_top_right is None or value_top_right.color != self.color:
                 actions.append(Action(self.x, self.y, top, right))
         if can_diagonal_left:
             value_top_left = self.board.get(top, left)
             if value_top_left is None or value_top_left.color != self.color:
                 actions.append(Action(self.x, self.y, top, left))
     return actions
Beispiel #2
0
 def _get_normal_actions_y(self, actions, x_to, y_tos):
     for y_to in y_tos:
         value = self.board.get(x_to, y_to)
         if value is None:
             actions.append(Action(self.x, self.y, x_to, y_to))
         elif value.color != self.color:
             actions.append(Action(self.x, self.y, x_to, y_to))
             return
         else:
             return
 def test_hash(self):
     all_actions = set()
     all_hashes = set()
     for x_from in range(10):
         for x_to in range(10):
             for y_from in range(9):
                 for y_to in range(9):
                     action0 = Action(x_from, y_from, x_to, y_to)
                     action1 = Action(x_from, y_from, x_to, y_to)
                     self.assertEqual(hash(action0), hash(action1))
                     all_actions.add(action0)
                     all_hashes.add(hash(action0))
     self.assertEqual(len(all_actions), 10 * 10 * 9 * 9)
     self.assertEqual(len(all_hashes), 10 * 10 * 9 * 9)
Beispiel #4
0
 def _get_diagonal_actions_sub(self, actions, center_x, center_y, x_diff, y_diff):
     is_in_diagonal_fortress = (self.x - x_diff == center_x and self.y - y_diff == center_y)
     if not is_in_diagonal_fortress:
         return
     center_fortress_is_occupied = self.board.get(center_x, center_y) is not None
     if center_fortress_is_occupied and self.board.get(center_x, center_y).color != self.color:
         actions.append(Action(self.x, self.y, center_x, center_y))
     elif not center_fortress_is_occupied:
         actions.append(Action(self.x, self.y, center_x, center_y))
         diff_center_x = center_x - x_diff
         diff_center_y = center_y - y_diff
         arrival_is_free = self.board.get(diff_center_x, diff_center_y) is None
         can_eat_arrival = self.board.get(diff_center_x, diff_center_y) is not None and \
                           self.board.get(diff_center_x, diff_center_y).color != self.color
         arrival_is_legal = (arrival_is_free or can_eat_arrival)
         if arrival_is_legal:
             actions.append(Action(self.x, self.y, diff_center_x, diff_center_y))
Beispiel #5
0
 def _get_actions_one_direction(self, actions, x_tos, y_tos):
     encounter_piece_jump = False
     for y_to in y_tos:
         for x_to in x_tos:
             value = self.board.get(x_to, y_to)
             if value is None:
                 if encounter_piece_jump:
                     actions.append(Action(self.x, self.y, x_to, y_to))
             elif value.get_index() == self.get_index():
                 return
             elif not encounter_piece_jump:
                 encounter_piece_jump = True
             elif value.color != self.color:
                 actions.append(Action(self.x, self.y, x_to, y_to))
                 return
             else:
                 return
Beispiel #6
0
    def _get_actions_sub(self, actions, fix_x, fix_y):
        short_x = self.x + fix_x
        long_x = self.x + 2 * fix_x
        short_y = self.y + fix_y
        long_y = self.y + 2 * fix_y

        if self.board.is_in(long_x, short_y):
            value_long_short = self.board.get(long_x, short_y)
            second_jump_ok = value_long_short is None or value_long_short.color != self.color
            first_jump_ok = second_jump_ok and self.board.get(short_x, self.y) is None
            if first_jump_ok:
                actions.append(Action(self.x, self.y, long_x, short_y))
        if self.board.is_in(short_x, long_y):
            value_short_long = self.board.get(short_x, long_y)
            second_jump_ok = value_short_long is None or value_short_long.color != self.color
            first_jump_ok = second_jump_ok and self.board.get(self.x, short_y) is None
            if first_jump_ok:
                actions.append(Action(self.x, self.y, short_x, long_y))
 def play_action(self):
     self.process_engine.stdin.write("go\n")
     self.process_engine.stdin.flush()
     if self.think_time != -1:
         time.sleep(self.think_time)
         self.process_engine.stdin.write("d\n")
     self.process_engine.stdin.flush()
     while True:
         line = self.process_engine.stdout.readline()
         line = line.strip()
         if "move" in line:
             if "pass" in line:
                 return Action(0, 0, 0, 0)
             move = line.split()[1].strip()
             action = Action(int(move[1]),
                             int(UCI_USI_CONVERSIONS[move[0]]),
                             int(move[3]),
                             int(UCI_USI_CONVERSIONS[move[2]]))
             return action
Beispiel #8
0
    def _get_actions_sub(self, actions, fix_x, fix_y):
        short_x = self.x + fix_x
        long_x = self.x + 2 * fix_x
        very_long_x = self.x + 3 * fix_x
        short_y = self.y + fix_y
        long_y = self.y + 2 * fix_y
        very_long_y = self.y + 3 * fix_y

        third_jump_ok = self.board.is_in(very_long_x, long_y) and (
                self.board.get(very_long_x, long_y) is None or
                self.board.get(very_long_x, long_y).color != self.color)
        second_jump_ok = third_jump_ok and self.board.get(long_x, short_y) is None
        first_jump_ok = second_jump_ok and self.board.get(short_x, self.y) is None
        if first_jump_ok:
            actions.append(Action(self.x, self.y, very_long_x, long_y))

        third_jump_ok = self.board.is_in(long_x, very_long_y) and (
                self.board.get(long_x, very_long_y) is None or
                self.board.get(long_x, very_long_y).color != self.color)
        second_jump_ok = third_jump_ok and self.board.get(short_x, long_y) is None
        first_jump_ok = second_jump_ok and self.board.get(self.x, short_y) is None
        if first_jump_ok:
            actions.append(Action(self.x, self.y, long_x, very_long_y))
Beispiel #9
0
 def _get_diagonal_actions(self, actions, center_x, center_y):
     for x_diff in [-1, 1]:
         for y_diff in [-1, 1]:
             is_in_diagonal_fortress = (self.x - x_diff == center_x and self.y - y_diff == center_y)
             if not is_in_diagonal_fortress:
                 continue
             center_fortress_is_occupied = self.board.get(center_x, center_y) is not None
             if not center_fortress_is_occupied:
                 continue
             diff_center_x = center_x - x_diff
             diff_center_y = center_y - y_diff
             value = self.board.get(diff_center_x, diff_center_y)
             arrival_is_free = value is None
             can_eat_arrival = value is not None and \
                               value.color != self.color
             arrival_is_legal = (arrival_is_free or can_eat_arrival)
             if arrival_is_legal:
                 actions.append(Action(self.x, self.y, diff_center_x, diff_center_y))
Beispiel #10
0
 def _get_action_per_fortress(self, actions, x_min, x_max, y_min, y_max):
     mid_x = x_min + 1
     mid_y = y_min + 1
     for x_diff in [-1, 0, 1]:
         for y_diff in [-1, 0, 1]:
             if x_diff == 0 and y_diff == 0:
                 continue
             if (self.x == mid_x or self.y == mid_y) and not (self.x == mid_x and self.y == mid_y):
                 if x_diff != 0 and y_diff != 0:
                     continue
             new_x = self.x + x_diff
             new_y = self.y + y_diff
             is_in_fortress = x_min <= new_x <= x_max and y_min <= new_y <= y_max
             if not is_in_fortress:
                 continue
             destination_is_legal = (self.board.get(new_x, new_y) is None or
                                     self.board.get(new_x, new_y).color != self.color)
             if destination_is_legal:  # and not will_be_check:
                 actions.append(Action(self.x, self.y, new_x, new_y))
 def test_pass(self):
     action = Action(0, 0, 0, 0)
     self.assertTrue(action.is_pass())
     print(action.get_features())
Beispiel #12
0
 def _get_diagonal_actions_in_center(self, actions, center_x, center_y, x_diff, y_diff):
     new_x = center_x + x_diff
     new_y = center_y + y_diff
     value = self.board.get(new_x, new_y)
     if value is None or value.color != self.color:
         actions.append(Action(self.x, self.y, new_x, new_y))
Beispiel #13
0
def _raw_to_examples(line_iterator, proba=None):
    game_number = 1
    blue_starting = None
    red_starting = None
    fen_starting = None
    board = None
    is_blue = True
    round = 0
    examples = []
    for line in line_iterator:
        line = line.strip()
        if "{" in line:
            examples = []
            json_to_examples(line, examples, proba)
            game_number += 1
            for example in examples:
                yield example
            examples = []
        elif line == "":
            if is_blue:
                winner = Color.RED
            else:
                winner = Color.BLUE
            set_winner(examples, winner)
            for example in examples:
                yield example
            # End game
            blue_starting = None
            red_starting = None
            fen_starting = None
            board = None
            is_blue = True
            round = 0
            examples = []
            game_number += 1
        elif "/" in line:
            fen_starting = line
        elif fen_starting is None and blue_starting is None:
            blue_starting = line
        elif fen_starting is None and red_starting is None:
            red_starting = line
        else:
            if board is None:
                if fen_starting is None:
                    board = Board(start_blue=blue_starting,
                                  start_red=red_starting)
                else:
                    board = Board.from_fen(fen_starting)
            if line == "XXXX":
                action = None
                get_policy = get_none_action_policy
            else:
                action = Action(int(line[0]), int(line[1]), int(line[2]),
                                int(line[3]))
                get_policy = action.get_policy
            update_examples(board, examples, get_policy, is_blue, proba, round)
            try:
                board.apply_action(action)
            except AttributeError:
                print("Wrong action", action, ", round:", round)
            round += 1
            is_blue = not is_blue
    if is_blue:
        winner = Color.RED
    else:
        winner = Color.BLUE
    set_winner(examples, winner)
    for example in examples:
        yield example
Beispiel #14
0
 def test_strange_move_chariot(self):
     board = Board.from_fen("2b1akb1B/2r6/4C2c1/5p3/1p1P1Pp2/9/1PnN5/5R3/2B1K4/5AN2 w - - 1 67")
     actions = board.get(2, 5).get_actions()
     self.assertNotIn(Action(2, 5, 1, 4), actions)
Beispiel #15
0
 def test_read_strange(self):
     board = Board.from_fen("1bnaa1bn1/R8/5k1cr/1p2p1B1p/2p6/9/1PP2P2P/4CCN2/1N2K4/2BA1A2R w - - 0 1")
     self.assertNotEqual(board.get_actions(Color.RED), [Action(0, 0, 0, 0)])
Beispiel #16
0
    def run_simulation(self, current_node, game, predictor):
        if game.is_finished():
            reward = game.get_reward()
            return -reward

        current_node.lock.acquire()
        if current_node.probabilities is None:
            possible_actions = game.get_current_actions()
            probabilities, predicted_value = predictor.predict(game, possible_actions)
            current_node.set_up(probabilities, game.current_player, possible_actions, predicted_value)
            current_node.lock.release()
            return -predicted_value
        else:
            possible_actions = list(current_node.q.keys())

        random.shuffle(possible_actions)

        u_max, best_action = -float("inf"), None
        for action in possible_actions:
            if action is None:
                continue
            try:
                u = current_node.q[action] + \
                    self.c_puct * current_node.probabilities[action] * \
                    math.sqrt(current_node.total_N) / (1 + current_node.N[action]) - current_node.virtual_loss[action]
            except:
                print(current_node.probabilities)
                print(repr(game.board))
                print(current_node.total_N)
                raise
            if u > u_max:
                u_max = u
                best_action = action
        # Best action is None when there is no legal move

        if best_action is not None:
            # We have to duplicate it in case of multithreading as apply_action modifies the action (eaten)
            best_action = Action(best_action.x_from, best_action.y_from, best_action.x_to, best_action.y_to)

        game.apply_action(best_action, invalidate_cache=False)
        if best_action not in current_node.next_nodes:
            next_node = MCTSNode()
            current_node.next_nodes[best_action] = next_node
        else:
            next_node = current_node.next_nodes[best_action]

        current_node.virtual_loss[best_action] += 1
        current_node.lock.release()

        value = self.run_simulation(next_node, game, predictor)
        game.reverse_action()

        current_node.lock.acquire()
        # Might be a problem if not enough simulations
        current_node.q[best_action] = (current_node.N[best_action] * current_node.q[best_action] + value) \
                                      / (current_node.N[best_action] + 1)
        current_node.N[best_action] += 1
        current_node.total_N += 1
        current_node.virtual_loss[best_action] -= 1
        current_node.lock.release()

        return -value