def _get_diagonal_actions_in_center(self, actions, center_x, center_y, x_diff, y_diff): new_x = center_x + x_diff new_y = center_y + y_diff value = self.board.get(new_x, new_y) if value is None or value.color != self.color: actions.append(Action(self.x, self.y, new_x, new_y))
def _raw_to_examples(line_iterator, proba=None): game_number = 1 blue_starting = None red_starting = None fen_starting = None board = None is_blue = True round = 0 examples = [] for line in line_iterator: line = line.strip() if "{" in line: examples = [] json_to_examples(line, examples, proba) game_number += 1 for example in examples: yield example examples = [] elif line == "": if is_blue: winner = Color.RED else: winner = Color.BLUE set_winner(examples, winner) for example in examples: yield example # End game blue_starting = None red_starting = None fen_starting = None board = None is_blue = True round = 0 examples = [] game_number += 1 elif "/" in line: fen_starting = line elif fen_starting is None and blue_starting is None: blue_starting = line elif fen_starting is None and red_starting is None: red_starting = line else: if board is None: if fen_starting is None: board = Board(start_blue=blue_starting, start_red=red_starting) else: board = Board.from_fen(fen_starting) if line == "XXXX": action = None get_policy = get_none_action_policy else: action = Action(int(line[0]), int(line[1]), int(line[2]), int(line[3])) get_policy = action.get_policy update_examples(board, examples, get_policy, is_blue, proba, round) try: board.apply_action(action) except AttributeError: print("Wrong action", action, ", round:", round) round += 1 is_blue = not is_blue if is_blue: winner = Color.RED else: winner = Color.BLUE set_winner(examples, winner) for example in examples: yield example
def test_read_strange(self): board = Board.from_fen("1bnaa1bn1/R8/5k1cr/1p2p1B1p/2p6/9/1PP2P2P/4CCN2/1N2K4/2BA1A2R w - - 0 1") self.assertNotEqual(board.get_actions(Color.RED), [Action(0, 0, 0, 0)])
def test_strange_move_chariot(self): board = Board.from_fen("2b1akb1B/2r6/4C2c1/5p3/1p1P1Pp2/9/1PnN5/5R3/2B1K4/5AN2 w - - 1 67") actions = board.get(2, 5).get_actions() self.assertNotIn(Action(2, 5, 1, 4), actions)
def run_simulation(self, current_node, game, predictor): if game.is_finished(): reward = game.get_reward() return -reward current_node.lock.acquire() if current_node.probabilities is None: possible_actions = game.get_current_actions() probabilities, predicted_value = predictor.predict(game, possible_actions) current_node.set_up(probabilities, game.current_player, possible_actions, predicted_value) current_node.lock.release() return -predicted_value else: possible_actions = list(current_node.q.keys()) random.shuffle(possible_actions) u_max, best_action = -float("inf"), None for action in possible_actions: if action is None: continue try: u = current_node.q[action] + \ self.c_puct * current_node.probabilities[action] * \ math.sqrt(current_node.total_N) / (1 + current_node.N[action]) - current_node.virtual_loss[action] except: print(current_node.probabilities) print(repr(game.board)) print(current_node.total_N) raise if u > u_max: u_max = u best_action = action # Best action is None when there is no legal move if best_action is not None: # We have to duplicate it in case of multithreading as apply_action modifies the action (eaten) best_action = Action(best_action.x_from, best_action.y_from, best_action.x_to, best_action.y_to) game.apply_action(best_action, invalidate_cache=False) if best_action not in current_node.next_nodes: next_node = MCTSNode() current_node.next_nodes[best_action] = next_node else: next_node = current_node.next_nodes[best_action] current_node.virtual_loss[best_action] += 1 current_node.lock.release() value = self.run_simulation(next_node, game, predictor) game.reverse_action() current_node.lock.acquire() # Might be a problem if not enough simulations current_node.q[best_action] = (current_node.N[best_action] * current_node.q[best_action] + value) \ / (current_node.N[best_action] + 1) current_node.N[best_action] += 1 current_node.total_N += 1 current_node.virtual_loss[best_action] -= 1 current_node.lock.release() return -value