def test_state_to_numpy_without_augmentation(): state = State.empty() state = state.take_action(Action(0, 0)) state = state.take_action(Action(0, 1)) arr = state.to_numpy() expected_white = np.zeros((4, 4)).astype(bool) expected_white[0, 0] = True expected_black = np.zeros((4, 4)).astype(bool) expected_black[0, 1] = True assert expected_white.tolist() == arr[:, :, 0, 0].tolist() assert expected_black.tolist() == arr[:, :, 0, 1].tolist()
def read_data(data_path, max_games=None): game_files = list(sorted(list_files(data_path, '.json'))) augmentations = list(Augmentation.iter_augmentations()) if max_games is not None: game_files = list(game_files)[-max_games:] print('Using game files from %s to %s' % (game_files[0], game_files[-1])) x = [] y_policy = [] y_reward = [] for game_path in tqdm(game_files): with open(game_path, 'r') as fin: game_data = json.load(fin) winner, starter, actions, policies = AlphaConnectSerializer.deserialize(game_data) state = State.empty() states = [state] for action in actions: state = state.take_action(action) states.append(state) states, final_state = states[:-1], states[-1] n_samples = min(len(states), len(augmentations)) game_samples = sample(list(range(len(states))), n_samples) for augmentation, i in zip(augmentations, game_samples): augmentend_action_order = sorted(Action.iter_actions(), key=lambda a: a.augment(augmentation).to_int()) x.append(states[i].to_numpy(augmentation)) y_policy.append([policies[i].get(action, 0.0) for action in augmentend_action_order]) y_reward.append(winner_value(final_state.winner, states[i])) return np.array(x), np.array(y_policy), np.array(y_reward)
def test_rotating_four_quarters_is_same(): old_action = Action(1, 3) action = old_action for _ in range(4): action = action.augment(Augmentation(Rotation.QUARTER, False)) assert old_action == action
def test_state_to_numpy_with_three_quarter_rotation_and_x_flip(): state = State.empty() state = state.take_action(Action(0, 0)) arr = state.to_numpy(Augmentation(Rotation.THREE_QUARTER, True)) expected = np.zeros((4, 4)).astype(bool) expected[3, 3] = True assert expected.tolist() == arr[:, :, 0, 1].tolist()
def test_state_to_numpy_with_quarter_rotation(): state = State.empty() state = state.take_action(Action(0, 0)) arr = state.to_numpy(Augmentation(Rotation.QUARTER, False)) expected = np.zeros((4, 4)).astype(bool) expected[3, 0] = True assert expected.tolist() == arr[:, :, 0, 1].tolist()
def decide(self, state: State): while True: print('Possible actions:') print( format_in_action_grid( {action: str(action) for action in Action.iter_actions()}, cell_format='{:.2s}', default_value=' ')) user_input = input('Choose your action: ') try: action = Action.from_hex(user_input) if action in state.allowed_actions: print() return action else: print('Action %s not allowed' % action) except ValueError: print('User input is not an action')
def test_simple_state_has_winner(): state = State.empty() state = state.take_action(Action(0, 0)) state = state.take_action(Action(1, 0)) state = state.take_action(Action(0, 0)) state = state.take_action(Action(1, 0)) state = state.take_action(Action(0, 0)) state = state.take_action(Action(1, 0)) state = state.take_action(Action(0, 0)) assert state.has_winner
def test_white_lines_is_updated_after_action(): state = State.empty() white_action = Action(2, 3) white_position = Position.from_action_and_height(white_action, 0) brown_action = Action(1, 2) brown_position = Position.from_action_and_height(brown_action, 0) intersection_postition = Position(white_position.x, brown_position.y, 0) state = state.take_action(white_action) state = state.take_action(brown_action) # white position for line_i, _ in State.POSITION_TO_LINES[white_position]: assert 1 == state.white_lines[line_i] assert 0 == state.brown_lines[line_i] assert 4 == state.white_lines_free[white_position] assert 0 == state.brown_lines_free[white_position] assert 1 == state.white_max_line[white_position] assert 0 == state.brown_max_line[white_position] # brown position for line_i, _ in State.POSITION_TO_LINES[brown_position]: assert 1 == state.brown_lines[line_i] assert 0 == state.white_lines[line_i] assert 4 == state.brown_lines_free[brown_position] assert 0 == state.white_lines_free[brown_position] assert 1 == state.brown_max_line[brown_position] assert 0 == state.white_max_line[brown_position] # intersection assert 3 == state.brown_lines_free[intersection_postition] assert 3 == state.white_lines_free[intersection_postition] assert 1 == state.brown_max_line[intersection_postition] assert 1 == state.white_max_line[intersection_postition]
def other_win_in_one_move(): state = State.empty() state = state.take_action(Action(0, 0)) state = state.take_action(Action(3, 3)) state = state.take_action(Action(1, 2)) state = state.take_action(Action(3, 2)) state = state.take_action(Action(2, 1)) state = state.take_action(Action(3, 1)) return state
def simulate(self, node: 'AlphaConnectNode', callback): if node.state.is_end_of_game(): state_value = self.evaluate_final_state(node) callback(state_value, None) else: self.queue.append((node, callback)) if len(self.queue) >= self.batch_size: nodes, callbacks = zip(*self.queue) array = np.concatenate( list(map(lambda node: node.state.to_numpy(batch=True), nodes))) pred_actions, pred_value = self.model.predict(array) for i, callback in enumerate(callbacks): state_value = pred_value[i].item() action_probs = dict(zip(Action.iter_actions(), pred_actions[i])) callback(state_value, action_probs) self.queue = []
def _play_game(args): human_player = ConsolePlayer('You') computer_player = AlphaConnectPlayer(args.model_path, 'Computer', time_budget=14500) observers = [ AlphaConnectPrinter(), GameStatePrinter(show_action_history=True) ] if args.human_first: game = TwoPlayerGame(State.empty(), human_player, computer_player, observers) else: game = TwoPlayerGame(State.empty(), computer_player, human_player, observers) if args.actions is not None: for action_hex in args.actions: game.play_action(game.next_player(), Action.from_hex(action_hex)) game.play()
def test_winner_on_diagonal_line_along_side(): state = State.empty() state = state.take_action(Action(0, 3)) # white state = state.take_action(Action(0, 2)) # brown state = state.take_action(Action(0, 2)) # white state = state.take_action(Action(0, 1)) # brown state = state.take_action(Action(0, 0)) # white state = state.take_action(Action(0, 1)) # brown state = state.take_action(Action(0, 1)) # white state = state.take_action(Action(0, 0)) # brown state = state.take_action(Action(1, 0)) # white state = state.take_action(Action(0, 0)) # brown state = state.take_action(Action(0, 0)) # white assert state.is_end_of_game() assert state.winner is Color.WHITE
def test_action_in_empty_state_has_single_stone(): state = State.empty() action = Action(0, 0) new_state = state.take_action(action) assert 1 == sum([stone is Color.WHITE for stone in new_state.stones.values()])
def test_regression_str_with_top_plane_stones(): actions_history = [Action.from_hex(i) for i in '0cf35aa55ae9699663cb8c7447f8ec'] state = State.empty().take_actions(actions_history) str(state)
def test_flipping_twice_is_same(): old_action = Action(0, 0) action = old_action action = action.augment(Augmentation(Rotation.NO, True)) action = action.augment(Augmentation(Rotation.NO, True)) assert old_action == action
def test_action_changes_next_player(): state = State.empty() state = state.take_action(Action(3, 3)) state = state.take_action(Action(3, 3)) assert Color.WHITE is state.next_color
def test_player_prevents_other_from_winning(players: List[Player], other_win_in_one_move: State): for player in players: action = player.decide(other_win_in_one_move) assert Action(3, 0) == action, '%s does not prevent other from winning' % player
def test_action_to_int(): action = Action(2, 3) assert action == Action.from_int(action.to_int())