Esempio n. 1
0
def subgame_cfr(state, hidden_state, perspectives, me, regrets, strats, observations, strategy_probability, t):
    if state.is_terminal():
        return state.terminal_value(hidden_state)[me]

    observation_history = tuple(observations)

    moving_players = state.moving_players()
    my_move_index = None
    moves = [None] * len(moving_players)
    for i, player in enumerate(moving_players):
        if hidden_state[player] not in EVIL_ROLES and state.status == 'run':
            moves[i] = MissionAction(fail=False)
            continue

        if hidden_state[player] != 'assassin' and state.status == 'merlin':
            moves[i] = PickMerlinAction(merlin=np.random.choice(len(hidden_state)))
            continue

        perspective = perspectives[player]

        if player == me:
            my_move_index = i
            continue
        
        move_probs = calculate_strategy(regrets[state.status][(perspective, observation_history)])
        legal_actions = state.legal_actions(player, hidden_state)
        moves[i] = legal_actions[get_action_index(move_probs)]


    if my_move_index is None:
        new_state, _, observation = state.transition(moves, hidden_state)
        if state.status == 'vote':
            observation = tuple([vote.up for vote in observation])
        observations.append(observation)
        value = subgame_cfr(new_state, hidden_state, perspectives, me, regrets, strats, observations, strategy_probability, t)
        observations.pop()
        return value


    perspective = perspectives[me]
    p = calculate_strategy(regrets[state.status][(perspective, observation_history)])

    values = np.zeros(len(p))

    legal_actions = state.legal_actions(me, hidden_state)
    for action_index in range(len(values)):
        moves[my_move_index] = legal_actions[action_index]
        new_state, _, observation = state.transition(moves, hidden_state)
        if state.status == 'vote':
            observation = tuple([vote.up for vote in observation])
        observations.append(observation)
        values[action_index] = subgame_cfr(new_state, hidden_state, perspectives, me, regrets, strats, observations, strategy_probability * p[action_index], t)
        observations.pop()

    strategy_value = np.dot(values, p)
    new_regrets = values - strategy_value
    key = (perspective, observation_history)
    regrets[state.status][key] += new_regrets * t
    strats[state.status][key] += p * strategy_probability * t
    return strategy_value
Esempio n. 2
0
def handle_round(tree_roots, state, hidden_state, round_):
    last_proposal = None
    for proposal_num in ['1', '2', '3', '4', '5']:
        proposal = last_proposal = round_[proposal_num]
        assert state.proposer == proposal['proposer'], "idk"
        assert state.propose_count == int(proposal_num) - 1, "idk2"
        moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))]
        tree_roots, state = deal_with_transition(tree_roots, state, moves,
                                                 hidden_state)
        assert state.status == 'vote'
        moves = [
            VoteAction(up=(vote == 'Approve')) for vote in proposal['votes']
        ]
        tree_roots, state = deal_with_transition(tree_roots, state, moves,
                                                 hidden_state)
        if state.status == 'run':
            break

    secret_votes = sorted(zip(last_proposal['team'], round_['mission']))
    moves = [
        MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes
    ]
    tree_roots, state = deal_with_transition(tree_roots, state, moves,
                                             hidden_state)
    if state.status == 'merlin':
        assert 'findMerlin' in round_, "wat"
        find_merlin = round_['findMerlin']
        assert hidden_state[find_merlin['assassin']] == 'assassin', "wat"
        moves = [
            PickMerlinAction(merlin=find_merlin['merlin_guess'])
            for _ in hidden_state
        ]
        tree_roots, state = deal_with_transition(tree_roots, state, moves,
                                                 hidden_state)
    return tree_roots, state
Esempio n. 3
0
    def get_move_probabilities(self, state, legal_actions):
        result = np.zeros(len(legal_actions))
        if state.status == 'propose':
            # Only consider proposals with yourself
            for i, act in enumerate(legal_actions):
                result[i] += 1 if self.player in act.proposal else 0
        elif state.status == 'vote':
            result += 1
            if self.player in state.proposal:
                # Vote up most proposals with yourself
                result[legal_actions.index(VoteAction(up=True))] += 5
            elif self.player not in state.proposal:
                if state.propose_count == 4 and not self.is_evil:
                    # Vote up most proposals on the final round if you're good
                    result[legal_actions.index(VoteAction(up=True))] += 5
                else:
                    # Vote down most proposals which don't contain you.
                    result[legal_actions.index(VoteAction(up=False))] += 5
        elif state.status == 'run':
            result += 1
            if self.is_evil:
                # Fail most missions unless it's the first one
                if state.fails + state.succeeds == 0:
                    result[legal_actions.index(MissionAction(fail=False))] += 5
                else:
                    result[legal_actions.index(MissionAction(fail=True))] += 5
        elif state.status == 'merlin':
            # Try to pick merlin based off of hidden states
            for hidden_state in self.hidden_states:
                merlin = hidden_state.index('merlin')
                result[legal_actions.index(
                    PickMerlinAction(merlin=merlin))] += 1

        return result / np.sum(result)
Esempio n. 4
0
def move_index_to_move(move_index, state):
    if state.status == 'merlin':
        return PickMerlinAction(merlin=move_index)
    elif state.status == 'propose':
        size, _ = state.MISSION_SIZES[state.succeeds + state.fails]
        mapping = INDEX_TO_PROPOSAL_2 if size == 2 else INDEX_TO_PROPOSAL_3
        return ProposeAction(proposal=bitstring_to_proposal(mapping[move_index]))
    elif state.status == 'vote':
        return VoteAction(up=bool(move_index))
    else:
        return MissionAction(fail=bool(move_index))
Esempio n. 5
0
def game_state_generator(avalon_start, human_game, hidden_state):
    # at each step, return old state, new state, and observation
    state = avalon_start

    for round_ in human_game['log']:
        last_proposal = None
        for proposal_num in ['1', '2', '3', '4', '5']:
            proposal = last_proposal = round_[proposal_num]
            assert state.proposer == proposal['proposer']
            assert state.propose_count == int(proposal_num) - 1
            moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))]
            new_state, _, observation = state.transition(moves, hidden_state)
            yield state, moves
            state = new_state

            assert state.status == 'vote'
            moves = [
                VoteAction(up=(vote == 'Approve'))
                for vote in proposal['votes']
            ]
            new_state, _, observation = state.transition(moves, hidden_state)
            yield state, moves
            state = new_state

            if state.status == 'run':
                break

        secret_votes = sorted(zip(last_proposal['team'], round_['mission']))
        moves = [
            MissionAction(fail=(vote == "Fail"))
            for player, vote in secret_votes
        ]
        new_state, _, observation = state.transition(moves, hidden_state)
        yield state, moves
        state = new_state

        if state.status == 'merlin':
            assert 'findMerlin' in round_
            yield state, [
                PickMerlinAction(merlin=round_['findMerlin']['merlin_guess'])
            ] * 5
Esempio n. 6
0
    def legal_actions(self, player, hidden_state):
        """
        Returns the legal actions of the player from this state, given a hidden state
        """
        assert player in self.moving_players(), "Asked a non-moving player legal actions"
        if self.status == 'merlin':
            return [PickMerlinAction(merlin=p) for p in range(self.NUM_PLAYERS)]

        if self.status == 'propose':
            proposal_size, _ = self.MISSION_SIZES[self.succeeds + self.fails]
            return [ProposeAction(proposal=p) for p in itertools.combinations(range(self.NUM_PLAYERS), r=proposal_size)]

        if self.status == 'vote':
            return [VoteAction(up=True), VoteAction(up=False)]

        if self.status == 'run':
            player_role = hidden_state[player]
            if player_role in EVIL_ROLES:
                return [MissionAction(fail=False), MissionAction(fail=True)]
            else:
                return [MissionAction(fail=False)]

        assert False, "Not sure how we got here"
Esempio n. 7
0
    def cfr_search(self, me, state, hidden_state, fails, strategy_probability,
                   t):
        if state.is_terminal():
            return state.terminal_value(hidden_state)[me]

        moving_players = state.moving_players()
        my_move_index = None
        moves = [None] * len(moving_players)
        for i in range(len(moving_players)):
            player = moving_players[i]
            if hidden_state[player] not in EVIL_ROLES and state.status == 'run':
                moves[i] = MissionAction(fail=False)
                continue

            if hidden_state[player] != 'assassin' and state.status == 'merlin':
                moves[i] = PickMerlinAction(
                    merlin=np.random.choice(len(hidden_state)))
                continue

            perspective = get_python_perspective(hidden_state, player)
            perspective_bucket = get_hidden_states_bucket(perspective, fails)

            if player == me:
                if np.random.random() < 0.15:
                    my_move_index = i
                    continue
                strat = self.cfr_regret[state.status][(state.as_key(),
                                                       perspective,
                                                       perspective_bucket)]
                strat = np.clip(strat, 0.0, None)
                if np.sum(strat) == 0:
                    strat = np.ones(len(strat))
                move_probs = strat / np.sum(strat)
            else:
                move_probs = calculate_observebot_move_probs(
                    state, perspective, perspective_bucket)

            legal_actions = state.legal_actions(player, hidden_state)
            moves[i] = legal_actions[np.random.choice(len(legal_actions),
                                                      p=move_probs)]

        if my_move_index is None:
            value = 0.0
            new_state, _, observation = state.transition(moves, hidden_state)
            if state.status == 'run' and observation > 0:
                fails.append((state.proposal, observation))
            value = self.cfr_search(me, new_state, hidden_state, fails,
                                    strategy_probability, t)
            if state.status == 'run' and observation > 0:
                fails.pop()
            return value

        perspective = get_python_perspective(hidden_state, me)
        perspective_bucket = get_hidden_states_bucket(perspective, fails)
        my_strategy = self.cfr_regret[state.status][(state.as_key(),
                                                     perspective,
                                                     perspective_bucket)]
        my_strategy = np.clip(my_strategy, 0, None)

        if np.sum(my_strategy) == 0:
            p = np.ones(len(my_strategy)) / len(my_strategy)
        else:
            p = my_strategy / np.sum(my_strategy)

        values = np.zeros(len(my_strategy))

        legal_actions = state.legal_actions(me, hidden_state)
        for action_index in range(len(values)):
            moves[my_move_index] = legal_actions[action_index]
            new_state, _, observation = state.transition(moves, hidden_state)
            if state.status == 'run' and observation > 0:
                fails.append((state.proposal, observation))
            values[action_index] = self.cfr_search(
                me, new_state, hidden_state, fails,
                strategy_probability * p[action_index], t)
            if state.status == 'run' and observation > 0:
                fails.pop()

        strategy_value = np.dot(values, p)
        regrets = values - strategy_value
        key = (state.as_key(), perspective, perspective_bucket)
        self.cfr_regret[state.status][key] += regrets * t
        self.cfr_strat[state.status][key] += p * strategy_probability * t
        return strategy_value
Esempio n. 8
0
    def cfr_search_fast(self, state, hidden_state, fails, strategy_probability, t, cache):
        if state.is_terminal():
            return state.terminal_value(hidden_state)[self.player]

        cache_key = (state.as_key(), tuple(fails))
        if cache_key in cache:
            return cache[cache_key]

        if np.random.random() < 0.0001:
            print len(cache)

        player_statuses = [
            [0, 0, 0, 0, 0]
            for _ in hidden_state
        ]
        for proposal, _ in fails:
            for p in range(len(hidden_state)):
                p_on = p in proposal
                for player in proposal:
                    player_statuses[p][player] = max(player_statuses[p][player], 2 if p_on else 1)


        moving_players = state.moving_players()
        my_move_index = None
        moves = [None] * len(moving_players)
        for i in range(len(moving_players)):
            player = moving_players[i]
            if hidden_state[player] not in EVIL_ROLES and state.status == 'run':
                moves[i] = MissionAction(fail=False)
                continue

            if hidden_state[player] != 'assassin' and state.status == 'merlin':
                moves[i] = PickMerlinAction(merlin=np.random.choice(len(hidden_state)))
                continue

            bucket_type, bucket = history_to_bucket(hidden_state, player, [(None, state)], player_statuses[player])
            if player == self.player:
                my_move_index = i
                # moves[i] = [(None, 1.0)]
                continue
            else:
                bucket_data = self.opponent_buckets[player][bucket_type][bucket]
                uniform_prob = np.ones(len(bucket_data))/len(bucket_data)
                tremble_prob = 1.0/np.sqrt(3 * np.sum(bucket_data) + 1)
                if np.sum(bucket_data) == 0:
                    move_probs = uniform_prob
                else:
                    move_probs = tremble_prob * uniform_prob + (1.0 - tremble_prob) * bucket_data / np.sum(bucket_data)

            moves[i] = move_index_to_move(np.random.choice(len(move_probs), p=move_probs), state)


        if my_move_index is None:
            value = 0.0
            new_state, _, observation = state.transition(moves, hidden_state)
            if state.status == 'run' and observation > 0:
                fails.append((state.proposal, observation))
            value = self.cfr_search_fast(new_state, hidden_state, fails, strategy_probability, t, cache)
            if state.status == 'run' and observation > 0:
                fails.pop()
            cache[cache_key] = value
            return value


        bucket_type, bucket = history_to_bucket(hidden_state, self.player, [(None, state)], player_statuses[self.player])
        my_strategy = np.clip(self.cfr_regret[bucket_type][bucket], 0, None)

        if np.sum(my_strategy) == 0:
            p = np.ones(len(my_strategy))/len(my_strategy)
        else:
            p = my_strategy / np.sum(my_strategy)
        values = np.zeros(len(my_strategy))

        for action_index in range(len(values)):
            moves[my_move_index] = move_index_to_move(action_index, state)
            new_state, _, observation = state.transition(moves, hidden_state)
            if state.status == 'run' and observation > 0:
                fails.append((state.proposal, observation))
            values[action_index] = self.cfr_search_fast(new_state, hidden_state, fails, strategy_probability * p[action_index], t, cache)
            if state.status == 'run' and observation > 0:
                fails.pop()

        strategy_value = np.dot(values, p)
        regrets = values - strategy_value
        self.cfr_regret[bucket_type][bucket] += regrets * t

        cache[cache_key] = strategy_value
        return strategy_value
Esempio n. 9
0
    def single_mcts_search(self, state):
        history_len = len(self.history)
        hidden_state = self.hidden_states[np.random.choice(len(self.hidden_states))]
        player_statuses = [
            [0, 0, 0, 0, 0]
            for _ in hidden_state
        ]
        # Set up player statuses
        for proposal, _ in self.fails:
            for p in range(len(hidden_state)):
                p_on = p in proposal
                for player in proposal:
                    player_statuses[p][player] = max(player_statuses[p][player], 2 if p_on else 1)

        visited_nodes = []
        chosen_actions = []

        value = None
        while True:
            moves = []
            for player in state.moving_players():
                if hidden_state[player] not in EVIL_ROLES and state.status == 'run':
                    moves.append(MissionAction(fail=False))
                    continue

                if hidden_state[player] != 'assassin' and state.status == 'merlin':
                    moves.append(PickMerlinAction(merlin=np.random.choice(len(hidden_state))))
                    continue

                bucket_type, bucket = history_to_bucket(hidden_state, player, self.history, player_statuses[player])

                if player != self.player:
                    bucket_data = self.opponent_buckets[self.bot_ids[player]][bucket_type][bucket]

                    uniform_prob = np.ones(len(bucket_data))/len(bucket_data)
                    tremble_prob = 1.0/np.sqrt(4*np.sum(bucket_data) + 1)
                    if np.sum(bucket_data) == 0:
                        move_probs = uniform_prob
                    else:
                        move_probs = tremble_prob * uniform_prob + (1.0 - tremble_prob) * bucket_data / np.sum(bucket_data)

                    move_index = np.random.choice(len(move_probs), p=move_probs)
                else:
                    is_new_node = bucket not in self.my_buckets[bucket_type]
                    node = self.my_buckets[bucket_type][bucket]
                    move_index = node.select_move()
                    if is_new_node:
                        value = heuristic_value_func(state, hidden_state, self.player)
                    else:
                        visited_nodes.append(node)
                        chosen_actions.append(move_index)

                moves.append(move_index_to_move(move_index, state))

            if value is not None:
                break

            state, _, observation = state.transition(moves, hidden_state)
            self.history.append((observation, state))

            if state.is_terminal():
                value = state.terminal_value(hidden_state)[self.player]
                break


        for node, action in zip(visited_nodes, chosen_actions):
            node.choose_counts[action] += 1
            node.total_payoffs[action] += 1

        self.history = self.history[:history_len]
Esempio n. 10
0
def handle_round(game, state, hidden_state, bots, round_, stats):
    last_proposal = None
    for proposal_num in ['1', '2', '3', '4', '5']:
        proposal = last_proposal = round_[proposal_num]
        assert state.proposer == proposal['proposer']
        assert state.propose_count == int(proposal_num) - 1
        moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))]
        for player, move in zip(state.moving_players(), moves):
            prob = get_prob(state, hidden_state, player, bots[player], move)
            stats.append({
                'game': game['id'],
                'seat': player,
                'role': hidden_state[player],
                'player': game['players'][player]['player_id'],
                'type': 'propose',
                'move': ','.join(map(str, sorted(move.proposal))),
                'bot': bots[player].__class__.__name__,
                'prob': prob,
                'num_players': len(hidden_state)
            })
        state = handle_transition(state, hidden_state, moves, bots)

        assert state.status == 'vote'
        moves = [
            VoteAction(up=(vote == 'Approve')) for vote in proposal['votes']
        ]
        for player, move in zip(state.moving_players(), moves):
            prob = get_prob(state, hidden_state, player, bots[player], move)
            stats.append({
                'game': game['id'],
                'seat': player,
                'role': hidden_state[player],
                'player': game['players'][player]['player_id'],
                'type': 'vote',
                'move': 'up' if move.up else 'down',
                'bot': bots[player].__class__.__name__,
                'prob': prob,
                'num_players': len(hidden_state)
            })
        state = handle_transition(state, hidden_state, moves, bots)

        if state.status == 'run':
            break

    secret_votes = sorted(zip(last_proposal['team'], round_['mission']))
    moves = [
        MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes
    ]
    for player, move in zip(state.moving_players(), moves):
        prob = get_prob(state, hidden_state, player, bots[player], move)
        stats.append({
            'game': game['id'],
            'seat': player,
            'role': hidden_state[player],
            'player': game['players'][player]['player_id'],
            'type': 'mission',
            'move': 'fail' if move.fail else 'succeed',
            'bot': bots[player].__class__.__name__,
            'prob': prob,
            'num_players': len(hidden_state)
        })
    state = handle_transition(state, hidden_state, moves, bots)

    if state.status == 'merlin':
        assert 'findMerlin' in round_
        find_merlin = round_['findMerlin']
        assert hidden_state[find_merlin['assassin']] == 'assassin'
        moves = [
            PickMerlinAction(merlin=find_merlin['merlin_guess'])
            for _ in hidden_state
        ]
        for player, move in zip(state.moving_players(), moves):
            prob = get_prob(state, hidden_state, player, bots[player], move)
            if hidden_state[player] == 'assassin':
                stats.append({
                    'game': game['id'],
                    'seat': player,
                    'role': hidden_state[player],
                    'player': game['players'][player]['player_id'],
                    'type': 'merlin',
                    'move': move.merlin,
                    'bot': bots[player].__class__.__name__,
                    'prob': prob,
                    'num_players': len(hidden_state)
                })
        state = handle_transition(state, hidden_state, moves, bots)
    return state
Esempio n. 11
0
def replay_game(game):
    roles = game['game_info']['roles']
    roles = roles[:1] + roles[1:][::-1]
    hidden_state = [PRO_TO_HS[r] for r in roles]
    players = game['session_info']['players']

    proposer = [
        'VHleader' in game['game_info']['voteHistory'][player][0][0]
        for player in players
    ].index(True)

    state = AvalonState(
        proposer=proposer,
        propose_count=0,
        succeeds=0,
        fails=0,
        status='propose',
        proposal=None,
        game_end=None,
        num_players=5
    )

    yield None, state, hidden_state

    while not state.is_terminal():
        rnd = state.succeeds + state.fails
        if state.status != 'merlin':
            proposer = [
                'VHleader' in game['game_info']['voteHistory'][player][rnd][state.propose_count]
                for player in players
            ].index(True)
            assert proposer == state.proposer, "{} != {}".format(proposer, state.proposer)
        if state.status == 'propose':
            proposal = tuple(sorted([
                players.index(player)
                for player in players
                if 'VHpicked' in game['game_info']['voteHistory'][player][rnd][state.propose_count]
            ]))
            actions = [ProposeAction(proposal=proposal)]
        elif state.status == 'vote':
            actions = [
                VoteAction(up=(
                    'VHapprove' in game['game_info']['voteHistory'][player][rnd][state.propose_count]
                ))
                for player in players
            ]
        elif state.status == 'run':
            observed_fails = game['game_info']['numFailsHistory'][rnd]
            actions = []
            for player in state.moving_players():
                if hidden_state[player] in set(['merlin', 'servant']):
                    actions.append(MissionAction(fail=False))
                elif observed_fails == 0:
                    actions.append(MissionAction(fail=False))
                else:
                    actions.append(MissionAction(fail=True))
                    observed_fails -= 1
            assert observed_fails == 0
        elif state.status == 'merlin':
            shot_player = players.index(game['game_info']['publicData']['roles']['assassinShotUsername'])
            actions = [PickMerlinAction(merlin=shot_player) for _ in range(5)]

        assert len(actions) == len(state.moving_players())

        new_state, _, obs = state.transition(actions, hidden_state)
        yield state, new_state, obs
        state = new_state
Esempio n. 12
0
def handle_round(data, state, hidden_state, game, round_):
    last_proposal = None
    for proposal_num in ['1', '2', '3', '4', '5']:
        proposal = last_proposal = round_[proposal_num]
        assert state.proposer == proposal['proposer'], "idk"
        assert state.propose_count == int(proposal_num) - 1, "idk2"
        data.append({
            'game': game['id'],
            'player': game['players'][proposal['proposer']]['player_id'],
            'seat': proposal['proposer'],
            'role': hidden_state[proposal['proposer']],
            'is_evil': hidden_state[proposal['proposer']] in EVIL_ROLES,
            'type': 'propose',
            'move': ','.join(map(str, sorted(proposal['team']))),
            'propose_count': state.propose_count,
            'round': state.fails + state.succeeds,
            'succeeds': state.succeeds,
            'fails': state.fails,
            'propose_has_self': proposal['proposer'] in proposal['team'],
            'num_players': len(hidden_state)
        })

        moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))]
        state, _, _ = state.transition(moves, hidden_state)
        assert state.status == 'vote'
        for seat, vote in enumerate(proposal['votes']):
            data.append({
                'game': game['id'],
                'player': game['players'][seat]['player_id'],
                'seat': seat,
                'role': hidden_state[seat],
                'is_evil': hidden_state[seat] in EVIL_ROLES,
                'type': 'vote',
                'move': vote,
                'propose_count': state.propose_count,
                'round': state.fails + state.succeeds,
                'succeeds': state.succeeds,
                'fails': state.fails,
                'propose_has_self': seat in proposal['team'],
                'num_players': len(hidden_state)
            })
        moves = [VoteAction(up=(vote == 'Approve')) for vote in proposal['votes']]
        state, _, _ = state.transition(moves, hidden_state)
        if state.status == 'run':
            break

    secret_votes = sorted(zip(last_proposal['team'], round_['mission']))
    for seat, vote in secret_votes:
        data.append({
            'game': game['id'],
            'player': game['players'][seat]['player_id'],
            'seat': seat,
            'role': hidden_state[seat],
            'is_evil': hidden_state[seat] in EVIL_ROLES,
            'type': 'mission',
            'move': vote,
            'propose_count': state.propose_count,
            'round': state.fails + state.succeeds,
            'succeeds': state.succeeds,
            'fails': state.fails,
            'propose_has_self': True,
            'num_players': len(hidden_state)
        })

    moves = [MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes]
    state, _, _ = state.transition(moves, hidden_state)
    if state.status == 'merlin':
        assert 'findMerlin' in round_, "wat"
        find_merlin = round_['findMerlin']
        assert hidden_state[find_merlin['assassin']] == 'assassin', "wat"
        moves = [
            PickMerlinAction(merlin=find_merlin['merlin_guess'])
            for _ in hidden_state
        ]
        seat = hidden_state.index('assassin')
        data.append({
            'game': game['id'],
            'player': game['players'][seat]['player_id'],
            'seat': seat,
            'role': hidden_state[seat],
            'is_evil': hidden_state[seat] in EVIL_ROLES,
            'type': 'merlin',
            'move': str(find_merlin['merlin_guess']),
            'propose_count': state.propose_count,
            'round': state.fails + state.succeeds,
            'succeeds': state.succeeds,
            'fails': state.fails,
            'propose_has_self': True,
            'num_players': len(hidden_state)
        })
        state, _, _ = state.transition(moves, hidden_state)
    return state