def subgame_cfr(state, hidden_state, perspectives, me, regrets, strats, observations, strategy_probability, t): if state.is_terminal(): return state.terminal_value(hidden_state)[me] observation_history = tuple(observations) moving_players = state.moving_players() my_move_index = None moves = [None] * len(moving_players) for i, player in enumerate(moving_players): if hidden_state[player] not in EVIL_ROLES and state.status == 'run': moves[i] = MissionAction(fail=False) continue if hidden_state[player] != 'assassin' and state.status == 'merlin': moves[i] = PickMerlinAction(merlin=np.random.choice(len(hidden_state))) continue perspective = perspectives[player] if player == me: my_move_index = i continue move_probs = calculate_strategy(regrets[state.status][(perspective, observation_history)]) legal_actions = state.legal_actions(player, hidden_state) moves[i] = legal_actions[get_action_index(move_probs)] if my_move_index is None: new_state, _, observation = state.transition(moves, hidden_state) if state.status == 'vote': observation = tuple([vote.up for vote in observation]) observations.append(observation) value = subgame_cfr(new_state, hidden_state, perspectives, me, regrets, strats, observations, strategy_probability, t) observations.pop() return value perspective = perspectives[me] p = calculate_strategy(regrets[state.status][(perspective, observation_history)]) values = np.zeros(len(p)) legal_actions = state.legal_actions(me, hidden_state) for action_index in range(len(values)): moves[my_move_index] = legal_actions[action_index] new_state, _, observation = state.transition(moves, hidden_state) if state.status == 'vote': observation = tuple([vote.up for vote in observation]) observations.append(observation) values[action_index] = subgame_cfr(new_state, hidden_state, perspectives, me, regrets, strats, observations, strategy_probability * p[action_index], t) observations.pop() strategy_value = np.dot(values, p) new_regrets = values - strategy_value key = (perspective, observation_history) regrets[state.status][key] += new_regrets * t strats[state.status][key] += p * strategy_probability * t return strategy_value
def handle_round(tree_roots, state, hidden_state, round_): last_proposal = None for proposal_num in ['1', '2', '3', '4', '5']: proposal = last_proposal = round_[proposal_num] assert state.proposer == proposal['proposer'], "idk" assert state.propose_count == int(proposal_num) - 1, "idk2" moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))] tree_roots, state = deal_with_transition(tree_roots, state, moves, hidden_state) assert state.status == 'vote' moves = [ VoteAction(up=(vote == 'Approve')) for vote in proposal['votes'] ] tree_roots, state = deal_with_transition(tree_roots, state, moves, hidden_state) if state.status == 'run': break secret_votes = sorted(zip(last_proposal['team'], round_['mission'])) moves = [ MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes ] tree_roots, state = deal_with_transition(tree_roots, state, moves, hidden_state) if state.status == 'merlin': assert 'findMerlin' in round_, "wat" find_merlin = round_['findMerlin'] assert hidden_state[find_merlin['assassin']] == 'assassin', "wat" moves = [ PickMerlinAction(merlin=find_merlin['merlin_guess']) for _ in hidden_state ] tree_roots, state = deal_with_transition(tree_roots, state, moves, hidden_state) return tree_roots, state
def get_move_probabilities(self, state, legal_actions): result = np.zeros(len(legal_actions)) if state.status == 'propose': # Only consider proposals with yourself for i, act in enumerate(legal_actions): result[i] += 1 if self.player in act.proposal else 0 elif state.status == 'vote': result += 1 if self.player in state.proposal: # Vote up most proposals with yourself result[legal_actions.index(VoteAction(up=True))] += 5 elif self.player not in state.proposal: if state.propose_count == 4 and not self.is_evil: # Vote up most proposals on the final round if you're good result[legal_actions.index(VoteAction(up=True))] += 5 else: # Vote down most proposals which don't contain you. result[legal_actions.index(VoteAction(up=False))] += 5 elif state.status == 'run': result += 1 if self.is_evil: # Fail most missions unless it's the first one if state.fails + state.succeeds == 0: result[legal_actions.index(MissionAction(fail=False))] += 5 else: result[legal_actions.index(MissionAction(fail=True))] += 5 elif state.status == 'merlin': # Try to pick merlin based off of hidden states for hidden_state in self.hidden_states: merlin = hidden_state.index('merlin') result[legal_actions.index( PickMerlinAction(merlin=merlin))] += 1 return result / np.sum(result)
def move_index_to_move(move_index, state): if state.status == 'merlin': return PickMerlinAction(merlin=move_index) elif state.status == 'propose': size, _ = state.MISSION_SIZES[state.succeeds + state.fails] mapping = INDEX_TO_PROPOSAL_2 if size == 2 else INDEX_TO_PROPOSAL_3 return ProposeAction(proposal=bitstring_to_proposal(mapping[move_index])) elif state.status == 'vote': return VoteAction(up=bool(move_index)) else: return MissionAction(fail=bool(move_index))
def game_state_generator(avalon_start, human_game, hidden_state): # at each step, return old state, new state, and observation state = avalon_start for round_ in human_game['log']: last_proposal = None for proposal_num in ['1', '2', '3', '4', '5']: proposal = last_proposal = round_[proposal_num] assert state.proposer == proposal['proposer'] assert state.propose_count == int(proposal_num) - 1 moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))] new_state, _, observation = state.transition(moves, hidden_state) yield state, moves state = new_state assert state.status == 'vote' moves = [ VoteAction(up=(vote == 'Approve')) for vote in proposal['votes'] ] new_state, _, observation = state.transition(moves, hidden_state) yield state, moves state = new_state if state.status == 'run': break secret_votes = sorted(zip(last_proposal['team'], round_['mission'])) moves = [ MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes ] new_state, _, observation = state.transition(moves, hidden_state) yield state, moves state = new_state if state.status == 'merlin': assert 'findMerlin' in round_ yield state, [ PickMerlinAction(merlin=round_['findMerlin']['merlin_guess']) ] * 5
def legal_actions(self, player, hidden_state): """ Returns the legal actions of the player from this state, given a hidden state """ assert player in self.moving_players(), "Asked a non-moving player legal actions" if self.status == 'merlin': return [PickMerlinAction(merlin=p) for p in range(self.NUM_PLAYERS)] if self.status == 'propose': proposal_size, _ = self.MISSION_SIZES[self.succeeds + self.fails] return [ProposeAction(proposal=p) for p in itertools.combinations(range(self.NUM_PLAYERS), r=proposal_size)] if self.status == 'vote': return [VoteAction(up=True), VoteAction(up=False)] if self.status == 'run': player_role = hidden_state[player] if player_role in EVIL_ROLES: return [MissionAction(fail=False), MissionAction(fail=True)] else: return [MissionAction(fail=False)] assert False, "Not sure how we got here"
def cfr_search(self, me, state, hidden_state, fails, strategy_probability, t): if state.is_terminal(): return state.terminal_value(hidden_state)[me] moving_players = state.moving_players() my_move_index = None moves = [None] * len(moving_players) for i in range(len(moving_players)): player = moving_players[i] if hidden_state[player] not in EVIL_ROLES and state.status == 'run': moves[i] = MissionAction(fail=False) continue if hidden_state[player] != 'assassin' and state.status == 'merlin': moves[i] = PickMerlinAction( merlin=np.random.choice(len(hidden_state))) continue perspective = get_python_perspective(hidden_state, player) perspective_bucket = get_hidden_states_bucket(perspective, fails) if player == me: if np.random.random() < 0.15: my_move_index = i continue strat = self.cfr_regret[state.status][(state.as_key(), perspective, perspective_bucket)] strat = np.clip(strat, 0.0, None) if np.sum(strat) == 0: strat = np.ones(len(strat)) move_probs = strat / np.sum(strat) else: move_probs = calculate_observebot_move_probs( state, perspective, perspective_bucket) legal_actions = state.legal_actions(player, hidden_state) moves[i] = legal_actions[np.random.choice(len(legal_actions), p=move_probs)] if my_move_index is None: value = 0.0 new_state, _, observation = state.transition(moves, hidden_state) if state.status == 'run' and observation > 0: fails.append((state.proposal, observation)) value = self.cfr_search(me, new_state, hidden_state, fails, strategy_probability, t) if state.status == 'run' and observation > 0: fails.pop() return value perspective = get_python_perspective(hidden_state, me) perspective_bucket = get_hidden_states_bucket(perspective, fails) my_strategy = self.cfr_regret[state.status][(state.as_key(), perspective, perspective_bucket)] my_strategy = np.clip(my_strategy, 0, None) if np.sum(my_strategy) == 0: p = np.ones(len(my_strategy)) / len(my_strategy) else: p = my_strategy / np.sum(my_strategy) values = np.zeros(len(my_strategy)) legal_actions = state.legal_actions(me, hidden_state) for action_index in range(len(values)): moves[my_move_index] = legal_actions[action_index] new_state, _, observation = state.transition(moves, hidden_state) if state.status == 'run' and observation > 0: fails.append((state.proposal, observation)) values[action_index] = self.cfr_search( me, new_state, hidden_state, fails, strategy_probability * p[action_index], t) if state.status == 'run' and observation > 0: fails.pop() strategy_value = np.dot(values, p) regrets = values - strategy_value key = (state.as_key(), perspective, perspective_bucket) self.cfr_regret[state.status][key] += regrets * t self.cfr_strat[state.status][key] += p * strategy_probability * t return strategy_value
def cfr_search_fast(self, state, hidden_state, fails, strategy_probability, t, cache): if state.is_terminal(): return state.terminal_value(hidden_state)[self.player] cache_key = (state.as_key(), tuple(fails)) if cache_key in cache: return cache[cache_key] if np.random.random() < 0.0001: print len(cache) player_statuses = [ [0, 0, 0, 0, 0] for _ in hidden_state ] for proposal, _ in fails: for p in range(len(hidden_state)): p_on = p in proposal for player in proposal: player_statuses[p][player] = max(player_statuses[p][player], 2 if p_on else 1) moving_players = state.moving_players() my_move_index = None moves = [None] * len(moving_players) for i in range(len(moving_players)): player = moving_players[i] if hidden_state[player] not in EVIL_ROLES and state.status == 'run': moves[i] = MissionAction(fail=False) continue if hidden_state[player] != 'assassin' and state.status == 'merlin': moves[i] = PickMerlinAction(merlin=np.random.choice(len(hidden_state))) continue bucket_type, bucket = history_to_bucket(hidden_state, player, [(None, state)], player_statuses[player]) if player == self.player: my_move_index = i # moves[i] = [(None, 1.0)] continue else: bucket_data = self.opponent_buckets[player][bucket_type][bucket] uniform_prob = np.ones(len(bucket_data))/len(bucket_data) tremble_prob = 1.0/np.sqrt(3 * np.sum(bucket_data) + 1) if np.sum(bucket_data) == 0: move_probs = uniform_prob else: move_probs = tremble_prob * uniform_prob + (1.0 - tremble_prob) * bucket_data / np.sum(bucket_data) moves[i] = move_index_to_move(np.random.choice(len(move_probs), p=move_probs), state) if my_move_index is None: value = 0.0 new_state, _, observation = state.transition(moves, hidden_state) if state.status == 'run' and observation > 0: fails.append((state.proposal, observation)) value = self.cfr_search_fast(new_state, hidden_state, fails, strategy_probability, t, cache) if state.status == 'run' and observation > 0: fails.pop() cache[cache_key] = value return value bucket_type, bucket = history_to_bucket(hidden_state, self.player, [(None, state)], player_statuses[self.player]) my_strategy = np.clip(self.cfr_regret[bucket_type][bucket], 0, None) if np.sum(my_strategy) == 0: p = np.ones(len(my_strategy))/len(my_strategy) else: p = my_strategy / np.sum(my_strategy) values = np.zeros(len(my_strategy)) for action_index in range(len(values)): moves[my_move_index] = move_index_to_move(action_index, state) new_state, _, observation = state.transition(moves, hidden_state) if state.status == 'run' and observation > 0: fails.append((state.proposal, observation)) values[action_index] = self.cfr_search_fast(new_state, hidden_state, fails, strategy_probability * p[action_index], t, cache) if state.status == 'run' and observation > 0: fails.pop() strategy_value = np.dot(values, p) regrets = values - strategy_value self.cfr_regret[bucket_type][bucket] += regrets * t cache[cache_key] = strategy_value return strategy_value
def single_mcts_search(self, state): history_len = len(self.history) hidden_state = self.hidden_states[np.random.choice(len(self.hidden_states))] player_statuses = [ [0, 0, 0, 0, 0] for _ in hidden_state ] # Set up player statuses for proposal, _ in self.fails: for p in range(len(hidden_state)): p_on = p in proposal for player in proposal: player_statuses[p][player] = max(player_statuses[p][player], 2 if p_on else 1) visited_nodes = [] chosen_actions = [] value = None while True: moves = [] for player in state.moving_players(): if hidden_state[player] not in EVIL_ROLES and state.status == 'run': moves.append(MissionAction(fail=False)) continue if hidden_state[player] != 'assassin' and state.status == 'merlin': moves.append(PickMerlinAction(merlin=np.random.choice(len(hidden_state)))) continue bucket_type, bucket = history_to_bucket(hidden_state, player, self.history, player_statuses[player]) if player != self.player: bucket_data = self.opponent_buckets[self.bot_ids[player]][bucket_type][bucket] uniform_prob = np.ones(len(bucket_data))/len(bucket_data) tremble_prob = 1.0/np.sqrt(4*np.sum(bucket_data) + 1) if np.sum(bucket_data) == 0: move_probs = uniform_prob else: move_probs = tremble_prob * uniform_prob + (1.0 - tremble_prob) * bucket_data / np.sum(bucket_data) move_index = np.random.choice(len(move_probs), p=move_probs) else: is_new_node = bucket not in self.my_buckets[bucket_type] node = self.my_buckets[bucket_type][bucket] move_index = node.select_move() if is_new_node: value = heuristic_value_func(state, hidden_state, self.player) else: visited_nodes.append(node) chosen_actions.append(move_index) moves.append(move_index_to_move(move_index, state)) if value is not None: break state, _, observation = state.transition(moves, hidden_state) self.history.append((observation, state)) if state.is_terminal(): value = state.terminal_value(hidden_state)[self.player] break for node, action in zip(visited_nodes, chosen_actions): node.choose_counts[action] += 1 node.total_payoffs[action] += 1 self.history = self.history[:history_len]
def handle_round(game, state, hidden_state, bots, round_, stats): last_proposal = None for proposal_num in ['1', '2', '3', '4', '5']: proposal = last_proposal = round_[proposal_num] assert state.proposer == proposal['proposer'] assert state.propose_count == int(proposal_num) - 1 moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))] for player, move in zip(state.moving_players(), moves): prob = get_prob(state, hidden_state, player, bots[player], move) stats.append({ 'game': game['id'], 'seat': player, 'role': hidden_state[player], 'player': game['players'][player]['player_id'], 'type': 'propose', 'move': ','.join(map(str, sorted(move.proposal))), 'bot': bots[player].__class__.__name__, 'prob': prob, 'num_players': len(hidden_state) }) state = handle_transition(state, hidden_state, moves, bots) assert state.status == 'vote' moves = [ VoteAction(up=(vote == 'Approve')) for vote in proposal['votes'] ] for player, move in zip(state.moving_players(), moves): prob = get_prob(state, hidden_state, player, bots[player], move) stats.append({ 'game': game['id'], 'seat': player, 'role': hidden_state[player], 'player': game['players'][player]['player_id'], 'type': 'vote', 'move': 'up' if move.up else 'down', 'bot': bots[player].__class__.__name__, 'prob': prob, 'num_players': len(hidden_state) }) state = handle_transition(state, hidden_state, moves, bots) if state.status == 'run': break secret_votes = sorted(zip(last_proposal['team'], round_['mission'])) moves = [ MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes ] for player, move in zip(state.moving_players(), moves): prob = get_prob(state, hidden_state, player, bots[player], move) stats.append({ 'game': game['id'], 'seat': player, 'role': hidden_state[player], 'player': game['players'][player]['player_id'], 'type': 'mission', 'move': 'fail' if move.fail else 'succeed', 'bot': bots[player].__class__.__name__, 'prob': prob, 'num_players': len(hidden_state) }) state = handle_transition(state, hidden_state, moves, bots) if state.status == 'merlin': assert 'findMerlin' in round_ find_merlin = round_['findMerlin'] assert hidden_state[find_merlin['assassin']] == 'assassin' moves = [ PickMerlinAction(merlin=find_merlin['merlin_guess']) for _ in hidden_state ] for player, move in zip(state.moving_players(), moves): prob = get_prob(state, hidden_state, player, bots[player], move) if hidden_state[player] == 'assassin': stats.append({ 'game': game['id'], 'seat': player, 'role': hidden_state[player], 'player': game['players'][player]['player_id'], 'type': 'merlin', 'move': move.merlin, 'bot': bots[player].__class__.__name__, 'prob': prob, 'num_players': len(hidden_state) }) state = handle_transition(state, hidden_state, moves, bots) return state
def replay_game(game): roles = game['game_info']['roles'] roles = roles[:1] + roles[1:][::-1] hidden_state = [PRO_TO_HS[r] for r in roles] players = game['session_info']['players'] proposer = [ 'VHleader' in game['game_info']['voteHistory'][player][0][0] for player in players ].index(True) state = AvalonState( proposer=proposer, propose_count=0, succeeds=0, fails=0, status='propose', proposal=None, game_end=None, num_players=5 ) yield None, state, hidden_state while not state.is_terminal(): rnd = state.succeeds + state.fails if state.status != 'merlin': proposer = [ 'VHleader' in game['game_info']['voteHistory'][player][rnd][state.propose_count] for player in players ].index(True) assert proposer == state.proposer, "{} != {}".format(proposer, state.proposer) if state.status == 'propose': proposal = tuple(sorted([ players.index(player) for player in players if 'VHpicked' in game['game_info']['voteHistory'][player][rnd][state.propose_count] ])) actions = [ProposeAction(proposal=proposal)] elif state.status == 'vote': actions = [ VoteAction(up=( 'VHapprove' in game['game_info']['voteHistory'][player][rnd][state.propose_count] )) for player in players ] elif state.status == 'run': observed_fails = game['game_info']['numFailsHistory'][rnd] actions = [] for player in state.moving_players(): if hidden_state[player] in set(['merlin', 'servant']): actions.append(MissionAction(fail=False)) elif observed_fails == 0: actions.append(MissionAction(fail=False)) else: actions.append(MissionAction(fail=True)) observed_fails -= 1 assert observed_fails == 0 elif state.status == 'merlin': shot_player = players.index(game['game_info']['publicData']['roles']['assassinShotUsername']) actions = [PickMerlinAction(merlin=shot_player) for _ in range(5)] assert len(actions) == len(state.moving_players()) new_state, _, obs = state.transition(actions, hidden_state) yield state, new_state, obs state = new_state
def handle_round(data, state, hidden_state, game, round_): last_proposal = None for proposal_num in ['1', '2', '3', '4', '5']: proposal = last_proposal = round_[proposal_num] assert state.proposer == proposal['proposer'], "idk" assert state.propose_count == int(proposal_num) - 1, "idk2" data.append({ 'game': game['id'], 'player': game['players'][proposal['proposer']]['player_id'], 'seat': proposal['proposer'], 'role': hidden_state[proposal['proposer']], 'is_evil': hidden_state[proposal['proposer']] in EVIL_ROLES, 'type': 'propose', 'move': ','.join(map(str, sorted(proposal['team']))), 'propose_count': state.propose_count, 'round': state.fails + state.succeeds, 'succeeds': state.succeeds, 'fails': state.fails, 'propose_has_self': proposal['proposer'] in proposal['team'], 'num_players': len(hidden_state) }) moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))] state, _, _ = state.transition(moves, hidden_state) assert state.status == 'vote' for seat, vote in enumerate(proposal['votes']): data.append({ 'game': game['id'], 'player': game['players'][seat]['player_id'], 'seat': seat, 'role': hidden_state[seat], 'is_evil': hidden_state[seat] in EVIL_ROLES, 'type': 'vote', 'move': vote, 'propose_count': state.propose_count, 'round': state.fails + state.succeeds, 'succeeds': state.succeeds, 'fails': state.fails, 'propose_has_self': seat in proposal['team'], 'num_players': len(hidden_state) }) moves = [VoteAction(up=(vote == 'Approve')) for vote in proposal['votes']] state, _, _ = state.transition(moves, hidden_state) if state.status == 'run': break secret_votes = sorted(zip(last_proposal['team'], round_['mission'])) for seat, vote in secret_votes: data.append({ 'game': game['id'], 'player': game['players'][seat]['player_id'], 'seat': seat, 'role': hidden_state[seat], 'is_evil': hidden_state[seat] in EVIL_ROLES, 'type': 'mission', 'move': vote, 'propose_count': state.propose_count, 'round': state.fails + state.succeeds, 'succeeds': state.succeeds, 'fails': state.fails, 'propose_has_self': True, 'num_players': len(hidden_state) }) moves = [MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes] state, _, _ = state.transition(moves, hidden_state) if state.status == 'merlin': assert 'findMerlin' in round_, "wat" find_merlin = round_['findMerlin'] assert hidden_state[find_merlin['assassin']] == 'assassin', "wat" moves = [ PickMerlinAction(merlin=find_merlin['merlin_guess']) for _ in hidden_state ] seat = hidden_state.index('assassin') data.append({ 'game': game['id'], 'player': game['players'][seat]['player_id'], 'seat': seat, 'role': hidden_state[seat], 'is_evil': hidden_state[seat] in EVIL_ROLES, 'type': 'merlin', 'move': str(find_merlin['merlin_guess']), 'propose_count': state.propose_count, 'round': state.fails + state.succeeds, 'succeeds': state.succeeds, 'fails': state.fails, 'propose_has_self': True, 'num_players': len(hidden_state) }) state, _, _ = state.transition(moves, hidden_state) return state