def get_move_probabilities(self, state, legal_actions): result = np.zeros(len(legal_actions)) if state.status == 'propose': # Only consider proposals with yourself for i, act in enumerate(legal_actions): result[i] += 1 if self.player in act.proposal else 0 elif state.status == 'vote': result += 1 if self.player in state.proposal: # Vote up most proposals with yourself result[legal_actions.index(VoteAction(up=True))] += 5 elif self.player not in state.proposal: if state.propose_count == 4 and not self.is_evil: # Vote up most proposals on the final round if you're good result[legal_actions.index(VoteAction(up=True))] += 5 else: # Vote down most proposals which don't contain you. result[legal_actions.index(VoteAction(up=False))] += 5 elif state.status == 'run': result += 1 if self.is_evil: # Fail most missions unless it's the first one if state.fails + state.succeeds == 0: result[legal_actions.index(MissionAction(fail=False))] += 5 else: result[legal_actions.index(MissionAction(fail=True))] += 5 elif state.status == 'merlin': # Try to pick merlin based off of hidden states for hidden_state in self.hidden_states: merlin = hidden_state.index('merlin') result[legal_actions.index( PickMerlinAction(merlin=merlin))] += 1 return result / np.sum(result)
def get_opponent_moves_and_probs(state, hidden_state, player, no_tremble=False): legal_actions = state.legal_actions(player, hidden_state) if len(legal_actions) == 1: return legal_actions, np.ones(1) move_probs = np.zeros(len(legal_actions)) if state.status == 'propose': for i, act in enumerate(legal_actions): if player in act.proposal: move_probs[i] += 1 elif state.status == 'vote': vote_up = player in state.proposal move_probs[legal_actions.index(VoteAction(up=vote_up))] = 1 elif state.status == 'run': # must be evil since we have two options here. move_probs[legal_actions.index(MissionAction(fail=True))] = 1 elif state.status == 'merlin': for i, act in enumerate(legal_actions): if hidden_state[act.merlin] in GOOD_ROLES: move_probs[i] += 1 move_probs /= np.sum(move_probs) if no_tremble: return legal_actions, move_probs tremble = np.ones(len(legal_actions)) / len(legal_actions) return legal_actions, ( 1 - OPPONENT_TREMBLE) * move_probs + OPPONENT_TREMBLE * tremble
def get_move_probabilities(self, state, legal_actions): if state.status == 'propose': probs = PROPOSE_MODEL.predict(np.array([self.propose_nn_input]))[0] result = np.zeros(len(legal_actions)) for i, action in enumerate(legal_actions): result[i] = np.exp( sum([np.log(probs[p]) for p in action.proposal])) return result / np.sum(result) elif state.status == 'vote': self.vote_nn_input.append( np.concatenate([ self.perception.flat, onehot(self.player), onehot(state.proposal), np.zeros(5) ])) up_vote_percent = VOTE_MODEL.predict(np.array([self.vote_nn_input ]))[0] self.vote_nn_input.pop() result = np.zeros(len(legal_actions)) up_index = legal_actions.index(VoteAction(up=True)) result[up_index] = up_vote_percent result[1 - up_index] = 1 - up_vote_percent return result / np.sum(result) else: return np.ones(len(legal_actions)) / len(legal_actions)
def human_game_state_generator(avalon_start, human_game, hidden_state): # at each step, return old state, new state, and observation state = avalon_start for round_ in human_game['log']: last_proposal = None for proposal_num in ['1', '2', '3', '4', '5']: proposal = last_proposal = round_[proposal_num] assert state.proposer == proposal['proposer'] assert state.propose_count == int(proposal_num) - 1 moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))] new_state, _, observation = state.transition(moves, hidden_state) yield state, new_state, observation state = new_state assert state.status == 'vote' moves = [VoteAction(up=(vote == 'Approve')) for vote in proposal['votes']] new_state, _, observation = state.transition(moves, hidden_state) yield state, new_state, observation state = new_state if state.status == 'run': break secret_votes = sorted(zip(last_proposal['team'], round_['mission'])) moves = [MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes] new_state, _, observation = state.transition(moves, hidden_state) yield state, new_state, observation state = new_state
def get_action(self, state, legal_actions): if state.status == 'run': return MissionAction(fail=self.is_evil) if state.status == 'vote': return VoteAction(up=True) return random.choice(legal_actions)
def get_move_probabilities(self, state, legal_actions): if state.status == 'vote': result = np.zeros(len(legal_actions)) result[legal_actions.index(VoteAction(up=True))] = 1. return result return np.ones(len(legal_actions)) / len(legal_actions)
def get_move_probabilities(self, state, legal_actions): if len(legal_actions) == 1: return np.array([1.0]) if state.status == 'propose': probs = np.zeros(len(legal_actions)) propose_strategy = self.node['propose_strat'][self.perspective] propose_options = self.node['propose_options'] for strategy_prob, proposal_bitstring in zip(propose_strategy, propose_options): action = ProposeAction(proposal=bitstring_to_proposal(proposal_bitstring)) probs[legal_actions.index(action)] = strategy_prob # print probs # print_move_probs(probs, legal_actions) return probs elif state.status == 'vote': probs = np.zeros(len(legal_actions)) vote_strategy = self.node['vote_strat'][self.player][self.perspective] for strategy_prob, vote_up in zip(vote_strategy, [False, True]): action = VoteAction(up=vote_up) probs[legal_actions.index(action)] = strategy_prob return probs elif state.status == 'run': probs = np.zeros(len(legal_actions)) mission_strategy = self.node['mission_strat'][self.player][self.perspective] for strategy_prob, fail in zip(mission_strategy, [False, True]): action = MissionAction(fail=fail) probs[legal_actions.index(action)] = strategy_prob return probs elif state.status == 'merlin': # print np.array(self.node['merlin_strat'][self.player][self.perspective]) return np.array(self.node['merlin_strat'][self.player][self.perspective])
def handle_round(game, state, hidden_state, assassin_bot, assassin_player, round_): last_proposal = None for proposal_num in ['1', '2', '3', '4', '5']: proposal = last_proposal = round_[proposal_num] assert state.proposer == proposal['proposer'] assert state.propose_count == int(proposal_num) - 1 moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))] state = handle_transition(state, hidden_state, moves, assassin_bot, assassin_player) assert state.status == 'vote' moves = [ VoteAction(up=(vote == 'Approve')) for vote in proposal['votes'] ] state = handle_transition(state, hidden_state, moves, assassin_bot, assassin_player) if state.status == 'run': break secret_votes = sorted(zip(last_proposal['team'], round_['mission'])) moves = [ MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes ] state = handle_transition(state, hidden_state, moves, assassin_bot, assassin_player) return state
def handle_round(tree_roots, state, hidden_state, round_): last_proposal = None for proposal_num in ['1', '2', '3', '4', '5']: proposal = last_proposal = round_[proposal_num] assert state.proposer == proposal['proposer'], "idk" assert state.propose_count == int(proposal_num) - 1, "idk2" moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))] tree_roots, state = deal_with_transition(tree_roots, state, moves, hidden_state) assert state.status == 'vote' moves = [ VoteAction(up=(vote == 'Approve')) for vote in proposal['votes'] ] tree_roots, state = deal_with_transition(tree_roots, state, moves, hidden_state) if state.status == 'run': break secret_votes = sorted(zip(last_proposal['team'], round_['mission'])) moves = [ MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes ] tree_roots, state = deal_with_transition(tree_roots, state, moves, hidden_state) if state.status == 'merlin': assert 'findMerlin' in round_, "wat" find_merlin = round_['findMerlin'] assert hidden_state[find_merlin['assassin']] == 'assassin', "wat" moves = [ PickMerlinAction(merlin=find_merlin['merlin_guess']) for _ in hidden_state ] tree_roots, state = deal_with_transition(tree_roots, state, moves, hidden_state) return tree_roots, state
def get_action(self, state, legal_actions): if self.current_node is None: if state.status == 'run': return MissionAction(fail=self.is_evil) if state.status == 'vote': return VoteAction(up=True) return random.choice(legal_actions) return max(self.current_node['move_counts'], key=self.current_node['move_counts'].get)
def get_move_probabilities(self, state, legal_actions): result = np.zeros(len(legal_actions)) if state.status == 'run': result[legal_actions.index(MissionAction(fail=self.is_evil))] = 1 elif state.status == 'vote': result[legal_actions.index(VoteAction(up=True))] = 1 else: result[:] = 1 return result / len(legal_actions)
def move_index_to_move(move_index, state): if state.status == 'merlin': return PickMerlinAction(merlin=move_index) elif state.status == 'propose': size, _ = state.MISSION_SIZES[state.succeeds + state.fails] mapping = INDEX_TO_PROPOSAL_2 if size == 2 else INDEX_TO_PROPOSAL_3 return ProposeAction(proposal=bitstring_to_proposal(mapping[move_index])) elif state.status == 'vote': return VoteAction(up=bool(move_index)) else: return MissionAction(fail=bool(move_index))
def legal_actions(self, player, hidden_state): """ Returns the legal actions of the player from this state, given a hidden state """ assert player in self.moving_players(), "Asked a non-moving player legal actions" if self.status == 'merlin': return [PickMerlinAction(merlin=p) for p in range(self.NUM_PLAYERS)] if self.status == 'propose': proposal_size, _ = self.MISSION_SIZES[self.succeeds + self.fails] return [ProposeAction(proposal=p) for p in itertools.combinations(range(self.NUM_PLAYERS), r=proposal_size)] if self.status == 'vote': return [VoteAction(up=True), VoteAction(up=False)] if self.status == 'run': player_role = hidden_state[player] if player_role in EVIL_ROLES: return [MissionAction(fail=False), MissionAction(fail=True)] else: return [MissionAction(fail=False)] assert False, "Not sure how we got here"
def get_action(self, state, legal_actions, role_guess=None, return_all=False): role_guess = role_guess or random.choice(self.hidden_states) if state.status == 'vote': if state.propose_count == 4: return VoteAction(up=True) up_vote = role_guess[state.proposer] in GOOD_ROLES and all( role_guess[p] in GOOD_ROLES for p in state.proposal) if self.is_evil: up_vote = not up_vote return VoteAction(up=up_vote) if state.status == 'propose' and not self.is_evil: propose_size = len(legal_actions[0].proposal) good_players = [ p for p, role in enumerate(role_guess) if role in GOOD_ROLES ] if return_all: return [ ProposeAction(proposal=combo) for combo in itertools.combinations( good_players, propose_size) ] random.shuffle(good_players) return ProposeAction( proposal=tuple(sorted(good_players[:propose_size]))) if state.status == 'run' and self.is_evil: return MissionAction(fail=True) if return_all: return legal_actions return random.choice(legal_actions)
def handle_round(data, state, hidden_state, game, round_): last_proposal = None for proposal_num in ['1', '2', '3', '4', '5']: proposal = last_proposal = round_[proposal_num] assert state.proposer == proposal['proposer'], "idk" assert state.propose_count == int(proposal_num) - 1, "idk2" data.append({ 'game': game['id'], 'player': game['players'][proposal['proposer']]['player_id'], 'seat': proposal['proposer'], 'role': hidden_state[proposal['proposer']], 'is_evil': hidden_state[proposal['proposer']] in EVIL_ROLES, 'type': 'propose', 'move': ','.join(map(str, sorted(proposal['team']))), 'propose_count': state.propose_count, 'round': state.fails + state.succeeds, 'succeeds': state.succeeds, 'fails': state.fails, 'propose_has_self': proposal['proposer'] in proposal['team'], 'num_players': len(hidden_state) }) moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))] state, _, _ = state.transition(moves, hidden_state) assert state.status == 'vote' for seat, vote in enumerate(proposal['votes']): data.append({ 'game': game['id'], 'player': game['players'][seat]['player_id'], 'seat': seat, 'role': hidden_state[seat], 'is_evil': hidden_state[seat] in EVIL_ROLES, 'type': 'vote', 'move': vote, 'propose_count': state.propose_count, 'round': state.fails + state.succeeds, 'succeeds': state.succeeds, 'fails': state.fails, 'propose_has_self': seat in proposal['team'], 'num_players': len(hidden_state) }) moves = [VoteAction(up=(vote == 'Approve')) for vote in proposal['votes']] state, _, _ = state.transition(moves, hidden_state) if state.status == 'run': break secret_votes = sorted(zip(last_proposal['team'], round_['mission'])) for seat, vote in secret_votes: data.append({ 'game': game['id'], 'player': game['players'][seat]['player_id'], 'seat': seat, 'role': hidden_state[seat], 'is_evil': hidden_state[seat] in EVIL_ROLES, 'type': 'mission', 'move': vote, 'propose_count': state.propose_count, 'round': state.fails + state.succeeds, 'succeeds': state.succeeds, 'fails': state.fails, 'propose_has_self': True, 'num_players': len(hidden_state) }) moves = [MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes] state, _, _ = state.transition(moves, hidden_state) if state.status == 'merlin': assert 'findMerlin' in round_, "wat" find_merlin = round_['findMerlin'] assert hidden_state[find_merlin['assassin']] == 'assassin', "wat" moves = [ PickMerlinAction(merlin=find_merlin['merlin_guess']) for _ in hidden_state ] seat = hidden_state.index('assassin') data.append({ 'game': game['id'], 'player': game['players'][seat]['player_id'], 'seat': seat, 'role': hidden_state[seat], 'is_evil': hidden_state[seat] in EVIL_ROLES, 'type': 'merlin', 'move': str(find_merlin['merlin_guess']), 'propose_count': state.propose_count, 'round': state.fails + state.succeeds, 'succeeds': state.succeeds, 'fails': state.fails, 'propose_has_self': True, 'num_players': len(hidden_state) }) state, _, _ = state.transition(moves, hidden_state) return state
def calculate_observation_ll(hidden_state, bot_classes, observation_history, tremble=0.0): all_hidden_states = possible_hidden_states(set(hidden_state), num_players=len(hidden_state)) beliefs = [ starting_hidden_states(player, hidden_state, all_hidden_states) for player in range(len(hidden_state)) ] state = AvalonState.start_state(len(hidden_state)) bots = [ bot() for bot in bot_classes ] for i, bot in enumerate(bots): bot.reset(state, i, hidden_state[i], beliefs[i]) log_likelihood = 0.0 for obs_type, obs in observation_history: assert obs_type == state.status, "Incorrect matchup {} != {}".format(obs_type, state.status) moving_players = state.moving_players() moves = [] if obs_type == 'propose': player = moving_players[0] legal_actions = state.legal_actions(player, hidden_state) move = ProposeAction(proposal=obs) index = legal_actions.index(move) moves.append(move) move_probs = bots[player].get_move_probabilities(state, legal_actions) move_probs = (1.0 - tremble) * move_probs + tremble * (np.ones(len(legal_actions))/len(legal_actions)) log_likelihood += np.log(move_probs[index]) elif obs_type == 'vote': for p, vote_up in zip(moving_players, obs): legal_actions = state.legal_actions(p, hidden_state) move = VoteAction(up=vote_up) index = legal_actions.index(move) moves.append(move) move_probs = bots[p].get_move_probabilities(state, legal_actions) move_probs = (1.0 - tremble) * move_probs + tremble * (np.ones(len(legal_actions))/len(legal_actions)) log_likelihood += np.log(move_probs[index]) elif obs_type == 'run': bad_guys_on_mission = [p for p in state.proposal if hidden_state[p] in EVIL_ROLES ] if len(bad_guys_on_mission) < obs: # Impossible - fewer bad than failed return np.log(0.0) player_fail_probability = {} for bad in bad_guys_on_mission: legal_actions = state.legal_actions(bad, hidden_state) move = MissionAction(fail=True) index = legal_actions.index(move) move_probs = bots[bad].get_move_probabilities(state, legal_actions) move_probs = (1.0 - tremble) * move_probs + tremble * (np.ones(len(legal_actions))/len(legal_actions)) player_fail_probability[bad] = move_probs[index] failure_prob = 0.0 moves = [ MissionAction(fail=False) ] * len(state.proposal) for bad_failers in itertools.combinations(bad_guys_on_mission, r=obs): specific_fail_prob = 1.0 for bad in bad_guys_on_mission: moves[state.proposal.index(bad)] = MissionAction(fail=True) if bad in bad_failers else MissionAction(fail=False) specific_fail_prob *= player_fail_probability[bad] if bad in bad_failers else (1.0 - player_fail_probability[bad]) failure_prob += specific_fail_prob log_likelihood += np.log(failure_prob) new_state, _, observation = state.transition(moves, hidden_state) for player, bot in enumerate(bots): if player in moving_players: move = moves[moving_players.index(player)] else: move = None bot.handle_transition(state, new_state, observation, move=move) state = new_state return log_likelihood
def get_action(self, state, legal_actions): if state.status == 'vote': return VoteAction(up=True) return random.choice(legal_actions)
def handle_round(game, state, hidden_state, bots, round_, stats): last_proposal = None for proposal_num in ['1', '2', '3', '4', '5']: proposal = last_proposal = round_[proposal_num] assert state.proposer == proposal['proposer'] assert state.propose_count == int(proposal_num) - 1 moves = [ProposeAction(proposal=tuple(sorted(proposal['team'])))] for player, move in zip(state.moving_players(), moves): prob = get_prob(state, hidden_state, player, bots[player], move) stats.append({ 'game': game['id'], 'seat': player, 'role': hidden_state[player], 'player': game['players'][player]['player_id'], 'type': 'propose', 'move': ','.join(map(str, sorted(move.proposal))), 'bot': bots[player].__class__.__name__, 'prob': prob, 'num_players': len(hidden_state) }) state = handle_transition(state, hidden_state, moves, bots) assert state.status == 'vote' moves = [ VoteAction(up=(vote == 'Approve')) for vote in proposal['votes'] ] for player, move in zip(state.moving_players(), moves): prob = get_prob(state, hidden_state, player, bots[player], move) stats.append({ 'game': game['id'], 'seat': player, 'role': hidden_state[player], 'player': game['players'][player]['player_id'], 'type': 'vote', 'move': 'up' if move.up else 'down', 'bot': bots[player].__class__.__name__, 'prob': prob, 'num_players': len(hidden_state) }) state = handle_transition(state, hidden_state, moves, bots) if state.status == 'run': break secret_votes = sorted(zip(last_proposal['team'], round_['mission'])) moves = [ MissionAction(fail=(vote == "Fail")) for player, vote in secret_votes ] for player, move in zip(state.moving_players(), moves): prob = get_prob(state, hidden_state, player, bots[player], move) stats.append({ 'game': game['id'], 'seat': player, 'role': hidden_state[player], 'player': game['players'][player]['player_id'], 'type': 'mission', 'move': 'fail' if move.fail else 'succeed', 'bot': bots[player].__class__.__name__, 'prob': prob, 'num_players': len(hidden_state) }) state = handle_transition(state, hidden_state, moves, bots) if state.status == 'merlin': assert 'findMerlin' in round_ find_merlin = round_['findMerlin'] assert hidden_state[find_merlin['assassin']] == 'assassin' moves = [ PickMerlinAction(merlin=find_merlin['merlin_guess']) for _ in hidden_state ] for player, move in zip(state.moving_players(), moves): prob = get_prob(state, hidden_state, player, bots[player], move) if hidden_state[player] == 'assassin': stats.append({ 'game': game['id'], 'seat': player, 'role': hidden_state[player], 'player': game['players'][player]['player_id'], 'type': 'merlin', 'move': move.merlin, 'bot': bots[player].__class__.__name__, 'prob': prob, 'num_players': len(hidden_state) }) state = handle_transition(state, hidden_state, moves, bots) return state
def replay_game(game): roles = game['game_info']['roles'] roles = roles[:1] + roles[1:][::-1] hidden_state = [PRO_TO_HS[r] for r in roles] players = game['session_info']['players'] proposer = [ 'VHleader' in game['game_info']['voteHistory'][player][0][0] for player in players ].index(True) state = AvalonState( proposer=proposer, propose_count=0, succeeds=0, fails=0, status='propose', proposal=None, game_end=None, num_players=5 ) yield None, state, hidden_state while not state.is_terminal(): rnd = state.succeeds + state.fails if state.status != 'merlin': proposer = [ 'VHleader' in game['game_info']['voteHistory'][player][rnd][state.propose_count] for player in players ].index(True) assert proposer == state.proposer, "{} != {}".format(proposer, state.proposer) if state.status == 'propose': proposal = tuple(sorted([ players.index(player) for player in players if 'VHpicked' in game['game_info']['voteHistory'][player][rnd][state.propose_count] ])) actions = [ProposeAction(proposal=proposal)] elif state.status == 'vote': actions = [ VoteAction(up=( 'VHapprove' in game['game_info']['voteHistory'][player][rnd][state.propose_count] )) for player in players ] elif state.status == 'run': observed_fails = game['game_info']['numFailsHistory'][rnd] actions = [] for player in state.moving_players(): if hidden_state[player] in set(['merlin', 'servant']): actions.append(MissionAction(fail=False)) elif observed_fails == 0: actions.append(MissionAction(fail=False)) else: actions.append(MissionAction(fail=True)) observed_fails -= 1 assert observed_fails == 0 elif state.status == 'merlin': shot_player = players.index(game['game_info']['publicData']['roles']['assassinShotUsername']) actions = [PickMerlinAction(merlin=shot_player) for _ in range(5)] assert len(actions) == len(state.moving_players()) new_state, _, obs = state.transition(actions, hidden_state) yield state, new_state, obs state = new_state