def dict_to_flatten_board_state(state, map_object): """ Converts a game state to its flatten (list) board state representation. :param state: A game state. :param map_object: The instantiated Map object :return: A flatten (list) representation of the phase (81*35 = 2835) """ state_proto = dict_to_proto(state, StateProto) return proto_to_board_state(state_proto, map_object).flatten().tolist()
def dict_to_flatten_prev_orders_state(phase, map_object): """ Converts a phase to its flatten (list) prev orders state representation. :param phase: A phase from a saved game. :param map_object: The instantiated Map object :return: A flatten (list) representation of the prev orders (81*40 = 3240) """ phase_proto = dict_to_proto(phase, PhaseHistoryProto) return proto_to_prev_orders_state(phase_proto, map_object).flatten().tolist()
def extract_state_proto(game): """ Extracts the state_proto from a diplomacy.Game object :type game: diplomacy.Game """ state = game.get_state() state['game_id'] = game.game_id state['map'] = game.map.name state['rules'] = list(game.rules) return dict_to_proto(state, StateProto)
def process_game(line): """ Process a line in the .jsonl file :return: A tuple (game_id, saved_game_zlib) """ if not line: return None, None saved_game = json.loads(line) saved_game = add_cached_states_to_saved_game(saved_game) saved_game = add_possible_orders_to_saved_game(saved_game) saved_game_proto = dict_to_proto(saved_game, SavedGameProto) saved_game_proto = add_rewards_to_saved_game_proto(saved_game_proto, DefaultRewardFunction()) saved_game_zlib = proto_to_zlib(saved_game_proto) return saved_game['id'], saved_game_zlib
def extract_phase_history_proto(game, nb_previous_phases=NB_PREV_ORDERS_HISTORY): """ Extracts the phase_history_proto from a diplomacy.Game object :param game: The diplomacy.Game object :param nb_previous_phases: Integer. If set, only the last x phases will be returned. If None, the full history since the beginning of the game is returned. :return: A list of `.proto.game.PhaseHistory` proto. :type game: diplomacy.Game """ from_phase = None if nb_previous_phases is None else -1 * nb_previous_phases phase_history = Game.get_phase_history(game, from_phase=from_phase) return [ dict_to_proto(hist.to_dict(), PhaseHistoryProto) for hist in phase_history ]
def get_saved_game_proto(env, players, stored_board_state, stored_prev_orders_state, stored_possible_orders, power_variables, start_phase_ix, reward_fn, advantage_fn, is_partial_game): """ Extracts the saved game proto from the environment to send back to the learner :param env: The gym environment (needs to implement a SaveGame wrapper) :param players: A list of instantiated players :param stored_board_state: A dictionary with phase_name as key and board_state as value :param stored_prev_orders_state: A dictionary with phase_name as key and prev_orders_state as value :param stored_possible_orders: A dictionary with phase_name as key and possible orders as value :param power_variables: A dict containing orders, policy details, values, rewards, returns for each power :param start_phase_ix: For partial game, the index of the phase from which to start learning :param reward_fn: The reward function to use to calculate rewards :param advantage_fn: An instance of `.models.self_play.advantages` :param is_partial_game: Boolean that indicates that we are processing an incomplete game :return: The saved game in proto format """ # pylint: disable=too-many-arguments powers = sorted( [power_name for power_name in get_map_powers(env.game.map)]) assigned_powers = env.get_all_powers_name() # Computing returns for power_name in powers: rewards = power_variables[power_name]['rewards'] state_values = power_variables[power_name]['state_values'] last_state_value = power_variables[power_name]['last_state_value'] power_variables[power_name]['returns'] = advantage_fn.get_returns( rewards, state_values, last_state_value) # Retrieving saved game saved_game = env.get_saved_game() # Restoring stored variables on the saved game before converting to proto for phase_ix, phase in enumerate(saved_game['phases']): # Last phase - Only storing state value if phase_ix == len(saved_game['phases']) - 1: state_values = { power_name: float(power_variables[power_name]['state_values'][-1]) for power_name in powers } phase['state_value'] = state_values break # Setting shared fields (board_state, prev_orders_state, possible_orders) phase['state']['board_state'] = stored_board_state[phase['name']] if phase['name'][-1] == 'M': phase['prev_orders_state'] = stored_prev_orders_state[ phase['name']] phase['possible_orders'] = { loc: stored_possible_orders[phase['name']][loc].value for loc in stored_possible_orders[phase['name']] } # Setting orders, policy_details, state_values phase['orders'] = { power_name: power_variables[power_name]['orders'][phase_ix] for power_name in powers } phase['policy'] = { power_name: power_variables[power_name]['policy_details'][phase_ix] for power_name in powers } phase['state_value'] = { power_name: float(power_variables[power_name]['state_values'][phase_ix]) for power_name in powers } # Adding power assignments, done reason, and kwargs done_reason = env.done_reason.value if env.done_reason is not None else '' saved_game['done_reason'] = done_reason saved_game['assigned_powers'] = assigned_powers saved_game['players'] = [player.name for player in players] saved_game['kwargs'] = { power_name: players[assigned_powers.index(power_name)].kwargs for power_name in powers } saved_game['is_partial_game'] = is_partial_game saved_game['start_phase_ix'] = start_phase_ix if is_partial_game else 0 saved_game['reward_fn'] = reward_fn.name saved_game['rewards'] = { power_name: power_variables[power_name]['rewards'] for power_name in powers } saved_game['returns'] = { power_name: power_variables[power_name]['returns'] for power_name in powers } # Returning return dict_to_proto(saved_game, SavedGameProto)
def extract_possible_orders_proto(game): """ Extracts the possible_orders_proto from a diplomacy.Game object :type game: diplomacy.Game """ possible_orders = game.get_all_possible_orders() return dict_to_proto(possible_orders, MapStringList)
def test_to_from_dict(): """ Tests proto_to_dict and dict_to_proto """ message_proto = _get_message() message_dict = proto_to_dict(message_proto) new_message_proto = dict_to_proto(message_dict, Message) _compare_messages(message_proto, new_message_proto)