Example #1
0
def dict_to_flatten_board_state(state, map_object):
    """ Converts a game state to its flatten (list) board state representation.
        :param state: A game state.
        :param map_object: The instantiated Map object
        :return: A flatten (list) representation of the phase (81*35 = 2835)
    """
    state_proto = dict_to_proto(state, StateProto)
    return proto_to_board_state(state_proto, map_object).flatten().tolist()
Example #2
0
def dict_to_flatten_prev_orders_state(phase, map_object):
    """ Converts a phase to its flatten (list) prev orders state representation.
        :param phase: A phase from a saved game.
        :param map_object: The instantiated Map object
        :return: A flatten (list) representation of the prev orders (81*40 = 3240)
    """
    phase_proto = dict_to_proto(phase, PhaseHistoryProto)
    return proto_to_prev_orders_state(phase_proto,
                                      map_object).flatten().tolist()
Example #3
0
def extract_state_proto(game):
    """ Extracts the state_proto from a diplomacy.Game object
        :type game: diplomacy.Game
    """
    state = game.get_state()
    state['game_id'] = game.game_id
    state['map'] = game.map.name
    state['rules'] = list(game.rules)
    return dict_to_proto(state, StateProto)
Example #4
0
def process_game(line):
    """ Process a line in the .jsonl file
        :return: A tuple (game_id, saved_game_zlib)
    """
    if not line:
        return None, None
    saved_game = json.loads(line)
    saved_game = add_cached_states_to_saved_game(saved_game)
    saved_game = add_possible_orders_to_saved_game(saved_game)
    saved_game_proto = dict_to_proto(saved_game, SavedGameProto)
    saved_game_proto = add_rewards_to_saved_game_proto(saved_game_proto, DefaultRewardFunction())
    saved_game_zlib = proto_to_zlib(saved_game_proto)
    return saved_game['id'], saved_game_zlib
Example #5
0
def extract_phase_history_proto(game,
                                nb_previous_phases=NB_PREV_ORDERS_HISTORY):
    """ Extracts the phase_history_proto from a diplomacy.Game object
        :param game: The diplomacy.Game object
        :param nb_previous_phases: Integer. If set, only the last x phases will be returned.
                                            If None, the full history since the beginning of the game is returned.
        :return: A list of `.proto.game.PhaseHistory` proto.
        :type game: diplomacy.Game
    """
    from_phase = None if nb_previous_phases is None else -1 * nb_previous_phases
    phase_history = Game.get_phase_history(game, from_phase=from_phase)
    return [
        dict_to_proto(hist.to_dict(), PhaseHistoryProto)
        for hist in phase_history
    ]
Example #6
0
def get_saved_game_proto(env, players, stored_board_state,
                         stored_prev_orders_state, stored_possible_orders,
                         power_variables, start_phase_ix, reward_fn,
                         advantage_fn, is_partial_game):
    """ Extracts the saved game proto from the environment to send back to the learner
        :param env: The gym environment (needs to implement a SaveGame wrapper)
        :param players: A list of instantiated players
        :param stored_board_state: A dictionary with phase_name as key and board_state as value
        :param stored_prev_orders_state: A dictionary with phase_name as key and prev_orders_state as value
        :param stored_possible_orders: A dictionary with phase_name as key and possible orders as value
        :param power_variables: A dict containing orders, policy details, values, rewards, returns for each power
        :param start_phase_ix: For partial game, the index of the phase from which to start learning
        :param reward_fn: The reward function to use to calculate rewards
        :param advantage_fn: An instance of `.models.self_play.advantages`
        :param is_partial_game: Boolean that indicates that we are processing an incomplete game
        :return: The saved game in proto format
    """
    # pylint: disable=too-many-arguments
    powers = sorted(
        [power_name for power_name in get_map_powers(env.game.map)])
    assigned_powers = env.get_all_powers_name()

    # Computing returns
    for power_name in powers:
        rewards = power_variables[power_name]['rewards']
        state_values = power_variables[power_name]['state_values']
        last_state_value = power_variables[power_name]['last_state_value']
        power_variables[power_name]['returns'] = advantage_fn.get_returns(
            rewards, state_values, last_state_value)

    # Retrieving saved game
    saved_game = env.get_saved_game()

    # Restoring stored variables on the saved game before converting to proto
    for phase_ix, phase in enumerate(saved_game['phases']):

        # Last phase - Only storing state value
        if phase_ix == len(saved_game['phases']) - 1:
            state_values = {
                power_name:
                float(power_variables[power_name]['state_values'][-1])
                for power_name in powers
            }
            phase['state_value'] = state_values
            break

        # Setting shared fields (board_state, prev_orders_state, possible_orders)
        phase['state']['board_state'] = stored_board_state[phase['name']]
        if phase['name'][-1] == 'M':
            phase['prev_orders_state'] = stored_prev_orders_state[
                phase['name']]
        phase['possible_orders'] = {
            loc: stored_possible_orders[phase['name']][loc].value
            for loc in stored_possible_orders[phase['name']]
        }

        # Setting orders, policy_details, state_values
        phase['orders'] = {
            power_name: power_variables[power_name]['orders'][phase_ix]
            for power_name in powers
        }
        phase['policy'] = {
            power_name: power_variables[power_name]['policy_details'][phase_ix]
            for power_name in powers
        }
        phase['state_value'] = {
            power_name:
            float(power_variables[power_name]['state_values'][phase_ix])
            for power_name in powers
        }

    # Adding power assignments, done reason, and kwargs
    done_reason = env.done_reason.value if env.done_reason is not None else ''
    saved_game['done_reason'] = done_reason
    saved_game['assigned_powers'] = assigned_powers
    saved_game['players'] = [player.name for player in players]
    saved_game['kwargs'] = {
        power_name: players[assigned_powers.index(power_name)].kwargs
        for power_name in powers
    }
    saved_game['is_partial_game'] = is_partial_game
    saved_game['start_phase_ix'] = start_phase_ix if is_partial_game else 0
    saved_game['reward_fn'] = reward_fn.name
    saved_game['rewards'] = {
        power_name: power_variables[power_name]['rewards']
        for power_name in powers
    }
    saved_game['returns'] = {
        power_name: power_variables[power_name]['returns']
        for power_name in powers
    }

    # Returning
    return dict_to_proto(saved_game, SavedGameProto)
Example #7
0
def extract_possible_orders_proto(game):
    """ Extracts the possible_orders_proto from a diplomacy.Game object
        :type game: diplomacy.Game
    """
    possible_orders = game.get_all_possible_orders()
    return dict_to_proto(possible_orders, MapStringList)
Example #8
0
def test_to_from_dict():
    """ Tests proto_to_dict and dict_to_proto """
    message_proto = _get_message()
    message_dict = proto_to_dict(message_proto)
    new_message_proto = dict_to_proto(message_dict, Message)
    _compare_messages(message_proto, new_message_proto)