Example #1
0
def play_game(game: Game, save_game: bool, agent_nation: list, label="", turn_number = 3, repeat_number= 1000):
    set_starting_influence(game)
    if save_game:
        saver = GameSaver()
    q_table_Handler = QtableHandler(game, agent_nation)
    iterator = 0
    state = 0
    finish = False
    stats = {"centers":defaultdict(list), "influence":defaultdict(list)}
    while not game.is_game_done and not finish:
        iterator += 1
        q_table_Handler.set_turn_info()

        # settings order
        phase = game.get_current_phase()[-1]
        for power_name, power in game.powers.items():
            power_orders = q_table_Handler.chose_orders(power_name)
            game.set_orders(power_name, power_orders)


        #visualizer.paint_orders(game)
        if save_game:
            saver.save_game(game, "gierka")

        game.process()

        if phase == 'M':
            q_table_Handler.set_reward()
        adjust_influence(game)

        if iterator == turn_number:
            state += 1
            if state % repeat_number == 0:
                q_table_Handler.save()
            if state == repeat_number:
                save_stat(stats,turn_number,label)
                game = load_saved_games_from_disk("game.json")[0]
                return
            iterator = 0
            print("State: ", state)
            print("Accuracy: {0}".format(q_table_Handler.get_accuracy()))
            print("Number of Germany centers: ", game.get_centers("GERMANY").__len__(),  game.get_power("GERMANY").influence.__len__())
            for power_name, _ in game.powers.items():
                stats["influence"][power_name].append(game.get_power(power_name).influence.__len__())
                stats["centers"][power_name].append(game.get_centers(power_name).__len__())
            game = load_saved_games_from_disk("game.json")[0]
            q_table_Handler.game = game
            q_table_Handler.attempts = 0
            q_table_Handler.miss_hits = 0
Example #2
0
def test_now():
    """ Tests the NOW response """
    daide_str = 'NOW ( SPR #1901 ) ( AUS AMY BUD ) ( AUS AMY VIE ) ( AUS FLT TRI ) ( ENG FLT EDI )' \
                ' ( ENG FLT LON ) ( ENG AMY LVP ) ( FRA FLT BRE ) ( FRA AMY MAR ) ( FRA AMY PAR )' \
                ' ( GER FLT KIE ) ( GER AMY BER ) ( GER AMY MUN ) ( ITA FLT NAP ) ( ITA AMY ROM )' \
                ' ( ITA AMY VEN ) ( RUS AMY WAR ) ( RUS AMY MOS ) ( RUS FLT SEV )' \
                ' ( RUS FLT ( STP SCS ) ) ( TUR FLT ANK ) ( TUR AMY CON ) ( TUR AMY SMY )'
    game = Game(map_name='standard')
    phase_name = game.get_current_phase()
    units = {power.name: power.units for power in game.powers.values()}
    retreats = {power.name: power.retreats for power in game.powers.values()}
    response = responses.NOW(phase_name=phase_name,
                             powers_units=units,
                             powers_retreats=retreats)
    assert isinstance(response, responses.NOW), 'Expected a NOW response'
    assert bytes(response) == str_to_bytes(daide_str)
Example #3
0
class DiplomacyEnv(gym.Env):
    """ Gym environment wrapper for the Diplomacy board game. """
    metadata = {'render.modes': ['human']}

    def __init__(self):
        """ Constructor """
        self.game = None
        self.curr_seed = 0
        self._last_known_phase = 'S1901M'

    @property
    def current_year(self):
        """ Returns the current year of the game in normalized format
            e.g. S1901M = year 1
                 F1903M = year 3
                 COMPLETED = year of last phase
        """
        current_phase = self.game.get_current_phase()
        if current_phase == 'COMPLETED':
            current_phase = self._last_known_phase
        return int(current_phase[1:5]) - self.game.map.first_year + 1

    @property
    def game_id(self):
        """ Returns the current game game_id """
        if self.game:
            return self.game.game_id
        return ''

    @property
    def players(self):
        """ Returns a list of players instances playing the game """
        raise NotImplementedError()

    @property
    def is_done(self):
        """ Determines if the game is done """
        return self.game.is_game_done

    @property
    def done_reason(self):
        """ Returns the reason why the game was terminated """
        if self.is_done:
            return DoneReason.GAME_ENGINE
        return None

    def process(self):
        """ Requests that the game processes the current orders """
        self.game.process()
        current_phase = self.game.get_current_phase()
        if current_phase != 'COMPLETED':
            self._last_known_phase = current_phase

    def seed(self, seed=None):
        """ Sets a random seed """
        self.curr_seed = seeding.hash_seed(seed) % 2**32
        return [self.curr_seed]

    def step(self, action):
        """ Have one agent interact with the environment once.
            :param action: Tuple containing the POWER name and its corresponding list of orders.
                        (e.g. ('FRANCE', ['A PAR H', 'A MAR - BUR', ...])
            :return: Nothing
        """
        power_name, orders = action
        if self.game.get_current_phase()[-1] == 'R':
            orders = [order.replace(' - ', ' R ') for order in orders]
        orders = [order for order in orders if order != 'WAIVE']
        self.game.set_orders(power_name, orders, expand=False)

    def reset(self):
        """ Resets the game to its starting configuration
            :return: ** None. This is a deviation from the standard Gym API. **
        """
        self.game = Game(game_id=get_game_id())
        self._last_known_phase = self.game.get_current_phase()

    def get_all_powers_name(self):
        """ Returns the power for all players """
        map_object = (self.game or Game()).map
        return get_map_powers(map_object)

    def get_player_seeds(self):
        """ Returns a dictionary of power_name: seed to use for all powers """
        map_object = (self.game or Game()).map
        if not self.game:
            return {power_name: 0 for power_name in get_map_powers(map_object)}
        return {
            power_name: get_player_seed(self.game.game_id, power_name)
            for power_name in get_map_powers(map_object)
        }

    @staticmethod
    def get_saved_game():
        """ Returns the last saved game """
def test_custom_int_unit_reward():
    """ Tests for CustomInterimUnitReward """
    game = Game()
    rew_fn = CustomIntUnitReward()

    # Issuing orders
    prev_state_proto = extract_state_proto(game)
    game.set_orders('FRANCE', ['A MAR - SPA', 'A PAR - PIC'])
    game.set_orders('AUSTRIA', ['A VIE - TYR'])
    game.process()
    state_proto = extract_state_proto(game)
    assert game.get_current_phase() == 'F1901M'
    get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward(
        prev_state_proto,
        state_proto,
        power_name,
        is_terminal_state=is_terminal,
        done_reason=done_reason)

    # +1 for FRANCE for conquering SPA

    # --- Not in terminal state
    assert get_reward('AUSTRIA', False, None) == 0.
    assert get_reward('ENGLAND', False, None) == 0.
    assert get_reward('FRANCE', False, None) == 1.
    assert get_reward('GERMANY', False, None) == 0.
    assert get_reward('ITALY', False, None) == 0.
    assert get_reward('RUSSIA', False, None) == 0.
    assert get_reward('TURKEY', False, None) == 0.

    # --- In terminal state
    assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == 1.
    assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 0.

    # --- Thrashing
    assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == -18.
    assert get_reward('ENGLAND', True, DoneReason.THRASHED) == -18.
    assert get_reward('FRANCE', True, DoneReason.THRASHED) == -18.
    assert get_reward('GERMANY', True, DoneReason.THRASHED) == -18.
    assert get_reward('ITALY', True, DoneReason.THRASHED) == -18.
    assert get_reward('RUSSIA', True, DoneReason.THRASHED) == -18.
    assert get_reward('TURKEY', True, DoneReason.THRASHED) == -18.

    # Issuing orders
    prev_state_proto = state_proto
    game.set_orders('FRANCE', ['A PIC - BEL', 'A SPA - POR'])
    game.set_orders('AUSTRIA', ['F TRI - VEN', 'A TYR S F TRI - VEN'])
    game.process()
    state_proto = extract_state_proto(game)
    get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward(
        prev_state_proto,
        state_proto,
        power_name,
        is_terminal_state=is_terminal,
        done_reason=done_reason)

    # +1 for FRANCE for conquering POR
    # -1 for FRANCE for losing SPA
    # +1 for FRANCE for conquering BEL
    # +1 for AUSTRIA for conquering VEN
    # -1 for ITALY for losing VEN

    # --- Not in terminal state
    assert get_reward('AUSTRIA', False, None) == 1.
    assert get_reward('ENGLAND', False, None) == 0.
    assert get_reward('FRANCE', False, None) == 1.
    assert get_reward('GERMANY', False, None) == 0.
    assert get_reward('ITALY', False, None) == -1.
    assert get_reward('RUSSIA', False, None) == 0.
    assert get_reward('TURKEY', False, None) == 0.

    # --- In terminal state
    assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 1.
    assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == 1.
    assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == -1.
    assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 0.

    # --- Thrashing
    assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == -18.
    assert get_reward('ENGLAND', True, DoneReason.THRASHED) == -18.
    assert get_reward('FRANCE', True, DoneReason.THRASHED) == -18.
    assert get_reward('GERMANY', True, DoneReason.THRASHED) == -18.
    assert get_reward('ITALY', True, DoneReason.THRASHED) == -18.
    assert get_reward('RUSSIA', True, DoneReason.THRASHED) == -18.
    assert get_reward('TURKEY', True, DoneReason.THRASHED) == -18.

    # Issuing orders
    prev_state_proto = state_proto
    game.set_orders('FRANCE', ['A PIC - BEL', 'A SPA - POR'])
    game.set_orders('AUSTRIA', ['F TRI - VEN', 'A TYR S F TRI - VEN'])
    game.process()
    state_proto = extract_state_proto(game)
    get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward(
        prev_state_proto,
        state_proto,
        power_name,
        is_terminal_state=is_terminal,
        done_reason=done_reason)

    # +0 - No new SCs

    # --- Not in terminal state
    assert get_reward('AUSTRIA', False, None) == 0.
    assert get_reward('ENGLAND', False, None) == 0.
    assert get_reward('FRANCE', False, None) == 0.
    assert get_reward('GERMANY', False, None) == 0.
    assert get_reward('ITALY', False, None) == 0.
    assert get_reward('RUSSIA', False, None) == 0.
    assert get_reward('TURKEY', False, None) == 0.

    # --- In terminal state
    assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 0.
    assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 0.

    # --- Thrashing
    assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == -18.
    assert get_reward('ENGLAND', True, DoneReason.THRASHED) == -18.
    assert get_reward('FRANCE', True, DoneReason.THRASHED) == -18.
    assert get_reward('GERMANY', True, DoneReason.THRASHED) == -18.
    assert get_reward('ITALY', True, DoneReason.THRASHED) == -18.
    assert get_reward('RUSSIA', True, DoneReason.THRASHED) == -18.
    assert get_reward('TURKEY', True, DoneReason.THRASHED) == -18.