def play_game(game: Game, save_game: bool, agent_nation: list, label="", turn_number = 3, repeat_number= 1000): set_starting_influence(game) if save_game: saver = GameSaver() q_table_Handler = QtableHandler(game, agent_nation) iterator = 0 state = 0 finish = False stats = {"centers":defaultdict(list), "influence":defaultdict(list)} while not game.is_game_done and not finish: iterator += 1 q_table_Handler.set_turn_info() # settings order phase = game.get_current_phase()[-1] for power_name, power in game.powers.items(): power_orders = q_table_Handler.chose_orders(power_name) game.set_orders(power_name, power_orders) #visualizer.paint_orders(game) if save_game: saver.save_game(game, "gierka") game.process() if phase == 'M': q_table_Handler.set_reward() adjust_influence(game) if iterator == turn_number: state += 1 if state % repeat_number == 0: q_table_Handler.save() if state == repeat_number: save_stat(stats,turn_number,label) game = load_saved_games_from_disk("game.json")[0] return iterator = 0 print("State: ", state) print("Accuracy: {0}".format(q_table_Handler.get_accuracy())) print("Number of Germany centers: ", game.get_centers("GERMANY").__len__(), game.get_power("GERMANY").influence.__len__()) for power_name, _ in game.powers.items(): stats["influence"][power_name].append(game.get_power(power_name).influence.__len__()) stats["centers"][power_name].append(game.get_centers(power_name).__len__()) game = load_saved_games_from_disk("game.json")[0] q_table_Handler.game = game q_table_Handler.attempts = 0 q_table_Handler.miss_hits = 0
def test_now(): """ Tests the NOW response """ daide_str = 'NOW ( SPR #1901 ) ( AUS AMY BUD ) ( AUS AMY VIE ) ( AUS FLT TRI ) ( ENG FLT EDI )' \ ' ( ENG FLT LON ) ( ENG AMY LVP ) ( FRA FLT BRE ) ( FRA AMY MAR ) ( FRA AMY PAR )' \ ' ( GER FLT KIE ) ( GER AMY BER ) ( GER AMY MUN ) ( ITA FLT NAP ) ( ITA AMY ROM )' \ ' ( ITA AMY VEN ) ( RUS AMY WAR ) ( RUS AMY MOS ) ( RUS FLT SEV )' \ ' ( RUS FLT ( STP SCS ) ) ( TUR FLT ANK ) ( TUR AMY CON ) ( TUR AMY SMY )' game = Game(map_name='standard') phase_name = game.get_current_phase() units = {power.name: power.units for power in game.powers.values()} retreats = {power.name: power.retreats for power in game.powers.values()} response = responses.NOW(phase_name=phase_name, powers_units=units, powers_retreats=retreats) assert isinstance(response, responses.NOW), 'Expected a NOW response' assert bytes(response) == str_to_bytes(daide_str)
class DiplomacyEnv(gym.Env): """ Gym environment wrapper for the Diplomacy board game. """ metadata = {'render.modes': ['human']} def __init__(self): """ Constructor """ self.game = None self.curr_seed = 0 self._last_known_phase = 'S1901M' @property def current_year(self): """ Returns the current year of the game in normalized format e.g. S1901M = year 1 F1903M = year 3 COMPLETED = year of last phase """ current_phase = self.game.get_current_phase() if current_phase == 'COMPLETED': current_phase = self._last_known_phase return int(current_phase[1:5]) - self.game.map.first_year + 1 @property def game_id(self): """ Returns the current game game_id """ if self.game: return self.game.game_id return '' @property def players(self): """ Returns a list of players instances playing the game """ raise NotImplementedError() @property def is_done(self): """ Determines if the game is done """ return self.game.is_game_done @property def done_reason(self): """ Returns the reason why the game was terminated """ if self.is_done: return DoneReason.GAME_ENGINE return None def process(self): """ Requests that the game processes the current orders """ self.game.process() current_phase = self.game.get_current_phase() if current_phase != 'COMPLETED': self._last_known_phase = current_phase def seed(self, seed=None): """ Sets a random seed """ self.curr_seed = seeding.hash_seed(seed) % 2**32 return [self.curr_seed] def step(self, action): """ Have one agent interact with the environment once. :param action: Tuple containing the POWER name and its corresponding list of orders. (e.g. ('FRANCE', ['A PAR H', 'A MAR - BUR', ...]) :return: Nothing """ power_name, orders = action if self.game.get_current_phase()[-1] == 'R': orders = [order.replace(' - ', ' R ') for order in orders] orders = [order for order in orders if order != 'WAIVE'] self.game.set_orders(power_name, orders, expand=False) def reset(self): """ Resets the game to its starting configuration :return: ** None. This is a deviation from the standard Gym API. ** """ self.game = Game(game_id=get_game_id()) self._last_known_phase = self.game.get_current_phase() def get_all_powers_name(self): """ Returns the power for all players """ map_object = (self.game or Game()).map return get_map_powers(map_object) def get_player_seeds(self): """ Returns a dictionary of power_name: seed to use for all powers """ map_object = (self.game or Game()).map if not self.game: return {power_name: 0 for power_name in get_map_powers(map_object)} return { power_name: get_player_seed(self.game.game_id, power_name) for power_name in get_map_powers(map_object) } @staticmethod def get_saved_game(): """ Returns the last saved game """
def test_custom_int_unit_reward(): """ Tests for CustomInterimUnitReward """ game = Game() rew_fn = CustomIntUnitReward() # Issuing orders prev_state_proto = extract_state_proto(game) game.set_orders('FRANCE', ['A MAR - SPA', 'A PAR - PIC']) game.set_orders('AUSTRIA', ['A VIE - TYR']) game.process() state_proto = extract_state_proto(game) assert game.get_current_phase() == 'F1901M' get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward( prev_state_proto, state_proto, power_name, is_terminal_state=is_terminal, done_reason=done_reason) # +1 for FRANCE for conquering SPA # --- Not in terminal state assert get_reward('AUSTRIA', False, None) == 0. assert get_reward('ENGLAND', False, None) == 0. assert get_reward('FRANCE', False, None) == 1. assert get_reward('GERMANY', False, None) == 0. assert get_reward('ITALY', False, None) == 0. assert get_reward('RUSSIA', False, None) == 0. assert get_reward('TURKEY', False, None) == 0. # --- In terminal state assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == 1. assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 0. # --- Thrashing assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == -18. assert get_reward('ENGLAND', True, DoneReason.THRASHED) == -18. assert get_reward('FRANCE', True, DoneReason.THRASHED) == -18. assert get_reward('GERMANY', True, DoneReason.THRASHED) == -18. assert get_reward('ITALY', True, DoneReason.THRASHED) == -18. assert get_reward('RUSSIA', True, DoneReason.THRASHED) == -18. assert get_reward('TURKEY', True, DoneReason.THRASHED) == -18. # Issuing orders prev_state_proto = state_proto game.set_orders('FRANCE', ['A PIC - BEL', 'A SPA - POR']) game.set_orders('AUSTRIA', ['F TRI - VEN', 'A TYR S F TRI - VEN']) game.process() state_proto = extract_state_proto(game) get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward( prev_state_proto, state_proto, power_name, is_terminal_state=is_terminal, done_reason=done_reason) # +1 for FRANCE for conquering POR # -1 for FRANCE for losing SPA # +1 for FRANCE for conquering BEL # +1 for AUSTRIA for conquering VEN # -1 for ITALY for losing VEN # --- Not in terminal state assert get_reward('AUSTRIA', False, None) == 1. assert get_reward('ENGLAND', False, None) == 0. assert get_reward('FRANCE', False, None) == 1. assert get_reward('GERMANY', False, None) == 0. assert get_reward('ITALY', False, None) == -1. assert get_reward('RUSSIA', False, None) == 0. assert get_reward('TURKEY', False, None) == 0. # --- In terminal state assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 1. assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == 1. assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == -1. assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 0. # --- Thrashing assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == -18. assert get_reward('ENGLAND', True, DoneReason.THRASHED) == -18. assert get_reward('FRANCE', True, DoneReason.THRASHED) == -18. assert get_reward('GERMANY', True, DoneReason.THRASHED) == -18. assert get_reward('ITALY', True, DoneReason.THRASHED) == -18. assert get_reward('RUSSIA', True, DoneReason.THRASHED) == -18. assert get_reward('TURKEY', True, DoneReason.THRASHED) == -18. # Issuing orders prev_state_proto = state_proto game.set_orders('FRANCE', ['A PIC - BEL', 'A SPA - POR']) game.set_orders('AUSTRIA', ['F TRI - VEN', 'A TYR S F TRI - VEN']) game.process() state_proto = extract_state_proto(game) get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward( prev_state_proto, state_proto, power_name, is_terminal_state=is_terminal, done_reason=done_reason) # +0 - No new SCs # --- Not in terminal state assert get_reward('AUSTRIA', False, None) == 0. assert get_reward('ENGLAND', False, None) == 0. assert get_reward('FRANCE', False, None) == 0. assert get_reward('GERMANY', False, None) == 0. assert get_reward('ITALY', False, None) == 0. assert get_reward('RUSSIA', False, None) == 0. assert get_reward('TURKEY', False, None) == 0. # --- In terminal state assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 0. # --- Thrashing assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == -18. assert get_reward('ENGLAND', True, DoneReason.THRASHED) == -18. assert get_reward('FRANCE', True, DoneReason.THRASHED) == -18. assert get_reward('GERMANY', True, DoneReason.THRASHED) == -18. assert get_reward('ITALY', True, DoneReason.THRASHED) == -18. assert get_reward('RUSSIA', True, DoneReason.THRASHED) == -18. assert get_reward('TURKEY', True, DoneReason.THRASHED) == -18.