def play_best(game: Game, save_game: bool,agent_nation: list, turn_number = 3, repeat_number= 1000): set_starting_influence(game) if save_game: saver = GameSaver() q_table_Handler = QtableHandler(game, agent_nation) iterator = 0 state = 0 while not game.is_game_done: iterator += 1 q_table_Handler.set_turn_info() # settings order for power_name, power in game.powers.items(): if power_name in agent_nation: power_orders = q_table_Handler.chose_best(power_name) else: power_orders = q_table_Handler.chose_orders(power_name) game.set_orders(power_name, power_orders) if save_game: saver.save_game(game, "gierka") game.process() adjust_influence(game) if iterator == turn_number: return
def add_possible_orders_to_saved_game(saved_game): """ Adds possible_orders for each phase of the saved game """ if saved_game['map'].startswith('standard'): for phase in saved_game['phases']: game = Game(map_name=saved_game['map'], rules=saved_game['rules']) game.set_state(phase['state']) phase['possible_orders'] = game.get_all_possible_orders() return saved_game
def test_mis_003(): """ Tests the MIS response """ daide_str = 'MIS ( #0 )' game = Game(map_name='standard') phase_name = 'W1901A' power = game.get_power('FRANCE') response = responses.MIS(phase_name=phase_name, power=power) assert isinstance(response, responses.MIS), 'Expected a MIS response' assert bytes(response) == str_to_bytes(daide_str)
def test_mis_010(): """ Tests the MIS response """ daide_str = 'MIS ( FRA FLT BRE ) ( FRA AMY MAR )' game = Game(map_name='standard') phase_name = 'S1901M' power = game.get_power('FRANCE') power.orders['INVALID'] = 'A PAR - BUR' response = responses.MIS(phase_name=phase_name, power=power) assert isinstance(response, responses.MIS), 'Expected a MIS response' assert bytes(response) == str_to_bytes(daide_str)
def test_get_nb_centers(): """ Testing if the number of supply centers is correct """ game = Game() player = FakePlayer() state_proto = extract_state_proto(game) # Checking every power power_names = [power_name for power_name in game.powers] for power_name in power_names: assert player.get_nb_centers(state_proto, power_name) == len(game.get_power(power_name).centers)
def test_mis_012(): """ Tests the MIS response """ daide_str = 'MIS ( #-1 )' game = Game(map_name='standard') phase_name = 'W1901A' power = game.get_power('FRANCE') power.centers += ['LON'] power.units = power.units[:2] response = responses.MIS(phase_name=phase_name, power=power) assert isinstance(response, responses.MIS), 'Expected a MIS response' assert bytes(response) == str_to_bytes(daide_str)
def test_mis_002(): """ Tests the MIS response """ daide_str = 'MIS ( TUR FLT ANK MRT ( ARM ) ) ' \ '( TUR FLT CON MRT ( BLA SMY ( BUL ECS ) ( BUL SCS ) ) ) ' \ '( TUR AMY SMY MRT ( SYR ) )' game = Game(map_name='standard') phase_name = 'S1901R' power = game.get_power('TURKEY') power.units = ['F ANK', 'F CON', 'A SMY'] power.retreats['F ANK'] = ['ARM'] power.retreats['F CON'] = ['BLA', 'SMY', 'BUL/EC', 'BUL/SC'] power.retreats['A SMY'] = ['SYR'] response = responses.MIS(phase_name=phase_name, power=power) assert isinstance(response, responses.MIS), 'Expected a MIS response' assert bytes(response) == str_to_bytes(daide_str)
def test_get_state_value(self): """ Checks if the .get_state_value method works """ game = Game() state_proto = extract_state_proto(game) phase_history_proto = extract_phase_history_proto(game) kwargs = { 'player_seed': 0, 'noise': 0., 'temperature': 0., 'dropout_rate': 0. } # Testing with and without prefetching for use_prefetching in (False, True): if not use_prefetching: results = yield self.adapter.get_state_value( state_proto, 'FRANCE', phase_history_proto, **kwargs) else: fetches = yield self.adapter.get_state_value( state_proto, 'FRANCE', phase_history_proto, prefetch=True, **kwargs) fetches = yield process_fetches_dict(self.queue_dataset, fetches) results = yield self.adapter.get_state_value( state_proto, 'FRANCE', phase_history_proto, fetches=fetches, **kwargs) assert results != 0.
def test_board_state(): """ Tests the proto_to_state_space """ game = Game() game_map = game.map state_proto = state_space.extract_state_proto(game) new_game = state_space.build_game_from_state_proto(state_proto) # Retrieving board_state state_proto_2 = state_space.extract_state_proto(new_game) board_state_1 = state_space.proto_to_board_state(state_proto, game_map) board_state_2 = state_space.proto_to_board_state(state_proto_2, game_map) # Checking assert np.allclose(board_state_1, board_state_2) assert board_state_1.shape == (state_space.NB_NODES, state_space.NB_FEATURES) assert game.get_hash() == new_game.get_hash()
def get_opening_orders(self): """ Returns a dictionary of power_name: [orders] for each power The orders represent the opening orders that would have been submitted by the player """ game = Game() state_proto = extract_state_proto(game) phase_history_proto = extract_phase_history_proto(game) possible_orders_proto = extract_possible_orders_proto(game) # Retrieving all orders # Using default player_seed, noise, temperature, and dropout_rate. # power_orders is a list of tuples (orders, policy_details) power_orders = yield [ self.policy_adapter.get_orders(self.get_orderable_locations( state_proto, power_name), state_proto, power_name, phase_history_proto, possible_orders_proto, retry_on_failure=False) for power_name in game.powers ] return { power_name: orders[0] for power_name, orders in zip(game.powers.keys(), power_orders) }
def test_now(): """ Tests the NOW response """ daide_str = 'NOW ( SPR #1901 ) ( AUS AMY BUD ) ( AUS AMY VIE ) ( AUS FLT TRI ) ( ENG FLT EDI )' \ ' ( ENG FLT LON ) ( ENG AMY LVP ) ( FRA FLT BRE ) ( FRA AMY MAR ) ( FRA AMY PAR )' \ ' ( GER FLT KIE ) ( GER AMY BER ) ( GER AMY MUN ) ( ITA FLT NAP ) ( ITA AMY ROM )' \ ' ( ITA AMY VEN ) ( RUS AMY WAR ) ( RUS AMY MOS ) ( RUS FLT SEV )' \ ' ( RUS FLT ( STP SCS ) ) ( TUR FLT ANK ) ( TUR AMY CON ) ( TUR AMY SMY )' game = Game(map_name='standard') phase_name = game.get_current_phase() units = {power.name: power.units for power in game.powers.values()} retreats = {power.name: power.retreats for power in game.powers.values()} response = responses.NOW(phase_name=phase_name, powers_units=units, powers_retreats=retreats) assert isinstance(response, responses.NOW), 'Expected a NOW response' assert bytes(response) == str_to_bytes(daide_str)
def get_player_seeds(self): """ Returns a dictionary of power_name: seed to use for all powers """ map_object = (self.game or Game()).map if not self.game: return {power_name: 0 for power_name in get_map_powers(map_object)} return { power_name: get_player_seed(self.game.game_id, power_name) for power_name in get_map_powers(map_object) }
def test_ord_004(): """ Tests the ORD response """ daide_str = 'ORD ( SPR #1901 ) ( ( ENG FLT NWG ) SUP ( ENG AMY YOR ) MTO NWY ) ( NSO )' order_daide_str = '( ENG FLT NWG ) SUP ( ENG AMY YOR ) MTO NWY' game = Game(map_name='standard') phase_name = game.map.phase_abbr(game.phase) response = responses.ORD(phase_name=phase_name, order_bytes=str_to_bytes(order_daide_str), results=[BOUNCE.code, DISLODGED]) assert isinstance(response, responses.ORD), 'Expected a ORD response' assert bytes(response) == str_to_bytes(daide_str)
def test_get_orderable_locations(): """ Testing if the number of orderable locations is correct """ game = Game() player = FakePlayer() state_proto = extract_state_proto(game) # Checking every power power_names = [power_name for power_name in game.powers] for power_name in power_names: expected_locs = [unit.replace('*', '')[2:5] for unit in state_proto.units[power_name].value] expected_locs += state_proto.builds[power_name].homes assert sorted(player.get_orderable_locations(state_proto, power_name)) == sorted(expected_locs)
def play_game(game: Game, save_game: bool, agent_nation: list, label="", turn_number = 3, repeat_number= 1000): set_starting_influence(game) if save_game: saver = GameSaver() q_table_Handler = QtableHandler(game, agent_nation) iterator = 0 state = 0 finish = False stats = {"centers":defaultdict(list), "influence":defaultdict(list)} while not game.is_game_done and not finish: iterator += 1 q_table_Handler.set_turn_info() # settings order phase = game.get_current_phase()[-1] for power_name, power in game.powers.items(): power_orders = q_table_Handler.chose_orders(power_name) game.set_orders(power_name, power_orders) #visualizer.paint_orders(game) if save_game: saver.save_game(game, "gierka") game.process() if phase == 'M': q_table_Handler.set_reward() adjust_influence(game) if iterator == turn_number: state += 1 if state % repeat_number == 0: q_table_Handler.save() if state == repeat_number: save_stat(stats,turn_number,label) game = load_saved_games_from_disk("game.json")[0] return iterator = 0 print("State: ", state) print("Accuracy: {0}".format(q_table_Handler.get_accuracy())) print("Number of Germany centers: ", game.get_centers("GERMANY").__len__(), game.get_power("GERMANY").influence.__len__()) for power_name, _ in game.powers.items(): stats["influence"][power_name].append(game.get_power(power_name).influence.__len__()) stats["centers"][power_name].append(game.get_centers(power_name).__len__()) game = load_saved_games_from_disk("game.json")[0] q_table_Handler.game = game q_table_Handler.attempts = 0 q_table_Handler.miss_hits = 0
def test_mis_008(): """ Tests the MIS response """ daide_str = 'MIS ( FRA FLT BRE ) ( FRA AMY MAR )' game = Game(map_name='standard') game.add_rule('NO_CHECK') game.set_orders('FRANCE', ['A PAR - BUR']) phase_name = 'S1901M' power = game.get_power('FRANCE') response = responses.MIS(phase_name=phase_name, power=power) assert isinstance(response, responses.MIS), 'Expected a MIS response' assert bytes(response) == str_to_bytes(daide_str)
def test_sco(): """ Tests the SCO response """ daide_str = 'SCO ( AUS BUD TRI VIE ) ( ENG EDI LON LVP ) ( FRA BRE MAR PAR ) ' \ '( GER BER KIE MUN ) ( ITA NAP ROM VEN ) ( RUS MOS SEV STP WAR ) ' \ '( TUR ANK CON SMY ) ( UNO BEL BUL DEN GRE HOL NWY POR RUM SER SPA SWE TUN )' game = Game(map_name='standard') power_centers = { power.name: power.centers for power in game.powers.values() } response = responses.SCO(power_centers, map_name='standard') assert isinstance(response, responses.SCO), 'Expected a SCO response' assert bytes(response) == str_to_bytes(daide_str)
def extract_phase_history_proto(game, nb_previous_phases=NB_PREV_ORDERS_HISTORY): """ Extracts the phase_history_proto from a diplomacy.Game object :param game: The diplomacy.Game object :param nb_previous_phases: Integer. If set, only the last x phases will be returned. If None, the full history since the beginning of the game is returned. :return: A list of `.proto.game.PhaseHistory` proto. :type game: diplomacy.Game """ from_phase = None if nb_previous_phases is None else -1 * nb_previous_phases phase_history = Game.get_phase_history(game, from_phase=from_phase) return [ dict_to_proto(hist.to_dict(), PhaseHistoryProto) for hist in phase_history ]
def test_int_norm_centers_reward(): """ Tests for InterimNormNbCentersReward """ game = Game() rew_fn = IntNormNbCentersReward() # Removing one center from FRANCE and adding it to GERMANY prev_state_proto = extract_state_proto(game) for power in game.powers.values(): if power.name == 'FRANCE': power.centers.remove('PAR') if power.name == 'GERMANY': power.centers.append('PAR') state_proto = extract_state_proto(game) assert rew_fn.name == 'int_norm_nb_centers_reward' get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward( prev_state_proto, state_proto, power_name, is_terminal_state=is_terminal, done_reason=done_reason) # --- Not in terminal state assert get_reward('AUSTRIA', False, None) == 0. assert get_reward('ENGLAND', False, None) == 0. assert get_reward('FRANCE', False, None) == -1. / 18 assert get_reward('GERMANY', False, None) == 1. / 18 assert get_reward('ITALY', False, None) == 0. assert get_reward('RUSSIA', False, None) == 0. assert get_reward('TURKEY', False, None) == 0. # --- In terminal state assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == -1. / 18 assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 1. / 18 assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 0. # --- Thrashing assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == -1. assert get_reward('ENGLAND', True, DoneReason.THRASHED) == -1. assert get_reward('FRANCE', True, DoneReason.THRASHED) == -1. assert get_reward('GERMANY', True, DoneReason.THRASHED) == -1. assert get_reward('ITALY', True, DoneReason.THRASHED) == -1. assert get_reward('RUSSIA', True, DoneReason.THRASHED) == -1. assert get_reward('TURKEY', True, DoneReason.THRASHED) == -1.
def main(): try: (options, arguments) = getopt.getopt(sys.argv[1:], 'h') except getopt.error: sys.exit("Unknown input parameter.") if [] == arguments: arguments = ["shiftLeft"] if not os.path.exists(arguments[0] + ".map"): sys.exit("%s.map could not be opened" % (arguments[0], )) game = Game(map_name=arguments[0]) while not game.is_game_done: # Getting the list of possible orders for all locations possible_orders = game.get_all_possible_orders() # For each power, randomly sampling a valid order for power_name, power in game.powers.items(): # power_orders = [random.choice(possible_orders[loc]) for loc in game.get_orderable_locations(power_name) # if possible_orders[loc]] power_orders = [] for loc in game.get_orderable_locations(power_name): if '/' == loc[-1]: loc = loc[:-1] if possible_orders[loc]: power_orders.append(random.choice(possible_orders[loc])) game.set_orders(power_name, power_orders) # Messages can be sent locally with game.add_message # e.g. game.add_message(Message(sender='FRANCE', # recipient='ENGLAND', # message='This is a message', # phase=self.get_current_phase(), # time_sent=int(time.time()))) # Processing the game to move to the next phase game.process() # to_saved_game_format(game, output_path='collected_autoplay_games.json') # Exporting the game to disk to visualize (game is appended) with open('collected_autoplay_games.json', 'a') as outp: outp.write(to_saved_game_format(game))
def __init__(self, env, players, power_assignments): """ Constructor :param env: The wrapped env :param players: A list of player instances :param power_assignments: The list of powers (in order) to assign to each player :type env: gym.core.Env :type players: List[diplomacy_research.players.player.Player] """ super(AssignPlayers, self).__init__(env) game = self.game or Game() # Making sure we have the correct number of powers assert len(power_assignments) == len(game.powers.keys()) assert sorted(power_assignments) == sorted(game.powers.keys()) # Setting fixed ordering self._powers = power_assignments self._players = players
def test_get_feedable_item(self): """ Checks if the .get_feedable_item method works """ game = Game() state_proto = extract_state_proto(game) phase_history_proto = extract_phase_history_proto(game) possible_orders_proto = extract_possible_orders_proto(game) locs = ['PAR', 'MAR', 'BUR'] kwargs = { 'player_seed': 0, 'noise': 0., 'temperature': 0., 'dropout_rate': 0. } assert self.dataset_builder.get_feedable_item(locs, state_proto, 'FRANCE', phase_history_proto, possible_orders_proto, **kwargs)
def check_serving(player, serving_port): """ Makes sure the current serving process is still active, otherwise restarts it. :param player: A player object to query the server :param serving_port: The port to use for TF serving """ game = Game() # Trying to check orders for _ in range(MAX_SENTINEL_CHECKS): orders = yield player.get_orders(game, 'FRANCE') if orders: return # Could not get orders x times in a row, restarting process LOGGER.warning( 'Could not retrieve orders from the serving process after %d attempts.', MAX_SENTINEL_CHECKS) LOGGER.warning('Restarting TF serving server.') launch_serving(None, serving_port)
def test_get_draw_prob(self): """ Checks if the .get_draw_prob method works """ game = Game() state_proto = extract_state_proto(game) phase_history_proto = extract_phase_history_proto(game) possible_orders_proto = extract_possible_orders_proto(game) locs = ['PAR', 'MAR', 'BUR'] kwargs = { 'player_seed': 0, 'noise': 0., 'temperature': 1., 'dropout_rate': 0. } # Temperature == 1. # With and without prefetching for use_prefetching in (False, True): if not use_prefetching: _, policy_details = yield self.adapter.get_orders( locs, state_proto, 'FRANCE', phase_history_proto, possible_orders_proto, **kwargs) else: fetches = yield self.adapter.get_orders(locs, state_proto, 'FRANCE', phase_history_proto, possible_orders_proto, prefetch=True, **kwargs) fetches = yield process_fetches_dict(self.queue_dataset, fetches) _, policy_details = yield self.adapter.get_orders( locs, state_proto, 'FRANCE', phase_history_proto, possible_orders_proto, fetches=fetches, **kwargs) assert policy_details['draw_action'] in (True, False) assert 0. < policy_details['draw_prob'] < 1.
def test_norm_centers_reward(): """ Tests for NormNbCentersReward """ game = Game() rew_fn = NormNbCentersReward() prev_state_proto = extract_state_proto(game) state_proto = extract_state_proto(game) assert rew_fn.name == 'norm_nb_centers_reward' get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward( prev_state_proto, state_proto, power_name, is_terminal_state=is_terminal, done_reason=done_reason) # --- Not in terminal state assert get_reward('AUSTRIA', False, None) == 0. assert get_reward('ENGLAND', False, None) == 0. assert get_reward('FRANCE', False, None) == 0. assert get_reward('GERMANY', False, None) == 0. assert get_reward('ITALY', False, None) == 0. assert get_reward('RUSSIA', False, None) == 0. assert get_reward('TURKEY', False, None) == 0. # --- In terminal state assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 3. / 18 assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 3. / 18 assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == 3. / 18 assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 3. / 18 assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == 3. / 18 assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 4. / 18 assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 3. / 18 # --- Thrashing assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == 0. assert get_reward('ENGLAND', True, DoneReason.THRASHED) == 0. assert get_reward('FRANCE', True, DoneReason.THRASHED) == 0. assert get_reward('GERMANY', True, DoneReason.THRASHED) == 0. assert get_reward('ITALY', True, DoneReason.THRASHED) == 0. assert get_reward('RUSSIA', True, DoneReason.THRASHED) == 0. assert get_reward('TURKEY', True, DoneReason.THRASHED) == 0.
def get_opening_orders(self): """ Returns a dictionary of power_name: [orders] for each power The orders represent the opening orders that would have been submitted by the player """ game = Game() state_proto = extract_state_proto(game) phase_history_proto = extract_phase_history_proto(game) possible_orders_proto = extract_possible_orders_proto(game) # Retrieving all orders # Not using kwargs - Using default player_seed, noise, temperature, and dropout_rate. power_orders = yield [ self.get_orders_with_proto(state_proto, power_name, phase_history_proto, possible_orders_proto, retry_on_failure=False) for power_name in game.powers ] return { power_name: orders for power_name, orders in zip(game.powers.keys(), power_orders) }
def __init__(self, env, players, clusters=None): """ Constructor :param env: The wrapped env :param players: A list of player instances (all 7 players if no clusters, otherwise 1 player per cluster) :param clusters: Optional. Contains a list of clusters, where each cluster is a tuple with - 1) the number of players inside it, 2) a boolean that indicates if the players need to be merged e.g. [(1,False), (3, False), (3, False)] would create a 1 vs 3 vs 3 game (3 clusters), where players[0] is assigned to the first cluster, players[1] is copied 3 times and assigned to the second cluster, and players[2] is copied 3 times and assigned to the third cluster. :type env: gym.core.Env :type players: List[diplomacy_research.players.player.Player] """ super(RandomizePlayers, self).__init__(env) game = self.game or Game() self._powers = [power_name for power_name in game.powers] self._clusters = clusters if clusters is not None else [ (1, False) ] * len(self._powers) # Making sure we have the right number of players if len(players) < len(self._clusters): LOGGER.error( 'The nb of players (%d) must be greater or equal to %d.', len(players), len(self._clusters)) raise ValueError() # Generating the list of players based on the clusters definition self._players = [] for cluster_ix, cluster_def in enumerate(self._clusters): nb_players_in_cluster = cluster_def[0] self._players += [players[cluster_ix]] * nb_players_in_cluster # Generating an ordering to shuffle players self._ordering = list(range(len(self._players)))
def state_dict_to_game_and_power(state_dict, country_id, max_phases=None): """ Converts a game state from the dictionary format to an actual diplomacy.Game object with the related power. :param state_dict: The game state in dictionary format from webdiplomacy.net :param country_id: The country id we want to convert. :param max_phases: Optional. If set, improve speed by only keeping the last 'x' phases to regenerate the game. :return: A tuple of 1) None, None - on error or if the conversion is not possible, or game is invalid / not-started / done 2) game, power_name - on successful conversion """ if state_dict is None: return None, None req_fields = ('gameID', 'variantID', 'turn', 'phase', 'gameOver', 'phases', 'standoffs', 'occupiedFrom') if [1 for field in req_fields if field not in state_dict]: LOGGER.error('The required fields for state dict are %s. Cannot translate %s', req_fields, state_dict) return None, None # Extracting information game_id = str(state_dict['gameID']) map_id = int(state_dict['variantID']) standoffs = state_dict['standoffs'] occupied_from = state_dict['occupiedFrom'] # Parsing all phases state_dict_phases = state_dict.get('phases', []) if max_phases is not None and isinstance(max_phases, int): state_dict_phases = state_dict_phases[-1 * max_phases:] all_phases = [process_phase_dict(phase_dict, map_id=map_id) for phase_dict in state_dict_phases] # Building game - Replaying the last phases game = Game(game_id=game_id, map_name=CACHE['ix_to_map'][map_id]) for phase_to_replay in all_phases[:-1]: game.set_current_phase(phase_to_replay['name']) # Units game.clear_units() for power_name, power_units in phase_to_replay['units'].items(): if power_name == 'GLOBAL': continue game.set_units(power_name, power_units) # Centers game.clear_centers() for power_name, power_centers in phase_to_replay['centers'].items(): if power_name == 'GLOBAL': continue game.set_centers(power_name, power_centers) # Orders for power_name, power_orders in phase_to_replay['orders'].items(): if power_name == 'GLOBAL': continue game.set_orders(power_name, power_orders) # Processing game.process() # Setting the current phase current_phase = all_phases[-1] game.set_current_phase(current_phase['name']) # Units game.clear_units() for power_name, power_units in current_phase['units'].items(): if power_name == 'GLOBAL': continue game.set_units(power_name, power_units) # Centers game.clear_centers() for power_name, power_centers in current_phase['centers'].items(): if power_name == 'GLOBAL': continue game.set_centers(power_name, power_centers) # Setting retreat locs if current_phase['name'][-1] == 'R': invalid_retreat_locs = set() attack_source = {} # Loc is occupied for power in game.powers.values(): for unit in power.units: invalid_retreat_locs.add(unit[2:5]) # Loc was in standoff if standoffs: for loc_dict in standoffs: _, loc = center_dict_to_str(loc_dict, map_id=map_id) invalid_retreat_locs.add(loc[:3]) # Loc was attacked from if occupied_from: for loc_id, occupied_from_id in occupied_from.items(): loc_name = CACHE[map_id]['ix_to_loc'][int(loc_id)][:3] from_loc_name = CACHE[map_id]['ix_to_loc'][int(occupied_from_id)][:3] attack_source[loc_name] = from_loc_name # Removing invalid retreat locs for power in game.powers.values(): for retreat_unit in power.retreats: power.retreats[retreat_unit] = [loc for loc in power.retreats[retreat_unit] if loc[:3] not in invalid_retreat_locs and loc[:3] != attack_source.get(retreat_unit[2:5], '')] # Returning power_name = CACHE[map_id]['ix_to_power'][country_id] return game, power_name
def render_saved_game(saved_game, output_dir, prefix=''): """ Renders a specific saved game :param saved_game: The saved game to render :param output_dir: The output directory where to save the rendering :param prefix: An optional prefix to add before the game id """ if prefix: output_dir = os.path.join(output_dir, prefix + '_' + saved_game['id']) else: output_dir = os.path.join(output_dir, saved_game['id']) nb_phases = len(saved_game['phases']) svg_count = 0 # Checking if already generated # Otherwise, regenerating completely if os.path.exists(output_dir): nb_svg = len([ os.path.join(output_dir, file) for file in os.listdir(output_dir) if file[-4:] == '.svg' ]) if nb_svg == 2 * nb_phases: print('Rendered {} (Skipped)'.format(saved_game['id'])) return shutil.rmtree(output_dir, ignore_errors=True) os.makedirs(output_dir, exist_ok=True) # Creating a Game to replay all orders, and a new Game object per phase to validate entire_game = Game() if saved_game['phases']: entire_game.set_state(saved_game['phases'][0]['state']) # Rendering for phase in saved_game['phases']: phase_game = Game() # Setting state state = phase['state'] phase_game.set_state(state) entire_game.note = phase_game.note # Setting orders phase_game.clear_orders() orders = phase['orders'] for power_name in orders: phase_game.set_orders(power_name, orders[power_name]) entire_game.set_orders(power_name, orders[power_name]) # Validating that we are at the same place for power_name in orders: assert sorted(phase_game.get_units(power_name)) == sorted( entire_game.get_units(power_name)) assert sorted(phase_game.get_centers(power_name)) == sorted( entire_game.get_centers(power_name)) # Rendering with and without orders with open(os.path.join(output_dir, '%03d%s' % (svg_count, '.svg')), 'w') as file: file.write(entire_game.render(incl_orders=False)) svg_count += 1 with open(os.path.join(output_dir, '%03d%s' % (svg_count, '.svg')), 'w') as file: file.write(entire_game.render(incl_orders=True)) # Processing (for entire game) svg_count += 1 entire_game.process() print('Rendered {}'.format(saved_game['id']))
def generate_trajectory(self): game = Game() powers = list(game.powers) np.random.shuffle(powers) power1 = powers[0] powers_others = powers[1:] action_probs = [] orders = [] values = [] supply_centers = [{power1: game.get_centers(power1)}] while not game.is_game_done: order, action_prob = self.actor.get_orders(game, [power1]) orders_others = { power_name: self.actor.get_orders(game, [power_name]) for power_name in powers_others } board = tf.convert_to_tensor( state_space.dict_to_flatten_board_state( game.get_state(), game.map), dtype=tf.float32) board = tf.reshape(board, (1, 81 * 35)) print("TEST") state_value = self.critic.call(board) # Indexing because get_orders can return a list of lists orders for multiple powers game.set_orders(power1, order[0]) for power_name, power_orders in orders_others.items(): orders_list, probs = power_orders print(orders_list) game.set_orders(power_name, orders_list[0]) game.process() # Collect data supply_centers.append({power1: game.get_centers(power1)}) action_probs.append(action_prob) orders.append(order) values.append(state_value) # local_rewards.append(reward_class.get_local_reward(power1)) # global_rewards.append(0 if not game.is_game_done else reward_class.get_terminal_reward(power1)) rewards = get_average_reward([supply_centers]) returns = get_returns([ supply_centers ]) # put in list to match shape of [bs, game_length, dict} return action_probs, returns, values, rewards