def play_best(game: Game, save_game: bool,agent_nation: list, turn_number = 3, repeat_number= 1000): set_starting_influence(game) if save_game: saver = GameSaver() q_table_Handler = QtableHandler(game, agent_nation) iterator = 0 state = 0 while not game.is_game_done: iterator += 1 q_table_Handler.set_turn_info() # settings order for power_name, power in game.powers.items(): if power_name in agent_nation: power_orders = q_table_Handler.chose_best(power_name) else: power_orders = q_table_Handler.chose_orders(power_name) game.set_orders(power_name, power_orders) if save_game: saver.save_game(game, "gierka") game.process() adjust_influence(game) if iterator == turn_number: return
def test_mis_007(): """ Tests the MIS response """ daide_str = 'MIS ( FRA FLT BRE ) ( FRA AMY MAR )' game = Game(map_name='standard') game.set_orders('FRANCE', ['A PAR - BUR']) phase_name = 'S1901M' power = game.get_power('FRANCE') response = responses.MIS(phase_name=phase_name, power=power) assert isinstance(response, responses.MIS), 'Expected a MIS response' assert bytes(response) == str_to_bytes(daide_str)
def play_game(game: Game, save_game: bool, agent_nation: list, label="", turn_number = 3, repeat_number= 1000): set_starting_influence(game) if save_game: saver = GameSaver() q_table_Handler = QtableHandler(game, agent_nation) iterator = 0 state = 0 finish = False stats = {"centers":defaultdict(list), "influence":defaultdict(list)} while not game.is_game_done and not finish: iterator += 1 q_table_Handler.set_turn_info() # settings order phase = game.get_current_phase()[-1] for power_name, power in game.powers.items(): power_orders = q_table_Handler.chose_orders(power_name) game.set_orders(power_name, power_orders) #visualizer.paint_orders(game) if save_game: saver.save_game(game, "gierka") game.process() if phase == 'M': q_table_Handler.set_reward() adjust_influence(game) if iterator == turn_number: state += 1 if state % repeat_number == 0: q_table_Handler.save() if state == repeat_number: save_stat(stats,turn_number,label) game = load_saved_games_from_disk("game.json")[0] return iterator = 0 print("State: ", state) print("Accuracy: {0}".format(q_table_Handler.get_accuracy())) print("Number of Germany centers: ", game.get_centers("GERMANY").__len__(), game.get_power("GERMANY").influence.__len__()) for power_name, _ in game.powers.items(): stats["influence"][power_name].append(game.get_power(power_name).influence.__len__()) stats["centers"][power_name].append(game.get_centers(power_name).__len__()) game = load_saved_games_from_disk("game.json")[0] q_table_Handler.game = game q_table_Handler.attempts = 0 q_table_Handler.miss_hits = 0
def generate_trajectory(self): game = Game() powers = list(game.powers) np.random.shuffle(powers) power1 = powers[0] powers_others = powers[1:] action_probs = [] orders = [] values = [] supply_centers = [{power1: game.get_centers(power1)}] while not game.is_game_done: order, action_prob = self.actor.get_orders(game, [power1]) orders_others = { power_name: self.actor.get_orders(game, [power_name]) for power_name in powers_others } board = tf.convert_to_tensor( state_space.dict_to_flatten_board_state( game.get_state(), game.map), dtype=tf.float32) board = tf.reshape(board, (1, 81 * 35)) print("TEST") state_value = self.critic.call(board) # Indexing because get_orders can return a list of lists orders for multiple powers game.set_orders(power1, order[0]) for power_name, power_orders in orders_others.items(): orders_list, probs = power_orders print(orders_list) game.set_orders(power_name, orders_list[0]) game.process() # Collect data supply_centers.append({power1: game.get_centers(power1)}) action_probs.append(action_prob) orders.append(order) values.append(state_value) # local_rewards.append(reward_class.get_local_reward(power1)) # global_rewards.append(0 if not game.is_game_done else reward_class.get_terminal_reward(power1)) rewards = get_average_reward([supply_centers]) returns = get_returns([ supply_centers ]) # put in list to match shape of [bs, game_length, dict} return action_probs, returns, values, rewards
def main(): try: (options, arguments) = getopt.getopt(sys.argv[1:], 'h') except getopt.error: sys.exit("Unknown input parameter.") if [] == arguments: arguments = ["shiftLeft"] if not os.path.exists(arguments[0] + ".map"): sys.exit("%s.map could not be opened" % (arguments[0], )) game = Game(map_name=arguments[0]) while not game.is_game_done: # Getting the list of possible orders for all locations possible_orders = game.get_all_possible_orders() # For each power, randomly sampling a valid order for power_name, power in game.powers.items(): # power_orders = [random.choice(possible_orders[loc]) for loc in game.get_orderable_locations(power_name) # if possible_orders[loc]] power_orders = [] for loc in game.get_orderable_locations(power_name): if '/' == loc[-1]: loc = loc[:-1] if possible_orders[loc]: power_orders.append(random.choice(possible_orders[loc])) game.set_orders(power_name, power_orders) # Messages can be sent locally with game.add_message # e.g. game.add_message(Message(sender='FRANCE', # recipient='ENGLAND', # message='This is a message', # phase=self.get_current_phase(), # time_sent=int(time.time()))) # Processing the game to move to the next phase game.process() # to_saved_game_format(game, output_path='collected_autoplay_games.json') # Exporting the game to disk to visualize (game is appended) with open('collected_autoplay_games.json', 'a') as outp: outp.write(to_saved_game_format(game))
def state_dict_to_game_and_power(state_dict, country_id, max_phases=None): """ Converts a game state from the dictionary format to an actual diplomacy.Game object with the related power. :param state_dict: The game state in dictionary format from webdiplomacy.net :param country_id: The country id we want to convert. :param max_phases: Optional. If set, improve speed by only keeping the last 'x' phases to regenerate the game. :return: A tuple of 1) None, None - on error or if the conversion is not possible, or game is invalid / not-started / done 2) game, power_name - on successful conversion """ if state_dict is None: return None, None req_fields = ('gameID', 'variantID', 'turn', 'phase', 'gameOver', 'phases', 'standoffs', 'occupiedFrom') if [1 for field in req_fields if field not in state_dict]: LOGGER.error('The required fields for state dict are %s. Cannot translate %s', req_fields, state_dict) return None, None # Extracting information game_id = str(state_dict['gameID']) map_id = int(state_dict['variantID']) standoffs = state_dict['standoffs'] occupied_from = state_dict['occupiedFrom'] # Parsing all phases state_dict_phases = state_dict.get('phases', []) if max_phases is not None and isinstance(max_phases, int): state_dict_phases = state_dict_phases[-1 * max_phases:] all_phases = [process_phase_dict(phase_dict, map_id=map_id) for phase_dict in state_dict_phases] # Building game - Replaying the last phases game = Game(game_id=game_id, map_name=CACHE['ix_to_map'][map_id]) for phase_to_replay in all_phases[:-1]: game.set_current_phase(phase_to_replay['name']) # Units game.clear_units() for power_name, power_units in phase_to_replay['units'].items(): if power_name == 'GLOBAL': continue game.set_units(power_name, power_units) # Centers game.clear_centers() for power_name, power_centers in phase_to_replay['centers'].items(): if power_name == 'GLOBAL': continue game.set_centers(power_name, power_centers) # Orders for power_name, power_orders in phase_to_replay['orders'].items(): if power_name == 'GLOBAL': continue game.set_orders(power_name, power_orders) # Processing game.process() # Setting the current phase current_phase = all_phases[-1] game.set_current_phase(current_phase['name']) # Units game.clear_units() for power_name, power_units in current_phase['units'].items(): if power_name == 'GLOBAL': continue game.set_units(power_name, power_units) # Centers game.clear_centers() for power_name, power_centers in current_phase['centers'].items(): if power_name == 'GLOBAL': continue game.set_centers(power_name, power_centers) # Setting retreat locs if current_phase['name'][-1] == 'R': invalid_retreat_locs = set() attack_source = {} # Loc is occupied for power in game.powers.values(): for unit in power.units: invalid_retreat_locs.add(unit[2:5]) # Loc was in standoff if standoffs: for loc_dict in standoffs: _, loc = center_dict_to_str(loc_dict, map_id=map_id) invalid_retreat_locs.add(loc[:3]) # Loc was attacked from if occupied_from: for loc_id, occupied_from_id in occupied_from.items(): loc_name = CACHE[map_id]['ix_to_loc'][int(loc_id)][:3] from_loc_name = CACHE[map_id]['ix_to_loc'][int(occupied_from_id)][:3] attack_source[loc_name] = from_loc_name # Removing invalid retreat locs for power in game.powers.values(): for retreat_unit in power.retreats: power.retreats[retreat_unit] = [loc for loc in power.retreats[retreat_unit] if loc[:3] not in invalid_retreat_locs and loc[:3] != attack_source.get(retreat_unit[2:5], '')] # Returning power_name = CACHE[map_id]['ix_to_power'][country_id] return game, power_name
def render_saved_game(saved_game, output_dir, prefix=''): """ Renders a specific saved game :param saved_game: The saved game to render :param output_dir: The output directory where to save the rendering :param prefix: An optional prefix to add before the game id """ if prefix: output_dir = os.path.join(output_dir, prefix + '_' + saved_game['id']) else: output_dir = os.path.join(output_dir, saved_game['id']) nb_phases = len(saved_game['phases']) svg_count = 0 # Checking if already generated # Otherwise, regenerating completely if os.path.exists(output_dir): nb_svg = len([ os.path.join(output_dir, file) for file in os.listdir(output_dir) if file[-4:] == '.svg' ]) if nb_svg == 2 * nb_phases: print('Rendered {} (Skipped)'.format(saved_game['id'])) return shutil.rmtree(output_dir, ignore_errors=True) os.makedirs(output_dir, exist_ok=True) # Creating a Game to replay all orders, and a new Game object per phase to validate entire_game = Game() if saved_game['phases']: entire_game.set_state(saved_game['phases'][0]['state']) # Rendering for phase in saved_game['phases']: phase_game = Game() # Setting state state = phase['state'] phase_game.set_state(state) entire_game.note = phase_game.note # Setting orders phase_game.clear_orders() orders = phase['orders'] for power_name in orders: phase_game.set_orders(power_name, orders[power_name]) entire_game.set_orders(power_name, orders[power_name]) # Validating that we are at the same place for power_name in orders: assert sorted(phase_game.get_units(power_name)) == sorted( entire_game.get_units(power_name)) assert sorted(phase_game.get_centers(power_name)) == sorted( entire_game.get_centers(power_name)) # Rendering with and without orders with open(os.path.join(output_dir, '%03d%s' % (svg_count, '.svg')), 'w') as file: file.write(entire_game.render(incl_orders=False)) svg_count += 1 with open(os.path.join(output_dir, '%03d%s' % (svg_count, '.svg')), 'w') as file: file.write(entire_game.render(incl_orders=True)) # Processing (for entire game) svg_count += 1 entire_game.process() print('Rendered {}'.format(saved_game['id']))
def main(sl_model, other_agent): """ Plays a local game with 7 bots """ # player1 = RandomPlayer() # Use main player here x1 player1 = sl_model # (Use when get_orders is ready) player2 = other_agent # Use other player here x6 game = Game() reward_class = Reward(game) supply_centers_dist = game.get_centers() # For randomly choosing the power of the special player powers = list(game.powers) random.shuffle(powers) powers1 = powers[0] powers2 = powers[1:7] # Playing game while not game.is_game_done: if reward_class.get_terminal_reward(powers1) == 0: return "defeated" orders1, action_prob = player1.get_orders(game, [powers1]) # orders1 = {power_name: player1.get_orders(game, power_name) for power_name in powers1} orders2 = yield { power_name: player2.get_orders(game, power_name) for power_name in powers2 } # for power_name, power_orders in orders1.items(): # for power_name, power_orders in orders1.items(): if reward_class.get_terminal_reward(powers1) != 0: game.set_orders(powers1, orders1[0]) for power_name, power_orders in orders2.items(): game.set_orders(power_name, power_orders) game.process() print(reward_class.get_local_reward_all_powers()) # input() print(reward_class.get_terminal_reward_all_powers()) print(game.outcome) # Calculating support phase_history = game.get_phase_history() support_count, x_support_count, eff_x_support_count = 0, 0, 0 for phase in phase_history: for order_index in range(len(phase.orders[powers1])): order_split = phase.orders[powers1][order_index].split() if 'S' in order_split: support_count += 1 s_loc = order_split.index('S') supported = order_split[s_loc + 1] + " " + order_split[s_loc + 2] if supported not in phase.state['units'][powers1]: x_support_count += 1 supporter = order_split[s_loc - 2] + " " + order_split[s_loc - 1] if phase.results[supporter] == []: eff_x_support_count += 1 print("X-Support Ratio: " + str(x_support_count / support_count)) print("Eff-X-Support Ratio: " + str(eff_x_support_count / x_support_count)) # Saving to disk with open('game.json', 'w') as file: file.write(json.dumps(to_saved_game_format(game))) sc_dict = reward_class.get_terminal_reward_all_powers() if len(game.outcome) == 2 and game.outcome[-1] == powers1: return "won" elif len(game.outcome) == 2 and game.outcome[-1] != powers1: return "defeated" elif len(game.outcome) != 2 and [ (k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True) ][0][0] == powers1: return "most_sc" elif len(game.outcome) != 2 and [ (k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True) ][0][0] != powers1: return "survived" # won = len(game.outcome) == 2 and game.outcome[-1] == powers1 # defeated = len(game.outcome) == 2 and game.outcome[-1] != powers1 # most_sc = len(game.outcome) != 2 and [(k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True)][0][0] == powers1 # survived = len(game.outcome) != 2 and [(k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True)][0][0] != powers1 return { "sl_model": powers1, "Game outcome": game.outcome, "get_terminal_reward_all_powers": reward_class.get_terminal_reward_all_powers(), "x-support": x_support_count / support_count }
def test_custom_int_unit_reward(): """ Tests for CustomInterimUnitReward """ game = Game() rew_fn = CustomIntUnitReward() # Issuing orders prev_state_proto = extract_state_proto(game) game.set_orders('FRANCE', ['A MAR - SPA', 'A PAR - PIC']) game.set_orders('AUSTRIA', ['A VIE - TYR']) game.process() state_proto = extract_state_proto(game) assert game.get_current_phase() == 'F1901M' get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward( prev_state_proto, state_proto, power_name, is_terminal_state=is_terminal, done_reason=done_reason) # +1 for FRANCE for conquering SPA # --- Not in terminal state assert get_reward('AUSTRIA', False, None) == 0. assert get_reward('ENGLAND', False, None) == 0. assert get_reward('FRANCE', False, None) == 1. assert get_reward('GERMANY', False, None) == 0. assert get_reward('ITALY', False, None) == 0. assert get_reward('RUSSIA', False, None) == 0. assert get_reward('TURKEY', False, None) == 0. # --- In terminal state assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == 1. assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 0. # --- Thrashing assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == -18. assert get_reward('ENGLAND', True, DoneReason.THRASHED) == -18. assert get_reward('FRANCE', True, DoneReason.THRASHED) == -18. assert get_reward('GERMANY', True, DoneReason.THRASHED) == -18. assert get_reward('ITALY', True, DoneReason.THRASHED) == -18. assert get_reward('RUSSIA', True, DoneReason.THRASHED) == -18. assert get_reward('TURKEY', True, DoneReason.THRASHED) == -18. # Issuing orders prev_state_proto = state_proto game.set_orders('FRANCE', ['A PIC - BEL', 'A SPA - POR']) game.set_orders('AUSTRIA', ['F TRI - VEN', 'A TYR S F TRI - VEN']) game.process() state_proto = extract_state_proto(game) get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward( prev_state_proto, state_proto, power_name, is_terminal_state=is_terminal, done_reason=done_reason) # +1 for FRANCE for conquering POR # -1 for FRANCE for losing SPA # +1 for FRANCE for conquering BEL # +1 for AUSTRIA for conquering VEN # -1 for ITALY for losing VEN # --- Not in terminal state assert get_reward('AUSTRIA', False, None) == 1. assert get_reward('ENGLAND', False, None) == 0. assert get_reward('FRANCE', False, None) == 1. assert get_reward('GERMANY', False, None) == 0. assert get_reward('ITALY', False, None) == -1. assert get_reward('RUSSIA', False, None) == 0. assert get_reward('TURKEY', False, None) == 0. # --- In terminal state assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 1. assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == 1. assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == -1. assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 0. # --- Thrashing assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == -18. assert get_reward('ENGLAND', True, DoneReason.THRASHED) == -18. assert get_reward('FRANCE', True, DoneReason.THRASHED) == -18. assert get_reward('GERMANY', True, DoneReason.THRASHED) == -18. assert get_reward('ITALY', True, DoneReason.THRASHED) == -18. assert get_reward('RUSSIA', True, DoneReason.THRASHED) == -18. assert get_reward('TURKEY', True, DoneReason.THRASHED) == -18. # Issuing orders prev_state_proto = state_proto game.set_orders('FRANCE', ['A PIC - BEL', 'A SPA - POR']) game.set_orders('AUSTRIA', ['F TRI - VEN', 'A TYR S F TRI - VEN']) game.process() state_proto = extract_state_proto(game) get_reward = lambda power_name, is_terminal, done_reason: rew_fn.get_reward( prev_state_proto, state_proto, power_name, is_terminal_state=is_terminal, done_reason=done_reason) # +0 - No new SCs # --- Not in terminal state assert get_reward('AUSTRIA', False, None) == 0. assert get_reward('ENGLAND', False, None) == 0. assert get_reward('FRANCE', False, None) == 0. assert get_reward('GERMANY', False, None) == 0. assert get_reward('ITALY', False, None) == 0. assert get_reward('RUSSIA', False, None) == 0. assert get_reward('TURKEY', False, None) == 0. # --- In terminal state assert get_reward('AUSTRIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ENGLAND', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('FRANCE', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('GERMANY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('ITALY', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('RUSSIA', True, DoneReason.GAME_ENGINE) == 0. assert get_reward('TURKEY', True, DoneReason.GAME_ENGINE) == 0. # --- Thrashing assert get_reward('AUSTRIA', True, DoneReason.THRASHED) == -18. assert get_reward('ENGLAND', True, DoneReason.THRASHED) == -18. assert get_reward('FRANCE', True, DoneReason.THRASHED) == -18. assert get_reward('GERMANY', True, DoneReason.THRASHED) == -18. assert get_reward('ITALY', True, DoneReason.THRASHED) == -18. assert get_reward('RUSSIA', True, DoneReason.THRASHED) == -18. assert get_reward('TURKEY', True, DoneReason.THRASHED) == -18.
import random from diplomacy import Game from diplomacy.utils.export import to_saved_game_format # Creating a game # Alternatively, a map_name can be specified as an argument. e.g. Game(map_name='pure') game = Game(map_name='standard') while not game.is_game_done: # Getting the list of possible orders for all locations possible_orders = game.get_all_possible_orders() # For each power, randomly sampling a valid order for power_name, power in game.powers.items(): power_orders = [random.choice(possible_orders[loc]) for loc in game.get_orderable_locations(power_name) if possible_orders[loc]] game.set_orders(power_name, power_orders) # Messages can be sent locally with game.add_message # e.g. game.add_message(Message(sender='FRANCE', # recipient='ENGLAND', # message='This is a message', # phase=self.get_current_phase(), # time_sent=int(time.time()))) # Processing the game to move to the next phase game.process() # Exporting the game to disk to visualize (game is appended to file) # Alternatively, we can do >> file.write(json.dumps(to_saved_game_format(game))) to_saved_game_format(game, output_path='collected_game_litanies.json')