def play_game(game: Game, save_game: bool, agent_nation: list, label="", turn_number = 3, repeat_number= 1000): set_starting_influence(game) if save_game: saver = GameSaver() q_table_Handler = QtableHandler(game, agent_nation) iterator = 0 state = 0 finish = False stats = {"centers":defaultdict(list), "influence":defaultdict(list)} while not game.is_game_done and not finish: iterator += 1 q_table_Handler.set_turn_info() # settings order phase = game.get_current_phase()[-1] for power_name, power in game.powers.items(): power_orders = q_table_Handler.chose_orders(power_name) game.set_orders(power_name, power_orders) #visualizer.paint_orders(game) if save_game: saver.save_game(game, "gierka") game.process() if phase == 'M': q_table_Handler.set_reward() adjust_influence(game) if iterator == turn_number: state += 1 if state % repeat_number == 0: q_table_Handler.save() if state == repeat_number: save_stat(stats,turn_number,label) game = load_saved_games_from_disk("game.json")[0] return iterator = 0 print("State: ", state) print("Accuracy: {0}".format(q_table_Handler.get_accuracy())) print("Number of Germany centers: ", game.get_centers("GERMANY").__len__(), game.get_power("GERMANY").influence.__len__()) for power_name, _ in game.powers.items(): stats["influence"][power_name].append(game.get_power(power_name).influence.__len__()) stats["centers"][power_name].append(game.get_centers(power_name).__len__()) game = load_saved_games_from_disk("game.json")[0] q_table_Handler.game = game q_table_Handler.attempts = 0 q_table_Handler.miss_hits = 0
def generate_trajectory(self): game = Game() powers = list(game.powers) np.random.shuffle(powers) power1 = powers[0] powers_others = powers[1:] action_probs = [] orders = [] values = [] supply_centers = [{power1: game.get_centers(power1)}] while not game.is_game_done: order, action_prob = self.actor.get_orders(game, [power1]) orders_others = { power_name: self.actor.get_orders(game, [power_name]) for power_name in powers_others } board = tf.convert_to_tensor( state_space.dict_to_flatten_board_state( game.get_state(), game.map), dtype=tf.float32) board = tf.reshape(board, (1, 81 * 35)) print("TEST") state_value = self.critic.call(board) # Indexing because get_orders can return a list of lists orders for multiple powers game.set_orders(power1, order[0]) for power_name, power_orders in orders_others.items(): orders_list, probs = power_orders print(orders_list) game.set_orders(power_name, orders_list[0]) game.process() # Collect data supply_centers.append({power1: game.get_centers(power1)}) action_probs.append(action_prob) orders.append(order) values.append(state_value) # local_rewards.append(reward_class.get_local_reward(power1)) # global_rewards.append(0 if not game.is_game_done else reward_class.get_terminal_reward(power1)) rewards = get_average_reward([supply_centers]) returns = get_returns([ supply_centers ]) # put in list to match shape of [bs, game_length, dict} return action_probs, returns, values, rewards
def render_saved_game(saved_game, output_dir, prefix=''): """ Renders a specific saved game :param saved_game: The saved game to render :param output_dir: The output directory where to save the rendering :param prefix: An optional prefix to add before the game id """ if prefix: output_dir = os.path.join(output_dir, prefix + '_' + saved_game['id']) else: output_dir = os.path.join(output_dir, saved_game['id']) nb_phases = len(saved_game['phases']) svg_count = 0 # Checking if already generated # Otherwise, regenerating completely if os.path.exists(output_dir): nb_svg = len([ os.path.join(output_dir, file) for file in os.listdir(output_dir) if file[-4:] == '.svg' ]) if nb_svg == 2 * nb_phases: print('Rendered {} (Skipped)'.format(saved_game['id'])) return shutil.rmtree(output_dir, ignore_errors=True) os.makedirs(output_dir, exist_ok=True) # Creating a Game to replay all orders, and a new Game object per phase to validate entire_game = Game() if saved_game['phases']: entire_game.set_state(saved_game['phases'][0]['state']) # Rendering for phase in saved_game['phases']: phase_game = Game() # Setting state state = phase['state'] phase_game.set_state(state) entire_game.note = phase_game.note # Setting orders phase_game.clear_orders() orders = phase['orders'] for power_name in orders: phase_game.set_orders(power_name, orders[power_name]) entire_game.set_orders(power_name, orders[power_name]) # Validating that we are at the same place for power_name in orders: assert sorted(phase_game.get_units(power_name)) == sorted( entire_game.get_units(power_name)) assert sorted(phase_game.get_centers(power_name)) == sorted( entire_game.get_centers(power_name)) # Rendering with and without orders with open(os.path.join(output_dir, '%03d%s' % (svg_count, '.svg')), 'w') as file: file.write(entire_game.render(incl_orders=False)) svg_count += 1 with open(os.path.join(output_dir, '%03d%s' % (svg_count, '.svg')), 'w') as file: file.write(entire_game.render(incl_orders=True)) # Processing (for entire game) svg_count += 1 entire_game.process() print('Rendered {}'.format(saved_game['id']))
def main(sl_model, other_agent): """ Plays a local game with 7 bots """ # player1 = RandomPlayer() # Use main player here x1 player1 = sl_model # (Use when get_orders is ready) player2 = other_agent # Use other player here x6 game = Game() reward_class = Reward(game) supply_centers_dist = game.get_centers() # For randomly choosing the power of the special player powers = list(game.powers) random.shuffle(powers) powers1 = powers[0] powers2 = powers[1:7] # Playing game while not game.is_game_done: if reward_class.get_terminal_reward(powers1) == 0: return "defeated" orders1, action_prob = player1.get_orders(game, [powers1]) # orders1 = {power_name: player1.get_orders(game, power_name) for power_name in powers1} orders2 = yield { power_name: player2.get_orders(game, power_name) for power_name in powers2 } # for power_name, power_orders in orders1.items(): # for power_name, power_orders in orders1.items(): if reward_class.get_terminal_reward(powers1) != 0: game.set_orders(powers1, orders1[0]) for power_name, power_orders in orders2.items(): game.set_orders(power_name, power_orders) game.process() print(reward_class.get_local_reward_all_powers()) # input() print(reward_class.get_terminal_reward_all_powers()) print(game.outcome) # Calculating support phase_history = game.get_phase_history() support_count, x_support_count, eff_x_support_count = 0, 0, 0 for phase in phase_history: for order_index in range(len(phase.orders[powers1])): order_split = phase.orders[powers1][order_index].split() if 'S' in order_split: support_count += 1 s_loc = order_split.index('S') supported = order_split[s_loc + 1] + " " + order_split[s_loc + 2] if supported not in phase.state['units'][powers1]: x_support_count += 1 supporter = order_split[s_loc - 2] + " " + order_split[s_loc - 1] if phase.results[supporter] == []: eff_x_support_count += 1 print("X-Support Ratio: " + str(x_support_count / support_count)) print("Eff-X-Support Ratio: " + str(eff_x_support_count / x_support_count)) # Saving to disk with open('game.json', 'w') as file: file.write(json.dumps(to_saved_game_format(game))) sc_dict = reward_class.get_terminal_reward_all_powers() if len(game.outcome) == 2 and game.outcome[-1] == powers1: return "won" elif len(game.outcome) == 2 and game.outcome[-1] != powers1: return "defeated" elif len(game.outcome) != 2 and [ (k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True) ][0][0] == powers1: return "most_sc" elif len(game.outcome) != 2 and [ (k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True) ][0][0] != powers1: return "survived" # won = len(game.outcome) == 2 and game.outcome[-1] == powers1 # defeated = len(game.outcome) == 2 and game.outcome[-1] != powers1 # most_sc = len(game.outcome) != 2 and [(k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True)][0][0] == powers1 # survived = len(game.outcome) != 2 and [(k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True)][0][0] != powers1 return { "sl_model": powers1, "Game outcome": game.outcome, "get_terminal_reward_all_powers": reward_class.get_terminal_reward_all_powers(), "x-support": x_support_count / support_count }