コード例 #1
0
ファイル: main.py プロジェクト: nne-nne/Diplomacy_SI
def play_game(game: Game, save_game: bool, agent_nation: list, label="", turn_number = 3, repeat_number= 1000):
    set_starting_influence(game)
    if save_game:
        saver = GameSaver()
    q_table_Handler = QtableHandler(game, agent_nation)
    iterator = 0
    state = 0
    finish = False
    stats = {"centers":defaultdict(list), "influence":defaultdict(list)}
    while not game.is_game_done and not finish:
        iterator += 1
        q_table_Handler.set_turn_info()

        # settings order
        phase = game.get_current_phase()[-1]
        for power_name, power in game.powers.items():
            power_orders = q_table_Handler.chose_orders(power_name)
            game.set_orders(power_name, power_orders)


        #visualizer.paint_orders(game)
        if save_game:
            saver.save_game(game, "gierka")

        game.process()

        if phase == 'M':
            q_table_Handler.set_reward()
        adjust_influence(game)

        if iterator == turn_number:
            state += 1
            if state % repeat_number == 0:
                q_table_Handler.save()
            if state == repeat_number:
                save_stat(stats,turn_number,label)
                game = load_saved_games_from_disk("game.json")[0]
                return
            iterator = 0
            print("State: ", state)
            print("Accuracy: {0}".format(q_table_Handler.get_accuracy()))
            print("Number of Germany centers: ", game.get_centers("GERMANY").__len__(),  game.get_power("GERMANY").influence.__len__())
            for power_name, _ in game.powers.items():
                stats["influence"][power_name].append(game.get_power(power_name).influence.__len__())
                stats["centers"][power_name].append(game.get_centers(power_name).__len__())
            game = load_saved_games_from_disk("game.json")[0]
            q_table_Handler.game = game
            q_table_Handler.attempts = 0
            q_table_Handler.miss_hits = 0
コード例 #2
0
    def generate_trajectory(self):
        game = Game()
        powers = list(game.powers)
        np.random.shuffle(powers)
        power1 = powers[0]
        powers_others = powers[1:]

        action_probs = []
        orders = []
        values = []

        supply_centers = [{power1: game.get_centers(power1)}]
        while not game.is_game_done:
            order, action_prob = self.actor.get_orders(game, [power1])
            orders_others = {
                power_name: self.actor.get_orders(game, [power_name])
                for power_name in powers_others
            }

            board = tf.convert_to_tensor(
                state_space.dict_to_flatten_board_state(
                    game.get_state(), game.map),
                dtype=tf.float32)
            board = tf.reshape(board, (1, 81 * 35))
            print("TEST")
            state_value = self.critic.call(board)
            # Indexing because get_orders can return a list of lists orders for multiple powers
            game.set_orders(power1, order[0])
            for power_name, power_orders in orders_others.items():
                orders_list, probs = power_orders
                print(orders_list)
                game.set_orders(power_name, orders_list[0])
            game.process()

            # Collect data
            supply_centers.append({power1: game.get_centers(power1)})
            action_probs.append(action_prob)
            orders.append(order)
            values.append(state_value)

            # local_rewards.append(reward_class.get_local_reward(power1))
            # global_rewards.append(0 if not game.is_game_done else reward_class.get_terminal_reward(power1))
        rewards = get_average_reward([supply_centers])
        returns = get_returns([
            supply_centers
        ])  # put in list to match shape of [bs, game_length, dict}
        return action_probs, returns, values, rewards
コード例 #3
0
ファイル: render.py プロジェクト: zhanpengfang/research
def render_saved_game(saved_game, output_dir, prefix=''):
    """ Renders a specific saved game
        :param saved_game: The saved game to render
        :param output_dir: The output directory where to save the rendering
        :param prefix: An optional prefix to add before the game id
    """
    if prefix:
        output_dir = os.path.join(output_dir, prefix + '_' + saved_game['id'])
    else:
        output_dir = os.path.join(output_dir, saved_game['id'])
    nb_phases = len(saved_game['phases'])
    svg_count = 0

    # Checking if already generated
    # Otherwise, regenerating completely
    if os.path.exists(output_dir):
        nb_svg = len([
            os.path.join(output_dir, file) for file in os.listdir(output_dir)
            if file[-4:] == '.svg'
        ])
        if nb_svg == 2 * nb_phases:
            print('Rendered {} (Skipped)'.format(saved_game['id']))
            return
        shutil.rmtree(output_dir, ignore_errors=True)
    os.makedirs(output_dir, exist_ok=True)

    # Creating a Game to replay all orders, and a new Game object per phase to validate
    entire_game = Game()
    if saved_game['phases']:
        entire_game.set_state(saved_game['phases'][0]['state'])

    # Rendering
    for phase in saved_game['phases']:
        phase_game = Game()

        # Setting state
        state = phase['state']
        phase_game.set_state(state)
        entire_game.note = phase_game.note

        # Setting orders
        phase_game.clear_orders()
        orders = phase['orders']
        for power_name in orders:
            phase_game.set_orders(power_name, orders[power_name])
            entire_game.set_orders(power_name, orders[power_name])

        # Validating that we are at the same place
        for power_name in orders:
            assert sorted(phase_game.get_units(power_name)) == sorted(
                entire_game.get_units(power_name))
            assert sorted(phase_game.get_centers(power_name)) == sorted(
                entire_game.get_centers(power_name))

        # Rendering with and without orders
        with open(os.path.join(output_dir, '%03d%s' % (svg_count, '.svg')),
                  'w') as file:
            file.write(entire_game.render(incl_orders=False))
        svg_count += 1
        with open(os.path.join(output_dir, '%03d%s' % (svg_count, '.svg')),
                  'w') as file:
            file.write(entire_game.render(incl_orders=True))

        # Processing (for entire game)
        svg_count += 1
        entire_game.process()

    print('Rendered {}'.format(saved_game['id']))
コード例 #4
0
def main(sl_model, other_agent):
    """ Plays a local game with 7 bots """
    # player1 = RandomPlayer() # Use main player here x1
    player1 = sl_model  # (Use when get_orders is ready)
    player2 = other_agent  # Use other player here x6

    game = Game()
    reward_class = Reward(game)
    supply_centers_dist = game.get_centers()

    # For randomly choosing the power of the special player
    powers = list(game.powers)
    random.shuffle(powers)
    powers1 = powers[0]
    powers2 = powers[1:7]

    # Playing game
    while not game.is_game_done:
        if reward_class.get_terminal_reward(powers1) == 0:
            return "defeated"
        orders1, action_prob = player1.get_orders(game, [powers1])
        # orders1 = {power_name: player1.get_orders(game, power_name) for power_name in powers1}
        orders2 = yield {
            power_name: player2.get_orders(game, power_name)
            for power_name in powers2
        }

        # for power_name, power_orders in orders1.items():
        # for power_name, power_orders in orders1.items():
        if reward_class.get_terminal_reward(powers1) != 0:
            game.set_orders(powers1, orders1[0])
        for power_name, power_orders in orders2.items():
            game.set_orders(power_name, power_orders)
        game.process()
        print(reward_class.get_local_reward_all_powers())
        # input()
    print(reward_class.get_terminal_reward_all_powers())

    print(game.outcome)

    # Calculating support
    phase_history = game.get_phase_history()
    support_count, x_support_count, eff_x_support_count = 0, 0, 0
    for phase in phase_history:
        for order_index in range(len(phase.orders[powers1])):
            order_split = phase.orders[powers1][order_index].split()
            if 'S' in order_split:
                support_count += 1
                s_loc = order_split.index('S')
                supported = order_split[s_loc + 1] + " " + order_split[s_loc +
                                                                       2]
                if supported not in phase.state['units'][powers1]:
                    x_support_count += 1
                    supporter = order_split[s_loc -
                                            2] + " " + order_split[s_loc - 1]
                    if phase.results[supporter] == []:
                        eff_x_support_count += 1

    print("X-Support Ratio: " + str(x_support_count / support_count))
    print("Eff-X-Support Ratio: " + str(eff_x_support_count / x_support_count))

    # Saving to disk
    with open('game.json', 'w') as file:
        file.write(json.dumps(to_saved_game_format(game)))

    sc_dict = reward_class.get_terminal_reward_all_powers()

    if len(game.outcome) == 2 and game.outcome[-1] == powers1:
        return "won"
    elif len(game.outcome) == 2 and game.outcome[-1] != powers1:
        return "defeated"
    elif len(game.outcome) != 2 and [
        (k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True)
    ][0][0] == powers1:
        return "most_sc"
    elif len(game.outcome) != 2 and [
        (k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True)
    ][0][0] != powers1:
        return "survived"

    # won = len(game.outcome) == 2 and game.outcome[-1] == powers1
    # defeated = len(game.outcome) == 2 and game.outcome[-1] != powers1
    # most_sc = len(game.outcome) != 2 and [(k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True)][0][0] == powers1
    # survived = len(game.outcome) != 2 and [(k, sc_dict[k]) for k in sorted(sc_dict, key=sc_dict.get, reverse=True)][0][0] != powers1

    return {
        "sl_model":
        powers1,
        "Game outcome":
        game.outcome,
        "get_terminal_reward_all_powers":
        reward_class.get_terminal_reward_all_powers(),
        "x-support":
        x_support_count / support_count
    }