Beispiel #1
0
def sim_and_modifiy_Series():
    #TODO: make sure this is correct
    print('Begin simulation and modify payoff matrix.')
    path = os.getcwd() + '/data/game.pkl'
    game = fp.load_pkl(path)

    env = game.env
    num_episodes = game.num_episodes

    #TODO: add str first and then calculate payoff
    old_dim, old_dim1 = game.dim_payoff_def()
    new_dim, new_dim1 = game.num_str()
    if old_dim != old_dim1 or new_dim != new_dim1:
        raise ValueError("Payoff dimension does not match.")

    def_str_list = game.def_str
    att_str_list = game.att_str

    position_col_list = []
    position_row_list = []
    for i in range(new_dim - 1):
        position_col_list.append((i, new_dim - 1))
    for j in range(new_dim):
        position_row_list.append((new_dim - 1, j))

    att_col = []
    att_row = []
    def_col = []
    def_row = []
    #TODO: check the path is correct
    for pos in position_col_list:
        idx_def, idx_att = pos
        # if MPI_flag:
        #     aReward, dReward = do_MPI_sim(att_str_list[idx_att], def_str_list[idx_def])
        # else:
        aReward, dReward = series_sim(env, game, att_str_list[idx_att],
                                      def_str_list[idx_def], num_episodes)
        att_col.append(aReward)
        def_col.append(dReward)

    for pos in position_row_list:
        idx_def, idx_att = pos
        # if MPI_flag:
        #     aReward, dReward = do_MPI_sim(att_str_list[idx_att], def_str_list[idx_def])
        # else:
        aReward, dReward = series_sim(env, game, att_str_list[idx_att],
                                      def_str_list[idx_def], num_episodes)
        att_row.append(aReward)
        def_row.append(dReward)

    game.add_col_att(
        np.reshape(np.round(np.array(att_col), 2), newshape=(len(att_col), 1)))
    game.add_col_def(
        np.reshape(np.round(np.array(def_col), 2), newshape=(len(att_col), 1)))
    game.add_row_att(np.round(np.array(att_row), 2)[None])
    game.add_row_def(np.round(np.array(def_row), 2)[None])

    fp.save_pkl(game, path=path)
    print("Done simulation and modify payoff matrix.")
Beispiel #2
0
def initialize(load_env=None, env_name=None):
    print("=======================================================")
    print("=======Begin Initialization and first epoch============")
    print("=======================================================")

    # Create Environment
    if isinstance(load_env, str):
        path = os.getcwd() + '/env_data/' + load_env + '.pkl'
        if not fp.isExist(path):
            raise ValueError("The env being loaded does not exist.")
        env = fp.load_pkl(path)
    else:
        # env is created and saved.
        env = dag.env_rand_gen_and_save(env_name)

    # save graph copy
    env.save_graph_copy()
    env.save_mask_copy()

    # create players and point to their env
    env.create_players()
    env.create_action_space()

    # load param
    param_path = os.getcwd() + '/network_parameters/param.json'
    param = jp.load_json_data(param_path)

    # initialize game data
    game = game_data.Game_data(env,
                               num_episodes=param['num_episodes'],
                               threshold=param['threshold'])
    game.set_hado_param(param=param['hado_param'])
    game.set_hado_time_step(param['retrain_timesteps'])
    game.env.defender.set_env_belong_to(game.env)
    game.env.attacker.set_env_belong_to(game.env)

    env.defender.set_env_belong_to(env)
    env.attacker.set_env_belong_to(env)

    # uniform strategy has been produced ahead of time
    print("epoch 1:", datetime.datetime.now())
    epoch = 1

    act_att = 'att_str_epoch1.pkl'
    act_def = 'def_str_epoch1.pkl'

    game.add_att_str(act_att)
    game.add_def_str(act_def)

    print('Begin simulation for uniform strategy.')
    sys.stdout.flush()
    # simulate using random strategies and initialize payoff matrix
    # if MPI_flag:
    #     aReward, dReward = do_MPI_sim(act_att, act_def)
    # else:
    aReward, dReward = series_sim(game.env, game, act_att, act_def,
                                  game.num_episodes)
    print('Done simulation for uniform strategy.')
    sys.stdout.flush()

    game.init_payoffmatrix(dReward, aReward)
    ne = {}
    ne[0] = np.array([1], dtype=np.float32)
    ne[1] = np.array([1], dtype=np.float32)
    game.add_nasheq(epoch, ne)

    # save a copy of game data
    game_path = os.getcwd() + '/game_data/game.pkl'
    fp.save_pkl(game, game_path)

    sys.stdout.flush()
    return game
Beispiel #3
0
def simulate_payoff(co_payoff_matrix_def,
                    co_payoff_matrix_att,
                    str_book_def,
                    str_book_att,
                    new_def,
                    new_att,
                    save_path='./payoff_data/'):
    """
    This function simulates the partial payoff matrix of the combined game given newly produced strategies.
    :param co_payoff_matrix: The partial payoff matrix of the combined game
    :param str_book: a book recording the method, its corresponding strategy name and position.
                    e.g. {position: str_name}
    :param new_def: newly produced defender's strategies.
    :param new_att: newly produced attacker's strategies.
    :return:
    """

    path = os.getcwd() + '/data/game.pkl'
    game = fp.load_pkl(path)

    env = game.env
    num_episodes = game.num_episodes

    # set positions.
    new_att = list(new_att)
    new_def = list(new_def)

    num_str_def, num_str_att = np.shape(co_payoff_matrix_def)
    num_new_str_def = len(new_def)
    num_new_str_att = len(new_att)
    new_dim_def = num_str_def + num_new_str_def
    new_dim_att = num_str_att + num_new_str_att

    for i in np.arange(num_new_str_def):
        idx = i + num_str_def
        if idx in str_book_def.keys():
            raise ValueError("idx already exists.")
        str_book_att[idx] = new_def[i]

    for i in np.arange(num_new_str_att):
        idx = i + num_str_att
        if idx in str_book_att.keys():
            raise ValueError("idx already exists.")
        str_book_att[idx] = new_att[i]

    position_row = []
    position_col = []
    # add column first.
    for i in np.arange(num_new_str_att):
        position_col_list = []
        for k in np.arange(num_str_def):
            position_col_list.append((k,num_str_att+i))

        position_row.append(position_col_list)

    # Then add row.
    for i in np.arange(num_new_str_def):
        position_row_list = []
        for k in np.arange(new_dim_att):
            position_row_list.append((num_new_str_def+i,k))

        position_row.append(position_row_list)


    att_col = []
    att_row = []
    def_col = []
    def_row = []

    for list in position_col:
        subcol_def = []
        subcol_att = []
        for pos in list:
            idx_def, idx_att = pos
            # TODO: reset the load path
            aReward, dReward = series_sim(env, game, str_book_att[idx_att], str_book_def[idx_def], num_episodes)
            subcol_att.append(aReward)
            subcol_def.append(dReward)

        att_col.append(subcol_att)
        def_col.append(subcol_def)

    for list in position_row:
        subrow_def = []
        subrow_att = []
        for pos in list:
            idx_def, idx_att = pos
            # TODO: reset the load path
            aReward, dReward = series_sim(env, game, str_book_att[idx_att], str_book_def[idx_def], num_episodes)
            subrow_att.append(aReward)
            subrow_def.append(dReward)

        att_row.append(subrow_att)
        def_row.append(subrow_def)

    for col in att_col:
        col = np.reshape(np.round(np.array(col),2),newshape=(num_str_def,1))
        co_payoff_matrix_att = add_col(co_payoff_matrix_att, col)

    for row in att_row:
        rol = np.round(np.array(row),2)[None]
        co_payoff_matrix_att = add_row(co_payoff_matrix_att, rol)

    for col in def_col:
        col = np.reshape(np.round(np.array(col), 2), newshape=(num_str_def, 1))
        co_payoff_matrix_def = add_col(co_payoff_matrix_def, col)

    for row in def_row:
        rol = np.round(np.array(row), 2)[None]
        co_payoff_matrix_def = add_row(co_payoff_matrix_def, rol)

    return co_payoff_matrix_def, co_payoff_matrix_att, str_book_def, str_book_att