Beispiel #1
0
def partial_matrix_ne_search(payoff_matrix_att, payoff_matrix_def,
                             child_partition):
    ne_dict = {}
    heuristic_pos = find_heuristic_position(child_partition)
    for method in child_partition:
        ne_dict[method] = {}
        # find the position of heuristic.
        h_pos = heuristic_pos[method]

        # find the NE of the partial matrix.
        nash_att, _ = ga.do_gambit_analysis(payoff_matrix_def,
                                            payoff_matrix_att[h_pos[0],
                                                              h_pos[1]],
                                            maxent=False,
                                            minent=False)
        _, nash_def = ga.do_gambit_analysis(payoff_matrix_def[h_pos[0],
                                                              h_pos[1]],
                                            payoff_matrix_att,
                                            maxent=False,
                                            minent=False)

        # add a zero for uniform strategy.
        ne_dict[method][0] = np.insert(nash_def, 0, 0)
        ne_dict[method][1] = np.insert(nash_att, 0, 0)

    return ne_dict
Beispiel #2
0
def find_all_NE(payoffmatrix_def, payoffmatrix_att):
    # Paired NE.
    # nash_att_list = [np.array([0.5,0.2,0.3]), ...]
    # nash_def_list = [np.array([0.5,0.2,0.3]), ...]
    nash_att_list, nash_def_list = do_gambit_analysis(payoffmatrix_def,
                                                      payoffmatrix_att,
                                                      return_list=True)
    return nash_att_list, nash_def_list
Beispiel #3
0
def regret_curves(payoffmatrix_def, payoffmatrix_att, child_partition):
    """
    Calculate the epsilon of each subgame.
    :param ne_dict: {"baseline": game.nasheq}
    :return:
    """
    curves_att = {}
    curves_def = {}
    num_str, _ = np.shape(payoffmatrix_att)
    positions = find_heuristic_position(child_partition)
    for method in child_partition:
        curves_att[method] = []
        curves_def[method] = []
        start, end = positions[method]
        submatrix_def = payoffmatrix_def[start:end, :]
        submatrix_att = payoffmatrix_att[:, start:end]
        subgame_def = payoffmatrix_def[start:end, start:end]
        subgame_att = payoffmatrix_att[start:end, start:end]

        zeros = np.zeros(end - start)
        for epoch in np.arange(end):
            subsubgame_def = subgame_def[:epoch, :epoch]
            subsubgame_att = subgame_att[:epoch, :epoch]

            # TODO: Error: line 4:2: Expecting outcome or payoff
            nash_att, nash_def = do_gambit_analysis(subsubgame_def,
                                                    subsubgame_att,
                                                    maxent=False,
                                                    minent=True)

            # TODO: Is this correct?? NO.
            nash_def = zeros[len(nash_def)] + nash_def
            nash_att = zeros[len(nash_att)] + nash_att

            nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1))

            payoff_vect_att = np.sum(nash_def * submatrix_def, axis=0)
            payoff_vect_def = np.sum(submatrix_att * nash_att, axis=1)

            payoffmatrix_def = np.reshape(payoffmatrix_def,
                                          newshape=np.shape(payoff_vect_att))

            nash_payoff_att = np.round(np.sum(nash_def * subgame_att *
                                              nash_att),
                                       decimals=2)
            nash_payoff_def = np.round(np.sum(nash_def * subgame_def *
                                              nash_att),
                                       decimals=2)

            deviation_att = np.max(payoff_vect_att)
            deviation_def = np.max(payoff_vect_def)
            regret_att = np.maximum(deviation_att - nash_payoff_att, 0)
            regret_def = np.maximum(deviation_def - nash_payoff_def, 0)

            curves_att[method].append(regret_att)
            curves_def[method].append(regret_def)

    return curves_att, curves_def
Beispiel #4
0
def EGTA_restart(restart_epoch,
                 start_hado=2,
                 retrain=False,
                 game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("============Continue Running HADO-EGTA=================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("=============Continue Running DO-EGTA==================")
        print("=======================================================")

    epoch = restart_epoch - 1

    sys.stdout.flush()
    arg_path = os.getcwd() + '/inner_egta_arg/'

    hado_arg = (start_hado, retrain)
    epoch_arg = epoch

    fp.save_pkl(hado_arg, path=arg_path + 'hado_arg.pkl')
    fp.save_pkl(epoch_arg, path=arg_path + 'epoch_arg.pkl')

    count = 8 - restart_epoch
    while count != 0:
        # while True:
        do_train_and_sim()
        game = fp.load_pkl(game_path)
        epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl')
        #
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)
        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END EPOCH: " + str(epoch))
    print(datetime.datetime.now())
Beispiel #5
0
def EGTA(start_hado=2,
         retrain=False,
         epoch=1,
         game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("==============Begin Running HADO-EGTA==================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("===============Begin Running DO-EGTA===================")
        print("=======================================================")

    sys.stdout.flush()
    arg_path = os.getcwd() + '/inner_egta_arg/'

    hado_arg = (start_hado, retrain)
    epoch_arg = epoch

    fp.save_pkl(hado_arg, path=arg_path + 'hado_arg.pkl')
    fp.save_pkl(epoch_arg, path=arg_path + 'epoch_arg.pkl')

    count = 18
    while count != 0:
        # while True:
        do_train_and_sim()
        game = fp.load_pkl(game_path)
        epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl')
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        fp.save_pkl(game, game_path)
        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END: " + str(epoch))
    os._exit(os.EX_OK)
Beispiel #6
0
def formal_regret_curves(payoffmatrix_def, payoffmatrix_att, child_partition):
    positions = find_heuristic_position(child_partition)
    curves_dict_def = {}
    curves_dict_att = {}
    for method in child_partition:
        curves_dict_def[method] = []
        curves_dict_att[method] = []
    for epoch in np.arange(40):
        for method in child_partition:
            if method == 'RM':
                continue

            start, end = positions[method]
            print(start, end)

            submatrix_att = payoffmatrix_att[start:start + epoch + 1,
                                             start:start + epoch + 1]
            submatrix_def = payoffmatrix_def[start:start + epoch + 1,
                                             start:start + epoch + 1]

            # print('X:', start, start+epoch+1)

            nash_att, nash_def = do_gambit_analysis(submatrix_def,
                                                    submatrix_att,
                                                    maxent=True)

            nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1))

            ne_payoff_def = np.sum(nash_def * submatrix_def * nash_att)
            ne_payoff_att = np.sum(nash_def * submatrix_att * nash_att)

            dev_def = np.max(
                np.sum(payoffmatrix_def[:, start:start + epoch + 1] * nash_att,
                       axis=1))
            dev_att = np.max(
                np.sum(nash_def * payoffmatrix_att[start:start + epoch + 1, :],
                       axis=0))

            curves_dict_def[method].append(
                np.maximum(dev_def - ne_payoff_def, 0))
            curves_dict_att[method].append(
                np.maximum(dev_att - ne_payoff_att, 0))

    return curves_dict_def, curves_dict_att
Beispiel #7
0
def NE_regret(regret_vect_att, regret_vect_def, payoffmatrix_att,
              payoffmatrix_def, child_partition):
    """
    Calculate the regret of each heuristic with respect to the combined game. The strategies of each heuristic only\
    include those in the NE of each heuristic.
    :param regret_vect: regret vector calculated from combined game.
    :param ne_dict: {"baseline": {0: np.array([1,0,1,0...]), 1: np.array([1,0,1,0...])},
    "RS": np.array([0,0,1,0...])} when a strategy is in a NE, that strategy is indicated by 1.
    :return:
    """

    regret_dict = {}
    positions = find_heuristic_position(child_partition)
    for method in child_partition:
        start, end = positions[method]
        print(start, end)
        submatrix_att = payoffmatrix_att[start:end, start:end]
        submatrix_def = payoffmatrix_def[start:end, start:end]

        # submatrix_att = payoffmatrix_att[start:start+32, start:start+32]
        # submatrix_def = payoffmatrix_def[start:start+32, start:start+32]

        nash_att, nash_def = do_gambit_analysis(submatrix_def,
                                                submatrix_att,
                                                maxent=True)

        nash_att[nash_att > 0] = 1
        nash_def[nash_def > 0] = 1

        regret_dict[method] = {
            0:
            np.sum(regret_vect_def[start:end] * nash_def) / np.sum(nash_def),
            1: np.sum(regret_vect_att[start:end] * nash_att) / np.sum(nash_att)
        }

        # regret_dict[method] = {0: np.sum(regret_vect_def[start:start+30] * nash_def) / np.sum(nash_def),
        #                        1: np.sum(regret_vect_att[start:start+30] * nash_att) / np.sum(nash_att)}

    return regret_dict
Beispiel #8
0
def regret_fixed_matrix(payoffmatrix_def, payoffmatrix_att, child_partition):
    positions = find_heuristic_position(child_partition)
    for method in child_partition:
        start, end = positions[method]
        print(start, end)
        # submatrix_att = payoffmatrix_att[start:end, start:end]
        # submatrix_def = payoffmatrix_def[start:end, start:end]

        submatrix_att = payoffmatrix_att[start:start + 32, start:start + 32]
        submatrix_def = payoffmatrix_def[start:start + 32, start:start + 32]

        nash_att, nash_def = do_gambit_analysis(submatrix_def,
                                                submatrix_att,
                                                maxent=True)

        nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1))

        ne_payoff_def = np.sum(nash_def * submatrix_def * nash_att)
        ne_payoff_att = np.sum(nash_def * submatrix_att * nash_att)

        # dev_def = np.max(np.sum(payoffmatrix_def[:, start:end] * nash_att, axis=1))
        # dev_att = np.max(np.sum(nash_def * payoffmatrix_att[start:end, :], axis=0))

        dev_def = np.max(
            np.sum(payoffmatrix_def[:, start:start + 32] * nash_att, axis=1))
        # print(np.argmax(np.sum(payoffmatrix_def[:, start:end] * nash_att, axis=1)))
        dev_att = np.max(
            np.sum(nash_def * payoffmatrix_att[start:start + 32, :], axis=0))
        # print(np.argmax(np.sum(nash_def * payoffmatrix_att[start:end, :], axis=0)))

        print('------------------------------------------')
        print("The current method is ", method)
        print("The defender's regret is", np.maximum(dev_def - ne_payoff_def,
                                                     0))
        print("The attacker's regret is", np.maximum(dev_att - ne_payoff_att,
                                                     0))
    print("==================================================")
Beispiel #9
0
def ne_search_wo_etrace(payoff_matrix_def, payoff_matrix_att, child_partition):
    position = find_heuristic_position(child_partition)

    total_num_str = 0
    init_flag = False

    # Assume 2 methods. Find candidate NE in the first subgame.
    for method in child_partition:
        if not init_flag:
            nash_att, nash_def = do_gambit_analysis(
                payoff_matrix_def[:child_partition[method], :
                                  child_partition[method]],
                payoff_matrix_att[:child_partition[method], :
                                  child_partition[method]],
                maxent=False,
                minent=False)
            # Strategies of current game
            strategy_set_def = list(range(child_partition[method]))
            strategy_set_att = list(range(child_partition[method]))
            init_flag = True

        total_num_str += child_partition[method]

    # Extend the NE to the length of the combined game.
    zeros_def = np.zeros(total_num_str)
    zeros_att = np.zeros(total_num_str)
    zeros_def[:len(nash_def)] = nash_def
    zeros_att[:len(nash_def)] = nash_att
    nash_def = zeros_def
    nash_att = zeros_att

    # indicator_matrix records which cell has been simulated in the payoff matrix.
    indicator_matrix = np.zeros((total_num_str, total_num_str))
    for method in position:
        start, end = position[method]
        indicator_matrix[start:end, start:end] = 1

    nash_def_T = np.reshape(nash_def, newshape=(len(nash_def), 1))

    payoff_def = np.sum(nash_def_T * payoff_matrix_def * nash_att)
    payoff_att = np.sum(nash_def_T * payoff_matrix_att * nash_att)

    support_idx_def = np.where(nash_def > 0)[0]
    support_idx_att = np.where(nash_att > 0)[0]

    # Change to simulation mode when simulation is needed.
    while True:

        for x in support_idx_def:
            indicator_matrix[x, :] = 1
        for y in support_idx_att:
            indicator_matrix[:, y] = 1

        dev_payoff_def = np.max(np.sum(payoff_matrix_def * nash_att, axis=1))
        dev_payoff_att = np.max(np.sum(nash_def_T * payoff_matrix_att, axis=0))

        dev_def = np.argmax(np.sum(payoff_matrix_def * nash_att, axis=1))
        dev_att = np.argmax(np.sum(nash_def * payoff_matrix_att, axis=0))

        if dev_payoff_def <= payoff_def and dev_payoff_att <= payoff_att:
            break
        if dev_payoff_def > payoff_def:
            strategy_set_def.append(dev_def)
            strategy_set_def.sort()
            indicator_matrix[dev_def, :] = 1
        else:
            strategy_set_def.append(dev_def)
            strategy_set_def.sort()
            indicator_matrix[dev_def, :] = 1

        if dev_payoff_att > payoff_att:
            strategy_set_att.append(dev_att)
            strategy_set_att.sort()
            indicator_matrix[:, dev_att] = 1
        else:
            strategy_set_att.append(dev_att)
            strategy_set_att.sort()
            indicator_matrix[:, dev_att] = 1

        subgame_def = es(strategy_set_def, strategy_set_att, payoff_matrix_def)
        subgame_att = es(strategy_set_def, strategy_set_att, payoff_matrix_att)

        # print(strategy_set_def, strategy_set_att)
        # print(np.shape(subgame_def), np.shape(subgame_att))

        nash_att, nash_def = do_gambit_analysis(subgame_def,
                                                subgame_att,
                                                maxent=False,
                                                minent=False)
        nash_def_T = np.reshape(nash_def, newshape=(len(nash_def), 1))

        payoff_def = np.sum(nash_def_T * subgame_def * nash_att)
        payoff_att = np.sum(nash_def_T * subgame_att * nash_att)

        zeros_def = np.zeros(total_num_str)
        zeros_att = np.zeros(total_num_str)
        for pos, value in zip(strategy_set_att, nash_att):
            zeros_att[pos] = value
        for pos, value in zip(strategy_set_def, nash_def):
            zeros_def[pos] = value

        nash_def = zeros_def
        nash_att = zeros_att

        support_idx_def = np.where(nash_def > 0)[0]
        support_idx_att = np.where(nash_att > 0)[0]

    # Payoff matrix of subgames denotes 5.
    for method in position:
        start, end = position[method]
        indicator_matrix[start:end, start:end] = 5

    return nash_def, nash_att, indicator_matrix
Beispiel #10
0
def run(p1_payoff, p2_payoff):
    np.random.seed(0)
    regret_list = []
    str_p1 = []
    str_p2 = []
    epoch = 0
    x1, x2 = 0, 0
    str_p1.append(x1)
    str_p2.append(x2)
    subgame_u1 = extract_submatrix(
        np.array(str_p1) * 2,
        np.array(str_p2) * 2, p1_payoff)
    subgame_u2 = extract_submatrix(
        np.array(str_p1) * 2,
        np.array(str_p2) * 2, p2_payoff)
    is_terminal = True
    switch = False
    while is_terminal:
        epoch += 1
        nelist = do_gambit_analysis(subgame_u1, subgame_u2, return_list=True)
        # nash_2, nash_1 = do_gambit_analysis(subgame_u1, subgame_u2, maxent=False, minent=True)
        nash_2, nash_1 = do_gambit_analysis(subgame_u1,
                                            subgame_u2,
                                            maxent=True,
                                            minent=False)
        regret_list.append(
            regret(nash_1, nash_2, np.array(str_p1), np.array(str_p2),
                   subgame_u1, subgame_u2, p1_payoff, p2_payoff))

        # DO solver
        if switch:
            x1 = BR(np.array(str_p2) * 2, nash_2, p1_payoff)
            x2 = BR(np.array(str_p1) * 2, nash_1, p1_payoff)

        # Beneficial Deviation
        if not switch:
            x1 = beneficial_dev(np.array(str_p2) * 2, nash_2, p1_payoff)
            x2 = beneficial_dev(np.array(str_p1) * 2, nash_1, p1_payoff)

        # random
        # x1 = rand(np.array(str_p1))
        # x2 = rand(np.array(str_p2))

        if epoch == 10:
            switch = True

        str_p1.append(x1)
        str_p2.append(x2)

        print("--------------------------------")
        print("Current Epoch is ", epoch)
        print("ne_list:", nelist)
        print("Current NE is ", nash_1, nash_2)
        print("x1:", str_p1)
        print("x2:", str_p2)

        # if x1 not in str_p1:
        #     str_p1.append(x1)
        # if x2 not in str_p2:
        #     str_p2.append(x2)

        subgame_u1 = extract_submatrix(
            np.array(str_p1) * 2,
            np.array(str_p2) * 2, p1_payoff)
        subgame_u2 = extract_submatrix(
            np.array(str_p1) * 2,
            np.array(str_p2) * 2, p2_payoff)

        if epoch == 20:
            is_terminal = False
            print(regret_list)
            print("x1:", str_p1)
            print("x2:", str_p2)
Beispiel #11
0
def EGTA_restart(restart_epoch,
                 start_hado=2,
                 retrain=False,
                 transfer=False,
                 game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("============Continue Running HADO-EGTA=================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("=============Continue Running DO-EGTA==================")
        print("=======================================================")

    epoch = restart_epoch - 1
    game = fp.load_pkl(game_path)
    env = game.env

    retrain_start = False

    count = 8 - restart_epoch
    while count != 0:
        # while True:
        # fix opponent strategy
        mix_str_def = game.nasheq[epoch][0]
        mix_str_att = game.nasheq[epoch][1]
        aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)

        game.att_payoff.append(aPayoff)
        game.def_payoff.append(dPayoff)

        # increase epoch
        epoch += 1
        print("Current epoch is " + str(epoch))
        print("epoch " + str(epoch) + ':', datetime.datetime.now())

        # train and save RL agents

        if retrain and epoch > start_hado:
            retrain_start = True

        print("Begin training attacker......")
        a_BD = training.training_att(game,
                                     mix_str_def,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer)
        print("Attacker training done......")

        print("Begin training defender......")
        d_BD = training.training_def(game,
                                     mix_str_att,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer)
        print("Defender training done......")

        if retrain and epoch > start_hado:
            print("Begin retraining attacker......")
            training.training_hado_att(game)
            print("Attacker retraining done......")

            print("Begin retraining defender......")
            training.training_hado_def(game)
            print("Defender retraining done......")

            # Simulation for retrained strategies and choose the best one as player's strategy.
            print('Begin retrained sim......')
            a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def,
                                     epoch)
            print('Done retrained sim......')

        game.att_BD_list.append(a_BD)
        game.def_BD_list.append(d_BD)

        # else:
        #
        #     # Judge beneficial deviation
        #     # one plays nn and another plays ne strategy
        #     print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = "att_str_epoch" + str(epoch) + ".pkl"
        #     nn_def = mix_str_def
        #     # if MPI_flag:
        #     #     a_BD, _ = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for a_BD.")
        #
        #     print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = mix_str_att
        #     nn_def = "def_str_epoch" + str(epoch) + ".pkl"
        #     # if MPI_flag:
        #     #     _, d_BD = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for d_BD.")

        # #TODO: This may lead to early stop.
        # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold:
        #     print("*************************")
        #     print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff)
        #     print("a_BD=", a_BD, " ", "d_BD=", d_BD)
        #     print("*************************")
        #     break
        #
        game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
        game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

        # simulate and extend the payoff matrix.
        game = sim_Series.sim_and_modifiy_Series_with_game(game)

        #
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)

        print('a_BD_list', game.att_BD_list)
        print('aPayoff', game.att_payoff)
        print('d_BD_list', game.def_BD_list)
        print('dPayoff', game.def_payoff)

        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END EPOCH: " + str(epoch))
    print(datetime.datetime.now())
Beispiel #12
0
def EGTA(env,
         game,
         start_hado=2,
         retrain=False,
         transfer=False,
         epoch=1,
         game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("==============Begin Running HADO-EGTA==================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("===============Begin Running DO-EGTA===================")
        print("=======================================================")

    retrain_start = False

    proc = psutil.Process(os.getpid())

    count = 18
    while count != 0:
        # while True:
        mem0 = proc.memory_info().rss
        # fix opponent strategy
        mix_str_def = game.nasheq[epoch][0]
        mix_str_att = game.nasheq[epoch][1]

        #TODO: play against uniform
        # mix_str_def = np.zeros(len(game.nasheq[epoch][0]))
        # mix_str_def[0] = 1
        # mix_str_att = np.zeros(len(game.nasheq[epoch][1]))
        # mix_str_att[0] = 1

        aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)

        game.att_payoff.append(aPayoff)
        game.def_payoff.append(dPayoff)

        # increase epoch
        epoch += 1
        print("Current epoch is " + str(epoch))
        print("epoch " + str(epoch) + ':', datetime.datetime.now())

        # train and save RL agents

        if retrain and epoch > start_hado:
            retrain_start = True

        if epoch == 2 and transfer:
            transfer_flag = False
        elif transfer:
            transfer_flag = True
        else:
            transfer_flag = False

        print("Begin training attacker......")
        a_BD = training.training_att(game,
                                     mix_str_def,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer_flag)
        print("Attacker training done......")

        print("Begin training defender......")
        d_BD = training.training_def(game,
                                     mix_str_att,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer_flag)
        print("Defender training done......")

        mem1 = proc.memory_info().rss

        if retrain and epoch > start_hado:
            print("Begin retraining attacker......")
            training.training_hado_att(game, transfer=transfer_flag)
            print("Attacker retraining done......")

            print("Begin retraining defender......")
            training.training_hado_def(game, transfer=transfer_flag)
            print("Defender retraining done......")

            # Simulation for retrained strategies and choose the best one as player's strategy.
            print('Begin retrained sim......')
            a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def,
                                     epoch)
            print('Done retrained sim......')

        game.att_BD_list.append(a_BD)
        game.def_BD_list.append(d_BD)

        # else:
        #
        #     # Judge beneficial deviation
        #     # one plays nn and another plays ne strategy
        #     print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = "att_str_epoch" + str(epoch) + ".pkl"
        #     nn_def = mix_str_def
        #     # if MPI_flag:
        #     #     a_BD, _ = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for a_BD.")
        #
        #     print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = mix_str_att
        #     nn_def = "def_str_epoch" + str(epoch) + ".pkl"
        #     # if MPI_flag:
        #     #     _, d_BD = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for d_BD.")
        mem2 = proc.memory_info().rss

        # #TODO: This may lead to early stop.
        # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold:
        #     print("*************************")
        #     print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff)
        #     print("a_BD=", a_BD, " ", "d_BD=", d_BD)
        #     print("*************************")
        #     break
        #
        game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
        game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

        # simulate and extend the payoff matrix.
        # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag)
        game = sim_Series.sim_and_modifiy_Series_with_game(game)
        mem3 = proc.memory_info().rss
        #
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)

        print('a_BD_list', game.att_BD_list)
        print('aPayoff', game.att_payoff)
        print('d_BD_list', game.def_BD_list)
        print('dPayoff', game.def_payoff)

        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        print("MEM:", (mem1 - mem0) / mem0, (mem2 - mem0) / mem0,
              (mem3 - mem0) / mem0)
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END: " + str(epoch))
    os._exit(os.EX_OK)
Beispiel #13
0
def _run(env,
         game,
         meta_method_name,
         epoch: int = 1,
         game_path: str = None,
         n_processes: int = 1):
    assert n_processes > 0, "Invalid number of processors."
    if game_path is None:
        game_path = osp.join(settings.get_run_dir(), "game.pkl")

    logger.info("=======================================================")
    logger.info("===============Begin Running DO-EGTA===================")
    logger.info("=======================================================")

    proc = psutil.Process(os.getpid())
    result_dir = settings.get_run_dir()

    selector = meta_method_selector(meta_method_name)

    count = 80
    while count != 0:
        mem0 = proc.memory_info().rss

        # Fix opponent strategy.
        mix_str_def, mix_str_att = selector.sample(game, epoch)

        # Save mixed strategies.
        # with open(osp.join(result_dir, f"mix_defender.{epoch}.pkl"), "wb") as outfile:
        #     pickle.dump(mix_str_def, outfile)
        # with open(osp.join(result_dir, f"mix_attacker.{epoch}.pkl"), "wb") as outfile:
        #     pickle.dump(mix_str_att, outfile)
        # with open(osp.join(result_dir, f"payoff_defender.{epoch}.pkl"), "wb") as outfile:
        #     pickle.dump(game.payoffmatrix_def, outfile)
        # with open(osp.join(result_dir, f"payoff_attacker.{epoch}.pkl"), "wb") as outfile:
        #     pickle.dump(game.payoffmatrix_att, outfile)

        # Equilibrium pay-off.
        aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)
        game.att_payoff.append(aPayoff)
        game.def_payoff.append(dPayoff)

        # increase epoch
        epoch += 1
        logger.info("Epoch " + str(epoch))
        epoch_dir = osp.join(result_dir, f"epoch_{epoch}")

        # Summary writer for each epoch.
        writer = SummaryWriter(logdir=epoch_dir)

        # train and save RL agents

        # Train new best-response policies.
        if n_processes > 1:
            logger.info("Begining training attacker and defender in parallel.")
            time_training = time.time()
            job_queue = multiprocessing.SimpleQueue()
            result_queue = multiprocessing.SimpleQueue()

            attacker_trainer = LearnerWorker(job_queue, result_queue, 1,
                                             mix_str_def, epoch)
            defender_trainer = LearnerWorker(job_queue, result_queue, 0,
                                             mix_str_att, epoch)

            attacker_trainer.start()
            defender_trainer.start()

            # Submit training jobs on our game.
            for _ in range(2):
                job_queue.put(CloudpickleWrapper(game))
            # Send sentinel values to tell processes to cleanly shutdown (1 per worker).
            for _ in range(2):
                job_queue.put(None)

            attacker_trainer.join()
            defender_trainer.join()

            # Collect and report results. We need to sort the results because they may appear in any order.
            results = []
            for _ in range(2):
                results += [result_queue.get()]
            results = results if not results[0][
                0] else results[::-1]  # Put defender first then attacker.

            # Process results into expected variables for non-distributed.
            a_BD = results[1][1]
            d_BD = results[0][1]

            logger.info("Done training attacker and defender.")
            logger.info(f"Defender training report: \n{results[0][2]}")
            logger.info(f"Attacker training report: \n{results[1][2]}")
            time_training = time.time() - time_training

        else:
            logger.info("Begin training attacker......")
            time_train_attacker = time.time()
            a_BD, report = training.train(game, 1, mix_str_def, epoch, writer)
            time_train_attacker = time.time() - time_train_attacker
            logger.info(f"\n{report}")
            logger.info("Attacker training done......")

            logger.info("Begin training defender......")
            time_train_defender = time.time()
            d_BD, report = training.train(game, 0, mix_str_att, epoch, writer)
            time_train_defender = time.time() - time_train_defender
            logger.info(f"\n{report}")
            logger.info("Defender training done......")

        mem1 = proc.memory_info().rss

        game.att_BD_list.append(a_BD)
        game.def_BD_list.append(d_BD)

        mem2 = proc.memory_info().rss

        game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
        game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

        # simulate and extend the payoff matrix.
        time_extend_game = time.time()
        game = simulation.simulate_expanded_game(game=game,
                                                 n_processes=n_processes,
                                                 save_dir=epoch_dir,
                                                 summary_writer=writer)
        time_extend_game = time.time() - time_extend_game
        mem3 = proc.memory_info().rss

        # find nash equilibrium using gambit analysis
        time_gambit = time.time()
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        logger.info("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)
        time_gambit = time.time() - time_gambit

        logger.info("RESULTS:")
        logger.info('  - a_BD_list: {}'.format(game.att_BD_list))
        logger.info('  - aPayoff: {}'.format(game.att_payoff))
        logger.info('  - d_BD_list: {}'.format(game.def_BD_list))
        logger.info('  - dPayoff: {}'.format(game.def_payoff))
        logger.info("MEM: {}, {}, {}.".format(
            (mem1 - mem0) / mem0, (mem2 - mem0) / mem0, (mem3 - mem0) / mem0))
        logger.info("TIME: ")
        if n_processes == 1:
            logger.info(f"  - Training attacker: {time_train_attacker}")
            logger.info(f"  - Training defender: {time_train_defender}")
        else:
            logger.info(f"  - Training: {time_training}")
        logger.info(f"  - Extend game: {time_extend_game}")
        logger.info(f"  - Gambit: {time_gambit}")
        logger.info("Round_" + str(epoch) + " has done and game was saved.")
        logger.info("=======================================================")

        count -= 1
        sys.stdout.flush()  # TODO: make sure this is correct.

    logger.info("END: " + str(epoch))
    os._exit(os.EX_OK)
Beispiel #14
0
# ga.encode_gambit_file(poDef,poAtt)
#
# ga.gambit_analysis()

# ga.decode_gambit_file()

n = 10
poDef = np.random.normal(size=(n,n))
poAtt = np.random.normal(size=(n,n))
poDef = np.round(poDef,2)
poAtt = np.round(poAtt,2)

# print(poAtt)

t1 = time.time()
nash_att, nash_def = ga.do_gambit_analysis(poDef, poAtt)
t2 = time.time()

print("time:",t2-t1)
print(nash_att, nash_def)

# nash_att, nash_def = ga.decode_gambit_file()
# print(nash_att, nash_def)

# a = '19/30,0,11/30,0,0,0,0,0,0,0,34/101,0,0,0,67/101,0,0,0,0,0'
# b = a.split(',')
# b = float(b)
# c = np.array(b,dtype=np.float)
# print(c)
# print(c[0])