Beispiel #1
0
def env_rand_gen_and_save(env_name,
                          num_attr_N=11,
                          num_attr_E=4,
                          T=10,
                          graphid=1,
                          numNodes=30,
                          numEdges=100,
                          numRoot=4,
                          numGoals=6,
                          history=3):
    env = Environment(num_attr_N=num_attr_N,
                      num_attr_E=num_attr_E,
                      T=T,
                      graphid=graphid,
                      numNodes=numNodes,
                      numEdges=numEdges,
                      numRoot=numRoot,
                      numGoals=numGoals,
                      history=history)
    env.randomDAG()
    path = os.getcwd() + "/env_data/" + env_name + ".pkl"
    print("env path is ", path)
    # if fp.isExist(path):
    #     raise ValueError("Env with such name already exists.")
    fp.save_pkl(env, path)
    print(env_name + " has been saved.")
    return env
Beispiel #2
0
def sim_and_modifiy_Series():
    #TODO: make sure this is correct
    print('Begin simulation and modify payoff matrix.')
    path = os.getcwd() + '/data/game.pkl'
    game = fp.load_pkl(path)

    env = game.env
    num_episodes = game.num_episodes

    #TODO: add str first and then calculate payoff
    old_dim, old_dim1 = game.dim_payoff_def()
    new_dim, new_dim1 = game.num_str()
    if old_dim != old_dim1 or new_dim != new_dim1:
        raise ValueError("Payoff dimension does not match.")

    def_str_list = game.def_str
    att_str_list = game.att_str

    position_col_list = []
    position_row_list = []
    for i in range(new_dim - 1):
        position_col_list.append((i, new_dim - 1))
    for j in range(new_dim):
        position_row_list.append((new_dim - 1, j))

    att_col = []
    att_row = []
    def_col = []
    def_row = []
    #TODO: check the path is correct
    for pos in position_col_list:
        idx_def, idx_att = pos
        # if MPI_flag:
        #     aReward, dReward = do_MPI_sim(att_str_list[idx_att], def_str_list[idx_def])
        # else:
        aReward, dReward = series_sim(env, game, att_str_list[idx_att],
                                      def_str_list[idx_def], num_episodes)
        att_col.append(aReward)
        def_col.append(dReward)

    for pos in position_row_list:
        idx_def, idx_att = pos
        # if MPI_flag:
        #     aReward, dReward = do_MPI_sim(att_str_list[idx_att], def_str_list[idx_def])
        # else:
        aReward, dReward = series_sim(env, game, att_str_list[idx_att],
                                      def_str_list[idx_def], num_episodes)
        att_row.append(aReward)
        def_row.append(dReward)

    game.add_col_att(
        np.reshape(np.round(np.array(att_col), 2), newshape=(len(att_col), 1)))
    game.add_col_def(
        np.reshape(np.round(np.array(def_col), 2), newshape=(len(att_col), 1)))
    game.add_row_att(np.round(np.array(att_row), 2)[None])
    game.add_row_def(np.round(np.array(def_row), 2)[None])

    fp.save_pkl(game, path=path)
    print("Done simulation and modify payoff matrix.")
Beispiel #3
0
def do_MPI_sim_retrain(nn_att, nn_def):
    path = os.getcwd()
    path_att = path + '/sim_arg/nn_att.pkl'
    path_def = path + '/sim_arg/nn_def.pkl'
    fp.save_pkl(nn_att, path_att)
    fp.save_pkl(nn_def, path_def)

    command_line = "mpirun python " + path + "/sim_MPI_retrain.py"
    aReward_list = []
    dReward_list = []
    num_mpirun = 5
    for i in range(num_mpirun):
        call_and_wait(command_line)
        aReward, dReward = fp.load_pkl(path + '/sim_arg/result.pkl')
        aReward_list.append(aReward)
        dReward_list.append(dReward)

    # return aReward, dReward
    return np.sum(aReward_list) / num_mpirun, np.sum(dReward_list) / num_mpirun
Beispiel #4
0
def sim_and_modifiy_MPI():
    #TODO: load game
    path = os.getcwd()
    game_path = os.getcwd() + '/game_data/game.pkl'
    game = fp.load_pkl(game_path)
    comm = MPI.COMM_WORLD
    rank = comm.rank
    size = comm.size

    nn_att = fp.load_pkl(path + '/sim_arg/nn_att.pkl')
    nn_def = fp.load_pkl(path + '/sim_arg/nn_def.pkl')

    # aReward, dReward = series_sim(game.env, game, nn_att, nn_def, size)
    aReward, dReward = series_sim_single(game.env, game, nn_att, nn_def)
    reward_tuple = (aReward, dReward)
    data = comm.gather(reward_tuple, root=0)
    if rank == 0:
        data = np.array(data)
        fp.save_pkl(np.round(np.sum(data, 0) / size, 1),
                    path=path + '/sim_arg/result.pkl')
Beispiel #5
0
def do_MPI_sim(nn_att, nn_def):
    path = os.getcwd()
    path_att = path + '/sim_arg/nn_att.pkl'
    path_def = path + '/sim_arg/nn_def.pkl'
    fp.save_pkl(nn_att, path_att)
    fp.save_pkl(nn_def, path_def)

    command_line = "mpirun python " + path + "/sim_MPI.py"

    # aReward_list = []
    # dReward_list = []
    # num_mpirun = 5
    # for i in range(num_mpirun):
    #     call_and_wait(command_line)
    #     aReward, dReward = fp.load_pkl(path + '/sim_arg/result.pkl')
    #     aReward_list.append(aReward)
    #     dReward_list.append(dReward)

    call_and_wait(command_line)
    # sim_and_modifiy_MPI()
    aReward, dReward = fp.load_pkl(path + '/sim_arg/result.pkl')

    return aReward, dReward
Beispiel #6
0
def EGTA_restart(restart_epoch,
                 start_hado=2,
                 retrain=False,
                 game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("============Continue Running HADO-EGTA=================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("=============Continue Running DO-EGTA==================")
        print("=======================================================")

    epoch = restart_epoch - 1

    sys.stdout.flush()
    arg_path = os.getcwd() + '/inner_egta_arg/'

    hado_arg = (start_hado, retrain)
    epoch_arg = epoch

    fp.save_pkl(hado_arg, path=arg_path + 'hado_arg.pkl')
    fp.save_pkl(epoch_arg, path=arg_path + 'epoch_arg.pkl')

    count = 8 - restart_epoch
    while count != 0:
        # while True:
        do_train_and_sim()
        game = fp.load_pkl(game_path)
        epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl')
        #
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)
        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END EPOCH: " + str(epoch))
    print(datetime.datetime.now())
Beispiel #7
0
def EGTA(start_hado=2,
         retrain=False,
         epoch=1,
         game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("==============Begin Running HADO-EGTA==================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("===============Begin Running DO-EGTA===================")
        print("=======================================================")

    sys.stdout.flush()
    arg_path = os.getcwd() + '/inner_egta_arg/'

    hado_arg = (start_hado, retrain)
    epoch_arg = epoch

    fp.save_pkl(hado_arg, path=arg_path + 'hado_arg.pkl')
    fp.save_pkl(epoch_arg, path=arg_path + 'epoch_arg.pkl')

    count = 18
    while count != 0:
        # while True:
        do_train_and_sim()
        game = fp.load_pkl(game_path)
        epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl')
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        fp.save_pkl(game, game_path)
        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END: " + str(epoch))
    os._exit(os.EX_OK)
Beispiel #8
0
def whole_payoff_matrix(num_str, child_partition, env_name='run_env_B', save_path=None, matrix_name=None):
    """
    Simulate the complete payoff matrix.
    :param num_str: number of total strategies.
    :param child_partition: a dict recording number of strategies for each child game. {"baseline": 40, "RS":40}
    :param env_name: name of envrionment
    :param str_range: the range of strategies to be simulated. {"start": 3, "end": 20}
    :param str_path_dict: {0/1:{'baselines': '/home/wangyzh/baselines/attackgraph/attacker_strategies/'}}
    :return: NE
    """
    print('Begin simulating payoff matrix of combined game.')
    print("*********************************************")
    print("*********************************************")
    game = initialize(load_env=env_name, env_name=None)
    print("*********************************************")
    print("*********************************************")
    sys.stdout.flush()

    env = game.env
    num_episodes = game.num_episodes

    # Assume two players have the same number of strategies.
    payoff_matrix_att = np.zeros((num_str, num_str))
    payoff_matrix_def = np.zeros((num_str, num_str))

    att_str_dict = load_policies(game, child_partition, identity=1)
    def_str_dict = load_policies(game, child_partition, identity=0)

    ## method_pos_def records the starting idx of each method when combined.
    method_pos_def = 0
    for key_def in child_partition:
        for i in np.arange(1, child_partition[key_def]+1):
            def_str = key_def + '/defender_strategies/def_str_epoch' + str(i+1) + '.pkl'
            entry_pos_def = method_pos_def + i
            method_pos_att = 0
            for key_att in child_partition:
                print('Current Method is ', (key_def, key_att), "Defender's pos is ", i+1, '# attacker strategies is ', child_partition[key_att])
                sys.stdout.flush()
                for j in np.arange(1,child_partition[key_att]+1):
                    att_str = key_att + '/attacker_strategies/att_str_epoch' + str(j+1) + '.pkl'
                    entry_pos_att = method_pos_att + j
                    # print current simulation info.
                    # if j == child_partition[key_att]:
                    #     print("----------------------------------------------------")
                    #     print('Current position:', (i+1,j+1), 'Pos:', (entry_pos_def-1, entry_pos_att-1))
                    #     sys.stdout.flush()

                    att_nn = att_str_dict[att_str]
                    def_nn = def_str_dict[def_str]

                    aReward, dReward = series_sim_combined(env, att_nn, def_nn, num_episodes=num_episodes)

                    payoff_matrix_att[entry_pos_def-1, entry_pos_att-1] = aReward
                    payoff_matrix_def[entry_pos_def-1, entry_pos_att-1] = dReward

                # update the starting position.
                method_pos_att += child_partition[key_att]

        ## Periodically saving the payoff matrix.
        if save_path is None:
            save_path = os.getcwd() + '/combined_game/matrice/'
        if matrix_name is None:
            fp.save_pkl(payoff_matrix_att, save_path + 'payoff_matrix_att.pkl')
            fp.save_pkl(payoff_matrix_def, save_path + 'payoff_matrix_def.pkl')
        else:
            fp.save_pkl(payoff_matrix_att, save_path + 'payoff_matrix_att_' + matrix_name + '.pkl')
            fp.save_pkl(payoff_matrix_def, save_path + 'payoff_matrix_def_' + matrix_name + '.pkl')
        method_pos_def += child_partition[key_def]

    print('Done simulating payoff matrix of combined game.')
    return payoff_matrix_att, payoff_matrix_def
Beispiel #9
0
def initialize(load_env=None, env_name=None):
    print("=======================================================")
    print("=======Begin Initialization and first epoch============")
    print("=======================================================")

    # Create Environment
    if isinstance(load_env, str):
        path = os.getcwd() + '/env_data/' + load_env + '.pkl'
        if not fp.isExist(path):
            raise ValueError("The env being loaded does not exist.")
        env = fp.load_pkl(path)
    else:
        # env is created and saved.
        env = dag.env_rand_gen_and_save(env_name)

    # save graph copy
    env.save_graph_copy()
    env.save_mask_copy()

    # create players and point to their env
    env.create_players()
    env.create_action_space()

    # load param
    param_path = os.getcwd() + '/network_parameters/param.json'
    param = jp.load_json_data(param_path)

    # initialize game data
    game = game_data.Game_data(env,
                               num_episodes=param['num_episodes'],
                               threshold=param['threshold'])
    game.set_hado_param(param=param['hado_param'])
    game.set_hado_time_step(param['retrain_timesteps'])
    game.env.defender.set_env_belong_to(game.env)
    game.env.attacker.set_env_belong_to(game.env)

    env.defender.set_env_belong_to(env)
    env.attacker.set_env_belong_to(env)

    # uniform strategy has been produced ahead of time
    print("epoch 1:", datetime.datetime.now())
    epoch = 1

    act_att = 'att_str_epoch1.pkl'
    act_def = 'def_str_epoch1.pkl'

    game.add_att_str(act_att)
    game.add_def_str(act_def)

    print('Begin simulation for uniform strategy.')
    sys.stdout.flush()
    # simulate using random strategies and initialize payoff matrix
    # if MPI_flag:
    #     aReward, dReward = do_MPI_sim(act_att, act_def)
    # else:
    aReward, dReward = series_sim(game.env, game, act_att, act_def,
                                  game.num_episodes)
    print('Done simulation for uniform strategy.')
    sys.stdout.flush()

    game.init_payoffmatrix(dReward, aReward)
    ne = {}
    ne[0] = np.array([1], dtype=np.float32)
    ne[1] = np.array([1], dtype=np.float32)
    game.add_nasheq(epoch, ne)

    # save a copy of game data
    game_path = os.getcwd() + '/game_data/game.pkl'
    fp.save_pkl(game, game_path)

    sys.stdout.flush()
    return game
Beispiel #10
0
def EGTA_restart(restart_epoch,
                 start_hado=2,
                 retrain=False,
                 transfer=False,
                 game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("============Continue Running HADO-EGTA=================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("=============Continue Running DO-EGTA==================")
        print("=======================================================")

    epoch = restart_epoch - 1
    game = fp.load_pkl(game_path)
    env = game.env

    retrain_start = False

    count = 8 - restart_epoch
    while count != 0:
        # while True:
        # fix opponent strategy
        mix_str_def = game.nasheq[epoch][0]
        mix_str_att = game.nasheq[epoch][1]
        aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)

        game.att_payoff.append(aPayoff)
        game.def_payoff.append(dPayoff)

        # increase epoch
        epoch += 1
        print("Current epoch is " + str(epoch))
        print("epoch " + str(epoch) + ':', datetime.datetime.now())

        # train and save RL agents

        if retrain and epoch > start_hado:
            retrain_start = True

        print("Begin training attacker......")
        a_BD = training.training_att(game,
                                     mix_str_def,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer)
        print("Attacker training done......")

        print("Begin training defender......")
        d_BD = training.training_def(game,
                                     mix_str_att,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer)
        print("Defender training done......")

        if retrain and epoch > start_hado:
            print("Begin retraining attacker......")
            training.training_hado_att(game)
            print("Attacker retraining done......")

            print("Begin retraining defender......")
            training.training_hado_def(game)
            print("Defender retraining done......")

            # Simulation for retrained strategies and choose the best one as player's strategy.
            print('Begin retrained sim......')
            a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def,
                                     epoch)
            print('Done retrained sim......')

        game.att_BD_list.append(a_BD)
        game.def_BD_list.append(d_BD)

        # else:
        #
        #     # Judge beneficial deviation
        #     # one plays nn and another plays ne strategy
        #     print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = "att_str_epoch" + str(epoch) + ".pkl"
        #     nn_def = mix_str_def
        #     # if MPI_flag:
        #     #     a_BD, _ = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for a_BD.")
        #
        #     print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = mix_str_att
        #     nn_def = "def_str_epoch" + str(epoch) + ".pkl"
        #     # if MPI_flag:
        #     #     _, d_BD = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for d_BD.")

        # #TODO: This may lead to early stop.
        # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold:
        #     print("*************************")
        #     print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff)
        #     print("a_BD=", a_BD, " ", "d_BD=", d_BD)
        #     print("*************************")
        #     break
        #
        game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
        game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

        # simulate and extend the payoff matrix.
        game = sim_Series.sim_and_modifiy_Series_with_game(game)

        #
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)

        print('a_BD_list', game.att_BD_list)
        print('aPayoff', game.att_payoff)
        print('d_BD_list', game.def_BD_list)
        print('dPayoff', game.def_payoff)

        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END EPOCH: " + str(epoch))
    print(datetime.datetime.now())
Beispiel #11
0
def EGTA(env,
         game,
         start_hado=2,
         retrain=False,
         transfer=False,
         epoch=1,
         game_path=os.getcwd() + '/game_data/game.pkl'):

    if retrain:
        print("=======================================================")
        print("==============Begin Running HADO-EGTA==================")
        print("=======================================================")
    else:
        print("=======================================================")
        print("===============Begin Running DO-EGTA===================")
        print("=======================================================")

    retrain_start = False

    proc = psutil.Process(os.getpid())

    count = 18
    while count != 0:
        # while True:
        mem0 = proc.memory_info().rss
        # fix opponent strategy
        mix_str_def = game.nasheq[epoch][0]
        mix_str_att = game.nasheq[epoch][1]

        #TODO: play against uniform
        # mix_str_def = np.zeros(len(game.nasheq[epoch][0]))
        # mix_str_def[0] = 1
        # mix_str_att = np.zeros(len(game.nasheq[epoch][1]))
        # mix_str_att[0] = 1

        aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)

        game.att_payoff.append(aPayoff)
        game.def_payoff.append(dPayoff)

        # increase epoch
        epoch += 1
        print("Current epoch is " + str(epoch))
        print("epoch " + str(epoch) + ':', datetime.datetime.now())

        # train and save RL agents

        if retrain and epoch > start_hado:
            retrain_start = True

        if epoch == 2 and transfer:
            transfer_flag = False
        elif transfer:
            transfer_flag = True
        else:
            transfer_flag = False

        print("Begin training attacker......")
        a_BD = training.training_att(game,
                                     mix_str_def,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer_flag)
        print("Attacker training done......")

        print("Begin training defender......")
        d_BD = training.training_def(game,
                                     mix_str_att,
                                     epoch,
                                     retrain=retrain_start,
                                     transfer=transfer_flag)
        print("Defender training done......")

        mem1 = proc.memory_info().rss

        if retrain and epoch > start_hado:
            print("Begin retraining attacker......")
            training.training_hado_att(game, transfer=transfer_flag)
            print("Attacker retraining done......")

            print("Begin retraining defender......")
            training.training_hado_def(game, transfer=transfer_flag)
            print("Defender retraining done......")

            # Simulation for retrained strategies and choose the best one as player's strategy.
            print('Begin retrained sim......')
            a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def,
                                     epoch)
            print('Done retrained sim......')

        game.att_BD_list.append(a_BD)
        game.def_BD_list.append(d_BD)

        # else:
        #
        #     # Judge beneficial deviation
        #     # one plays nn and another plays ne strategy
        #     print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = "att_str_epoch" + str(epoch) + ".pkl"
        #     nn_def = mix_str_def
        #     # if MPI_flag:
        #     #     a_BD, _ = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for a_BD.")
        #
        #     print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.")
        #     nn_att = mix_str_att
        #     nn_def = "def_str_epoch" + str(epoch) + ".pkl"
        #     # if MPI_flag:
        #     #     _, d_BD = do_MPI_sim(nn_att, nn_def)
        #     # else:
        #     _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes)
        #     print("Simulation done for d_BD.")
        mem2 = proc.memory_info().rss

        # #TODO: This may lead to early stop.
        # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold:
        #     print("*************************")
        #     print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff)
        #     print("a_BD=", a_BD, " ", "d_BD=", d_BD)
        #     print("*************************")
        #     break
        #
        game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
        game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

        # simulate and extend the payoff matrix.
        # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag)
        game = sim_Series.sim_and_modifiy_Series_with_game(game)
        mem3 = proc.memory_info().rss
        #
        # find nash equilibrium using gambit analysis
        payoffmatrix_def = game.payoffmatrix_def
        payoffmatrix_att = game.payoffmatrix_att
        print("Begin Gambit analysis.")
        nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def,
                                                   payoffmatrix_att)
        ga.add_new_NE(game, nash_att, nash_def, epoch)
        game.env.attacker.nn_att = None
        game.env.defender.nn_def = None
        fp.save_pkl(game, game_path)

        print('a_BD_list', game.att_BD_list)
        print('aPayoff', game.att_payoff)
        print('d_BD_list', game.def_BD_list)
        print('dPayoff', game.def_payoff)

        print("Round_" + str(epoch) + " has done and game was saved.")
        print("=======================================================")
        # break
        print("MEM:", (mem1 - mem0) / mem0, (mem2 - mem0) / mem0,
              (mem3 - mem0) / mem0)
        count -= 1

        sys.stdout.flush()  #TODO: make sure this is correct.

    print("END: " + str(epoch))
    os._exit(os.EX_OK)
Beispiel #12
0
import numpy as np
from attackgraph import file_op as fp
import os

DIR_def = os.getcwd() + '/defender_strategies/'
DIR_att = os.getcwd() + '/attacker_strategies/'


def act_att(ob, mask, training_flag, stochastic=True, update_eps=-1):
    if training_flag != 1:
        raise ValueError("training flag for uniform att str is not 1")
    legal_action = np.where(mask[0] == 0)[0]
    return [np.random.choice(legal_action)]


def act_def(ob, mask, training_flag, stochastic=True, update_eps=-1):
    if training_flag != 0:
        raise ValueError("training flag for uniform def str is not 0")
    legal_action = np.where(mask[0] == 0)[0]
    return [np.random.choice(legal_action)]


fp.save_pkl(act_att, DIR_att + "att_str_epoch" + str(1) + ".pkl")
fp.save_pkl(act_def, DIR_def + "def_str_epoch" + str(1) + ".pkl")
Beispiel #13
0
def train_and_sim():
    arg_path = os.getcwd() + '/inner_egta_arg/'

    start_hado, retrain = fp.load_pkl(arg_path + 'hado_arg.pkl')
    epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl')

    game_path = os.getcwd() + '/game_data/game.pkl'
    game = fp.load_pkl(game_path)
    env = game.env

    retrain_start = False

    mix_str_def = game.nasheq[epoch][0]
    mix_str_att = game.nasheq[epoch][1]
    aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch)

    game.att_payoff.append(aPayoff)
    game.def_payoff.append(dPayoff)

    # increase epoch
    epoch += 1
    print("Current epoch is " + str(epoch))
    print("epoch " + str(epoch) + ':', datetime.datetime.now())

    # train and save RL agents

    if retrain and epoch > start_hado:
        retrain_start = True

    print("Begin training attacker......")
    a_BD = training.training_att(game,
                                 mix_str_def,
                                 epoch,
                                 retrain=retrain_start)
    print("Attacker training done......")

    print("Begin training defender......")
    d_BD = training.training_def(game,
                                 mix_str_att,
                                 epoch,
                                 retrain=retrain_start)
    print("Defender training done......")

    if retrain and epoch > start_hado:
        print("Begin retraining attacker......")
        training.training_hado_att(game)
        print("Attacker retraining done......")

        print("Begin retraining defender......")
        training.training_hado_def(game)
        print("Defender retraining done......")

        # Simulation for retrained strategies and choose the best one as player's strategy.
        print('Begin retrained sim......')
        a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch)
        print('Done retrained sim......')

    game.att_BD_list.append(a_BD)
    game.def_BD_list.append(d_BD)

    # else:
    #
    #     # Judge beneficial deviation
    #     # one plays nn and another plays ne strategy
    #     print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.")
    #     nn_att = "att_str_epoch" + str(epoch) + ".pkl"
    #     nn_def = mix_str_def
    #     # if MPI_flag:
    #     #     a_BD, _ = do_MPI_sim(nn_att, nn_def)
    #     # else:
    #     a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes)
    #     print("Simulation done for a_BD.")
    #
    #     print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.")
    #     nn_att = mix_str_att
    #     nn_def = "def_str_epoch" + str(epoch) + ".pkl"
    #     # if MPI_flag:
    #     #     _, d_BD = do_MPI_sim(nn_att, nn_def)
    #     # else:
    #     _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes)
    #     print("Simulation done for d_BD.")

    # #TODO: This may lead to early stop.
    # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold:
    #     print("*************************")
    #     print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff)
    #     print("a_BD=", a_BD, " ", "d_BD=", d_BD)
    #     print("*************************")
    #     break
    #
    game.add_att_str("att_str_epoch" + str(epoch) + ".pkl")
    game.add_def_str("def_str_epoch" + str(epoch) + ".pkl")

    # simulate and extend the payoff matrix.
    # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag)
    game = sim_Series.sim_and_modifiy_Series_with_game(game)

    game.env.attacker.nn_att = None
    game.env.defender.nn_def = None

    print('a_BD_list', game.att_BD_list)
    print('aPayoff', game.att_payoff)
    print('d_BD_list', game.def_BD_list)
    print('dPayoff', game.def_payoff)

    fp.save_pkl(game, game_path)
    fp.save_pkl(epoch, arg_path + 'epoch_arg.pkl')