def load_policies(game, child_partition, identity, path_dict=None): if identity == 0: # load defender's policies. mid_name = '_def_str_epoch' if path_dict is None: path = str_path_def elif identity == 1: mid_name = '_att_str_epoch' if path_dict is None: path = str_path_att else: raise ValueError("identity is not correct") str_dict = {} if path_dict is None: for key in child_partition: for i in np.arange(1, child_partition[key]+1): nn = key + mid_name + str(i+1) + '.pkl' uniform_flag = False if "epoch1.pkl" in nn: uniform_flag = True load_path = path + nn # Strategies are kept as a tuple with parameters, session, graph. if uniform_flag: nn_act = fp.load_pkl(load_path) str_dict[nn] = (nn_act, None, None) else: scope = scope_finder(path) nn_act, sess, graph = load_action_class(load_path, scope, game, training_flag=identity) str_dict[nn] = (nn_act, sess, graph) else: for key in child_partition: str_dict[key] = {} for i in np.arange(1, child_partition[key] + 1): nn = key + mid_name + str(i + 1) + '.pkl' uniform_flag = False if "epoch1.pkl" in nn: uniform_flag = True path = path_dict[identity][key] load_path = path + nn # Strategies are kept as a tuple with parameters, session, graph. if uniform_flag: nn_act = fp.load_pkl(load_path) str_dict[key][nn] = (nn_act, None, None) else: scope = scope_finder(path) nn_act, sess, graph = load_action_class(load_path, scope, game, training_flag=identity) str_dict[key][nn] = (nn_act, sess, graph) return str_dict
def load_policies(game, child_partition, identity): if identity == 0: # load defender's policies. name = def_str_abs_path + 'def_str_epoch' elif identity == 1: name = att_str_abs_path + 'att_str_epoch' else: raise ValueError("identity is not correct") str_dict = {} path = os.getcwd() + '/combined_game/' for key in child_partition: for i in np.arange(1, child_partition[key] + 1): # nn = "RS/attacker_strategies/def_str_epoch2.pkl" nn = key + name + str(i + 1) + '.pkl' uniform_flag = False if "epoch1.pkl" in nn: uniform_flag = True load_path = path + nn # Strategies are kept as a tuple with parameters, session, graph. if uniform_flag: nn_act = fp.load_pkl(load_path) str_dict[nn] = (nn_act, None, None) else: scope = scope_finder(load_path) nn_act, sess, graph = load_action_class(load_path, scope, game, training_flag=identity) str_dict[nn] = (nn_act, sess, graph) return str_dict
def series_sim(env, game, nn_att, nn_def, num_episodes): aReward_list = np.array([]) dReward_list = np.array([]) nn_att_saved = copy.copy(nn_att) nn_def_saved = copy.copy(nn_def) T = env.T # Test if nn_att and nn_def point to one single strategy. single_str_att = True single_str_def = True if isinstance(nn_att, np.ndarray): if len(np.where(nn_att > 0.95)[0]) != 1: single_str_att = False if isinstance(nn_def, np.ndarray): if len(np.where(nn_def > 0.95)[0]) != 1: single_str_def = False for i in range(num_episodes): env.reset_everything() G = env.G attacker = env.attacker defender = env.defender aReward = 0 dReward = 0 if i == 0 or not single_str_att: att_uniform_flag = False nn_att = copy.copy(nn_att_saved) if isinstance(nn_att, np.ndarray): str_set = game.att_str nn_att = np.random.choice(str_set, p=nn_att) if "epoch1.pkl" in nn_att: att_uniform_flag = True path = os.getcwd() + "/attacker_strategies/" + nn_att if att_uniform_flag: nn_att_act = fp.load_pkl(path) else: att_scope = 'att_str_epoch' + str(1) + '.pkl' training_flag = 1 nn_att_act, sess1, graph1 = load_action_class( path, att_scope, game, training_flag) if i == 0 or not single_str_def: def_uniform_flag = False nn_def = copy.copy(nn_def_saved) if isinstance(nn_def, np.ndarray): str_set = game.def_str nn_def = np.random.choice(str_set, p=nn_def) if "epoch1.pkl" in nn_def: def_uniform_flag = True path = os.getcwd() + "/defender_strategies/" + nn_def if def_uniform_flag: nn_def_act = fp.load_pkl(path) else: def_scope = "def_str_epoch" + str(0) + '.pkl' training_flag = 0 nn_def_act, sess2, graph2 = load_action_class( path, def_scope, game, training_flag) # def_uniform_flag = False # att_uniform_flag = False # # nn_att = copy.copy(nn_att_saved) # nn_def = copy.copy(nn_def_saved) # # # nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy. # # A str represents the name of a strategy. # # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str): # str_set = game.att_str # nn_att = np.random.choice(str_set, p=nn_att) # # if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray): # str_set = game.def_str # nn_def = np.random.choice(str_set, p=nn_def) # # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray): # str_set = game.att_str # nn_att = np.random.choice(str_set, p=nn_att) # str_set = game.def_str # nn_def = np.random.choice(str_set, p=nn_def) # # if "epoch1" in nn_att: # att_uniform_flag = True # # if "epoch1" in nn_def: # def_uniform_flag = True # # # TODO: Transfer Learning modification # path = os.getcwd() + "/attacker_strategies/" + nn_att # if att_uniform_flag: # nn_att_act = fp.load_pkl(path) # else: # att_scope = 'att_str_epoch' + str(1) + '.pkl' # training_flag = 1 # nn_att_act, sess1, graph1 = load_action_class(path, att_scope, game, training_flag) # # path = os.getcwd() + "/defender_strategies/" + nn_def # if def_uniform_flag: # nn_def_act = fp.load_pkl(path) # else: # def_scope = "def_str_epoch" + str(0) + '.pkl' # training_flag = 0 # nn_def_act, sess2, graph2 = load_action_class(path, def_scope, game, training_flag) for t in range(T): timeleft = T - t if att_uniform_flag: attacker.att_greedy_action_builder_single( G, timeleft, nn_att_act) else: with graph1.as_default(): with sess1.as_default(): attacker.att_greedy_action_builder_single( G, timeleft, nn_att_act) if def_uniform_flag: defender.def_greedy_action_builder_single( G, timeleft, nn_def_act) else: with graph2.as_default(): with sess2.as_default(): defender.def_greedy_action_builder_single( G, timeleft, nn_def_act) att_action_set = attacker.attact def_action_set = defender.defact # print('att:', att_action_set) # print('def:', def_action_set) for attack in att_action_set: if isinstance(attack, tuple): # check OR node aReward += G.edges[attack]['cost'] if random.uniform(0, 1) <= G.edges[attack]['actProb']: G.nodes[attack[-1]]['state'] = 1 else: # check AND node aReward += G.nodes[attack]['aCost'] if random.uniform(0, 1) <= G.nodes[attack]['actProb']: G.nodes[attack]['state'] = 1 # defender's action for node in def_action_set: G.nodes[node]['state'] = 0 dReward += G.nodes[node]['dCost'] _, targetset = get_Targets(G) for node in targetset: if G.nodes[node]['state'] == 1: aReward += G.nodes[node]['aReward'] dReward += G.nodes[node]['dPenalty'] # update players' observations # update defender's observation defender.update_obs(defender.get_def_hadAlert(G)) defender.save_defact2prev() defender.defact.clear() # update attacker's observation attacker.update_obs(attacker.get_att_isActive(G)) attacker.attact.clear() aReward_list = np.append(aReward_list, aReward) dReward_list = np.append(dReward_list, dReward) return np.round(np.mean(aReward_list), 2), np.round(np.mean(dReward_list), 2)
def series_sim_retrain(env, game, nn_att, nn_def, num_episodes): aReward_list = np.array([]) dReward_list = np.array([]) nn_att_saved = copy.copy(nn_att) nn_def_saved = copy.copy(nn_def) T = env.T single_str_att = True single_str_def = True if isinstance(nn_att, np.ndarray): if len(np.where(nn_att > 0.95)[0]) != 1: single_str_att = False if isinstance(nn_def, np.ndarray): if len(np.where(nn_def > 0.95)[0]) != 1: single_str_def = False for i in range(num_episodes): #can be run parallel # G = copy.deepcopy(env.G_reserved) # attacker = copy.deepcopy(env.attacker) # defender = copy.deepcopy(env.defender) env.reset_everything() G = env.G attacker = env.attacker defender = env.defender aReward = 0 dReward = 0 if i == 0 or not single_str_att: att_uniform_flag = False att_mixed_flag = False nn_att = copy.copy(nn_att_saved) if isinstance(nn_att, np.ndarray): att_mixed_flag = True str_set = game.att_str nn_att = np.random.choice(str_set, p=nn_att) if "epoch1.pkl" in nn_att: att_uniform_flag = True if att_mixed_flag: path = os.getcwd() + "/attacker_strategies/" + nn_att scope_att = nn_att else: path = os.getcwd() + "/retrain_att/" + nn_att scope_att = 'att_str_retrain' + str(0) + '.pkl' if att_uniform_flag: nn_att_act = fp.load_pkl(path) else: training_flag = 1 nn_att_act, sess1, graph1 = load_action_class( path, scope_att, game, training_flag) if i == 0 or not single_str_def: def_uniform_flag = False def_mixed_flag = False nn_def = copy.copy(nn_def_saved) if isinstance(nn_def, np.ndarray): def_mixed_flag = True str_set = game.def_str nn_def = np.random.choice(str_set, p=nn_def) if "epoch1.pkl" in nn_def: def_uniform_flag = True if def_mixed_flag: path = os.getcwd() + "/defender_strategies/" + nn_def scope_def = nn_def else: path = os.getcwd() + "/retrain_def/" + nn_def scope_def = 'def_str_retrain' + str(0) + '.pkl' if def_uniform_flag: nn_def_act = fp.load_pkl(path) else: training_flag = 0 nn_def_act, sess2, graph2 = load_action_class( path, scope_def, game, training_flag) # def_uniform_flag = False # att_uniform_flag = False # # att_mixed_flag = False # def_mixed_flag = False # # nn_att = copy.copy(nn_att_saved) # nn_def = copy.copy(nn_def_saved) # # # nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy. # # A str represents the name of a strategy. # # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str): # att_mixed_flag = True # str_set = game.att_str # nn_att = np.random.choice(str_set, p=nn_att) # # if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray): # def_mixed_flag = True # str_set = game.def_str # nn_def = np.random.choice(str_set, p=nn_def) # # if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray): # str_set = game.att_str # nn_att = np.random.choice(str_set, p=nn_att) # str_set = game.def_str # nn_def = np.random.choice(str_set, p=nn_def) # # if not att_mixed_flag and not def_mixed_flag: # raise ValueError("One player should play mixed strategy in retraining simulation.") # # if "epoch1" in nn_att: # att_uniform_flag = True # # if "epoch1" in nn_def: # def_uniform_flag = True # # if att_mixed_flag: # path = os.getcwd() + "/attacker_strategies/" + nn_att # scope_att = nn_att # else: # path = os.getcwd() + "/retrain_att/" + nn_att # scope_att = 'att_str_retrain' + str(0) + '.pkl' # # if att_uniform_flag: # nn_att_act = fp.load_pkl(path) # else: # training_flag = 1 # nn_att_act, sess1, graph1 = load_action_class(path, scope_att, game, training_flag) # # if def_mixed_flag: # path = os.getcwd() + "/defender_strategies/" + nn_def # scope_def = nn_def # else: # path = os.getcwd() + "/retrain_def/" + nn_def # scope_def = 'def_str_retrain' + str(0) + '.pkl' # # if def_uniform_flag: # nn_def_act = fp.load_pkl(path) # else: # training_flag = 0 # nn_def_act, sess2, graph2 = load_action_class(path, scope_def, game, training_flag) for t in range(T): timeleft = T - t if att_uniform_flag: attacker.att_greedy_action_builder_single( G, timeleft, nn_att_act) else: with graph1.as_default(): with sess1.as_default(): attacker.att_greedy_action_builder_single( G, timeleft, nn_att_act) if def_uniform_flag: defender.def_greedy_action_builder_single( G, timeleft, nn_def_act) else: with graph2.as_default(): with sess2.as_default(): defender.def_greedy_action_builder_single( G, timeleft, nn_def_act) att_action_set = attacker.attact def_action_set = defender.defact # print('att:', att_action_set) # print('def:', def_action_set) for attack in att_action_set: if isinstance(attack, tuple): # check OR node aReward += G.edges[attack]['cost'] if random.uniform(0, 1) <= G.edges[attack]['actProb']: G.nodes[attack[-1]]['state'] = 1 else: # check AND node aReward += G.nodes[attack]['aCost'] if random.uniform(0, 1) <= G.nodes[attack]['actProb']: G.nodes[attack]['state'] = 1 # defender's action for node in def_action_set: G.nodes[node]['state'] = 0 dReward += G.nodes[node]['dCost'] _, targetset = get_Targets(G) for node in targetset: if G.nodes[node]['state'] == 1: aReward += G.nodes[node]['aReward'] dReward += G.nodes[node]['dPenalty'] aReward_list = np.append(aReward_list, aReward) dReward_list = np.append(dReward_list, dReward) return np.mean(aReward_list), np.mean(dReward_list)
def series_sim(env, game, nn_att, nn_def, size): aReward_list = np.array([]) dReward_list = np.array([]) nn_att_saved = copy.copy(nn_att) nn_def_saved = copy.copy(nn_def) if size > 20: num_epi = 10 elif size > 10 and size <= 20: num_epi = 20 else: num_epi = 30 for i in range(2): G = copy.deepcopy(env.G_reserved) attacker = copy.deepcopy(env.attacker) defender = copy.deepcopy(env.defender) T = env.T aReward = 0 dReward = 0 def_uniform_flag = False att_uniform_flag = False att_mixed_flag = False def_mixed_flag = False nn_att = copy.copy(nn_att_saved) nn_def = copy.copy(nn_def_saved) # nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy. # A str represents the name of a strategy. if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str): att_mixed_flag = True str_set = game.att_str nn_att = np.random.choice(str_set, p=nn_att) if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray): def_mixed_flag = True str_set = game.def_str nn_def = np.random.choice(str_set, p=nn_def) if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray): str_set = game.att_str nn_att = np.random.choice(str_set, p=nn_att) str_set = game.def_str nn_def = np.random.choice(str_set, p=nn_def) if not att_mixed_flag and not def_mixed_flag: raise ValueError( "One player should play mixed strategy in retraining simulation." ) if "epoch1" in nn_att: att_uniform_flag = True if "epoch1" in nn_def: def_uniform_flag = True if att_mixed_flag: path = os.getcwd() + "/attacker_strategies/" + nn_att scope_att = nn_att else: path = os.getcwd() + "/retrain_att/" + nn_att scope_att = 'att_str_retrain' + str(0) + '.pkl' if att_uniform_flag: nn_att_act = fp.load_pkl(path) else: training_flag = 1 nn_att_act, sess1, graph1 = load_action_class( path, scope_att, game, training_flag) if def_mixed_flag: path = os.getcwd() + "/defender_strategies/" + nn_def scope_def = nn_def else: path = os.getcwd() + "/retrain_def/" + nn_def scope_def = 'def_str_retrain' + str(0) + '.pkl' if def_uniform_flag: nn_def_act = fp.load_pkl(path) else: training_flag = 0 nn_def_act, sess2, graph2 = load_action_class( path, scope_def, game, training_flag) for t in range(T): timeleft = T - t if att_uniform_flag: attacker.att_greedy_action_builder_single( G, timeleft, nn_att_act) else: with graph1.as_default(): with sess1.as_default(): attacker.att_greedy_action_builder_single( G, timeleft, nn_att_act) if def_uniform_flag: defender.def_greedy_action_builder_single( G, timeleft, nn_def_act) else: with graph2.as_default(): with sess2.as_default(): defender.def_greedy_action_builder_single( G, timeleft, nn_def_act) att_action_set = attacker.attact def_action_set = defender.defact # print('att:', att_action_set) # print('def:', def_action_set) for attack in att_action_set: if isinstance(attack, tuple): # check OR node aReward += G.edges[attack]['cost'] if random.uniform(0, 1) <= G.edges[attack]['actProb']: G.nodes[attack[-1]]['state'] = 1 else: # check AND node aReward += G.nodes[attack]['aCost'] if random.uniform(0, 1) <= G.nodes[attack]['actProb']: G.nodes[attack]['state'] = 1 # defender's action for node in def_action_set: G.nodes[node]['state'] = 0 dReward += G.nodes[node]['dCost'] _, targetset = get_Targets(G) for node in targetset: if G.nodes[node]['state'] == 1: aReward += G.nodes[node]['aReward'] dReward += G.nodes[node]['dPenalty'] aReward_list = np.append(aReward_list, aReward) dReward_list = np.append(dReward_list, dReward) return np.mean(aReward_list), np.mean(dReward_list)
def single_sim(param): #single for single episode. # TODO: Dealing with uniform str aReward = 0 dReward = 0 def_uniform_flag = False att_uniform_flag = False #nn_att and nn_def here can be either np.ndarray or str. np.ndarray represents a mixed strategy. # A str represents the name of a strategy. G, game, attacker, nn_att, defender, nn_def, T = param if isinstance(nn_att, np.ndarray) and isinstance(nn_def, str): str_set = game.att_str nn_att = np.random.choice(str_set, p=nn_att) if isinstance(nn_att, str) and isinstance(nn_def, np.ndarray): str_set = game.def_str nn_def = np.random.choice(str_set, p=nn_def) if isinstance(nn_att, np.ndarray) and isinstance(nn_def, np.ndarray): str_set = game.att_str nn_att = np.random.choice(str_set, p=nn_att) str_set = game.def_str nn_def = np.random.choice(str_set, p=nn_def) if "epoch1" in nn_att: att_uniform_flag = True if "epoch1" in nn_def: def_uniform_flag = True path = os.getcwd() + "/attacker_strategies/" + nn_att if att_uniform_flag: nn_att = fp.load_pkl(path) else: training_flag = 1 nn_att, sess1, graph1 = load_action_class(path, game, training_flag) path = os.getcwd() + "/defender_strategies/" + nn_def if def_uniform_flag: nn_def = fp.load_pkl(path) else: training_flag = 0 nn_def, sess2, graph2 = load_action_class(path, game, training_flag) for t in range(T): timeleft = T - t if att_uniform_flag: attacker.att_greedy_action_builder_single(G, timeleft, nn_att) else: with graph1.as_default(): with sess1.as_default(): attacker.att_greedy_action_builder_single( G, timeleft, nn_att) if def_uniform_flag: defender.def_greedy_action_builder_single(G, timeleft, nn_def) else: with graph2.as_default(): with sess2.as_default(): defender.def_greedy_action_builder_single( G, timeleft, nn_def) att_action_set = attacker.attact def_action_set = defender.defact # print('att:', att_action_set) # print('def:', def_action_set) for attack in att_action_set: if isinstance(attack, tuple): # check OR node aReward += G.edges[attack]['cost'] if random.uniform(0, 1) <= G.edges[attack]['actProb']: G.nodes[attack[-1]]['state'] = 1 else: # check AND node aReward += G.nodes[attack]['aCost'] if random.uniform(0, 1) <= G.nodes[attack]['actProb']: G.nodes[attack]['state'] = 1 # defender's action for node in def_action_set: G.nodes[node]['state'] = 0 dReward += G.nodes[node]['dCost'] _, targetset = get_Targets(G) for node in targetset: if G.nodes[node]['state'] == 1: aReward += G.nodes[node]['aReward'] dReward += G.nodes[node]['dPenalty'] # print(aReward) # print(aReward, dReward) return aReward, dReward