def action(self, game_state: GameState) -> int: """ Samples an action from the policy. :param game_state: the game state :return: action_id """ # Feature engineering attacker_obs = game_state.get_attacker_observation( self.game_config.network_config, local_view=True, reconnaissance=self.game_config.reconnaissance_actions) defender_obs = game_state.get_defender_observation( self.game_config.network_config) neighbor_defense_attributes = np.zeros( (attacker_obs.shape[0], defender_obs.shape[1])) for node in range(attacker_obs.shape[0]): if int(attacker_obs[node][-1]) == 1: id = int(attacker_obs[node][-2]) neighbor_defense_attributes[node] = defender_obs[id] node_ids = attacker_obs[:, -2] node_reachable = attacker_obs[:, -1] det_values = neighbor_defense_attributes[:, -1] temp = neighbor_defense_attributes[:, 0:-1] - attacker_obs[:, 0:-2] features = [] for idx, row in enumerate(temp): t = row.tolist() t.append(node_ids[idx]) t.append(node_reachable[idx]) t.append(det_values[idx]) features.append(t) features = np.array(features) state = torch.from_numpy(features.flatten()).float() # Move to GPU if using GPU if torch.cuda.is_available() and self.config.gpu: device = torch.device("cuda:" + str(self.config.gpu_id)) state = state.to(device) legal_actions, non_legal_actions = self.get_legal_attacker_actions( attacker_obs, game_state) # Forward pass using the current policy network to predict P(a|s) action_probs = self.attacker_policy_network(state) # Set probability of non-legal actions to 0 action_probs_1 = action_probs.clone() if len(legal_actions) > 0 and len(non_legal_actions) < len( action_probs_1): action_probs_1[non_legal_actions] = 0 # Use torch.distributions package to create a parameterizable probability distribution of the learned policy policy_dist = Categorical(action_probs_1) # Sample an action from the probability distribution action = policy_dist.sample() global_action = PolicyGradientAgent.convert_local_attacker_action_to_global( action.item(), attacker_obs) return global_action
def test_copy(self): state = GameState() num_attack_types = 10 rows = 4 cols = 4 network_config = NetworkConfig(rows, cols) num_nodes = len(network_config.node_list) state.default_state(list(range(num_nodes)), (3, 1), num_attack_types, network_config) copy = state.copy() assert copy.num_hacks == state.num_hacks assert np.array_equal(copy.attack_values, state.attack_values) assert np.array_equal(copy.defense_det, state.defense_det) assert np.array_equal(copy.defense_values, state.defense_values)
def set_load_initial_state(self, initial_state_path: str) -> None: """ Sets the initial state by loading it from disk :param initial_state_path: :return: None """ self.initial_state = GameState.load(initial_state_path)
def test_default_state(self): state = GameState() rows = 4 cols = 4 network_config = NetworkConfig(rows, cols) num_nodes = len(network_config.node_list) num_attack_types = 10 state.default_state(list(range(num_nodes)), (3, 1), num_attack_types, network_config) assert state.attack_values.shape == (num_nodes, 10) assert state.defense_values.shape == (num_nodes, 10) assert state.defense_det.shape == (num_nodes, ) assert state.attacker_pos == (3, 1) assert state.done == False assert state.hacked == False assert state.num_hacks == 0 assert state.detected == False assert state.num_games == 0
def test_simulate_attack(self): rows = 4 cols = 4 network_config = NetworkConfig(rows, cols) num_nodes = len(network_config.node_list) state = GameState() num_attack_types = 10 state.default_state(list(range(num_nodes)), (3, 1), num_attack_types, network_config) attack_node_id = 3 attack_type = 4 state.defense_values[attack_node_id][attack_type] = 5 state.attack_values[attack_node_id][attack_type] = 5 assert not state.simulate_attack(attack_node_id, attack_type, network_config) state.defense_values[attack_node_id][attack_type] = 5 state.attack_values[attack_node_id][attack_type] = 6 assert state.simulate_attack(attack_node_id, attack_type, network_config)
def test_defend(self): state = GameState() rows = 4 cols = 4 network_config = NetworkConfig(rows, cols) num_nodes = len(network_config.node_list) num_attack_types = 10 state.default_state(list(range(num_nodes)), (3, 1), num_attack_types, network_config) defend_node_id = 3 defense_type = 4 max_value = 10 old_count = state.defense_values[defend_node_id][defense_type] state.defend(defend_node_id, defense_type, max_value, network_config) assert state.defense_values[defend_node_id][defense_type] < max_value assert state.defense_values[defend_node_id][ defense_type] == old_count + 1 state.defense_values[defend_node_id][defense_type] = 10 state.defend(defend_node_id, defense_type, max_value, network_config) assert state.defense_values[defend_node_id][defense_type] == max_value
def set_state(self, game_state: GameState) -> None: """ Updates the current state :param state: the new state :return: None """ self.game_state = game_state.copy() self.game_panel.update_state_text(self.game_state) self.attacker_sprite.move_to_pos(self.game_state.attacker_pos) if game_state.detected: self.attacker_sprite.detected() else: self.attacker_sprite.undetect() self.resource_network.set_node_states(self.game_state)
def action(self, game_state: GameState) -> int: """ Samples an action from the policy :param game_state: the game state :return: action_id """ from gym_idsgame.envs.util import idsgame_util actions = list(range(self.game_config.num_attack_actions)) if not self.game_config.reconnaissance_actions: legal_actions = list( filter( lambda action: idsgame_util.is_attack_id_legal( action, self.game_config, game_state.attacker_pos, game_state), actions)) if len(legal_actions) > 0: action = np.random.choice(legal_actions) else: action = np.random.choice(actions) else: attacker_obs = game_state.get_attacker_observation( self.game_config.network_config, local_view=self.idsgame_env.local_view_features(), reconnaissance=self.game_config.reconnaissance_actions, reconnaissance_bool_features=self.idsgame_env.idsgame_config. reconnaissance_bool_features) legal_actions = list( filter( lambda action: self.is_attack_legal( action, attacker_obs, game_state), actions)) if len(legal_actions) > 0: action = np.random.choice(legal_actions) else: action = np.random.choice(actions) if self.idsgame_env.local_view_features(): action = self.convert_local_attacker_action_to_global( action, attacker_obs) return action
def action(self, game_state: GameState) -> int: """ Samples an action from the policy. :param game_state: the game state :return: action_id """ try: # Feature engineering attacker_obs = game_state.get_attacker_observation( self.game_config.network_config, local_view=self.idsgame_env.local_view_features(), reconnaissance=self.game_config.reconnaissance_actions, reconnaissance_bool_features=self.idsgame_env.idsgame_config. reconnaissance_bool_features) defender_obs = game_state.get_defender_observation( self.game_config.network_config) defender_state = self.update_state(attacker_obs=attacker_obs, defender_obs=defender_obs, state=[], attacker=False) if not self.config.ar_policy: actions = list(range(self.idsgame_env.num_defense_actions)) non_legal_actions = list( filter( lambda action: not self.idsgame_env.is_defense_legal( action), actions)) obs_tensor_d = torch.as_tensor(defender_state.flatten()).to( self.device) defender_actions, defender_values, defender_log_probs = self.model.defender_policy.forward( obs_tensor_d, self.idsgame_env, device=self.device, attacker=False, non_legal_actions=non_legal_actions) defender_actions = defender_actions.item() else: actions = list(range(self.config.defender_node_net_output_dim)) non_legal_actions = list( filter( lambda action: not self.is_defense_legal( action, node=True, game_state=game_state), actions)) if len(non_legal_actions) == len(actions): non_legal_actions = [] obs_tensor_d = torch.as_tensor(defender_state.flatten()).to( self.device) defender_node_actions, defender_node_values, defender_node_log_probs, defender_node_lstm_state = self.model.defender_node_policy.forward( obs_tensor_d, self.idsgame_env, device=self.device, attacker=False, non_legal_actions=non_legal_actions) defender_node_actions = defender_node_actions.cpu().numpy() node = defender_node_actions[0] obs_tensor_d_1 = obs_tensor_d.reshape( self.idsgame_env.idsgame_config.game_config.num_nodes, self.config.defender_at_net_input_dim) obs_tensor_d_at = obs_tensor_d_1[node] actions = list(range(self.config.defender_at_net_output_dim)) non_legal_actions = list( filter( lambda action: not self.is_defense_legal( action, node=False, game_state=game_state, obs=obs_tensor_d_at), actions)) if len(non_legal_actions) == len(actions): non_legal_actions = [] defender_at_actions, defender_at_values, defender_at_log_probs, defender_at_lstm_state = self.model.defender_at_policy.forward( obs_tensor_d_at, self.idsgame_env, device=self.device, attacker=False, non_legal_actions=non_legal_actions) defender_at_actions = defender_at_actions.cpu().numpy() attack_id = util.get_defense_action_id( node, defender_at_actions[0], self.idsgame_env.idsgame_config.game_config) defender_actions = attack_id except Exception as e: print(str(e)) traceback.print_exc() return defender_actions
def test_new_game(self): rows = 4 cols = 4 network_config = NetworkConfig(rows, cols) state = GameState() num_nodes = len(network_config.node_list) num_attack_types = 10 state.default_state(list(range(num_nodes)), (3, 1), num_attack_types, network_config) init_state = state.copy() old_game_count = state.num_games state.new_game(init_state) assert state.num_games == old_game_count + 1 assert state.done == False assert state.detected == False state.default_state(list(range(num_nodes)), (3, 1), num_attack_types, network_config) init_state = state.copy() state.hacked = True old_hacked_count = 0 state.new_game(init_state) assert state.num_hacks == old_hacked_count + 1
def test_initialization(self): GameState()
def action(self, game_state: GameState) -> int: """ Samples an action from the policy. :param game_state: the game state :return: action_id """ try: # Feature engineering attacker_obs = game_state.get_attacker_observation( self.game_config.network_config, local_view=self.idsgame_env.local_view_features(), reconnaissance=self.game_config.reconnaissance_actions, reconnaissance_bool_features=self.idsgame_env.idsgame_config. reconnaissance_bool_features) defender_obs = game_state.get_defender_observation( self.game_config.network_config) attacker_state = self.update_state(attacker_obs=attacker_obs, defender_obs=defender_obs, state=[], attacker=True) if not self.config.ar_policy: actions = list(range(self.idsgame_env.num_attack_actions)) non_legal_actions = list( filter( lambda action: not self.is_attack_legal( action, attacker_obs, game_state), actions)) obs_tensor_a = torch.as_tensor(attacker_state.flatten()).to( self.device) attacker_actions, attacker_values, attacker_log_probs = self.model.attacker_policy.forward( obs_tensor_a, self.idsgame_env, device=self.device, attacker=True, non_legal_actions=non_legal_actions) attacker_action = attacker_actions.cpu().numpy()[0] else: actions = list(range(self.config.attacker_node_net_output_dim)) non_legal_actions = list( filter( lambda action: not self.is_attack_legal( action, attacker_obs, game_state, node=True), actions)) obs_tensor_a = torch.as_tensor(attacker_state.flatten()).to( self.device) attacker_node_actions, attacker_node_values, attacker_node_log_probs, attacker_node_lstm_state = self.model.attacker_node_policy.forward( obs_tensor_a, self.idsgame_env, device=self.device, attacker=True, non_legal_actions=non_legal_actions) attacker_node_probs = self.model.attacker_node_policy.get_action_dist( obs_tensor_a, self.idsgame_env, device=self.device, attacker=True, non_legal_actions=non_legal_actions) attacker_node_actions = attacker_node_actions.cpu().numpy() node = attacker_node_actions[0] obs_tensor_a_1 = obs_tensor_a.reshape( self.idsgame_env.idsgame_config.game_config.num_nodes, self.config.attacker_at_net_input_dim) obs_tensor_a_at = obs_tensor_a_1[node] attacker_at_actions, attacker_at_values, attacker_at_log_probs, attacker_at_lstm_state = self.model.attacker_at_policy.forward( obs_tensor_a_at, self.idsgame_env, device=self.device, attacker=True, non_legal_actions=non_legal_actions) attacker_at_probs = self.model.attacker_at_policy.get_action_dist( obs_tensor_a_at, self.idsgame_env, device=self.device, attacker=True, non_legal_actions=non_legal_actions) # print("attacker node probs:{}".format(attacker_node_probs.detach().cpu().numpy())) # print("attacker at probs:{}".format(attacker_at_probs.detach().cpu().numpy())) self.create_policy_plot( attacker_at_probs.detach().cpu().numpy(), 0, attacker=True) attacker_at_actions = attacker_at_actions.cpu().numpy() attack_id = util.get_attack_action_id( node, attacker_at_actions[0], self.idsgame_env.idsgame_config.game_config) attacker_action = attack_id except Exception as e: print(str(e)) traceback.print_exc() if self.idsgame_env.local_view_features(): attack = self.convert_local_attacker_action_to_global( attacker_action, attacker_obs) return attack else: return attacker_action
class GameConfig(): """ DTO with game configuration parameters """ def __init__(self, network_config: NetworkConfig = None, manual_attacker: bool = True, num_layers: int = 1, num_servers_per_layer: int = 2, num_attack_types: int = 10, max_value: int = 9, initial_state: GameState = None, manual_defender: bool = False, initial_state_path: str = None, dense_rewards=False, min_random_a_val: int = 0, min_random_d_val: int = 0, min_random_det_val: int = 0, dense_rewards_v2=False, reconnaissance_actions: bool = False, max_random_v_val: int = 1, dense_rewards_v3=False): """ Class constructor, initializes the DTO :param network_config: the network configuration of the game (e.g. number of nodes and their connectivity) :param manual_attacker: whether the attacker is controlled manually or by an agent :param manual_attacker: whether the defender is controlled manually or by an agent :param num_layers: the number of layers in the network :param num_servers_per_layer: the number of servers per layer in the network :param num_attack_types: the number of attack types :param max_value: max value for a defense/attack attribute :param initial_state: the initial state :param initial_state_path: path to the initial state saved on disk :param dense_rewards: if true, give hacker dense rewards (reward for each intermediate server hacked) :param dense_rewards_v2: if true, give defender reward only when blocking :param min_random_a_val: minimum attack value when randomizing the state :param min_random_d_val: minimum defense value when randomizing the state :param min_random_det_val: minimum detection value when randomizing the state :param reconnaissance_actions: a boolean flag that indicates whether reconnaissance activities are enabled for the attacker :param max_random_v_val: maximum random vulnerability value when usign randomized environment """ self.reconnaissance_actions = reconnaissance_actions self.manual_attacker = manual_attacker self.manual_defender = manual_defender self.num_layers = num_layers self.num_servers_per_layer = num_servers_per_layer self.num_attack_types = num_attack_types self.max_value = max_value self.min_random_a_val = min_random_a_val self.min_random_d_val = min_random_d_val self.min_random_det_val = min_random_det_val self.num_rows = self.num_layers + 2 self.num_nodes = self.num_layers * self.num_servers_per_layer + 2 # +2 for Start and Data Nodes self.num_cols = self.num_servers_per_layer self.set_attack_actions() self.num_defense_actions = (self.num_attack_types + 1) * self.num_nodes self.num_states = self.num_nodes self.network_config = network_config self.initial_state_path = initial_state_path self.defense_val = 2 self.attack_val = 0 self.num_vulnerabilities_per_node = 1 self.det_val = 2 self.dense_rewards_v2 = dense_rewards_v2 self.dense_rewards_v3 = dense_rewards_v3 self.vulnerabilitiy_val = 0 self.max_random_v_val = max_random_v_val self.num_vulnerabilities_per_layer = None if network_config is None: self.network_config = NetworkConfig(self.num_rows, self.num_cols, connected_layers=False) self.initial_state = initial_state if self.initial_state is None and self.initial_state_path is not None: self.initial_state = GameState.load(self.initial_state) if self.initial_state is None and self.initial_state_path is None: self.initial_state = GameState( min_random_a_val=min_random_a_val, min_random_det_val=min_random_det_val, min_random_d_val=min_random_d_val, max_value=self.max_value, max_random_v_val=self.max_random_v_val) self.initial_state.default_state( self.network_config.node_list, self.network_config.start_pos, self.num_attack_types, network_config=self.network_config, ) self.dense_rewards = dense_rewards def set_attack_actions(self, local_view: bool = False): if not self.reconnaissance_actions: self.num_attack_actions = self.num_attack_types * self.num_nodes else: if not local_view: self.num_attack_actions = (self.num_attack_types + 1) * self.num_nodes else: self.num_attack_actions = ( self.num_attack_types + 1) * self.network_config.max_neighbors def set_load_initial_state(self, initial_state_path: str) -> None: """ Sets the initial state by loading it from disk :param initial_state_path: :return: None """ self.initial_state = GameState.load(initial_state_path) def set_initial_state(self, defense_val=2, attack_val=0, num_vulnerabilities_per_node=1, det_val=2, vulnerability_val=0, num_vulnerabilities_per_layer=None, randomize_visibility: bool = False, visibility_p: float = 0.5): """ Utility function for setting the initial game state :param defense_val: defense value for defense types that are not vulnerable :param attack_val: attack value for attack types :param num_vulnerabilities_per_node: number of vulnerabilities per node :param det_val: detection value per node :param vulnerability_val: defense value for defense types that are vulnerable :param num_vulnerabilities_per_layer: number of vulnerabilities per layer :param min_random_val: minimum val when randomizing the state :param randomize_state: boolean flag whether to create the state randomly :param randomize_visibility: boolean flag whether to randomize visibility for partially observed envs :return: """ if num_vulnerabilities_per_layer is None: num_vulnerabilities_per_layer = self.num_servers_per_layer self.defense_val = defense_val self.attack_val = attack_val self.num_vulnerabilities_per_layer = num_vulnerabilities_per_layer self.det_val = det_val self.vulnerabilitiy_val = vulnerability_val self.num_vulnerabilities_per_node = num_vulnerabilities_per_node self.initial_state.set_state( self.network_config.node_list, self.num_attack_types, defense_val=defense_val, attack_val=attack_val, num_vulnerabilities_per_node=num_vulnerabilities_per_node, det_val=det_val, vulnerability_val=vulnerability_val, network_config=self.network_config, num_vulnerabilities_per_layer=num_vulnerabilities_per_layer, randomize_visibility=randomize_visibility, visibility_p=visibility_p) def get_attacker_observation_space(self) -> gym.spaces.Box: """ Creates an OpenAI-Gym Space for the game observation :return: observation space """ if not self.reconnaissance_actions: high_row = np.array([self.max_value] * (self.num_attack_types + 1)) low = np.zeros((self.num_nodes, self.num_attack_types + 1)) else: high_row = np.array([self.max_value] * (self.num_attack_types * 2 + 1)) low = np.zeros((self.num_nodes, self.num_attack_types * 2 + 1)) high = np.array([high_row] * self.num_nodes) observation_space = gym.spaces.Box(low=low, high=high, dtype=np.int32) return observation_space def get_defender_observation_space(self) -> gym.spaces.Box: """ Creates an OpenAI-Gym Space for the game observation :return: observation space """ high_row = np.array([self.max_value] * (self.num_attack_types + 1)) high = np.array([high_row] * 1) low = np.zeros((1, self.num_attack_types + 1)) observation_space = gym.spaces.Box(low=low, high=high, dtype=np.int32) return observation_space def get_action_space(self, defender: bool = False) -> gym.spaces.Discrete: """ Creates an OpenAi-Gym space for the actions in the environment :param defender: boolean flag if defender or not :return: action space """ if defender: return gym.spaces.Discrete(self.num_defense_actions) else: return gym.spaces.Discrete(self.num_attack_actions)
def __init__(self, network_config: NetworkConfig = None, manual_attacker: bool = True, num_layers: int = 1, num_servers_per_layer: int = 2, num_attack_types: int = 10, max_value: int = 9, initial_state: GameState = None, manual_defender: bool = False, initial_state_path: str = None, dense_rewards=False, min_random_a_val: int = 0, min_random_d_val: int = 0, min_random_det_val: int = 0, dense_rewards_v2=False, reconnaissance_actions: bool = False, max_random_v_val: int = 1, dense_rewards_v3=False): """ Class constructor, initializes the DTO :param network_config: the network configuration of the game (e.g. number of nodes and their connectivity) :param manual_attacker: whether the attacker is controlled manually or by an agent :param manual_attacker: whether the defender is controlled manually or by an agent :param num_layers: the number of layers in the network :param num_servers_per_layer: the number of servers per layer in the network :param num_attack_types: the number of attack types :param max_value: max value for a defense/attack attribute :param initial_state: the initial state :param initial_state_path: path to the initial state saved on disk :param dense_rewards: if true, give hacker dense rewards (reward for each intermediate server hacked) :param dense_rewards_v2: if true, give defender reward only when blocking :param min_random_a_val: minimum attack value when randomizing the state :param min_random_d_val: minimum defense value when randomizing the state :param min_random_det_val: minimum detection value when randomizing the state :param reconnaissance_actions: a boolean flag that indicates whether reconnaissance activities are enabled for the attacker :param max_random_v_val: maximum random vulnerability value when usign randomized environment """ self.reconnaissance_actions = reconnaissance_actions self.manual_attacker = manual_attacker self.manual_defender = manual_defender self.num_layers = num_layers self.num_servers_per_layer = num_servers_per_layer self.num_attack_types = num_attack_types self.max_value = max_value self.min_random_a_val = min_random_a_val self.min_random_d_val = min_random_d_val self.min_random_det_val = min_random_det_val self.num_rows = self.num_layers + 2 self.num_nodes = self.num_layers * self.num_servers_per_layer + 2 # +2 for Start and Data Nodes self.num_cols = self.num_servers_per_layer self.set_attack_actions() self.num_defense_actions = (self.num_attack_types + 1) * self.num_nodes self.num_states = self.num_nodes self.network_config = network_config self.initial_state_path = initial_state_path self.defense_val = 2 self.attack_val = 0 self.num_vulnerabilities_per_node = 1 self.det_val = 2 self.dense_rewards_v2 = dense_rewards_v2 self.dense_rewards_v3 = dense_rewards_v3 self.vulnerabilitiy_val = 0 self.max_random_v_val = max_random_v_val self.num_vulnerabilities_per_layer = None if network_config is None: self.network_config = NetworkConfig(self.num_rows, self.num_cols, connected_layers=False) self.initial_state = initial_state if self.initial_state is None and self.initial_state_path is not None: self.initial_state = GameState.load(self.initial_state) if self.initial_state is None and self.initial_state_path is None: self.initial_state = GameState( min_random_a_val=min_random_a_val, min_random_det_val=min_random_det_val, min_random_d_val=min_random_d_val, max_value=self.max_value, max_random_v_val=self.max_random_v_val) self.initial_state.default_state( self.network_config.node_list, self.network_config.start_pos, self.num_attack_types, network_config=self.network_config, ) self.dense_rewards = dense_rewards