def __init__(self, goal_velocity=0): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.goal_velocity = goal_velocity self.force = 0.001 self.gravity = 0.0025 self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32) self.high = np.array([self.max_position, self.max_speed], dtype=np.float32) self.viewer = None self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32) self.seed() self.rewards = [] self.infos = []
def __init__(self, seed): EzPickle.__init__(self) self.seed(seed) self.viewer = None self.world = Box2D.b2World() self.moon = None self.lander = None self.particles = [] self.prev_reward = None # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8, ), dtype=np.float32) if self.continuous: # Action is two floats [main engine, left-right engines]. # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power. # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off self.action_space = spaces.Box(-1, +1, (2, ), dtype=np.float32) else: # Nop, fire left engine, main engine, right engine self.action_space = spaces.Discrete(4) self.reset()
def __init__(self, seed): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = (self.masspole + self.masscart) self.length = 0.5 # actually half the pole's length self.polemass_length = (self.masspole * self.length) self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates self.kinematics_integrator = 'euler' # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * math.pi / 360 self.x_threshold = 2.4 # Angle limit set to 2 * theta_threshold_radians so failing observation # is still within bounds. high = np.array([ self.x_threshold * 2, np.finfo(np.float32).max, self.theta_threshold_radians * 2, np.finfo(np.float32).max ], dtype=np.float32) self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box(-high, high, dtype=np.float32) self.seed(seed) self.viewer = None self.state = None self.steps_beyond_done = None self.rewards = [] self.infos = []
def __init__(self, env_id, label, attack_data, aug, seed=None, policy=None): # id self.id = env_id # augment the data self.aug = aug # seed self.seed = seed # debug self.debug = False self.max_obs_time = 0 # load logs with open(vms_fpath, 'r') as f: self.vms = json.load(f) with open(nodes_fpath, 'r') as f: self.nodes = json.load(f) with open(ofports_fpath, 'r') as f: self.ofports = json.load(f) # check ids model weights models = [item.split('.tflite')[0] for item in os.listdir(ids_model_weights_dir) if item.endswith('.tflite')] self.n_models = len(models) self.n_thrs = len(fpr_levels) # ovs vm ovs_vms = [vm for vm in self.vms if vm['role'] == 'ovs' and int(vm['vm'].split('_')[1]) == self.id] assert len(ovs_vms) == 1 self.ovs_vm = ovs_vms[0] self.ovs_node = self.nodes[self.ovs_vm['vm']] set_seed(self.ovs_vm['mgmt'], flask_port, self.seed) # ids vms self.ids_vms = [vm for vm in self.vms if vm['role'] == 'ids' and int(vm['vm'].split('_')[1]) == self.id] self.n_ids = len(self.ids_vms) self.ids_nodes = [self.nodes[vm['vm']] for vm in self.ids_vms] for vm in self.ids_vms: restart_ids(vm) self.delay = [[] for _ in range(self.n_ids)] # controller controller_vm = [vm for vm in self.vms if vm['role'] == 'sdn'] assert len(controller_vm) == 1 self.controller_vm = controller_vm[0] self.controller = Odl(self.controller_vm['ip']) # tunnels and veth pairs self.internal_hosts = sorted([item for item in os.listdir(spl_dir) if osp.isdir(osp.join(spl_dir, item))]) self.ovs_vxlans = [item for item in self.ofports if item['vm'] == self.ovs_vm['vm'] and item['type'] == 'vxlan'] self.ovs_veths = [item for item in self.ofports if item['vm'] == self.ovs_vm['vm'] and item['type'] == 'veth'] # ids tunnels self.ids_tunnels = [] for ids_vm in self.ids_vms: tunnel_to_ids = [ofport['ofport'] for ofport in self.ofports if ofport['type'] == 'vxlan' and ofport['vm'] == self.ovs_vm['vm'] and ofport['remote'] == ids_vm['vm']] assert len(tunnel_to_ids) == 1 tunnel_to_ids = tunnel_to_ids[0] self.ids_tunnels.append(tunnel_to_ids) # configure ids for ids_vm, ids_node in zip(self.ids_vms, self.ids_nodes): ids_vxlan = [item for item in self.ofports if item['type'] == 'vxlan' and item['vm'] == ids_vm['vm']] assert len(ids_vxlan) == 1 ids_vxlan = ids_vxlan[0] ids_veths = [item for item in self.ofports if item['type'] == 'veth' and item['vm'] == ids_vm['vm']] clean_ids_tables(self.controller, ids_node) init_ids_tables(self.controller, ids_node, ids_vxlan, ids_veths) idx = int(ids_vm['vm'].split('_')[2]) set_vnf_param(ids_vm['ip'], flask_port, 'dscp', idx) # time self.tstart = None self.tstep = None self.step_duration = episode_duration / nsteps # traffic if type(label) is not list: label = [label] for l in label: assert l in attack_data.keys(), f'No data found for attack {l}!' self.profiles = calculate_probs(stats_dir, [0, *label]) self.label = cycle(label) self.attack_data = attack_data # obs self.stack_size = obs_stack_size self.n_apps = len(applications) self.n_flags = len(tcp_flags) self.n_dscps = 2 ** self.n_ids self.sdn_on_off_frame_shape = (self.n_dscps, self.n_ids + 1) self.sdn_on_off_frame = np.zeros(self.sdn_on_off_frame_shape) self.nfv_on_off_frame_shape = (self.n_ids, self.n_models + self.n_thrs) self.nfv_on_off_frame = np.zeros(self.nfv_on_off_frame_shape) self.nfv_on_off_frame[:, 0] = 1 # default model idx is 0 self.nfv_on_off_frame[:, self.n_models] = 1 # default thr idx is 0 obs_shape = (self.stack_size, self.n_apps * 2 + (self.n_flags + 1) * 2 + self.n_apps * self.n_ids + np.prod(self.sdn_on_off_frame_shape) + np.prod(self.nfv_on_off_frame_shape)) self.samples_by_app = np.zeros((self.n_apps, 2)) self.samples_by_flag = np.zeros((self.n_flags + 1, 2)) self.app_counts_stack = deque(maxlen=self.stack_size) self.in_samples_by_attacker_stack = deque(maxlen=self.stack_size) self.out_samples_by_attacker_stack = deque(maxlen=self.stack_size) self.intrusion_ips = [[[] for _ in range(self.n_apps)] for __ in range(self.n_ids)] self.ips_to_check_or_block = [[[] for _ in range(self.n_apps)] for __ in range(self.n_ids + 1)] self.intrusion_numbers = [[[] for _ in range(self.n_apps)] for __ in range(self.n_ids)] # actions self.n_forward_actions = self.n_dscps * self.n_ids self.n_unforward_actions = self.n_dscps * self.n_ids self.n_block_actions = self.n_dscps self.n_unblock_actions = self.n_dscps self.n_ids_actions = (self.n_models + self.n_thrs) * self.n_ids #act_dim = self.n_forward_actions + self.n_unforward_actions + self.n_block_actions + self.n_unblock_actions + self.n_ids_actions + 1 act_dim = self.n_forward_actions + self.n_block_actions + self.n_ids_actions + 1 self.actions_queue = deque() # start acting actor = Thread(target=self._act, daemon=True) actor.start() # log actions with open(actions_fpath, 'w') as f: for i in range(act_dim): fun, args, idx_val, q = self._action_mapper(i) line = f"{i};{fun.__name__};{idx_val};{','.join([str(item) for item in args])}\n" f.write(line) # default policy if policy is not None: self.default_reset_actions = policy['reset'] self.default_step_actions = policy['step'] else: self.default_reset_actions = None self.default_step_actions = None # reward self.n_attackers = len(attackers) self.samples_by_attacker = np.zeros((self.n_attackers + 1, 2)) # spaces self.observation_space = spaces.Box(low=0, high=np.inf, shape=obs_shape, dtype=np.float32) self.action_space = spaces.Discrete(act_dim) print('Observation shape: {0}'.format(obs_shape)) print('Number of actions: {0}'.format(act_dim)) self.in_samples = 0 self.out_samples = 0