예제 #1
0
    def __init__(self, seed):
        EzPickle.__init__(self)
        self.seed(seed)
        self.viewer = None

        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
        self.particles = []

        self.prev_reward = None

        # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-np.inf,
                                            np.inf,
                                            shape=(8, ),
                                            dtype=np.float32)

        if self.continuous:
            # Action is two floats [main engine, left-right engines].
            # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
            # Left-right:  -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
            self.action_space = spaces.Box(-1, +1, (2, ), dtype=np.float32)
        else:
            # Nop, fire left engine, main engine, right engine
            self.action_space = spaces.Discrete(4)

        self.reset()
예제 #2
0
    def __init__(self, goal_velocity=0):
        self.min_action = -1.0
        self.max_action = 1.0
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.45  # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
        self.goal_velocity = goal_velocity
        self.power = 0.0015

        self.low_state = np.array([self.min_position, -self.max_speed],
                                  dtype=np.float32)
        self.high_state = np.array([self.max_position, self.max_speed],
                                   dtype=np.float32)

        self.viewer = None

        self.action_space = spaces.Box(low=self.min_action,
                                       high=self.max_action,
                                       shape=(1, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=self.low_state,
                                            high=self.high_state,
                                            dtype=np.float32)

        self.seed()
        self.reset()
예제 #3
0
    def __init__(self):
        EzPickle.__init__(self)
        self.seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.terrain = None
        self.hull = None

        self.prev_shaping = None

        self.fd_polygon = fixtureDef(
            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
            friction=FRICTION)

        self.fd_edge = fixtureDef(
            shape=edgeShape(vertices=[(0, 0), (1, 1)]),
            friction=FRICTION,
            categoryBits=0x0001,
        )

        self.reset()

        high = np.array([np.inf] * 24)
        self.action_space = spaces.Box(np.array([-1, -1, -1, -1]),
                                       np.array([1, 1, 1, 1]),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
예제 #4
0
    def __init__(self, level, conf=None, simple_obs=False):
        print("starting DonkeyGym env")
        self.simple_obs = simple_obs
        self.viewer = None
        self.proc = None

        if conf is None:
            conf = {}

        conf["level"] = level

        # ensure defaults are supplied if missing.
        supply_defaults(conf)

        # set logging level
        logging.basicConfig(level=conf["log_level"])  # pytype: disable=key-error

        logger.debug("DEBUG ON")
        logger.debug(conf)

        # start Unity simulation subprocess
        self.proc = None
        if "exe_path" in conf:
            self.proc = DonkeyUnityProcess()
            # the unity sim server will bind to the host ip given
            self.proc.start(conf["exe_path"],
                            host="0.0.0.0",
                            port=conf["port"])

            # wait for simulator to startup and begin listening
            time.sleep(conf["start_delay"])

        # start simulation com
        self.viewer = DonkeyUnitySimContoller(conf=conf)

        # steering and throttle
        self.action_space = spaces.Box(
            low=np.array([self.STEER_LIMIT_LEFT, self.THROTTLE_MIN]),
            high=np.array([self.STEER_LIMIT_RIGHT, self.THROTTLE_MAX]),
            dtype=np.float32,
        )

        # camera sensor data
        #self.observation_space = spaces.Box(0, self.VAL_PER_PIXEL, self.viewer.get_sensor_size(), dtype=np.uint8)
        self.observation_space = spaces.Box(-np.inf,
                                            np.inf, (1, 17),
                                            dtype=np.float)

        # simulation related variables.
        self.seed()

        # Frame Skipping
        self.frame_skip = conf["frame_skip"]  # pytype: disable=key-error

        # wait until loaded
        self.viewer.wait_until_loaded()
예제 #5
0
    def __init__(self, goal_velocity=0):
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.5
        self.goal_velocity = goal_velocity

        self.force = 0.001
        self.gravity = 0.0025

        self.low = np.array([self.min_position, -self.max_speed],
                            dtype=np.float32)
        self.high = np.array([self.max_position, self.max_speed],
                             dtype=np.float32)

        self.viewer = None

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(self.low,
                                            self.high,
                                            dtype=np.float32)

        self.seed()

        self.rewards = []
        self.infos = []
예제 #6
0
    def __init__(self, seed):
        self.gravity = 9.8
        self.masscart = 1.0
        self.masspole = 0.1
        self.total_mass = (self.masspole + self.masscart)
        self.length = 0.5  # actually half the pole's length
        self.polemass_length = (self.masspole * self.length)
        self.force_mag = 10.0
        self.tau = 0.02  # seconds between state updates
        self.kinematics_integrator = 'euler'

        # Angle at which to fail the episode
        self.theta_threshold_radians = 12 * 2 * math.pi / 360
        self.x_threshold = 2.4

        # Angle limit set to 2 * theta_threshold_radians so failing observation
        # is still within bounds.
        high = np.array([
            self.x_threshold * 2,
            np.finfo(np.float32).max, self.theta_threshold_radians * 2,
            np.finfo(np.float32).max
        ],
                        dtype=np.float32)

        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)

        self.seed(seed)
        self.viewer = None
        self.state = None

        self.steps_beyond_done = None

        self.rewards = []
        self.infos = []
예제 #7
0
    def __init__(self, env):
        super(FlattenObservation, self).__init__(env)

        flatdim = spaces.flatdim(env.observation_space)
        self.observation_space = spaces.Box(low=-float('inf'),
                                            high=float('inf'),
                                            shape=(flatdim, ),
                                            dtype=np.float32)
 def __init__(self, venv, n_stack):
     self.venv = venv
     self.n_stack = n_stack
     wrapped_obs_space = venv.observation_space
     low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=-1)
     high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype)
     observation_space = spaces.Box(low=low,
                                    high=high,
                                    dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
예제 #9
0
파일: pendulum.py 프로젝트: mizolotu/izi
    def __init__(self, g=10.0):
        self.max_speed = 8
        self.max_torque = 2.
        self.dt = .05
        self.g = g
        self.m = 1.
        self.l = 1.
        self.viewer = None

        high = np.array([1., 1., self.max_speed], dtype=np.float32)
        self.action_space = spaces.Box(low=-self.max_torque,
                                       high=self.max_torque,
                                       shape=(1, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-high,
                                            high=high,
                                            dtype=np.float32)

        self.seed()

        self.rewards = []
        self.infos = []
    def __init__(self, env):
        """
        Warp frames to 84x84 as done in the Nature paper and later work.

        :param env: (Gym Environment) the environment
        """
        gym.ObservationWrapper.__init__(self, env)
        self.width = 84
        self.height = 84
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.height, self.width, 1),
                                            dtype=env.observation_space.dtype)
예제 #11
0
    def __init__(self, env):
        super(HERGoalEnvWrapper, self).__init__()
        self.env = env
        self.metadata = self.env.metadata
        self.action_space = env.action_space
        self.spaces = list(env.observation_space.spaces.values())
        # Check that all spaces are of the same type
        # (current limitation of the wrapper)
        space_types = [
            type(env.observation_space.spaces[key]) for key in KEY_ORDER
        ]
        assert len(set(space_types)) == 1, "The spaces for goal and observation"\
                                           " must be of the same type"

        if isinstance(self.spaces[0], spaces.Discrete):
            self.obs_dim = 1
            self.goal_dim = 1
        else:
            goal_space_shape = env.observation_space.spaces[
                'achieved_goal'].shape
            self.obs_dim = env.observation_space.spaces['observation'].shape[0]
            self.goal_dim = goal_space_shape[0]

            if len(goal_space_shape) == 2:
                assert goal_space_shape[
                    1] == 1, "Only 1D observation spaces are supported yet"
            else:
                assert len(
                    goal_space_shape
                ) == 1, "Only 1D observation spaces are supported yet"

        if isinstance(self.spaces[0], spaces.MultiBinary):
            total_dim = self.obs_dim + 2 * self.goal_dim
            self.observation_space = spaces.MultiBinary(total_dim)

        elif isinstance(self.spaces[0], spaces.Box):
            lows = np.concatenate([space.low for space in self.spaces])
            highs = np.concatenate([space.high for space in self.spaces])
            self.observation_space = spaces.Box(lows, highs, dtype=np.float32)

        elif isinstance(self.spaces[0], spaces.Discrete):
            dimensions = [
                env.observation_space.spaces[key].n for key in KEY_ORDER
            ]
            self.observation_space = spaces.MultiDiscrete(dimensions)

        else:
            raise NotImplementedError("{} space is not supported".format(
                type(self.spaces[0])))
 def __init__(self, env, a, b):
     assert isinstance(
         env.action_space,
         spaces.Box), ("expected Box action space, got {}".format(
             type(env.action_space)))
     assert np.less_equal(a, b).all(), (a, b)
     super(RescaleAction, self).__init__(env)
     self.a = np.zeros(env.action_space.shape,
                       dtype=env.action_space.dtype) + a
     self.b = np.zeros(env.action_space.shape,
                       dtype=env.action_space.dtype) + b
     self.action_space = spaces.Box(low=a,
                                    high=b,
                                    shape=env.action_space.shape,
                                    dtype=env.action_space.dtype)
    def __init__(self, env, n_frames):
        """Stack n_frames last frames.

        Returns lazy array, which is much more memory efficient.

        See Also
        --------
        stable_baselines.common.atari_wrappers.LazyFrames

        :param env: (Gym Environment) the environment
        :param n_frames: (int) the number of frames to stack
        """
        gym.Wrapper.__init__(self, env)
        self.n_frames = n_frames
        self.frames = deque([], maxlen=n_frames)
        shp = env.observation_space.shape
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(shp[0], shp[1],
                                                   shp[2] * n_frames),
                                            dtype=env.observation_space.dtype)
예제 #14
0
    def __init__(self, env_id, label, attack_data, aug, seed=None, policy=None):

        # id

        self.id = env_id

        # augment the data

        self.aug = aug

        # seed

        self.seed = seed

        # debug

        self.debug = False
        self.max_obs_time = 0

        # load logs

        with open(vms_fpath, 'r') as f:
            self.vms = json.load(f)

        with open(nodes_fpath, 'r') as f:
            self.nodes = json.load(f)

        with open(ofports_fpath, 'r') as f:
            self.ofports = json.load(f)

        # check ids model weights

        models = [item.split('.tflite')[0] for item in os.listdir(ids_model_weights_dir) if item.endswith('.tflite')]
        self.n_models = len(models)
        self.n_thrs = len(fpr_levels)

        # ovs vm

        ovs_vms = [vm for vm in self.vms if vm['role'] == 'ovs' and int(vm['vm'].split('_')[1]) == self.id]
        assert len(ovs_vms) == 1
        self.ovs_vm = ovs_vms[0]
        self.ovs_node = self.nodes[self.ovs_vm['vm']]
        set_seed(self.ovs_vm['mgmt'], flask_port, self.seed)

        # ids vms

        self.ids_vms = [vm for vm in self.vms if vm['role'] == 'ids' and int(vm['vm'].split('_')[1]) == self.id]
        self.n_ids = len(self.ids_vms)
        self.ids_nodes = [self.nodes[vm['vm']] for vm in self.ids_vms]
        for vm in self.ids_vms:
            restart_ids(vm)
        self.delay = [[] for _ in range(self.n_ids)]

        # controller

        controller_vm = [vm for vm in self.vms if vm['role'] == 'sdn']
        assert len(controller_vm) == 1
        self.controller_vm = controller_vm[0]
        self.controller = Odl(self.controller_vm['ip'])

        # tunnels and veth pairs

        self.internal_hosts = sorted([item for item in os.listdir(spl_dir) if osp.isdir(osp.join(spl_dir, item))])
        self.ovs_vxlans = [item for item in self.ofports if item['vm'] == self.ovs_vm['vm'] and item['type'] == 'vxlan']
        self.ovs_veths = [item for item in self.ofports if item['vm'] == self.ovs_vm['vm'] and item['type'] == 'veth']

        # ids tunnels

        self.ids_tunnels = []
        for ids_vm in self.ids_vms:
            tunnel_to_ids = [ofport['ofport'] for ofport in self.ofports if ofport['type'] == 'vxlan' and ofport['vm'] == self.ovs_vm['vm'] and ofport['remote'] == ids_vm['vm']]
            assert len(tunnel_to_ids) == 1
            tunnel_to_ids = tunnel_to_ids[0]
            self.ids_tunnels.append(tunnel_to_ids)

        # configure ids

        for ids_vm, ids_node in zip(self.ids_vms, self.ids_nodes):
            ids_vxlan = [item for item in self.ofports if item['type'] == 'vxlan' and item['vm'] == ids_vm['vm']]
            assert len(ids_vxlan) == 1
            ids_vxlan = ids_vxlan[0]
            ids_veths = [item for item in self.ofports if item['type'] == 'veth' and item['vm'] == ids_vm['vm']]
            clean_ids_tables(self.controller, ids_node)
            init_ids_tables(self.controller, ids_node, ids_vxlan, ids_veths)
            idx = int(ids_vm['vm'].split('_')[2])
            set_vnf_param(ids_vm['ip'], flask_port, 'dscp', idx)

        # time

        self.tstart = None
        self.tstep = None
        self.step_duration = episode_duration / nsteps

        # traffic

        if type(label) is not list:
            label = [label]
        for l in label:
            assert l in attack_data.keys(), f'No data found for attack {l}!'
        self.profiles = calculate_probs(stats_dir, [0, *label])
        self.label = cycle(label)
        self.attack_data = attack_data

        # obs

        self.stack_size = obs_stack_size
        self.n_apps = len(applications)
        self.n_flags = len(tcp_flags)
        self.n_dscps = 2 ** self.n_ids

        self.sdn_on_off_frame_shape = (self.n_dscps, self.n_ids + 1)
        self.sdn_on_off_frame = np.zeros(self.sdn_on_off_frame_shape)

        self.nfv_on_off_frame_shape = (self.n_ids, self.n_models + self.n_thrs)
        self.nfv_on_off_frame = np.zeros(self.nfv_on_off_frame_shape)
        self.nfv_on_off_frame[:, 0] = 1  # default model idx is 0
        self.nfv_on_off_frame[:, self.n_models] = 1  # default thr idx is 0

        obs_shape = (self.stack_size, self.n_apps * 2 +
                     (self.n_flags + 1) * 2 +
                     self.n_apps * self.n_ids +
                     np.prod(self.sdn_on_off_frame_shape) +
                     np.prod(self.nfv_on_off_frame_shape))

        self.samples_by_app = np.zeros((self.n_apps, 2))
        self.samples_by_flag = np.zeros((self.n_flags + 1, 2))
        self.app_counts_stack = deque(maxlen=self.stack_size)

        self.in_samples_by_attacker_stack = deque(maxlen=self.stack_size)
        self.out_samples_by_attacker_stack = deque(maxlen=self.stack_size)

        self.intrusion_ips = [[[] for _ in range(self.n_apps)] for __ in range(self.n_ids)]
        self.ips_to_check_or_block = [[[] for _ in range(self.n_apps)] for __ in range(self.n_ids + 1)]
        self.intrusion_numbers = [[[] for _ in range(self.n_apps)] for __ in range(self.n_ids)]

        # actions

        self.n_forward_actions = self.n_dscps * self.n_ids
        self.n_unforward_actions = self.n_dscps * self.n_ids
        self.n_block_actions = self.n_dscps
        self.n_unblock_actions = self.n_dscps
        self.n_ids_actions = (self.n_models + self.n_thrs) * self.n_ids

        #act_dim = self.n_forward_actions + self.n_unforward_actions + self.n_block_actions + self.n_unblock_actions + self.n_ids_actions + 1
        act_dim = self.n_forward_actions + self.n_block_actions + self.n_ids_actions + 1

        self.actions_queue = deque()

        # start acting

        actor = Thread(target=self._act, daemon=True)
        actor.start()

        # log actions

        with open(actions_fpath, 'w') as f:
            for i in range(act_dim):
                fun, args, idx_val, q = self._action_mapper(i)
                line = f"{i};{fun.__name__};{idx_val};{','.join([str(item) for item in args])}\n"
                f.write(line)

        # default policy

        if policy is not None:
            self.default_reset_actions = policy['reset']
            self.default_step_actions = policy['step']
        else:
            self.default_reset_actions = None
            self.default_step_actions = None

        # reward

        self.n_attackers = len(attackers)
        self.samples_by_attacker = np.zeros((self.n_attackers + 1, 2))

        # spaces

        self.observation_space = spaces.Box(low=0, high=np.inf, shape=obs_shape, dtype=np.float32)
        self.action_space = spaces.Discrete(act_dim)

        print('Observation shape: {0}'.format(obs_shape))
        print('Number of actions: {0}'.format(act_dim))

        self.in_samples = 0
        self.out_samples = 0
 def __init__(self, env):
     gym.ObservationWrapper.__init__(self, env)
     self.observation_space = spaces.Box(low=0,
                                         high=1.0,
                                         shape=env.observation_space.shape,
                                         dtype=np.float32)