def _request(self, req):
        """Manages the request-response exchanges with the iv4XR RL environment.

        Args:
            req (dict): request to send.

        Returns:
            received response.
        """
        self.socket.send_json(req)
        #  Get the reply.
        content = self.socket.recv_json()
        if req["command"] == "GET_SPEC":
            try:
                env_spec = EnvSpec(content['envName'])
            except gym.error.Error:
                env_spec = EnvSpec(content['envName'] + "-v0")
            action_space = parse_gym_space(content['actionSpace'])
            observation_space = parse_gym_space(content['observationSpace'])
            self._env_properties = EnvProperties(action_space,
                                                 observation_space, env_spec)
            logger.info(f"Connected to environment: {self.env_spec.id}")
            return self._env_properties
        elif req["command"] == "STEP":
            return (content["nextObservation"]["rawObservation"],
                    content["reward"], content["done"], content["info"])
        elif req["command"] == "RESET":
            return content["rawObservation"]
        else:
            raise AttributeError("Invalid command : " + req["command"])
 def run(self):
     """Run function of the Thread. Manages the messaging with iv4XR."""
     while self._running:
         #  Wait for next request from client
         try:
             message = self.socket.recv_json()
         except zmq.error.ContextTerminated:
             # Forcefully closing the connection
             return
         self._connected = True
         logger.debug("[Server] Received request: %s" % message)
         #  Send reply back to client
         if message["cmd"] == "ENV_SPEC":
             content = message['arg']
             try:
                 env_spec = EnvSpec(content['envName'])
             except gym.error.Error:
                 env_spec = EnvSpec(content['envName'] + "-v0")
             action_space = parse_gym_space(content['actionSpace'])
             observation_space = parse_gym_space(
                 content['observationSpace'])
             self._env_properties = EnvProperties(action_space,
                                                  observation_space,
                                                  env_spec)
             logger.info(f"Connected to environment: {self.env_spec.id}")
             self.socket.send_json(True)
         elif message["cmd"] == "GET_ACTION":
             content = message['arg']
             with self._state_cv:
                 self._state = content["rawObservation"]
                 self._state_cv.notify()
             with self._action_cv:
                 self._action_cv.wait()
                 self.socket.send_json({"rawAction": self._action_to_send})
         elif message["cmd"] == "LOG_RETURNS":
             content = message['arg']
             with self._returns_cv:
                 self._returns = (
                     content["nextObservation"]["rawObservation"],
                     content["reward"], content["done"], content["info"])
                 self._returns_cv.notify()
             self.socket.send_json(True)
         elif message["cmd"] == "DISCONNECT":
             self._env_properties = None
             self.socket.send_json(True)
             self._connected = False
             with self._state_cv:
                 self._state_cv.notify()
         else:
             raise ValueError(
                 f"[Server] Unexpected command: {message['cmd']}")
예제 #3
0
    def __init__(self):
        self.observation_space = gym.spaces.Box(-1, 1, shape=(10,))
        self.action_space = gym.spaces.Tuple((
            gym.spaces.Discrete(3), gym.spaces.Discrete(4), gym.spaces.Discrete(5),
            gym.spaces.Box(shape=(2,), low=-1.0, high=1.0)
        ))
        self.reward_range = (-math.inf, math.inf)
        self.metadata = None
        self.spec = EnvSpec(id="DummyEnv-v0")

        self.head_infos = [
            {"type": "categorical", "out_dim": 3},
            {"type": "categorical", "out_dim": 4},
            {"type": "categorical", "out_dim": 5},
            {"type": "normal", "out_dim": 2}
        ]
        self.autoregressive_maps = [
            [-1],
            [-1, 0],
            [-1, 0, 1],
            [-1, 0]
        ]
        self.action_type_masks = [
            [1, 1, 0],
            [1, 1, 1],
            [0, 0, 1]
        ]
예제 #4
0
    def __init__(self):
        self.qpos_cur = np.zeros([1, 7])
        self.qvel_cur = np.zeros([1, 7])
        self.impact = np.array([0, 0, 0, 0, 0, 0])
        self.fall = 0
        self.t_imp = 0
        self.set_impact = 0
        self.detect_impact_time = 10
        self.mu1 = 0.5
        self.mu2 = 0.5
        self.lfoot = 0
        self.rfoot = 0
        self.evaluate = False

        # print("Reached", id(self))
        mujoco_env.MujocoEnv.__init__(self,
                                      getResourcePath() + "/five_link.xml", 4)
        utils.EzPickle.__init__(self)
        # print("Can't reach", id(self))

        self.step_success = 0
        self.spec = EnvSpec("five_link-v3")  # TODO
        self.spec.max_episode_steps = 1000

        self.rbody_xpos = 0
        self.lbody_xpos = 0
    def meta_reset(self, seed):
        np.random.seed(seed)
        env = NormalHopperEnv()

        # Based on Hopper-v2
        spec = EnvSpec(
            'NormalHopperEnv-v0',
            entry_point='generic_rl.envs.mujoco:NormalHopperEnv',
            max_episode_steps=1000,
            reward_threshold=3800.0
        )

        env._spec = spec
        env.seed(seed)

        # Wrap the env as needed
        env = TimeLimit(
            env,
            max_episode_steps=spec.max_episode_steps,
            max_episode_seconds=spec.max_episode_seconds
        )

        self.env = env
        # Fix for done flags.
        self.env.reset()
        self.step = env.step
        self.render = env.render
        self.reset = env.reset
    def meta_reset(self, seed):
        np.random.seed(seed)

        env = RandomWeightHopperEnv(rand_mass=self.rand_mass,
                                    rand_gravity=self.rand_gravity,
                                    rand_friction=self.rand_friction,
                                    rand_thickness=self.rand_thickness)

        # Based on Hopper-v2
        spec = EnvSpec(
            'RandomWeightHopperEnv-v0',
            entry_point='generic_rl.envs.mujoco:RandomWeightHopperEnv',
            max_episode_steps=1000,
            reward_threshold=3800.0
        )

        env._spec = spec
        env.seed(seed)

        # Wrap the env as needed
        env = TimeLimit(
            env,
            max_episode_steps=spec.max_episode_steps,
            max_episode_seconds=spec.max_episode_seconds
        )

        self.env = env
        # Fix for done flags.
        self.env.reset()
        self.step = env.step
        self.render = env.render
        self.reset = env.reset
예제 #7
0
 def __init__(self, name, img_size=84, camera_id='side', max_step=-1):
     self.env_name = name
     self.img_size = img_size
     self.camera_id = camera_id
     self.max_step = max_step
     if self.env_name == 'Humanoid_CMU':
         self.env = humanoid_CMU.run()
     else:
         domain, task = self.env_name.split('/')
         self.env = suite.load(domain_name=domain, task_name=task)
     self.control_min = self.env.action_spec().minimum[0]
     self.control_max = self.env.action_spec().maximum[0]
     self.control_shape = self.env.action_spec().shape
     self._action_space = spaces.Box(self.control_min, self.control_max,
                                     self.control_shape)
     total_size = 0
     for i, j in self.env.observation_spec().items():
         total_size += j.shape[0] if len(j.shape) > 0 else 1
     self._observation_space = spaces.Box(-np.inf, np.inf, (total_size, ))
     self.step_count = 0
     self.reward_range = (-np.inf, np.inf)
     self.metadata = {
         'render.modes': ['human', 'rgb_array'],
         'video.frames_per_second': 67
     }
     self.spec = EnvSpec('Humanoid-v2', max_episode_steps=1000)
예제 #8
0
파일: t4_env.py 프로젝트: pourhadi/serpent
    def __init__(self, dir, continuous_action=False):
        super(T4HistoryEnv, self).__init__()

        if continuous_action:
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(1, ),
                                           dtype=np.int8)
        else:
            self.action_space = spaces.Discrete(3)
        self.dir = dir

        files = process_dir(dir)

        self.files = files
        shuffle(self.files)
        #first = first[:95,:]
        self.state = ObsState(dir, self.files[0])

        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=self.state.observation.shape,
                                            dtype=np.uint8)
        print(self.state.observation.shape)
        self.x = 0

        self.wins = 0
        self.episode_wins = 0
        self.action_count = 0

        self.spec = EnvSpec(id='T4History-v0', nondeterministic=True)

        self.reward_range = (-1, 1)
        self.continuous_action = continuous_action
 def __init__(self,
              process_idx=0,
              edge_penalty=0,
              width=10,
              height=16,
              easy=False,
              nohole=False,
              hidden_dist=0,
              mark_env=False,
              hrl_env=False,
              debug=False):
     self.process_idx = process_idx
     self.edge_penalty = edge_penalty
     self.action_space = gym.spaces.Discrete(4)
     self.observation_space = gym.spaces.Box(low=0.0,
                                             high=1.0,
                                             shape=(width, height, 3),
                                             dtype=np.float32)
     self.easy = easy
     self.nohole = nohole
     self.actions = []
     self.width = width
     self.height = height
     self.hidden = False
     self.hidden_dist = hidden_dist
     self.reward_range = (float(-self.edge_penalty - 1), 100.0)
     self.debug = debug
     self.spec = EnvSpec('Myenv-v0')
     self.mark_env = mark_env
     self.hrl_env = hrl_env
     self.mode_1 = False
     self.goal = (0, 0)  #true goal
     self.mark_done = False
예제 #10
0
    def __init__(self, env, max_timestep, maze_size_scaling, random_start, low,
                 high):
        super(GoalWrapper, self).__init__(env)
        ob_space = env.observation_space
        self.maze_size_scaling = maze_size_scaling
        low = np.array(low, dtype=ob_space.dtype)
        high = np.array(high, dtype=ob_space.dtype)
        maze_low = np.array(np.array([-4, -4]) / 8 * maze_size_scaling,
                            dtype=ob_space.dtype)
        maze_high = np.array(np.array([20, 20]) / 8 * maze_size_scaling,
                             dtype=ob_space.dtype)
        self.maze_size_scaling = maze_size_scaling
        self.goal_space = gym.spaces.Box(low=low, high=high)
        self.maze_space = gym.spaces.Box(low=maze_low, high=maze_high)
        #print(self.maze_space.low, self.maze_space.high, self.goal_space.low, self.goal_space.high)
        self.goal_dim = low.size
        self.distance_threshold = 5 * maze_size_scaling / 8.
        self.spec = EnvSpec(id='PointMaze-v0', timestep_limit=max_timestep)

        self.distance = 5 * maze_size_scaling / 8.
        #print(self.goal_space, low.size)
        #exit(0)
        self.observation_space = gym.spaces.Dict(
            OrderedDict({
                'observation': ob_space,
                'desired_goal': self.goal_space,
                'achieved_goal': self.goal_space,
            }))
        self.goal = None
        self.random_start = random_start
예제 #11
0
    def __init__(self,
                 env,
                 record_video=True,
                 video_schedule=None,
                 log_dir=None,
                 timestep_limit=9999):
        # Ensure the version saved to disk doesn't monitor into our log_dir
        locals_no_monitor = dict(locals())
        locals_no_monitor['log_dir'] = None
        locals_no_monitor['record_video'] = False
        locals_no_monitor['video_schedule'] = None
        Serializable.quick_init(self, locals_no_monitor)

        self.env = env
        self._observation_space = to_rllab_space(env.observation_space)
        self._action_space = to_rllab_space(env.action_space)
        self.env.spec = EnvSpec('GymEnv-v0')

        monitor.logger.setLevel(logging.WARNING)
        if not record_video:
            self.video_schedule = NoVideoSchedule()
        else:
            if video_schedule is None:
                self.video_schedule = CappedCubicVideoSchedule()
            else:
                self.video_schedule = video_schedule
        self.set_log_dir(log_dir)

        self._horizon = timestep_limit
예제 #12
0
 def __init__(self, config):
     self.end_pos = config["corridor_length"]
     self.cur_pos = 0
     self.action_space = Discrete(2)
     self.observation_space = Box(
         0.0, self.end_pos, shape=(1, ), dtype=np.float32)
     self._spec = EnvSpec("SimpleCorridor-{}-v0".format(self.end_pos))
예제 #13
0
class StockEnv (gym.Env):
    metatdata = {"render.modes": ['human']}
    spec = EnvSpec("StocksEnv-v0")

    @classmethod
    def from_dir (cls, data_dir, **kwargs):
        prices = {
                file: data.load_relative(file)
                for file in data.price_files(data_dir)
                }

        return StocksEnv(prices, **kwargs)

    def __init__ (self, prices, bars_count=DEFAULT_BARS_COUNT, commission=DEFAULT_COMMISSION_PERC, reset_on_close=True, conv_1d=False, random_ofs_on_reset=True, reward_on_close=False, volumes=False):
        assert isinstance(prices, dict)
        self._prices = prices
        if conv_1d:
            self._state = State1D(bars_count, commission, reset_on_close, reward_on_close=reward_on_close, volumes=volumes)
        else:
            self._state = State(bars_count, commission, reset_on_close, reward_on_close=reward_on_close, volumes=volumes)

        self.action_space = gym.spaces.Discrete(n=len(Actions))
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=self._state.shape, dtype=np.float32)
        self.random_ofs_on_reset = random_ofs_on_reset
        self.seed()

    def reset (self):
        self._instrument = self.np_random.choice(list(self._prices.keys()))
        prices = self._prices[self._instrument]
        bars = self._state.bars_count
        if self.random_ofs_on_reset:
            offset = self.np_random.choice(prices.high.shape[0]-bars*10) + bars
        else:
            offset = bars

        self._state.reset(prices, offset)
        return self._state.encode()

    def step (self, action_idx):
        action = Actions(action_idx)
        reward, done = self._state.step(action)
        obs = self._state.encode()
        info = {
                "instrument": self._instrument,
                "offset": self._state._offset
                }

        return obs, reward, done, info

    def render (self, mode='human', close=False):
        pass

    def close (self):
        pass

    def seed (self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31
        return [seed1, seed2]
예제 #14
0
def test_env_spec_tree():
    spec_tree = EnvSpecTree()

    # Add with namespace
    spec = EnvSpec("test/Test-v0")
    spec_tree["test/Test-v0"] = spec
    assert spec_tree.tree.keys() == {"test"}
    assert spec_tree.tree["test"].keys() == {"Test"}
    assert spec_tree.tree["test"]["Test"].keys() == {0}
    assert spec_tree.tree["test"]["Test"][0] == spec
    assert spec_tree["test/Test-v0"] == spec

    # Add without namespace
    spec = EnvSpec("Test-v0")
    spec_tree["Test-v0"] = spec
    assert spec_tree.tree.keys() == {"test", None}
    assert spec_tree.tree[None].keys() == {"Test"}
    assert spec_tree.tree[None]["Test"].keys() == {0}
    assert spec_tree.tree[None]["Test"][0] == spec

    # Delete last version deletes entire subtree
    del spec_tree["test/Test-v0"]
    assert spec_tree.tree.keys() == {None}

    # Append second version for same name
    spec_tree["Test-v1"] = EnvSpec("Test-v1")
    assert spec_tree.tree.keys() == {None}
    assert spec_tree.tree[None].keys() == {"Test"}
    assert spec_tree.tree[None]["Test"].keys() == {0, 1}

    # Deleting one version leaves other
    del spec_tree["Test-v0"]
    assert spec_tree.tree.keys() == {None}
    assert spec_tree.tree[None].keys() == {"Test"}
    assert spec_tree.tree[None]["Test"].keys() == {1}

    # Add without version
    myenv = "MyAwesomeEnv"
    spec = EnvSpec(myenv)
    spec_tree[myenv] = spec
    assert spec_tree.tree.keys() == {None}
    assert myenv in spec_tree.tree[None].keys()
    assert spec_tree.tree[None][myenv].keys() == {None}
    assert spec_tree.tree[None][myenv][None] == spec
    assert spec_tree.__repr__() == "├──Test: [ v1 ]\n" + f"└──{myenv}: [  ]\n"
예제 #15
0
    def __init__(self, goal_reaching_thresholds=np.array([0.075, 0.075, 0.75]),
                 goal_not_reached_penalty=-1, goal_reached_reward=0, terminate_on_goal_reaching=True,
                 time_limit=1000, frameskip=1, random_goals_instead_of_standing_goal=False,
                 polar_coordinates: bool=False):
        super().__init__()
        dir = os.path.dirname(__file__)
        model = load_model_from_path(dir + "/pendulum_with_goals.xml")

        self.sim = MjSim(model)
        self.viewer = None
        self.rgb_viewer = None

        self.frameskip = frameskip
        self.goal = None
        self.goal_reaching_thresholds = goal_reaching_thresholds
        self.goal_not_reached_penalty = goal_not_reached_penalty
        self.goal_reached_reward = goal_reached_reward
        self.terminate_on_goal_reaching = terminate_on_goal_reaching
        self.time_limit = time_limit
        self.current_episode_steps_counter = 0
        self.random_goals_instead_of_standing_goal = random_goals_instead_of_standing_goal
        self.polar_coordinates = polar_coordinates

        # spaces definition
        self.action_space = spaces.Box(low=-self.sim.model.actuator_ctrlrange[:, 1],
                                       high=self.sim.model.actuator_ctrlrange[:, 1],
                                       dtype=np.float32)
        if self.polar_coordinates:
            self.observation_space = spaces.Dict({
                "observation": spaces.Box(low=np.array([-np.pi, -15]),
                                          high=np.array([np.pi, 15]),
                                          dtype=np.float32),
                "desired_goal": spaces.Box(low=np.array([-np.pi, -15]),
                                           high=np.array([np.pi, 15]),
                                           dtype=np.float32),
                "achieved_goal": spaces.Box(low=np.array([-np.pi, -15]),
                                            high=np.array([np.pi, 15]),
                                            dtype=np.float32)
            })
        else:
            self.observation_space = spaces.Dict({
                "observation": spaces.Box(low=np.array([-1, -1, -15]),
                                          high=np.array([1, 1, 15]),
                                          dtype=np.float32),
                "desired_goal": spaces.Box(low=np.array([-1, -1, -15]),
                                           high=np.array([1, 1, 15]),
                                           dtype=np.float32),
                "achieved_goal": spaces.Box(low=np.array([-1, -1, -15]),
                                            high=np.array([1, 1, 15]),
                                            dtype=np.float32)
            })

        self.spec = EnvSpec('PendulumWithGoals-v0')
        self.spec.reward_threshold = self.goal_not_reached_penalty * self.time_limit

        self.reset()
예제 #16
0
 def __init__(self, config):
     self.config = config
     self.size_px = (config['res'], config['res'])
     env_args = dict(map_name=config['map'],
                     step_mul=config['step_mul'],
                     game_steps_per_episode=0,
                     screen_size_px=self.size_px,
                     minimap_size_px=self.size_px)
     self.env = sc2_env.SC2Env(**env_args)
     self._spec = EnvSpec("Sc2-{}-v0".format(config['map']))
예제 #17
0
 def make_timed_env(cls,
                    power_scalar,
                    max_episode_steps=None,
                    max_episode_seconds=None):
     base_env = Continuous_MountainCarEnv_Editted(power_scalar)
     base_env.spec = EnvSpec(base_env.get_name())
     env = TimeLimit(base_env,
                     max_episode_seconds=max_episode_seconds,
                     max_episode_steps=max_episode_steps)
     return env
예제 #18
0
 def __init__(self, simulator=ShowdownSimulator()):
     self.__version__ = "0.1.0"
     self._spec = EnvSpec('PokeBattleEnv-v0')
     self.simulator = simulator
     num_actions = len(self.simulator.get_available_actions()) + len(self.simulator.get_available_modifiers())
     self.action_space = Box(low=0.0, high=1.0, shape=(num_actions,), dtype=np.float32)
     state_dimensions = len(self.simulator.state.to_array())
     self.observation_space = Box(low=0, high=1000, shape=(state_dimensions,), dtype=np.float32)
     self.reward_range = (-1, 1)
     self.metadata['render.modes'] = ['human']
     self.metadata['semantics.autoreset'] = False
    def __init__(self, visualize=False, difficulty=None):
        super(LearnToRunEnv, self).__init__()
        if difficulty == None:
            self.difficulty = random.randint(0,2)
        else:
            self.difficulty = difficulty

        self.learntorun_env = RunEnv(visualize=visualize)
        self.observation_space = self.learntorun_env.observation_space
        self.action_space = self.learntorun_env.action_space

        self._spec = EnvSpec("RunEnv-diff{}-v1".format(difficulty))
예제 #20
0
    def __init__(self, config):
        """Define the environment properties
        :param config (object): the environment's configuration settings
        """
        self.config = config

        # Define the action and observation spaces
        self.action_space = spaces.Discrete(self.config.n_actions)
        self.observation_space = spaces.Discrete(self.config.n_rows * self.config.n_columns)

        # Define the environment id
        self.spec = EnvSpec('GridWorld-v0')
예제 #21
0
 def __init__(self, max_episode_steps_coeff=1, scale=20, goal_padding=2.0):
     super(PointMass, self).__init__()
     # define scale such that the each square in the grid is 1 x 1
     self.scale = int(scale)
     self.grid_size = self.scale * self.scale
     self.observation_space = gym.spaces.Box(low=np.array([0.0, 0.0]),
                                             high=np.array([1.0, 1.0]))
     self.action_space = gym.spaces.Box(low=np.array([-np.inf, -np.inf]),
                                        high=np.array([np.inf, np.inf]))
     self.goal_padding = goal_padding
     self.spec = EnvSpec(id='PointMass-v0',
                         max_episode_steps=int(max_episode_steps_coeff *
                                               self.scale))
예제 #22
0
    def __init__(self, config=None):
        """
        Simple go to goal environment where a non-holonomic agent is
        required to move to a goal. The goal can be random or single,
        however it is advised that during training we provide it a
        random goal everytime. Rewards are binary.

        config includes:
        max_episode_steps (int): maximum number of timesteps in an episode
        reward_max (int): reward when goal is achieved.
        seed (int): seed of the random numpy process.
        her (bool): whether to use the HER compatible variant or not
        dt (float): dt in kinematic update equation
        num_iter (int): num of iterations of kinematic update equation

        """
        # Default values. Will be overridden if specified in config
        self.dt = 1e-2
        # self.her = True
        self.her = True
        # self.seed = None
        self.thresh = np.array([0.05, 0.05, 0.1])[:-1]
        self.num_iter = 50
        self.reward_max = 1
        self.max_episode_steps = 25
        self._max_episode_steps = 25
        self.step_penalty = 1.0  # (self.max_episode_steps)

        self.action_low = np.array([0.0, -np.pi / 4])
        self.action_high = np.array([0.3, np.pi / 4])
        self.action_space = Box(self.action_low, self.action_high, dtype="f")

        # so that the goals are within the range of performing actions
        self.d_clip = self.action_high[0] * self.num_iter * self.dt * 1.35

        self.observation_space = Box(low=-1, high=1, shape=(5, ), dtype="f")

        self.limits = np.array([1, 1, np.pi])
        self.agent = Agent(0)
        if config is not None:
            self.__dict__.update(config)

        # if self.seed is not None:
        #     np.random.seed(self.seed)

        self.goal = None
        if not self.her:
            self.dMax = self.action_high[0] * self.dt * self.num_iter
            self.dRange = 2 * self.dMax
        self.viewer = None
        self._spec = EnvSpec("Go2Goal-v0")
예제 #23
0
    def __init__(self,
                 seed=None,
                 room_size=2,
                 gap_size=0.0,
                 decore_option: DecoreOption = DecoreOption.NONE,
                 wall_decore_height=None,
                 num_chars_on_wall=1,
                 invert_chars=True,
                 non_terminate=False,
                 **kwargs):
        params = DEFAULT_PARAMS
        params.set('turn_step', 5, 3, 7)
        params.set('forward_step', 0.2, 0.15, 0.25)

        self.num_rows = 6
        self.num_cols = 6
        self.room_size = room_size
        self.gap_size = gap_size
        self.decore_option = decore_option
        self.wall_decore_height = wall_decore_height
        self.num_chars_on_wall = num_chars_on_wall
        self.invert_chars = invert_chars
        self.non_terminate = non_terminate

        self.height = self.num_rows * room_size + (self.num_rows -
                                                   1) * gap_size
        self.width = self.num_cols * room_size + (self.num_cols - 1) * gap_size

        self.M = None

        # Decoration stuff
        self.text_decores = []
        if DecoreOption.DIGIT in self.decore_option:
            self.text_decores.extend(DIGITS)
        if DecoreOption.CHARACTER in self.decore_option:
            self.text_decores.extend(CHARACTERS)

        self.image_decores = PORTRAIT_NAMES if DecoreOption.PORTRAIT in self.decore_option else []

        super().__init__(seed=seed, params=params, **kwargs)

        self.spec = EnvSpec(id="WestWorld-v1",
                            entry_point=None,
                            reward_threshold=None,
                            nondeterministic=False,
                            max_episode_steps=self.max_episode_steps,
                            kwargs=None)

        # Allow only the movement actions
        self.action_space = spaces.Discrete(self.actions.move_back + 1)
예제 #24
0
class StocksEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    spec = EnvSpec("StocksEnv-v0")

    # Constructor: Initialize price, state, observation space, and action space
    def __init__(self,
                 prices,
                 bar_count=DEFAULT_BAR_COUNT,
                 commision=DEFAULT_COMMISION):
        assert isinstance(prices, dict)
        self._prices = prices
        self._state = State(bar_count, commision)
        self.observation_space = gym.spaces.Box(low=-np.inf,
                                                high=np.inf,
                                                shape=self._state.shape,
                                                dtype=np.float32)
        self.action_space = gym.spaces.Discrete(n=len(Actions))
        self.seed()

    # Take a step in the enviornment. return the next observation, the reward, done flag and other info

    def step(self, action_idx):
        action = Actions(action_idx)
        done, reward = self._state.step(action)
        obs = self._state.encode()
        info = {"choice": self._random_choice, "offset": self._state._offset}

        return obs, reward, done, info

    # Reset to give one observation to the Agent.
    def reset(self):
        self._random_choice = random.randrange(
            len(self._prices['date']) - self._state.bars_count - 1)
        # prices = {'open': [self._prices['open'][self._random_choice]], 'high': [self._prices['high'][self._random_choice]],
        #           'low': [self._prices['low'][self._random_choice]], 'close': [self._prices['close'][self._random_choice]]}
        offset = self._random_choice
        self._state.reset(self._prices, offset)
        return self._state.encode()

    def render(self, mode='human'):
        pass

    def close(self):
        pass

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        return [seed1, seed2]
예제 #25
0
    def __init__(self, env):
        self.env = env
        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second':
            int(np.round(1.0 / self.env.control_timestep()))
        }

        self.observation_space = convert_dm_control_to_gym_space(
            env.observation_spec())
        self.action_space = convert_dm_control_to_gym_space(env.action_spec())
        max_episode_steps = None if env._step_limit == float('inf') else int(
            env._step_limit)
        self.spec = EnvSpec('DM-v0', max_episode_steps=max_episode_steps)
        self.viewer = None
예제 #26
0
def register(id: str, cyberbattle_env_identifiers: model.Identifiers,
             **kwargs):
    """ same as gym.envs.registry.register, but adds CyberBattle specs to env.spec  """
    if id in registry.env_specs:
        raise Error('Cannot re-register id: {}'.format(id))
    spec = EnvSpec(id, **kwargs)
    # Map from port number to port names : List[model.PortName]
    spec.ports = cyberbattle_env_identifiers.ports
    # Array of all possible node properties (not necessarily all used in the network) : List[model.PropertyName]
    spec.properties = cyberbattle_env_identifiers.properties
    # Array defining an index for every possible local vulnerability name : List[model.VulnerabilityID]
    spec.local_vulnerabilities = cyberbattle_env_identifiers.local_vulnerabilities
    # Array defining an index for every possible remote  vulnerability name : List[model.VulnerabilityID]
    spec.remote_vulnerabilities = cyberbattle_env_identifiers.remote_vulnerabilities

    registry.env_specs[id] = spec
    def __init__(self, config=None):

        if config is None:
            config = self._get_default_config()

        self.rewardCriteria = config['rewardCriteria']
        self.environment = config['environment']
        self.behavior = config['behavior']
        self.verbose = config['verbose']
        self.episodeLengthDay = config['episodeLengthDay']
        self.stepSizeMinute = config['stepSizeMinute']

        self.action_space = Discrete(2)
        self.observation_space = self.get_observation_space()
        self._spec = EnvSpec("EngagementGym-v0")

        self.masterNumDayPassed = 0
예제 #28
0
    def __init__(self, config):
        self.env = MaintenanceEnv(config)
        self.config = config
        self.n_worker = config["number_of_workers"]
        self.action_space = Discrete(self.n_worker + 1)
        self.observation_space = Box(0,
                                     np.inf,
                                     shape=[
                                         config["number_of_machines"] * 4,
                                     ],
                                     dtype='float32')

        self.model_expert = tf.keras.models.load_model(
            config["path_to_keras_expert_model"])
        self._spec = EnvSpec("WorkerMaintenanceEnv-Feudal-{}-v0".format(
            self.n_worker))
        self.ranking = 0
예제 #29
0
    def __init__(self, config=CMOTPConfig(), max_steps=10000):
        """
        :param config: an object containing the configuration to use
        :param max_steps: maximum length of an episode
        """
        self.config = config
        self.max_steps = max_steps

        self.n_agents = self.config.n_agents

        # Define the action and observation spaces
        self.action_space = spaces.Discrete(self.config.n_actions)
        obs_shape = self.config.grid_dimensions + (1,)
        self.observation_space = spaces.Box(low=0, high=255, shape=obs_shape, dtype=np.uint)

        # Define the environment id
        self.spec = EnvSpec('CMOTP-v0')
예제 #30
0
    def __init__(self, config=ENV_CONFIG):
        self.__version__ = '0.0.1'
        logger.info('RawMaintenanceEnv - Version {}'.format(self.__version__))
        self._spec = EnvSpec("RawMaintenanceEnv-Worker-{}-v0".format(
            config["number_of_workers"]))

        self.time = 1
        self.time_horizon = config["time_horizon"]

        # initialize machines
        self.machine_park = MachineParkVecSimple(config)

        self.action_space = self.machine_park.action_space
        self.observation_space = self.machine_park.observation_space

        self.number_of_machines = self.machine_park.number_of_machines
        self.number_of_workers = self.machine_park.number_of_workers