def _request(self, req): """Manages the request-response exchanges with the iv4XR RL environment. Args: req (dict): request to send. Returns: received response. """ self.socket.send_json(req) # Get the reply. content = self.socket.recv_json() if req["command"] == "GET_SPEC": try: env_spec = EnvSpec(content['envName']) except gym.error.Error: env_spec = EnvSpec(content['envName'] + "-v0") action_space = parse_gym_space(content['actionSpace']) observation_space = parse_gym_space(content['observationSpace']) self._env_properties = EnvProperties(action_space, observation_space, env_spec) logger.info(f"Connected to environment: {self.env_spec.id}") return self._env_properties elif req["command"] == "STEP": return (content["nextObservation"]["rawObservation"], content["reward"], content["done"], content["info"]) elif req["command"] == "RESET": return content["rawObservation"] else: raise AttributeError("Invalid command : " + req["command"])
def run(self): """Run function of the Thread. Manages the messaging with iv4XR.""" while self._running: # Wait for next request from client try: message = self.socket.recv_json() except zmq.error.ContextTerminated: # Forcefully closing the connection return self._connected = True logger.debug("[Server] Received request: %s" % message) # Send reply back to client if message["cmd"] == "ENV_SPEC": content = message['arg'] try: env_spec = EnvSpec(content['envName']) except gym.error.Error: env_spec = EnvSpec(content['envName'] + "-v0") action_space = parse_gym_space(content['actionSpace']) observation_space = parse_gym_space( content['observationSpace']) self._env_properties = EnvProperties(action_space, observation_space, env_spec) logger.info(f"Connected to environment: {self.env_spec.id}") self.socket.send_json(True) elif message["cmd"] == "GET_ACTION": content = message['arg'] with self._state_cv: self._state = content["rawObservation"] self._state_cv.notify() with self._action_cv: self._action_cv.wait() self.socket.send_json({"rawAction": self._action_to_send}) elif message["cmd"] == "LOG_RETURNS": content = message['arg'] with self._returns_cv: self._returns = ( content["nextObservation"]["rawObservation"], content["reward"], content["done"], content["info"]) self._returns_cv.notify() self.socket.send_json(True) elif message["cmd"] == "DISCONNECT": self._env_properties = None self.socket.send_json(True) self._connected = False with self._state_cv: self._state_cv.notify() else: raise ValueError( f"[Server] Unexpected command: {message['cmd']}")
def __init__(self): self.observation_space = gym.spaces.Box(-1, 1, shape=(10,)) self.action_space = gym.spaces.Tuple(( gym.spaces.Discrete(3), gym.spaces.Discrete(4), gym.spaces.Discrete(5), gym.spaces.Box(shape=(2,), low=-1.0, high=1.0) )) self.reward_range = (-math.inf, math.inf) self.metadata = None self.spec = EnvSpec(id="DummyEnv-v0") self.head_infos = [ {"type": "categorical", "out_dim": 3}, {"type": "categorical", "out_dim": 4}, {"type": "categorical", "out_dim": 5}, {"type": "normal", "out_dim": 2} ] self.autoregressive_maps = [ [-1], [-1, 0], [-1, 0, 1], [-1, 0] ] self.action_type_masks = [ [1, 1, 0], [1, 1, 1], [0, 0, 1] ]
def __init__(self): self.qpos_cur = np.zeros([1, 7]) self.qvel_cur = np.zeros([1, 7]) self.impact = np.array([0, 0, 0, 0, 0, 0]) self.fall = 0 self.t_imp = 0 self.set_impact = 0 self.detect_impact_time = 10 self.mu1 = 0.5 self.mu2 = 0.5 self.lfoot = 0 self.rfoot = 0 self.evaluate = False # print("Reached", id(self)) mujoco_env.MujocoEnv.__init__(self, getResourcePath() + "/five_link.xml", 4) utils.EzPickle.__init__(self) # print("Can't reach", id(self)) self.step_success = 0 self.spec = EnvSpec("five_link-v3") # TODO self.spec.max_episode_steps = 1000 self.rbody_xpos = 0 self.lbody_xpos = 0
def meta_reset(self, seed): np.random.seed(seed) env = NormalHopperEnv() # Based on Hopper-v2 spec = EnvSpec( 'NormalHopperEnv-v0', entry_point='generic_rl.envs.mujoco:NormalHopperEnv', max_episode_steps=1000, reward_threshold=3800.0 ) env._spec = spec env.seed(seed) # Wrap the env as needed env = TimeLimit( env, max_episode_steps=spec.max_episode_steps, max_episode_seconds=spec.max_episode_seconds ) self.env = env # Fix for done flags. self.env.reset() self.step = env.step self.render = env.render self.reset = env.reset
def meta_reset(self, seed): np.random.seed(seed) env = RandomWeightHopperEnv(rand_mass=self.rand_mass, rand_gravity=self.rand_gravity, rand_friction=self.rand_friction, rand_thickness=self.rand_thickness) # Based on Hopper-v2 spec = EnvSpec( 'RandomWeightHopperEnv-v0', entry_point='generic_rl.envs.mujoco:RandomWeightHopperEnv', max_episode_steps=1000, reward_threshold=3800.0 ) env._spec = spec env.seed(seed) # Wrap the env as needed env = TimeLimit( env, max_episode_steps=spec.max_episode_steps, max_episode_seconds=spec.max_episode_seconds ) self.env = env # Fix for done flags. self.env.reset() self.step = env.step self.render = env.render self.reset = env.reset
def __init__(self, name, img_size=84, camera_id='side', max_step=-1): self.env_name = name self.img_size = img_size self.camera_id = camera_id self.max_step = max_step if self.env_name == 'Humanoid_CMU': self.env = humanoid_CMU.run() else: domain, task = self.env_name.split('/') self.env = suite.load(domain_name=domain, task_name=task) self.control_min = self.env.action_spec().minimum[0] self.control_max = self.env.action_spec().maximum[0] self.control_shape = self.env.action_spec().shape self._action_space = spaces.Box(self.control_min, self.control_max, self.control_shape) total_size = 0 for i, j in self.env.observation_spec().items(): total_size += j.shape[0] if len(j.shape) > 0 else 1 self._observation_space = spaces.Box(-np.inf, np.inf, (total_size, )) self.step_count = 0 self.reward_range = (-np.inf, np.inf) self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 67 } self.spec = EnvSpec('Humanoid-v2', max_episode_steps=1000)
def __init__(self, dir, continuous_action=False): super(T4HistoryEnv, self).__init__() if continuous_action: self.action_space = spaces.Box(low=-1, high=1, shape=(1, ), dtype=np.int8) else: self.action_space = spaces.Discrete(3) self.dir = dir files = process_dir(dir) self.files = files shuffle(self.files) #first = first[:95,:] self.state = ObsState(dir, self.files[0]) self.observation_space = spaces.Box(low=0, high=255, shape=self.state.observation.shape, dtype=np.uint8) print(self.state.observation.shape) self.x = 0 self.wins = 0 self.episode_wins = 0 self.action_count = 0 self.spec = EnvSpec(id='T4History-v0', nondeterministic=True) self.reward_range = (-1, 1) self.continuous_action = continuous_action
def __init__(self, process_idx=0, edge_penalty=0, width=10, height=16, easy=False, nohole=False, hidden_dist=0, mark_env=False, hrl_env=False, debug=False): self.process_idx = process_idx self.edge_penalty = edge_penalty self.action_space = gym.spaces.Discrete(4) self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(width, height, 3), dtype=np.float32) self.easy = easy self.nohole = nohole self.actions = [] self.width = width self.height = height self.hidden = False self.hidden_dist = hidden_dist self.reward_range = (float(-self.edge_penalty - 1), 100.0) self.debug = debug self.spec = EnvSpec('Myenv-v0') self.mark_env = mark_env self.hrl_env = hrl_env self.mode_1 = False self.goal = (0, 0) #true goal self.mark_done = False
def __init__(self, env, max_timestep, maze_size_scaling, random_start, low, high): super(GoalWrapper, self).__init__(env) ob_space = env.observation_space self.maze_size_scaling = maze_size_scaling low = np.array(low, dtype=ob_space.dtype) high = np.array(high, dtype=ob_space.dtype) maze_low = np.array(np.array([-4, -4]) / 8 * maze_size_scaling, dtype=ob_space.dtype) maze_high = np.array(np.array([20, 20]) / 8 * maze_size_scaling, dtype=ob_space.dtype) self.maze_size_scaling = maze_size_scaling self.goal_space = gym.spaces.Box(low=low, high=high) self.maze_space = gym.spaces.Box(low=maze_low, high=maze_high) #print(self.maze_space.low, self.maze_space.high, self.goal_space.low, self.goal_space.high) self.goal_dim = low.size self.distance_threshold = 5 * maze_size_scaling / 8. self.spec = EnvSpec(id='PointMaze-v0', timestep_limit=max_timestep) self.distance = 5 * maze_size_scaling / 8. #print(self.goal_space, low.size) #exit(0) self.observation_space = gym.spaces.Dict( OrderedDict({ 'observation': ob_space, 'desired_goal': self.goal_space, 'achieved_goal': self.goal_space, })) self.goal = None self.random_start = random_start
def __init__(self, env, record_video=True, video_schedule=None, log_dir=None, timestep_limit=9999): # Ensure the version saved to disk doesn't monitor into our log_dir locals_no_monitor = dict(locals()) locals_no_monitor['log_dir'] = None locals_no_monitor['record_video'] = False locals_no_monitor['video_schedule'] = None Serializable.quick_init(self, locals_no_monitor) self.env = env self._observation_space = to_rllab_space(env.observation_space) self._action_space = to_rllab_space(env.action_space) self.env.spec = EnvSpec('GymEnv-v0') monitor.logger.setLevel(logging.WARNING) if not record_video: self.video_schedule = NoVideoSchedule() else: if video_schedule is None: self.video_schedule = CappedCubicVideoSchedule() else: self.video_schedule = video_schedule self.set_log_dir(log_dir) self._horizon = timestep_limit
def __init__(self, config): self.end_pos = config["corridor_length"] self.cur_pos = 0 self.action_space = Discrete(2) self.observation_space = Box( 0.0, self.end_pos, shape=(1, ), dtype=np.float32) self._spec = EnvSpec("SimpleCorridor-{}-v0".format(self.end_pos))
class StockEnv (gym.Env): metatdata = {"render.modes": ['human']} spec = EnvSpec("StocksEnv-v0") @classmethod def from_dir (cls, data_dir, **kwargs): prices = { file: data.load_relative(file) for file in data.price_files(data_dir) } return StocksEnv(prices, **kwargs) def __init__ (self, prices, bars_count=DEFAULT_BARS_COUNT, commission=DEFAULT_COMMISSION_PERC, reset_on_close=True, conv_1d=False, random_ofs_on_reset=True, reward_on_close=False, volumes=False): assert isinstance(prices, dict) self._prices = prices if conv_1d: self._state = State1D(bars_count, commission, reset_on_close, reward_on_close=reward_on_close, volumes=volumes) else: self._state = State(bars_count, commission, reset_on_close, reward_on_close=reward_on_close, volumes=volumes) self.action_space = gym.spaces.Discrete(n=len(Actions)) self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=self._state.shape, dtype=np.float32) self.random_ofs_on_reset = random_ofs_on_reset self.seed() def reset (self): self._instrument = self.np_random.choice(list(self._prices.keys())) prices = self._prices[self._instrument] bars = self._state.bars_count if self.random_ofs_on_reset: offset = self.np_random.choice(prices.high.shape[0]-bars*10) + bars else: offset = bars self._state.reset(prices, offset) return self._state.encode() def step (self, action_idx): action = Actions(action_idx) reward, done = self._state.step(action) obs = self._state.encode() info = { "instrument": self._instrument, "offset": self._state._offset } return obs, reward, done, info def render (self, mode='human', close=False): pass def close (self): pass def seed (self, seed=None): self.np_random, seed1 = seeding.np_random(seed) seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31 return [seed1, seed2]
def test_env_spec_tree(): spec_tree = EnvSpecTree() # Add with namespace spec = EnvSpec("test/Test-v0") spec_tree["test/Test-v0"] = spec assert spec_tree.tree.keys() == {"test"} assert spec_tree.tree["test"].keys() == {"Test"} assert spec_tree.tree["test"]["Test"].keys() == {0} assert spec_tree.tree["test"]["Test"][0] == spec assert spec_tree["test/Test-v0"] == spec # Add without namespace spec = EnvSpec("Test-v0") spec_tree["Test-v0"] = spec assert spec_tree.tree.keys() == {"test", None} assert spec_tree.tree[None].keys() == {"Test"} assert spec_tree.tree[None]["Test"].keys() == {0} assert spec_tree.tree[None]["Test"][0] == spec # Delete last version deletes entire subtree del spec_tree["test/Test-v0"] assert spec_tree.tree.keys() == {None} # Append second version for same name spec_tree["Test-v1"] = EnvSpec("Test-v1") assert spec_tree.tree.keys() == {None} assert spec_tree.tree[None].keys() == {"Test"} assert spec_tree.tree[None]["Test"].keys() == {0, 1} # Deleting one version leaves other del spec_tree["Test-v0"] assert spec_tree.tree.keys() == {None} assert spec_tree.tree[None].keys() == {"Test"} assert spec_tree.tree[None]["Test"].keys() == {1} # Add without version myenv = "MyAwesomeEnv" spec = EnvSpec(myenv) spec_tree[myenv] = spec assert spec_tree.tree.keys() == {None} assert myenv in spec_tree.tree[None].keys() assert spec_tree.tree[None][myenv].keys() == {None} assert spec_tree.tree[None][myenv][None] == spec assert spec_tree.__repr__() == "├──Test: [ v1 ]\n" + f"└──{myenv}: [ ]\n"
def __init__(self, goal_reaching_thresholds=np.array([0.075, 0.075, 0.75]), goal_not_reached_penalty=-1, goal_reached_reward=0, terminate_on_goal_reaching=True, time_limit=1000, frameskip=1, random_goals_instead_of_standing_goal=False, polar_coordinates: bool=False): super().__init__() dir = os.path.dirname(__file__) model = load_model_from_path(dir + "/pendulum_with_goals.xml") self.sim = MjSim(model) self.viewer = None self.rgb_viewer = None self.frameskip = frameskip self.goal = None self.goal_reaching_thresholds = goal_reaching_thresholds self.goal_not_reached_penalty = goal_not_reached_penalty self.goal_reached_reward = goal_reached_reward self.terminate_on_goal_reaching = terminate_on_goal_reaching self.time_limit = time_limit self.current_episode_steps_counter = 0 self.random_goals_instead_of_standing_goal = random_goals_instead_of_standing_goal self.polar_coordinates = polar_coordinates # spaces definition self.action_space = spaces.Box(low=-self.sim.model.actuator_ctrlrange[:, 1], high=self.sim.model.actuator_ctrlrange[:, 1], dtype=np.float32) if self.polar_coordinates: self.observation_space = spaces.Dict({ "observation": spaces.Box(low=np.array([-np.pi, -15]), high=np.array([np.pi, 15]), dtype=np.float32), "desired_goal": spaces.Box(low=np.array([-np.pi, -15]), high=np.array([np.pi, 15]), dtype=np.float32), "achieved_goal": spaces.Box(low=np.array([-np.pi, -15]), high=np.array([np.pi, 15]), dtype=np.float32) }) else: self.observation_space = spaces.Dict({ "observation": spaces.Box(low=np.array([-1, -1, -15]), high=np.array([1, 1, 15]), dtype=np.float32), "desired_goal": spaces.Box(low=np.array([-1, -1, -15]), high=np.array([1, 1, 15]), dtype=np.float32), "achieved_goal": spaces.Box(low=np.array([-1, -1, -15]), high=np.array([1, 1, 15]), dtype=np.float32) }) self.spec = EnvSpec('PendulumWithGoals-v0') self.spec.reward_threshold = self.goal_not_reached_penalty * self.time_limit self.reset()
def __init__(self, config): self.config = config self.size_px = (config['res'], config['res']) env_args = dict(map_name=config['map'], step_mul=config['step_mul'], game_steps_per_episode=0, screen_size_px=self.size_px, minimap_size_px=self.size_px) self.env = sc2_env.SC2Env(**env_args) self._spec = EnvSpec("Sc2-{}-v0".format(config['map']))
def make_timed_env(cls, power_scalar, max_episode_steps=None, max_episode_seconds=None): base_env = Continuous_MountainCarEnv_Editted(power_scalar) base_env.spec = EnvSpec(base_env.get_name()) env = TimeLimit(base_env, max_episode_seconds=max_episode_seconds, max_episode_steps=max_episode_steps) return env
def __init__(self, simulator=ShowdownSimulator()): self.__version__ = "0.1.0" self._spec = EnvSpec('PokeBattleEnv-v0') self.simulator = simulator num_actions = len(self.simulator.get_available_actions()) + len(self.simulator.get_available_modifiers()) self.action_space = Box(low=0.0, high=1.0, shape=(num_actions,), dtype=np.float32) state_dimensions = len(self.simulator.state.to_array()) self.observation_space = Box(low=0, high=1000, shape=(state_dimensions,), dtype=np.float32) self.reward_range = (-1, 1) self.metadata['render.modes'] = ['human'] self.metadata['semantics.autoreset'] = False
def __init__(self, visualize=False, difficulty=None): super(LearnToRunEnv, self).__init__() if difficulty == None: self.difficulty = random.randint(0,2) else: self.difficulty = difficulty self.learntorun_env = RunEnv(visualize=visualize) self.observation_space = self.learntorun_env.observation_space self.action_space = self.learntorun_env.action_space self._spec = EnvSpec("RunEnv-diff{}-v1".format(difficulty))
def __init__(self, config): """Define the environment properties :param config (object): the environment's configuration settings """ self.config = config # Define the action and observation spaces self.action_space = spaces.Discrete(self.config.n_actions) self.observation_space = spaces.Discrete(self.config.n_rows * self.config.n_columns) # Define the environment id self.spec = EnvSpec('GridWorld-v0')
def __init__(self, max_episode_steps_coeff=1, scale=20, goal_padding=2.0): super(PointMass, self).__init__() # define scale such that the each square in the grid is 1 x 1 self.scale = int(scale) self.grid_size = self.scale * self.scale self.observation_space = gym.spaces.Box(low=np.array([0.0, 0.0]), high=np.array([1.0, 1.0])) self.action_space = gym.spaces.Box(low=np.array([-np.inf, -np.inf]), high=np.array([np.inf, np.inf])) self.goal_padding = goal_padding self.spec = EnvSpec(id='PointMass-v0', max_episode_steps=int(max_episode_steps_coeff * self.scale))
def __init__(self, config=None): """ Simple go to goal environment where a non-holonomic agent is required to move to a goal. The goal can be random or single, however it is advised that during training we provide it a random goal everytime. Rewards are binary. config includes: max_episode_steps (int): maximum number of timesteps in an episode reward_max (int): reward when goal is achieved. seed (int): seed of the random numpy process. her (bool): whether to use the HER compatible variant or not dt (float): dt in kinematic update equation num_iter (int): num of iterations of kinematic update equation """ # Default values. Will be overridden if specified in config self.dt = 1e-2 # self.her = True self.her = True # self.seed = None self.thresh = np.array([0.05, 0.05, 0.1])[:-1] self.num_iter = 50 self.reward_max = 1 self.max_episode_steps = 25 self._max_episode_steps = 25 self.step_penalty = 1.0 # (self.max_episode_steps) self.action_low = np.array([0.0, -np.pi / 4]) self.action_high = np.array([0.3, np.pi / 4]) self.action_space = Box(self.action_low, self.action_high, dtype="f") # so that the goals are within the range of performing actions self.d_clip = self.action_high[0] * self.num_iter * self.dt * 1.35 self.observation_space = Box(low=-1, high=1, shape=(5, ), dtype="f") self.limits = np.array([1, 1, np.pi]) self.agent = Agent(0) if config is not None: self.__dict__.update(config) # if self.seed is not None: # np.random.seed(self.seed) self.goal = None if not self.her: self.dMax = self.action_high[0] * self.dt * self.num_iter self.dRange = 2 * self.dMax self.viewer = None self._spec = EnvSpec("Go2Goal-v0")
def __init__(self, seed=None, room_size=2, gap_size=0.0, decore_option: DecoreOption = DecoreOption.NONE, wall_decore_height=None, num_chars_on_wall=1, invert_chars=True, non_terminate=False, **kwargs): params = DEFAULT_PARAMS params.set('turn_step', 5, 3, 7) params.set('forward_step', 0.2, 0.15, 0.25) self.num_rows = 6 self.num_cols = 6 self.room_size = room_size self.gap_size = gap_size self.decore_option = decore_option self.wall_decore_height = wall_decore_height self.num_chars_on_wall = num_chars_on_wall self.invert_chars = invert_chars self.non_terminate = non_terminate self.height = self.num_rows * room_size + (self.num_rows - 1) * gap_size self.width = self.num_cols * room_size + (self.num_cols - 1) * gap_size self.M = None # Decoration stuff self.text_decores = [] if DecoreOption.DIGIT in self.decore_option: self.text_decores.extend(DIGITS) if DecoreOption.CHARACTER in self.decore_option: self.text_decores.extend(CHARACTERS) self.image_decores = PORTRAIT_NAMES if DecoreOption.PORTRAIT in self.decore_option else [] super().__init__(seed=seed, params=params, **kwargs) self.spec = EnvSpec(id="WestWorld-v1", entry_point=None, reward_threshold=None, nondeterministic=False, max_episode_steps=self.max_episode_steps, kwargs=None) # Allow only the movement actions self.action_space = spaces.Discrete(self.actions.move_back + 1)
class StocksEnv(gym.Env): metadata = {'render.modes': ['human']} spec = EnvSpec("StocksEnv-v0") # Constructor: Initialize price, state, observation space, and action space def __init__(self, prices, bar_count=DEFAULT_BAR_COUNT, commision=DEFAULT_COMMISION): assert isinstance(prices, dict) self._prices = prices self._state = State(bar_count, commision) self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=self._state.shape, dtype=np.float32) self.action_space = gym.spaces.Discrete(n=len(Actions)) self.seed() # Take a step in the enviornment. return the next observation, the reward, done flag and other info def step(self, action_idx): action = Actions(action_idx) done, reward = self._state.step(action) obs = self._state.encode() info = {"choice": self._random_choice, "offset": self._state._offset} return obs, reward, done, info # Reset to give one observation to the Agent. def reset(self): self._random_choice = random.randrange( len(self._prices['date']) - self._state.bars_count - 1) # prices = {'open': [self._prices['open'][self._random_choice]], 'high': [self._prices['high'][self._random_choice]], # 'low': [self._prices['low'][self._random_choice]], 'close': [self._prices['close'][self._random_choice]]} offset = self._random_choice self._state.reset(self._prices, offset) return self._state.encode() def render(self, mode='human'): pass def close(self): pass def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) seed2 = seeding.hash_seed(seed1 + 1) % 2**31 return [seed1, seed2]
def __init__(self, env): self.env = env self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.env.control_timestep())) } self.observation_space = convert_dm_control_to_gym_space( env.observation_spec()) self.action_space = convert_dm_control_to_gym_space(env.action_spec()) max_episode_steps = None if env._step_limit == float('inf') else int( env._step_limit) self.spec = EnvSpec('DM-v0', max_episode_steps=max_episode_steps) self.viewer = None
def register(id: str, cyberbattle_env_identifiers: model.Identifiers, **kwargs): """ same as gym.envs.registry.register, but adds CyberBattle specs to env.spec """ if id in registry.env_specs: raise Error('Cannot re-register id: {}'.format(id)) spec = EnvSpec(id, **kwargs) # Map from port number to port names : List[model.PortName] spec.ports = cyberbattle_env_identifiers.ports # Array of all possible node properties (not necessarily all used in the network) : List[model.PropertyName] spec.properties = cyberbattle_env_identifiers.properties # Array defining an index for every possible local vulnerability name : List[model.VulnerabilityID] spec.local_vulnerabilities = cyberbattle_env_identifiers.local_vulnerabilities # Array defining an index for every possible remote vulnerability name : List[model.VulnerabilityID] spec.remote_vulnerabilities = cyberbattle_env_identifiers.remote_vulnerabilities registry.env_specs[id] = spec
def __init__(self, config=None): if config is None: config = self._get_default_config() self.rewardCriteria = config['rewardCriteria'] self.environment = config['environment'] self.behavior = config['behavior'] self.verbose = config['verbose'] self.episodeLengthDay = config['episodeLengthDay'] self.stepSizeMinute = config['stepSizeMinute'] self.action_space = Discrete(2) self.observation_space = self.get_observation_space() self._spec = EnvSpec("EngagementGym-v0") self.masterNumDayPassed = 0
def __init__(self, config): self.env = MaintenanceEnv(config) self.config = config self.n_worker = config["number_of_workers"] self.action_space = Discrete(self.n_worker + 1) self.observation_space = Box(0, np.inf, shape=[ config["number_of_machines"] * 4, ], dtype='float32') self.model_expert = tf.keras.models.load_model( config["path_to_keras_expert_model"]) self._spec = EnvSpec("WorkerMaintenanceEnv-Feudal-{}-v0".format( self.n_worker)) self.ranking = 0
def __init__(self, config=CMOTPConfig(), max_steps=10000): """ :param config: an object containing the configuration to use :param max_steps: maximum length of an episode """ self.config = config self.max_steps = max_steps self.n_agents = self.config.n_agents # Define the action and observation spaces self.action_space = spaces.Discrete(self.config.n_actions) obs_shape = self.config.grid_dimensions + (1,) self.observation_space = spaces.Box(low=0, high=255, shape=obs_shape, dtype=np.uint) # Define the environment id self.spec = EnvSpec('CMOTP-v0')
def __init__(self, config=ENV_CONFIG): self.__version__ = '0.0.1' logger.info('RawMaintenanceEnv - Version {}'.format(self.__version__)) self._spec = EnvSpec("RawMaintenanceEnv-Worker-{}-v0".format( config["number_of_workers"])) self.time = 1 self.time_horizon = config["time_horizon"] # initialize machines self.machine_park = MachineParkVecSimple(config) self.action_space = self.machine_park.action_space self.observation_space = self.machine_park.observation_space self.number_of_machines = self.machine_park.number_of_machines self.number_of_workers = self.machine_park.number_of_workers