def __init__(self) -> None: super().__init__() self.state = None self.current_goal = None self.iter = 0 self._action_space = FloatBox(low=-0.01, high=0.01, shape=2) self._observation_space = FloatBox(low=-1., high=1., shape=4) # current state and goal self.goal_space = FloatBox(low=-1., high=1., shape=2)
def get_obs(self, get_space=False): """ Get observation of state for RL. If get_space is true return space description. Space is defined as: 0: time [1] 1:7 hand pos + rotation vector [6] 7: gripper state [1] 8:14 obj pos + rotation vector [6], 14: obj height 15: obj radius In case of two objects: 16:22 obj pos + rotation vector [6], 22: obj height 23: obj radius """ if get_space: if self.two_objects: return FloatBox(low=[0.] + [-1.] * 6 + [0.] + [-1.] * 6 + [0.] * 2 + [-1.] * 6 + [0.] * 2, high=[1.] + [1.] * 6 + [self.FINGER_OPEN_POS] + [1.] * 6 + [0.2] * 2 + [1.] * 6 + [0.2] * 2) return FloatBox(low=[0.] + [-1.] * 6 + [0.] + [-1.] * 6 + [0.] * 2, high=[1.] + [1.] * 6 + [self.FINGER_OPEN_POS] + [1.] * 6 + [0.2] * 2) t = self.scene.simulation_time hand_pose = self.hand_actors[0].get_global_pose() grip = self.get_grip() obj_pose = self.obj.get_global_pose() if self.two_objects: obj2_pose = self.obj2.get_global_pose() return np.concatenate([ [t / 10.], hand_pose[0], npq.as_rotation_vector(hand_pose[1]), [grip], obj_pose[0], npq.as_rotation_vector(obj_pose[1]), [self.obj_height, self.obj_radius], obj2_pose[0], npq.as_rotation_vector(obj2_pose[1]), [self.obj2_height, self.obj2_radius], ]).astype(np.float32) return np.concatenate([ [t / 10.], hand_pose[0], npq.as_rotation_vector(hand_pose[1]), [grip], obj_pose[0], npq.as_rotation_vector(obj_pose[1]), [self.obj_height, self.obj_radius], ]).astype(np.float32)
def __init__(self): self.end_pos = 10 self.cur_pos = 0 # self._action_space = IntBox(low=0, high=2, shape=(2,)) self._action_space = IntBox(low=0, high=2) self._observation_space = Composite( [ FloatBox(low=0, high=self.end_pos), FloatBox(low=0, high=self.end_pos) ], Obs, )
def __init__(self, horizon=100, weight_are_actions=False) -> None: super().__init__() self.weight_are_actions = weight_are_actions self._horizon = horizon self.state = None self.start_space = FloatBox(low=0., high=1., shape=2) self.start_state = np.zeros(2, dtype=np.float32) self.goal = np.zeros(2, dtype=np.float32) self.iter = 0 self._action_space = FloatBox(low=-10, high=10, shape=2) # new state self._observation_space = self.get_obs(shape=True)
def __init__(self) -> None: super().__init__() self.state = None self.current_goal = None self.iter = 0 self.action_discrete_mapping = [ np.array([0.0, 0.0]), np.array([-0.1, 0.0]), np.array([0.0, -0.1]), np.array([0.1, 0.0]), np.array([0.0, 0.1]), ] self._action_space = IntBox(low=0, high=len(self.action_discrete_mapping)) self._observation_space = FloatBox(low=-1., high=1., shape=4) # current state and goal self.goal_space = FloatBox(low=-1., high=1., shape=2)
def __init__(self, id: str, exp_kwargs: dict = None, external_logging: str = 'none', save_path: str = '', overwrite: bool = True): assert (id in VALID_ENV_SWEEP_IDS) or ( id in VALID_ENV_IDS and exp_kwargs is not None ) # Either using one of presets or using base experiment with other settings aug_path = osp.join(LOG_DIR, save_path) # LOG_DIR + save_path if id in VALID_ENV_SWEEP_IDS: # Pre-parameterized experiments if external_logging == 'none': env = bsuite.load_from_id(id) # No recording else: env = bsuite.load_and_record( id, aug_path, external_logging, overwrite=overwrite ) # Record in sql or csv. same sql for each id self.num_episodes = env.bsuite_num_episodes else: noise_scale = exp_kwargs.pop('noise_scale', 0.) noise_scale_seed = exp_kwargs.pop('noise_scale_seed', 0.) reward_scale = exp_kwargs.pop('reward_scale', 0.) env = bsuite.load(id, **exp_kwargs) if noise_scale: env = RewardNoise(env, noise_scale, noise_scale_seed) if reward_scale: env = RewardScale(env, reward_scale) self.num_episodes = 1e4 # Default self.env = env self._action_space = IntBox(low=0, high=self.env.action_spec().num_values) o_spec = self.env.observation_spec() if isinstance(o_spec, specs.BoundedArray): self._observation_space = FloatBox(low=o_spec.minimum.item(), high=o_spec.maximum.item(), shape=o_spec.shape, dtype=o_spec.dtype) else: self._observation_space = FloatBox(low=-float('inf'), high=float('inf'), shape=o_spec.shape, dtype=o_spec.dtype) self._last_observation = None self.game_over = False, self.viewer = None
def observation_space(self): img_obs = IntBox(low=0, high=255, shape=(3,) + self._size, dtype="uint8") if self.use_state: state_obs = self._env.observation_spec() total_shape = np.sum([v.shape[0] for k, v in state_obs.items()]) state_obs_box = FloatBox(low=-float('inf'), high=float('inf'), shape=total_shape) return StateObs(img_obs, state_obs_box) else: return img_obs
def __init__(self, batch_T, batch_B) -> None: super().__init__() self.batch_T = batch_T self.batch_B = batch_B self.state = None self.current_goal = None self.iter = 0 self.action_discrete_mapping = np.array([ [0.0, 0.0], [-0.1, 0.0], [0.0, -0.1], [0.1, 0.0], [0.0, 0.1], ]) self._action_space = IntBox(low=0, high=len(self.action_discrete_mapping)) self._observation_space = FloatBox(low=-1., high=1., shape=4) # current state and goal self.goal_space = FloatBox(low=-1., high=1., shape=(self.batch_B, 2))
def __init__( self, vsrl_env: VSRLEnv, oracle_safety: bool = False, log_unsafe_transitions: bool = False, ): """ :param log_unsafe_transitions: only used when sym_features is given at `step` """ super().__init__() self._env = vsrl_env action_space = vsrl_env.action_space if isinstance(action_space, FiniteSpace): # rlpyt discrete spaces can only be `IntBox`s, so I just make them # IntBox(0, n_actions) and then map the action back to our space in step n_actions = len(action_space.elements) self._action_space = IntBox(0, n_actions) # self._action_space = IntBox(0, n_actions, shape=(1,)) self._actions = np.stack(action_space.elements) self.constrained_sample = partial( constrained_sample_finite, constraint=self._env.constraint_func, actions=action_space.elements[:], ) elif isinstance(action_space, CompactSet): low, high = zip(*action_space.bounds.values()) ones = np.ones_like(low) self._action_space = FloatBox(-ones, ones) self._actions = None self._action_lb = np.array(low, dtype=np.float32) self._action_ub = np.array(high, dtype=np.float32) self._action_range = self._action_ub - self._action_lb if hasattr(self._env, "constrained_sample"): self.constrained_sample = self._env.constrained_sample else: self.constrained_sample = partial( constrained_sample_continuous, constraint=self._env.constraint_func, lower_bounds=self._action_lb, upper_bounds=self._action_ub, ) else: raise ValueError( f"Do not know how to convert action space {action_space} to rlpyt." ) self._observation_space = vsrl_env.observation_space self.log_unsafe_transitions = log_unsafe_transitions self.oracle_safety = oracle_safety self._fallback_action = None
def __init__(self): self.cur_step = 0 self.width = CANVAS_WIDTH self.obj_wh = np.array([OBJ_WIDTH, OBJ_WIDTH], dtype=np.float32) / self.width self.n_items = N_ITEMS self._action_space = IntBox(low=0, high=N_ACTIONS) self._observation_space = Composite( [ FloatBox(low=-1, high=1, shape=(self.width, self.width, 1)), FloatBox(low=-1, high=1, shape=(self.width, self.width, 1)), FloatBox(low=-10, high=10, shape=(self.n_items, 4)), ], Obs, ) # self._observation_space = FloatBox( # low=-1, high=1, shape=(self.width, self.width, 1) # ) self.target_im = np.zeros(shape=(self.width, self.width, 1), dtype=np.float32) self.target_coord = None # (n_obj, 4=(x0, y0, x1, y1)) self.cur_im = None self.cur_coord = None # (n_obj, 4=(x0, y0, x1, y1)) self.item = None
def __init__(self, low, high, shape=None, dtype="float32", null_value=None): """ low and high are scalars, applied across all dimensions of shape. """ dtype = np.dtype(dtype) if dtype.kind == 'i' or dtype.kind == 'u': self.box = IntBox(low, high, shape=shape, dtype=dtype, null_value=None) elif dtype.kind == 'f': self.box = FloatBox(low, high, shape=shape, dtype=dtype, null_value=None) else: raise NotImplementedError(dtype)
def __init__(self, scenario_name='', scenario_cfg=None, max_steps=200, gif_freq=500, steps_per_action=4, image_dir='images/test', viewport=False): # Load holodeck environment if scenario_cfg is not None and \ scenario_cfg['package_name'] not in holodeck.installed_packages(): holodeck.install(scenario_cfg['package_name']) self._env = holodeck.make(scenario_name=scenario_name, scenario_cfg=scenario_cfg, show_viewport=viewport) # Get action space from holodeck env and store for use with rlpyt if self.is_action_continuous: self._action_space = FloatBox(-1, 1, self._env.action_space.shape) else: self._action_space = IntBox(self._env.action_space.get_low(), self._env.action_space.get_high(), ()) # Calculate observation space with all sensor data max_width = 0 max_height = 0 num_img = 0 num_lin = 0 for sensor in self._env._agent.sensors.values(): if 'Task' in sensor.name: continue shape = sensor.sensor_data.shape if len(shape) == 3: max_width = max(max_width, shape[0]) max_height = max(max_height, shape[1]) num_img += shape[2] else: num_lin += np.prod(shape) if num_img > 0 and num_lin == 0: self.has_img = True self.has_lin = False self._observation_space = FloatBox(0, 1, (num_img, max_width, max_height)) elif num_lin > 0 and num_img == 0: self.has_img = False self.has_lin = True self._observation_space = FloatBox(-256, 256, (num_lin,)) else: self.has_img = True self.has_lin = True self._observation_space = Composite([ FloatBox(0, 1, (num_img, max_width, max_height)), FloatBox(-256, 256, (num_lin,))], HolodeckObservation) # Set data members self._max_steps = max_steps self._image_dir = image_dir self._steps_per_action = steps_per_action self.curr_step = 0 self.gif_freq = gif_freq self.rollout_count = -1 self.gif_images = []
def action_space(self): spec = self._env.action_spec() return FloatBox(low=spec.minimum, high=spec.maximum)
def __init__( self, assortment_size=1000, # number of items to train max_stock=1000, # Size of maximum stock clip_reward=False, episodic_lives=True, repeat_action_probability=0.0, horizon=10000, seed=None, substep_count=4, bucket_customers=torch.tensor([800., 400., 500., 900.]), bucket_cov=torch.eye(4) / 100, forecastBias=0.0, forecastVariance=0.0, freshness=1, utility_function='homogeneous', utility_weights={ 'alpha': 1., 'beta': 1., 'gamma': 1. }, characDim=4, lead_time=1, # Defines how quickly the orders goes through the buffer - also impacts the relevance of the observation lead_time_fast=0, symmetric_action_space=False, ): save__init__args(locals(), underscore=True) logging.info("Creating new StoreEnv") self.bucket_customers = bucket_customers # Spaces if symmetric_action_space: self._action_space = FloatBox(low=-max_stock / 2, high=max_stock / 2, shape=[assortment_size]) else: self._action_space = IntBox(low=0, high=max_stock, shape=[assortment_size]) self.stock = torch.zeros(assortment_size, max_stock, requires_grad=False) # correct high with max shelf life self._observation_space = FloatBox( low=0, high=1000, shape=(assortment_size, max_stock + characDim + lead_time + lead_time_fast + 1)) self._horizon = int(horizon) self.assortment = Assortment(assortment_size, freshness, seed) self._repeater = torch.stack( (self.assortment.shelf_lives, torch.zeros( self._assortment_size))).transpose(0, 1).reshape(-1).detach() self.forecast = torch.zeros(assortment_size, 1) # DAH forecast. self._step_counter = 0 # Needs to move towards env parameters self._customers = \ d.multivariate_normal.MultivariateNormal(bucket_customers, bucket_cov) self.assortment.base_demand = \ self.assortment.base_demand.detach() \ / bucket_customers.sum() self._bias = d.normal.Normal(forecastBias, forecastVariance) # We want a yearly seasonality - We have a cosinus argument and a phase. # Note that, as we take the absolute value, 2*pi/365 becomes pi/365. self._year_multiplier = torch.arange(0.0, horizon, pi / 365) self._week_multiplier = torch.arange(0.0, horizon, pi / 7) self._phase = 2 * pi * torch.rand(assortment_size) self._phase2 = 2 * pi * torch.rand(assortment_size) self.create_buffers(lead_time, lead_time_fast) if utility_function == 'linear': self.utility_function = LinearUtility(**utility_weights) elif utility_function == 'loglinear': self.utility_function = LogLinearUtility(**utility_weights) elif utility_function == 'cobbdouglas': self.utility_function = CobbDouglasUtility(**utility_weights) elif utility_function == 'homogeneous': self.utility_function = HomogeneousReward(**utility_weights) else: self.utility_function = utility_function self._updateEnv() for i in range(self._lead_time): units_to_order = torch.as_tensor( self.forecast.squeeze() * bucket_customers[i]).round().clamp( 0, self._max_stock) self._addStock(units_to_order)
def action_space(self): shape = (self.env.action_space.n, ) space = FloatBox(low=0, high=1, shape=shape, dtype=self._dtype) space.sample = self._sample_action return space
def get_obs(self, shape=False): if shape: return FloatBox(low=-10, high=10, shape=3) return np.concatenate([[self.get_time()], self.state]).astype(np.float32)
def action_space(self): low = np.where(self._mask, -np.ones_like(self._low), self._low) high = np.where(self._mask, np.ones_like(self._low), self._high) return FloatBox(low, high, dtype=np.float32)
def __init__(self, horizon=100, render=False, realtime=False, reward_params=None, reset_state_sampler=None, state_trajectories: List[StateTrajectory] = None, video_filename=None, reset_on_singularity=True, reset_on_plane_hit=True, action_start_time=0., action_end_time=0., two_objects=False, dz=0.2, open_gripper_on_leave=True, close_gripper_on_leave=False, use_max_reward=False, add_obstacle=False) -> None: super().__init__() self.use_max_reward = use_max_reward self.close_gripper_on_leave = close_gripper_on_leave self.open_gripper_on_leave = open_gripper_on_leave assert not (self.close_gripper_on_leave and self.open_gripper_on_leave) self.action_end_time = action_end_time self.action_start_time = action_start_time self.two_objects = two_objects self.reset_on_singularity = reset_on_singularity self.reset_on_plane_hit = reset_on_plane_hit self.realtime = realtime self.reset_state_sampler = reset_state_sampler self.reward_params = reward_params or GripperCylinderEnv.get_default_reward_params( ) self.state_trajectories = state_trajectories or [] self.render = render self._horizon = horizon self.iter = 0 self.num_sub_steps = 10 self.control_frequency = Rate(12) """ Define action and observation space. """ self._action_space = FloatBox(low=-1., high=1., shape=7) self._observation_space = self.get_obs(get_space=True) """ Create scene. """ self.scene = Scene(scene_flags=[ SceneFlag.ENABLE_FRICTION_EVERY_ITERATION, SceneFlag.ENABLE_STABILIZATION ]) self.plane_mat = Material(static_friction=0, dynamic_friction=0) self.obj_mat = Material(static_friction=1., dynamic_friction=1., restitution=0) self.plane_actor = RigidStatic.create_plane(material=self.plane_mat) self.scene.add_actor(self.plane_actor) self.hand_actors = self.create_hand_actors(use_mesh_fingers=False, use_mesh_base=False) self.joints = self.create_hand_joints() self.obj = self.create_obj() self.obj_height = 0.1 self.obj_radius = 0.03 if self.two_objects: self.obj2 = self.create_obj() self.obj2_height = 0.1 self.obj2_radius = 0.03 if add_obstacle: actor = RigidStatic() actor.attach_shape( Shape.create_box([0.02, 0.02, 0.2], self.obj_mat)) actor.set_global_pose([0.075, 0.015, 0.1]) self.scene.add_actor(actor) if self.render: from pyphysx_render.pyrender import PyPhysxViewer, RoboticTrackball import pyrender render_scene = pyrender.Scene() render_scene.bg_color = np.array([0.75] * 3) cam = pyrender.PerspectiveCamera(yfov=np.deg2rad(60), aspectRatio=1.414, znear=0.005) cam_pose = np.eye(4) cam_pose[:3, 3] = RoboticTrackball.spherical_to_cartesian( 0.75, np.deg2rad(-90), np.deg2rad(60)) cam_pose[:3, :3] = RoboticTrackball.look_at_rotation( eye=cam_pose[:3, 3], target=np.zeros(3), up=[0, 0, 1]) nc = pyrender.Node(camera=cam, matrix=cam_pose) render_scene.add_node(nc) render_scene.main_camera_node = nc self.renderer = PyPhysxViewer(video_filename=video_filename, render_scene=render_scene, viewer_flags={ 'axes_scale': 0.2, 'plane_grid_spacing': 0.1, }) self.renderer.add_physx_scene(self.scene) # from pyphysx_render.renderer import PyPhysXParallelRenderer # self.renderer = PyPhysXParallelRenderer(render_window_kwargs=dict( # video_filename=video_filename, coordinates_scale=0.2, coordinate_lw=1., # cam_pos_distance=0.75, cam_pos_elevation=np.deg2rad(60), cam_pos_azimuth=np.deg2rad(-90.), # )) """ Compute trajectory caches. Stores quantities in MxN arrays [# of time steps, # of trajectories] """ timesteps = np.arange( 0., self.control_frequency.period() * (1 + self.horizon), self.control_frequency.period()) self.demo_opos = np.zeros( (len(timesteps), len(self.state_trajectories), 3)) self.demo_hpos = np.zeros( (len(timesteps), len(self.state_trajectories), 3)) self.demo_grip = np.zeros( (len(timesteps), len(self.state_trajectories))) self.demo_hrot = np.zeros( (len(timesteps), len(self.state_trajectories), 2)) if self.two_objects: self.demo_opos2 = np.zeros( (len(timesteps), len(self.state_trajectories), 3)) for i, st in enumerate(self.state_trajectories): half_height = 0.5 * st.get_property_at_time('o0szz', timesteps) self.demo_opos[:, i, 0] = st.get_property_at_time('o0posx', timesteps) self.demo_opos[:, i, 1] = st.get_property_at_time('o0posy', timesteps) self.demo_opos[:, i, 2] = np.maximum( st.get_property_at_time('o0posz', timesteps) - half_height, 0.) if self.two_objects: half_height2 = 0.5 * st.get_property_at_time( 'o1szz', timesteps) self.demo_opos2[:, i, 0] = st.get_property_at_time( 'o1posx', timesteps) self.demo_opos2[:, i, 1] = st.get_property_at_time( 'o1posy', timesteps) self.demo_opos2[:, i, 2] = np.maximum( st.get_property_at_time('o1posz', timesteps) - half_height2, 0.) self.demo_hpos[:, i, 0] = st.get_property_at_time('hposx', timesteps) self.demo_hpos[:, i, 1] = st.get_property_at_time('hposy', timesteps) self.demo_hpos[:, i, 2] = np.maximum( st.get_property_at_time('hposz', timesteps) - half_height, 0.) self.demo_grip[:, i] = (1 - st.get_property_at_time( 'touch', timesteps)) * self.FINGER_OPEN_POS self.demo_hrot[:, i, 0] = st.get_property_at_time('hazi', timesteps) self.demo_hrot[:, i, 1] = st.get_property_at_time('hele', timesteps) self.demo_hpos[int(action_end_time * 12):, :, :] = self.demo_opos[int(action_end_time * 12):, :, :].copy() if self.two_objects: self.demo_hpos[int(action_end_time * 12):, :, :] = self.demo_opos2[ int(action_end_time * 12):, :, :].copy() self.demo_hpos[int(action_end_time * 12):, :, 2] += dz self.demo_grip_weak_closed = np.zeros( (len(timesteps), len(self.state_trajectories)), dtype=np.bool) self.demo_grip_weak_closed[int(action_start_time * 12):int(action_end_time * 12 )] = True # those inside action self.demo_grip_weak_closed &= self.demo_grip < self.FINGER_OPEN_POS / 2 # that are also closed # self.demo_grip_strong_open = np.ones((len(timesteps), len(self.state_trajectories)), dtype=np.bool) # self.demo_grip_strong_open[int(action_start_time * 12):int(action_end_time * 12)] = False tmp_rot_vec = np.zeros( (len(timesteps), len(self.state_trajectories), 3)) for i, st in enumerate(self.state_trajectories): for j in range(3): tmp_rot_vec[:, i, j] = st.get_property_at_time( 'o0rot{}'.format(j), timesteps) self.demo_orot = npq.from_rotation_vector(tmp_rot_vec) if self.two_objects: tmp_rot_vec = np.zeros( (len(timesteps), len(self.state_trajectories), 3)) for i, st in enumerate(self.state_trajectories): for j in range(3): tmp_rot_vec[:, i, j] = st.get_property_at_time( 'o1rot{}'.format(j), timesteps) self.demo_orot2 = npq.from_rotation_vector(tmp_rot_vec)