def forward_dynamics(self, action): if len(action) != flat_dim(self.action_space) + 1: raise ValueError('incorrect action dimension: expected %d but got ' '%d' % (flat_dim(self.action_space), len(action))) lb, ub = self.action_bounds action = np.clip(action, lb, ub) for ctrl, act in zip(self.extra_data.controls, action): if ctrl.typ == "force": for name in ctrl.bodies: body = find_body(self.world, name) direction = np.array(ctrl.direction) direction = direction / np.linalg.norm(direction) world_force = body.GetWorldVector(direction * act) world_point = body.GetWorldPoint(ctrl.anchor) body.ApplyForce(world_force, world_point, wake=True) elif ctrl.typ == "torque": assert ctrl.joint joint = find_joint(self.world, ctrl.joint) joint.motorEnabled = True # forces the maximum allowed torque to be taken if act > 0: joint.motorSpeed = 1e5 else: joint.motorSpeed = -1e5 joint.maxMotorTorque = abs(act) else: raise NotImplementedError self.before_world_step(action) self.world.Step(self.extra_data.timeStep, self.extra_data.velocityIterations, self.extra_data.positionIterations)
def clear_patch(hfield, box): '''Clears a patch shaped like box, assuming robot is placed in center of hfield @param box: metaworlds.spaces.Box-like ''' if flat_dim(box) > 2: raise ValueError("Provide 2dim box") # clear patch h_center = int(0.5 * hfield.shape[0]) w_center = int(0.5 * hfield.shape[1]) fromrow, torow = w_center + int(box.low[0] / STEP), w_center + int( box.high[0] / STEP) fromcol, tocol = h_center + int(box.low[1] / STEP), h_center + int( box.high[1] / STEP) hfield[fromrow:torow, fromcol:tocol] = 0.0 # convolve to smoothen edges somewhat, in case hills were cut off k = np.ones((10, 10)) / 100.0 s = convolve2d(hfield[fromrow - 9:torow + 9, fromcol - 9:tocol + 9], k, mode='same', boundary='symm') hfield[fromrow - 9:torow + 9, fromcol - 9:tocol + 9] = s return hfield
def log_diagnostics(self, paths, *args, **kwargs): # we call here any logging related to the maze, strip the maze # obs and call log_diag with the stripped paths we need to log # the purely gather reward!! with logger.tabular_prefix('Maze_'): gather_undiscounted_returns = [ sum(path['env_infos']['outer_rew']) for path in paths ] logger.record_tabular_misc_stat('Return', gather_undiscounted_returns, placement='front') stripped_paths = [] for path in paths: stripped_path = {} for k, v in path.items(): stripped_path[k] = v stripped_path['observations'] = stripped_path[ 'observations'][:, :flat_dim(self.env.observation_space)] # this breaks if the obs of the robot are d>1 dimensional (not a # vector) stripped_paths.append(stripped_path) with logger.tabular_prefix('wrapped_'): wrapped_undiscounted_return = np.mean( [np.sum(path['env_infos']['inner_rew']) for path in paths]) logger.record_tabular('AverageReturn', wrapped_undiscounted_return) self.env.log_diagnostics(stripped_paths, *args, **kwargs)
def __init__( self, env, action_delay=3, ): assert action_delay > 0, "Should not use this env transformer" Serializable.quick_init(self, locals()) super(DelayedActionEnv, self).__init__(env) self.action_delay = action_delay self._action_flat_dim = flat_dim(self.action_space) self._queued_actions = None
def __init__( self, env, obs_noise=1e-1, ): super().__init__(env) self.obs_noise = obs_noise self._action_flat_dim = flat_dim(self.action_space) # Always call Serializable constructor last Serializable.quick_init(self, locals())
def _set_sensor_mask(self, env, sensor_idx): obsdim = flat_dim(env.observation_space) if len(sensor_idx) > obsdim: raise ValueError( ("Length of sensor mask ({0}) cannot be greater " "than observation dim ({1})").format(len(sensor_idx), obsdim)) if len(sensor_idx) == obsdim and not np.any(np.array(sensor_idx) > 1): sensor_mask = np.array(sensor_idx, dtype=np.bool) elif np.any(np.unique(sensor_idx, return_counts=True)[1] > 1): raise ValueError(("Double entries or boolean mask " "with dim ({0}) < observation dim ({1})").format( len(sensor_idx), obsdim)) else: sensor_mask = np.zeros((obsdim, ), dtype=np.bool) sensor_mask[sensor_idx] = 1 self._sensor_mask = sensor_mask