class WindySlope(gym.Env): def __init__(self, model, mode, hertz=25, should_render=True, should_screenshot=False): self.hertz = hertz self.steps = 0 self.should_render = should_render self.should_screenshot = should_screenshot self.nsubsteps = int(MAX_TIME / model.opt.timestep / 100) self.viewer = None self.model = model self.mode = mode self.enabled = True self.metadata = {'render.modes': 'rgb_array'} self.should_record = False def close(self): pass @property def observation_space(self): return Box(low=-np.inf, high=np.inf, shape=(18, )) @property def action_space(self): return Box(low=-np.inf, high=np.inf, shape=(0, )) @property def model(self): return self._model @model.setter def model(self, model): self._model = model self.sim = MjSim(model) self.data = self.sim.data if self.should_render: if self.viewer: self.viewer.sim = sim return self.viewer = MjViewer(self.sim) self.viewer.cam.azimuth = 45 self.viewer.cam.elevation = -20 self.viewer.cam.distance = 25 self.viewer.cam.lookat[:] = [0, 0, -2] self.viewer.scn.flags[3] = 0 def reset(self): self.sim.reset() self.steps = 0 self.sim.forward() obs = self.get_observations(self.model, self.data) return obs def get_observations(self, model, data): self.sim.forward() obs = [] name = 'box' pos = data.get_body_xpos(name) xmat = data.get_body_xmat(name).reshape(-1) velp = data.get_body_xvelp(name) velr = data.get_body_xvelr(name) for x in [pos, xmat, velp, velr]: obs.extend(x.copy()) obs = np.array(obs, dtype=np.float32) return obs def screenshot(self, image_path): self.viewer.hide_overlay = True self.viewer.render() width, height = 2560, 1440 #width, height = 1,1 img = self.viewer.read_pixels(width, height, depth=False) # original image is upside-down, so flip it img = img[::-1, :, :] imageio.imwrite(image_path, img) def step(self, action): nsubsteps = self.nsubsteps for _ in range(nsubsteps): self.sim.step() self.render() self.steps += 1 return self.get_observations(self.model, self.data), 1, self.steps == 100, {} def render(self, mode=None): if self.should_render: self.viewer._overlay.clear() def nothing(): return self.viewer._create_full_overlay = nothing wind = model.opt.wind[0] self.viewer.add_overlay(1, "Wind", "{:.2f}".format(wind)) self.viewer.render() if self.should_record: width, height = 2560, 1440 img = self.viewer.read_pixels(width, height, depth=False) # original image is upside-down, so flip it img = img[::-1, :, :] return img def euler2quat(self, euler): euler = np.asarray(euler, dtype=np.float64) assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler) ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2 si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) cc, cs = ci * ck, ci * sk sc, ss = si * ck, si * sk quat = np.empty(euler.shape[:-1] + (4, ), dtype=np.float64) quat[..., 0] = cj * cc + sj * ss quat[..., 3] = cj * sc - sj * cs quat[..., 2] = -(cj * ss + sj * cc) quat[..., 1] = cj * cs - sj * sc return quat def degrees2radians(self, degrees): return degrees * np.pi / 180
class YuMi(gym.GoalEnv): metadata = {'render.modes': ['human']} def __init__(self, path, dynamics, task, goal_env=False, hertz=25, render=True, seed=0, logging_level=logging.INFO): logging.basicConfig(level=logging_level) tf.set_random_seed(seed) np.random.seed(seed) random.seed(seed) self.goal_env = goal_env self.task = task self.path = path model = load_model_from_path(path) self.joint_idx = [] for name in sorted(model.joint_names): if 'yumi' not in name: continue self.joint_idx.append(model.joint_name2id(name)) self.joint_states_pos = None self.joint_states_vel = None self.previous_action = np.zeros(14) self.target_hist = deque(maxlen=2) self.hertz = hertz self.steps = 0 self.should_render = render self.steps_per_action = int(1.0 / hertz / model.opt.timestep) model.nuserdata = 14 self.viewer = None self.model = model self._set_joint_limits() self.generate_dynamics = dynamics @property def model(self): return self._model @model.setter def model(self, model): self._model = model self.sim = MjSim(model, udd_callback=self.callback) self.data = self.sim.data if self.should_render and not self.viewer: self.viewer = MjViewer(self.sim) self.viewer.cam.azimuth = 180 self.viewer.cam.elevation = -15 self.viewer._hide_overlay = True @property def horizon(self): return int(self.hertz * MAX_TIME) def get_horizon(self): return self.horizon @property def action_space(self): return spaces.Box(low=-0.1, high=0.1, shape=(14, ), dtype=np.float32) @property def observation_space(self): d = spaces.Dict({ 'observation': spaces.Box(low=-np.inf, high=np.inf, shape=(91, ), dtype=np.float32), 'desired_goal': spaces.Box(low=-np.inf, high=np.inf, shape=(6, ), dtype=np.float32), 'achieved_goal': spaces.Box(low=-np.inf, high=np.inf, shape=(6, ), dtype=np.float32) }) if self.goal_env: return d else: return d['observation'] def get_step(self): return self.steps def simstep(self): try: if self.sim.nsubsteps == self.steps_per_action: terminal = self.sim.step() else: # TODO: randomize substeps for step in range(self.steps_per_action): terminal = self.sim.step() if terminal: break except Exception as e: print('Caught exception: ', e) with open('exception.txt', 'w') as f: f.write(str(e)) terminal = True self.render() return terminal def callback(self, state): if self.joint_states_pos is None: return model = state.model nu = model.nu data = state.data dt = 1 / self.hertz if VELOCITY_CONTROLLER: action = np.empty(nu) action[:] = data.userdata[:nu] ctrl = action - self.joint_states_vel data.qacc[self.joint_idx] = ctrl * 1.0 / dt else: action = np.empty(model.nu) for i in range(len(self.joint_idx)): pid = self.pids[i] qpos = self.joint_states_pos[i] action[i] = pid(qpos, data.time) res = action res -= self.joint_states_vel * dt data.qacc[self.joint_idx] = (1.0 / dt**2) * res functions.mj_inverse(model, data) data.qfrc_applied[self.joint_idx] = data.qfrc_inverse[self.joint_idx] def reset(self): self.steps = 0 self.joint_states_pos = None self.joint_states_vel = None self.data.qacc[:] = 0 self.data.qvel[:] = 0 self.pids = [] for i in range(self.model.nu): pid = PID(Kp=1.0, Kd=0.001, sample_time=0) pid.output_limits = (self.action_space.low[i], self.action_space.high[i]) self.pids.append(pid) _ = self.randomize_dynamics(self.model) self.sim.reset() self.set_initial_pose() self.sim.forward() obs = self.get_observations() return obs def set_initial_pose(self): self.data.set_joint_qpos('yumi_joint_1_l', 1.) self.data.set_joint_qpos('yumi_joint_1_r', -1.) self.data.set_joint_qpos('yumi_joint_2_l', 0.1) self.data.set_joint_qpos('yumi_joint_2_r', 0.1) ''' randomize goal goal_id = self.model.body_name2id('goal') pos = self.model.body_pos[goal_id] pos[0:2] = [0.5, 0] pos[0:2] += np.random.uniform(-0.05, 0.05, size=(2,)) self.model.body_pos[goal_id] = pos ''' self.sim.forward() target_start, target_end = self.model.get_joint_qpos_addr('target') target_qpos = self.data.qpos[target_start:target_end] target_quat = target_qpos[3:] if self.task == 0: # rotate y=-90 deg quat = rotations.euler2quat(np.array([0, -np.pi / 2, 0])) z_idx = 0 elif self.task == 1: # rotate x=90 deg quat = rotations.euler2quat(np.array([np.pi / 2, 0, 0])) z_idx = 1 elif self.task == 2: # do nothing quat = rotations.euler2quat(np.array([0, 0, 0])) z_idx = 2 elif self.task == 3: # rotate z=-90 deg quat = rotations.euler2quat(np.array([0, 0, -np.pi / 2])) z_idx = 2 else: raise Exception('Additional tasks not implemented.') target_id = self.model.geom_name2id('target') target_qpos[0] = 0.5 + np.random.uniform(-0.01, 0.01) target_qpos[1] = np.random.uniform(0.14, 0.15) height = self.model.geom_size[target_id][z_idx] target_qpos[2] = 0.051 + height goal_id = self.model.body_name2id('goal') body_pos = self.model.body_pos[goal_id] body_quat = self.model.body_quat[goal_id] body_pos[2] = target_qpos[2] body_quat[:] = quat perturbation = np.zeros(3) perturbation[z_idx] = np.random.uniform(-0.2, 0.2) euler = rotations.quat2euler(quat) euler = rotations.subtract_euler(euler, perturbation) target_qpos[3:] = rotations.euler2quat(euler) def quat2mat(self, quat): result = np.empty(9, dtype=np.double) functions.mju_quat2Mat(result, np.asarray(quat)) return result def mat2quat(self, mat): result = np.empty(4, dtype=np.double) functions.mju_mat2Quat(result, np.asarray(mat)) return result def get_observations(self): """ This functions looks a bit awkward since it is trying to replicate the order of observations when listening to transforms using ROS TF. """ obs = [] observations = {} pos = [] vel = [] model = self.model data = self.data # Previous policy in ROS used translations between yumi_base_link # and requested frames. # We subtract the xpos of yumi_base_link to mimic that behavior. base_link_xpos = data.get_body_xpos('yumi_base_link') # TODO Look at this for i, joint in enumerate(self.joint_idx): pos.append(data.qpos[joint]) vel.append(data.qvel[joint]) name = model.joint_names[joint] assert model.get_joint_qpos_addr(name) == joint obs.extend(pos) obs.extend(vel) self.joint_states_pos = np.array(pos) + np.random.normal( 0, 0.001, size=len(pos)) self.joint_states_vel = np.array(vel) + np.random.normal( 0, 0.001, size=len(vel)) name = 'site:goal' pos = data.get_site_xpos(name) pos -= base_link_xpos mat = data.get_site_xmat(name) mat = mat.reshape(-1) obs.extend(pos) obs.extend(mat) names = ['left_gripper_base', 'right_gripper_base'] for name in names: pos = data.get_body_xpos(name) pos -= base_link_xpos quat = data.get_body_xquat(name) mat = self.quat2mat(quat) obs.extend(pos) obs.extend(mat) name = 'site:target' pos = data.get_site_xpos(name) pos -= base_link_xpos mat = data.get_site_xmat(name) mat = mat.reshape(-1) observations['desired_goal'] = self.get_desired_goal() observations['achieved_goal'] = self.get_achieved_goal() while len(self.target_hist) < self.target_hist.maxlen: self.target_hist.append(np.hstack([pos, mat])) self.target_hist.append(np.hstack([pos, mat])) obs.extend(self.target_hist[-1]) obs.extend( (self.target_hist[-1] - self.target_hist[-2]) / (1 / self.hertz)) target_id = model.geom_name2id('target') size = model.geom_size[target_id] obs.extend(size) obs = np.array(obs) observations['observation'] = obs if self.goal_env: return observations else: return obs def render(self): if self.should_render: if False and self.sim.nsubsteps == self.steps_per_action and self.steps % 10 != 0: return self.viewer.render() def step(self, action): action = .1 * action self.steps += 1 reward = 0 terminal = False # Check for early termination terminal, force_penalty = self.bad_collision() if terminal: reward = -10 # Eventhough limits are specified in action_space, they # are not honored by baselines so we clip them action = np.clip(action, self.action_space.low, self.action_space.high) idx = 0 if self.joint_states_pos[idx] > 1.2: action[0] = min(action[idx], 0) elif self.joint_states_pos[0] < 0.8: action[0] = max(action[0], 0) idx = 1 if self.joint_states_pos[idx] < -1.2: action[idx] = max(action[idx], 0) elif self.joint_states_pos[idx] > -0.8: action[idx] = min(action[idx], 0) for idx in [2, 3, 4, 5]: if self.joint_states_pos[idx] > 0.4: action[idx] = min(action[idx], 0) if self.joint_states_pos[idx] < -0.3: action[idx] = max(action[idx], 0) idx = -2 if self.joint_states_pos[idx] > 0.2: action[idx] = min(action[idx], 0) elif self.joint_states_pos[idx] < -0.4: action[idx] = max(action[idx], 0) idx = -1 if self.joint_states_pos[idx] < -0.2: action[idx] = max(action[idx], 0) elif self.joint_states_pos[idx] > 0.4: action[idx] = min(action[idx], 0) if VELOCITY_CONTROLLER: action += self.joint_states_vel else: action += self.joint_states_pos for i, a in enumerate(action): pid = self.pids[i] pid.setpoint = a # Perturbate action #action += np.random.normal(0, 0.001, size=len(action)) stop_early = self.simstep() terminal = terminal or stop_early obs = self.get_observations() if not terminal: reward = self.compute_reward(self.get_achieved_goal(), self.get_desired_goal(), {}) #reward -= force_penalty #logger.debug('force penalty: %f' % force_penalty) terminal = self.steps == self.horizon if self.steps % self.hertz == 0: logging.info('Step: {}, reward: {}'.format(self.steps, reward)) if 0.8 < reward and self.steps == self.horizon: logging.info('**** LOOKING GOOD ****') return obs, reward, terminal, {} def get_distance(self, a, b): return np.linalg.norm(a - b, axis=-1) def get_goal_distance(self, achieved_goal, desired_goal): pos1, pos2 = achieved_goal[:], desired_goal[:] pos_distance = self.get_distance(pos1, pos2) return pos_distance def compute_reward(self, achieved_goal, desired_goal, info): pos_distance = self.get_goal_distance(achieved_goal, desired_goal) pos_reward = self.get_pos_reward(pos_distance) euler1, euler2 = achieved_goal[3:], desired_goal[3:] ang_distance = np.linalg.norm(rotations.subtract_euler(euler1, euler2), axis=-1) ang_distance_ratio = 0.5 ang_distance_penalty = ang_distance_ratio * ang_distance reward = pos_reward - ang_distance_penalty if self.steps % 10 == 0: logging.debug('Reward: %f, pos reward: %f, ang penalty: %f' % (reward, pos_reward, ang_distance_penalty)) return reward def get_pos_reward(self, distance, close=0.01, margin=0.2): return max(0, 1 - distance / margin) def _set_joint_limits(self): xml_str = self.model.get_xml() tree = EE.fromstring(xml_str) low, high = [], [] for name in sorted(self.model.joint_names): if 'yumi' not in name: continue limit = tree.find(".//joint[@name='{}']".format(name)) limit_range = [float(x) for x in limit.get('range').split(' ')] low.append(limit_range[0]) high.append(limit_range[1]) self.joint_limits = spaces.Box(low=np.array(low), high=np.array(high), dtype=np.float32) def bad_collision(self): bad_collision = False force_penalty = 0 for i in range(self.data.ncon): contact = self.data.contact[i] geom1 = contact.geom1 geom2 = contact.geom2 bodyid1 = self.model.geom_bodyid[geom1] bodyid2 = self.model.geom_bodyid[geom2] bodyname1 = self.model.body_id2name(bodyid1) bodyname2 = self.model.body_id2name(bodyid2) is_target = 'target' in bodyname1 or 'target' in bodyname2 is_target = is_target or 'box-composite' in bodyname1 or 'box-composite' in bodyname2 is_table = 'table' in bodyname1 or 'table' in bodyname2 if is_target and is_table: continue elif is_target: continue sim = self.sim body1_cfrc = sim.data.cfrc_ext[bodyid1] body1_contact_force_norm = np.sqrt( np.sum(np.square(body1_cfrc))) body2_cfrc = sim.data.cfrc_ext[bodyid2] body2_contact_force_norm = np.sqrt( np.sum(np.square(body2_cfrc))) force_penalty = body1_contact_force_norm + body2_contact_force_norm else: bad_collision = True if bad_collision: print('body is: ', bodyname1, bodyname2) break return bad_collision, force_penalty def get_dynamics(self): return self.dynamics def randomize_dynamics(self, model): self.dynamics = self.generate_dynamics() fri, icom = self.dynamics try: id = model.body_name2id('insidebox') model.body_pos[id][-1] = icom for pair in range(model.npair): tableid = model.geom_name2id('table') targetid = model.geom_name2id('target') if ((model.pair_geom1 == tableid and model.pair_geom2 == targetid) or (model.pair_geom2 == tableid and model.pair_geom1 == targetid)): pair_friction = model.pair_friction[pair] pair_friction[:2] = [fri, fri] logging.debug('Dynamics: {}'.format(self.dynamics)) except: pass self.sim.forward() return model def get_desired_goal(self): return self.get_site_pose('site:goal') def get_achieved_goal(self): return self.get_site_pose('site:target') def get_site_pose(self, site): xpos = self.data.get_site_xpos(site) euler = rotations.mat2euler(self.data.get_site_xmat(site)) return np.hstack([xpos, euler]) def screenshot(self, image_path='image.png'): import imageio self.viewer._hide_overlay = True self.viewer.render() width, height = 2560, 1440 img = self.viewer.read_pixels(width, height, depth=False) # original image is upside-down, so flip it img = img[::-1, :, :] imageio.imwrite(image_path, img)