def find_contact_height(sim: mj.MjSim, iterations: int = 10) -> float: """ Find the height we should put our walker at so we are just barely in contact with the ground. Runs a binary search on height (assumed to be state[1][2]) such that the body with the minimum z coordinate on the ground is just barely in contact with the ground. Args: sim: the instantiated sim MjSim object you want to find the correct height for iterations: number of times to run_util the binary search Returns: just the height as a float, you'll need to set it yourself """ state = sim.get_state() height_guess = state[1][1] step = height_guess * 2 # upper range for how much we will search for for _ in range(iterations): if sim.data.ncon: # ncon is the number of collisions height_guess += step else: height_guess -= step state[1][1] = height_guess sim.set_state(state) sim.forward() step /= 2 return height_guess
def play(self, mocap_filepath): from mujoco_py import load_model_from_xml, MjSim, MjViewer curr_path = getcwd() xmlpath = '/mujoco/humanoid_deepmimic/envs/asset/dp_env_v2.xml' with open(curr_path + xmlpath) as fin: MODEL_XML = fin.read() model = load_model_from_xml(MODEL_XML) sim = MjSim(model) viewer = MjViewer(sim) self.read_raw_data(mocap_filepath) self.convert_raw_data() from time import sleep phase_offset = np.array([0.0, 0.0, 0.0]) while True: for k in range(len(self.data)): tmp_val = self.data_config[k] sim_state = sim.get_state() sim_state.qpos[:] = tmp_val[:] sim_state.qpos[:3] += phase_offset[:] sim.set_state(sim_state) sim.forward() viewer.render() sim_state = sim.get_state() phase_offset = sim_state.qpos[:3] phase_offset[2] = 0
def test_tableBoundViolation(): model = load_model_from_path("robot.xml") sim = MjSim(model) test1 = np.array([-0.0031, -0.9718, -0.7269, -2.4357, -0.0157, 1.5763, 0.7303]) #False test2 = np.array([0.0264, -0.0772, 0.1924, -2.8478, -0.0273, 2.8339, 0.7566]) #True test3 = np.array([-1.4870, -1.7289, 1.6138, -1.9814, -0.9856, 1.9304, 0.9981]) #True test4 = np.array([-0.5250, -0.6410, 0.1893, -1.3827, -0.2573, 2.1356, 0.7116]) #False test5 = np.array([-0.0133, 0.9846, 0.0365, -1.5491, 2.8629, 0.7630, 0.6254]) #True qd = np.zeros(7) state = MjSimState(time=0,qpos=test1,qvel=qd,act=None,udd_state={}) sim.set_state(state) sim.step() print("Test1:", "Passed" if not bounds.tableBoundViolation(sim) else "Failed") state = MjSimState(time=0,qpos=test2,qvel=qd,act=None,udd_state={}) sim.set_state(state) sim.step() print("Test2:", "Passed" if bounds.tableBoundViolation(sim) else "Failed") state = MjSimState(time=0,qpos=test3,qvel=qd,act=None,udd_state={}) sim.set_state(state) sim.step() print("Test3:", "Passed" if bounds.tableBoundViolation(sim) else "Failed") state = MjSimState(time=0,qpos=test4,qvel=qd,act=None,udd_state={}) sim.set_state(state) sim.step() print("Test4:", "Passed" if not bounds.tableBoundViolation(sim) else "Failed") state = MjSimState(time=0,qpos=test5,qvel=qd,act=None,udd_state={}) sim.set_state(state) sim.step() print("Test5:", "Passed" if bounds.tableBoundViolation(sim) else "Failed")
def run_activations_fcn(est_activations, model_ver=0, timestep=0.005, Mj_render=False): #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! the q0 is now the chasis pos. needs to be fixed """ this function runs the predicted activations generatred from running the inverse map on the desired task kinematics """ #loading data #print("loading data") #task_kinematics=np.load("task_kinematics.npy") #est_task_activations=np.load("est_task_activations.npy") model = load_model_from_path( "./models/nmi_leg_w_chassis_v{}.xml".format(model_ver)) sim = MjSim(model) if Mj_render: viewer = MjViewer(sim) # viewer.cam.fixedcamid += 1 # viewer.cam.type = const.CAMERA_FIXED sim_state = sim.get_state() control_vector_length = sim.data.ctrl.__len__() print("control_vector_length: " + str(control_vector_length)) number_of_task_samples = est_activations.shape[0] real_attempt_positions = np.zeros((number_of_task_samples, 2)) real_attempt_activations = np.zeros((number_of_task_samples, 3)) chassis_pos = np.zeros(number_of_task_samples, ) sim.set_state(sim_state) for ii in range(number_of_task_samples): sim.data.ctrl[:] = est_activations[ii, :] sim.step() current_positions_array = sim.data.qpos[-2:] # current_kinematics_array=np.array( # [sim.data.qpos[0], # sim.data.qvel[0], # sim.data.qacc[0], # sim.data.qpos[1], # sim.data.qvel[1], # sim.data.qacc[1]] # ) chassis_pos[ii] = sim.data.get_geom_xpos("Chassis_frame")[0] real_attempt_positions[ii, :] = current_positions_array real_attempt_activations[ii, :] = sim.data.ctrl if Mj_render: viewer.render() real_attempt_kinematics = positions_to_kinematics_fcn( real_attempt_positions[:, 0], real_attempt_positions[:, 1], timestep=0.005) return real_attempt_kinematics, real_attempt_activations, chassis_pos
def run_activations_fcn(MuJoCo_model_name, est_activations, timestep=0.01, Mj_render=False): #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! the q0 is now the chasis pos. needs to be fixed """ this function runs the predicted activations generatred from running the inverse map on the desired task kinematics """ #loading data #print("loading data") #task_kinematics=np.load("task_kinematics.npy") #est_task_activations=np.load("est_task_activations.npy") MuJoCo_model = load_model_from_path("./models/"+MuJoCo_model_name) sim = MjSim(MuJoCo_model) if Mj_render: viewer = MjViewer(sim) # to move it to the mounted camera viewer.cam.fixedcamid += 1 viewer.cam.type = const.CAMERA_FIXED # # to record the video # viewer._record_video = True # # recording path # viewer._video_path = "~/Documents/"+str(time.localtime()[3])+str(time.localtime()[4])+str(time.localtime()[5]) sim_state = sim.get_state() control_vector_length=sim.data.ctrl.__len__() #print("control_vector_length: "+str(control_vector_length)) number_of_task_samples=est_activations.shape[0] real_attempt_positions = np.zeros((number_of_task_samples,2)) real_attempt_activations = np.zeros((number_of_task_samples,3)) chassis_pos=np.zeros(number_of_task_samples,) sim.set_state(sim_state) for ii in range(number_of_task_samples): sim.data.ctrl[:] = est_activations[ii,:] sim.step() current_positions_array = sim.data.qpos[-2:] # current_kinematics_array=np.array( # [sim.data.qpos[0], # sim.data.qvel[0], # sim.data.qacc[0], # sim.data.qpos[1], # sim.data.qvel[1], # sim.data.qacc[1]] # ) chassis_pos[ii]=sim.data.get_geom_xpos("Chassis_frame")[0] real_attempt_positions[ii,:] = current_positions_array real_attempt_activations[ii,:] = sim.data.ctrl if Mj_render: viewer.render() real_attempt_kinematics = positions_to_kinematics_fcn( real_attempt_positions[:,0], real_attempt_positions[:,1], timestep = 0.01) return real_attempt_kinematics, real_attempt_activations, chassis_pos
def test_dampingControl(): model = load_model_from_path("robot.xml") sim = MjSim(model) viewer = MjViewer(sim) timestep = generatePatternedTrajectories.TIMESTEP control_freq = 1/timestep total_time = 2 num_cycles = int(total_time * control_freq) plt.ion() LW = 1.0 fig = plt.figure(figsize=(4,15)) axes = [] lines = [] goals = [] for i in range(7): axes.append(fig.add_subplot(7,1,i+1)) lines.append(axes[i].plot([],[],'b-', lw=LW)[0]) goals.append(axes[i].plot([],[],'r-', lw=LW)[0]) axes[i].set_ylim([-1, 1]) axes[i].set_xlim([0,total_time]) axes[i].set_ylabel("Angle{}(rad)".format(i), fontsize=8) axes[i].set_xlabel("Time(s)", fontsize=8) for test in range(5): q_init = bounds.getRandPosInBounds() qd_goal = np.zeros(7) qd_init = np.random.rand(7) for g in range(7): goals[g].set_ydata(np.ones(num_cycles) * qd_goal[g]) goals[g].set_xdata(np.linspace(0,3,num_cycles)) sim.set_state(MjSimState(time=0,qpos=q_init,qvel=qd_init,act=None,udd_state={})) sim.step() sim_time = 0 for i in range(num_cycles): state = sim.get_state() q = state[1] qd = state[2] sim.data.ctrl[:] = controllers.dampingControl(qd=qd) sim.step() viewer.render() if i % 35 == 0: for a in range(7): lines[a].set_xdata(np.append(lines[a].get_xdata(), sim_time)) lines[a].set_ydata(np.append(lines[a].get_ydata(), qd[a])) fig.canvas.draw() fig.canvas.flush_events() sim_time += timestep if bounds.outOfBounds(q): break for i in range(7): lines[i].set_xdata([]) lines[i].set_ydata([]) time.sleep(1)
class MyEnv(gym.Env): metadata = {'render.modes': ['human']} def __init__(self): self.model = load_model_from_path("xmls/Tesrt.xml") self.sim = MjSim(self.model) self.viewer = None self.sim_state = self.sim.get_state() self.bodynames = [ 'torso1', 'head', 'uwaist', 'lwaist', 'butt', 'right_thigh1', 'right_shin1', 'right_foot_cap1', 'right_foot_cap2', 'left_thigh1', 'left_shin1', 'left_foot_cap1', 'left_foot_cap2', 'right_uarm1', 'right_larm', 'right_hand', 'left_uarm1', 'left_larm', 'left_hand' ] ones_act = np.ones(len(self.sim.data.ctrl)) ones_obs = np.ones(self._get_state().shape[0]) self.action_space = spaces.Box(-ones_act, ones_act) self.observation_space = spaces.Box(-ones_obs, ones_obs) def _get_state(self): torso = [] ret = [] for i in range(len(self.bodynames)): vec = self.sim.data.get_geom_xpos(self.bodynames[i]) if i==0: ret = vec torso = vec if i!=0: ret = np.append(ret, vec-torso) return ret def _get_reward(self): return self.sim.data.get_geom_xpos('head')[2] def _step(self, action): for i in range(len(action)): self.sim.data.ctrl[i] = action[i] * 0.5 self.sim.step() self.sim.step() return self._get_state(),self._get_reward(),False,{} def _reset(self): self.sim.set_state(self.sim_state) return self._get_state() def _render(self, mode = 'human', close = False): if self.viewer is None: self.viewer = MjViewer(self.sim) self.viewer.render()
def test_cycle_through_orientations(): panda_kinematics = ikpy_panda_kinematics.panda_kinematics() model = load_model_from_path("robot.xml") sim = MjSim(model) viewer = MjViewer(sim) x_target, y_target, z_target = 0.2, 0.0, 0.5 step1 = np.linspace(0,np.pi/2, 100) step2 = np.linspace(np.pi/2, -np.pi/2, 200) step3 = np.linspace(-np.pi/2, 0, 100) sweep = np.concatenate((step1, step2, step3)) roll = 0 pitch = 0 yaw = np.pi ee_goal = [x_target, y_target, z_target, roll, pitch, yaw] qinit = panda_kinematics.inverse_kinematics(translation=ee_goal[0:3], rpy=ee_goal[3:6]) sim.set_state(MjSimState(time=0,qpos=qinit,qvel=np.zeros(7),act=None,udd_state={})) sim.step() qgoal = qinit q = qinit qd = np.zeros(7) count = -1 num_steps_per_change = 4 for i in range(num_steps_per_change * 400): if i % num_steps_per_change == 0: count += 1 ee_goal = [x_target, y_target, z_target, roll, pitch, sweep[count] + yaw] qgoal = panda_kinematics.inverse_kinematics(translation=ee_goal[0:3], rpy=ee_goal[3:6], init_qpos=q) R1 = panda_kinematics.euler_angles_to_rpy_rotation_matrix(rpy=[roll, pitch, sweep[count] + yaw]) R2 = panda_kinematics.euler_angles_to_rpy_rotation_matrix(panda_kinematics.forward_kinematics(qgoal)[3:6]) # R3 = sim.data.body_xmat[sim.model.body_name2id("right_hand")].reshape(3, 3) print("R1:\n", R1) print("R2:\n", R2) # print("R3:\n", R3) print("EE:", np.around(ee_goal, 3)) print("q: ", np.around(qgoal, 3)) state = sim.get_state() q = state[1] qd = state[2] sim.data.ctrl[:] = controllers.PDControl(q=q,qd=qd,qgoal=qgoal) sim.step() viewer.render() time.sleep(1)
def solve_disc_value_iteration(self, model: MjSim, cost_func): [row, col] = np.shape(states) for iter in range(6): print(f"iteration: {iter}") for s_1 in range(col): for s_2 in range(col): for ctrl in range(self._ctrls.shape[0]): # reinitialise the state model.set_state( State(time=0, qpos=np.array([states[0][s_1]]), qvel=np.array([states[1][s_2]]), act=0, udd_state={})) model.data.ctrl[0] = self._ctrls[ctrl] value_curr = cost_func( np.append(model.data.xipos[1], model.data.qvel[0]), model.data.ctrl) model.step() # solve for the instantaneous cost and interpolate the value at the next state if (self._states[0][0] < model.data.qpos[0] < self._states[0][-1]) and \ self._states[1][0] < model.data.qvel[0] < self._states[1][-1]: value_curr += interpolate.griddata( np.array([self.P.ravel(), self.V.ravel()]).T, self._values.ravel(), np.array( [model.data.qpos[0], model.data.qvel[0]]).T) else: rbf_net = interpolate.Rbf(self.P.ravel(), self.V.ravel(), self._values.ravel(), function="linear") value_curr += rbf_net(model.data.qpos[0], model.data.qvel[0]) # Hacky fix for the first iteration of vi if ctrl == 0 and iter == 0: self._values[s_1][s_2] = value_curr if value_curr < self._values[s_1][s_2]: # print(f"Difference = {value_curr - self._values[s_1][s_2]} index {s_1}{s_2}") self._values[s_1][s_2] = value_curr return self._values
class Mujoco_Renderer(): def __init__(self, mujoco_xml, hp): from mujoco_py import load_model_from_path, MjSim mujoco_xml = '/'.join(str.split(gcp.__file__, '/')[:-1]) + '/' + mujoco_xml self.sim = MjSim(load_model_from_path(mujoco_xml)) self._hp = hp def render(self, qpos): sim_state = self.sim.get_state() sim_state.qpos[:2] = qpos sim_state.qvel[:] = np.zeros_like(self.sim.data.qvel) self.sim.set_state(sim_state) self.sim.forward() subgoal_image = self.sim.render(self._hp.mpar.img_sz, self._hp.mpar.img_sz, camera_name='maincam') # plt.imshow(subgoal_image) # plt.savefig('test.png') return subgoal_image
def __init__(self, model: MujocoModel, fileName): self.FileName = fileName self.Model = model self.Config = TaskConfig(fileName) sim = MjSim(self.Model.MujocoModel) self.StartState = self.MakeState(sim, self.Config.StartConfig) self.EndState = self.MakeState(sim, self.Config.EndConfig) self.TargetPos = {} self.TargetAngle = {} joints = self.Model.JointList sim.set_state(self.EndState) sim.step() for joint in joints: self.TargetPos[joint.Site] = np.array( sim.data.get_site_xpos(joint.Site)) self.TargetAngle[joint.Site] = np.array( self.MatToAngle(sim.data.get_site_xmat(joint.Site)))
class MujocoSimulation(Simulation): def __init__(self, name, total_time=1000, recording_time=[0, 1000]): super(MujocoSimulation, self).__init__(name, total_time=total_time, recording_time=recording_time) self.model = load_model_from_path('mujoco_xmls/' + name + '.xml') self.sim = MjSim(self.model) self.initial_state = self.sim.get_state() self.input_size = len(self.sim.data.ctrl) self.reset() self.viewer = None def reset(self): super(MujocoSimulation, self).reset() self.sim.set_state(self.initial_state) def run(self, reset=True): if reset: self.reset() self.sim.set_state(self.initial_state) for i in range(self.total_time): self.sim.data.ctrl[:] = self.ctrl_array[i] self.sim.step() self.trajectory.append(self.sim.get_state()) self.alreadyRun = True def get_trajectory(self, all_info=True): if not self.alreadyRun: self.run() if all_info: return self.trajectory.copy() else: return [x.qpos for x in self.trajectory] def get_recording(self, all_info=True): traj = self.get_trajectory(all_info=all_info) return traj[self.recording_time[0]:self.recording_time[1]] def watch(self, repeat_count=4): if self.viewer is None: self.viewer = MjViewer(self.sim) for _ in range(repeat_count): self.sim.set_state(self.initial_state) for i in range(self.total_time): self.sim.data.ctrl[:] = self.ctrl_array[i] self.sim.step() self.viewer.render() self.run( reset=False ) # so that the trajectory will be compatible with what user watches
class BaseMujocoHexRobot(HexRobot): """The hexapod robot in mujoco. Child classes need to define the joint names.""" @classmethod @abc.abstractproperty def joint_names(cls) -> Tuple[str, ...]: """The names of the joints in the mujoco xml.""" raise NotImplementedError def __init__(self, load_path: str, viewer: bool = True): """Initialize a hex robot in mujoco. Parameters --------- load_path : str Path to the xml file. viewer : bool Flag to create a MjViewer for the robot. By default a viewer will be created. """ model = load_model_from_path(load_path) self._sim = MjSim(model) self.num_joints = len(self.joint_names) self._joint_qpos_ids = [ self._sim.model.get_joint_qpos_addr(x) for x in self.joint_names ] self._joint_qvel_ids = [ self._sim.model.get_joint_qvel_addr(x) for x in self.joint_names ] self._joint_actuator_id_map = dict( zip(self.joint_names, range(self.num_joints))) if viewer: self._viewer = MjViewer(self._sim) else: self._viewer = None def get_state(self) -> HexState: """Retrieve the current state of the robot. Returns ------- HexState The current state of the robot. """ sim_state = self._sim.get_state() joint_pos = sim_state.qpos[self._joint_qpos_ids] joint_vel = sim_state.qvel[self._joint_qvel_ids] return HexState( qpos=OrderedDict(zip(self.joint_names, joint_pos)), qvel=OrderedDict(zip(self.joint_names, joint_vel)), acceleration=np.zeros(3), ) # TODO get the acceleration. def set_joint_positions(self, joints: Dict[str, float]) -> None: """Sets the joint positions to the specified values Parameters ---------- joints : Dict[str, float] A mapping between the joint name and its value to be set. """ sim_state = self._sim.get_state() for name, value in joints.items(): joint_id = self._sim.model.get_joint_qpos_addr(name) sim_state.qpos[joint_id] = value self._sim.set_state(sim_state) self._sim.forward() def step(self) -> None: """Steps the simulation forward by one step. Updates the visualizer if one is available. """ self._sim.step() if self._viewer is not None: self._viewer.render() def set_command(self, command: Dict[str, float]) -> None: """Set the command of the robot. Parameters ---------- command : Dict[str, float] The commands to the robot. """ for name, value in command.items(): self._sim.data.ctrl[self._joint_actuator_id_map[name]] = value
class MujocoEnv(gym.Env): def __init__(self, model_path, frame_skip=1, action_noise=0.0, random_init_state=True): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.viewer = None self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() self.init_qacc = self.data.qacc.ravel().copy() self.init_ctrl = self.data.ctrl.ravel().copy() self.qpos_dim = self.init_qpos.size self.qvel_dim = self.init_qvel.size self.ctrl_dim = self.init_ctrl.size self.action_noise = action_noise self.random_init_state = random_init_state """ if "init_qpos" in self.model.numeric_names: init_qpos_id = self.model.numeric_names.index("init_qpos") addr = self.model.numeric_adr.flat[init_qpos_id] size = self.model.numeric_size.flat[init_qpos_id] init_qpos = self.model.numeric_data.flat[addr:addr + size] self.init_qpos = init_qpos """ self.dcom = None self.current_com = None self.reset() super(MujocoEnv, self).__init__() @property def action_space(self): bounds = self.model.actuator_ctrlrange.copy() lb = bounds[:, 0] ub = bounds[:, 1] return spaces.Box(lb, ub) @property def observation_space(self): shp = self.get_current_obs().shape ub = BIG * np.ones(shp) return spaces.Box(ub * -1, ub) @property def action_bounds(self): return self.action_space.low, self.action_space.high def reset_mujoco(self, init_state=None): if init_state is None: if self.random_init_state: qp = self.init_qpos.copy() + \ np.random.normal(size=self.init_qpos.shape) * 0.01 qv = self.init_qvel.copy() + \ np.random.normal(size=self.init_qvel.shape) * 0.1 else: qp = self.init_qpos.copy() qv = self.init_qvel.copy() qacc = self.init_qacc.copy() ctrl = self.init_ctrl.copy() else: pass """ start = 0 for datum_name in ["qpos", "qvel", "qacc", "ctrl"]: datum = getattr(self.data, datum_name) datum_dim = datum.shape[0] datum = init_state[start: start + datum_dim] setattr(self.model.data, datum_name, datum) start += datum_dim """ self.set_state(qp, qv) def reset(self, init_state=None): # self.reset_mujoco(init_state) self.sim.reset() self.sim.forward() self.current_com = self.data.subtree_com[0] self.dcom = np.zeros_like(self.current_com) return self.get_current_obs() def set_state(self, qpos, qvel, qacc): assert qpos.shape == (self.qpos_dim, ) and qvel.shape == ( self.qvel_dim, ) and qacc.shape == (self.qacc_dim, ) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.data cdists = np.copy(self.model.geom_margin).flat for c in self.model.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, # data.cdof.flat, data.cinert.flat, data.cvel.flat, # data.cacc.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, # data.qfrc_bias.flat, # data.qfrc_passive.flat, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate([self.data.qpos.flat, self.data.qvel.flat]) @property def _full_state(self): return np.concatenate([ self.data.qpos, self.data.qvel, self.data.qacc, self.data.ctrl, ]).ravel() def inject_action_noise(self, action): # generate action noise noise = self.action_noise * \ np.random.normal(size=action.shape) # rescale the noise to make it proportional to the action bounds lb, ub = self.action_bounds noise = 0.5 * (ub - lb) * noise return action + noise def forward_dynamics(self, action): ctrl = self.inject_action_noise(action) for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(self.frame_skip): self.sim.step() new_com = self.data.subtree_com[0] self.dcom = new_com - self.current_com self.current_com = new_com def get_viewer(self, config=None): if self.viewer is None: self.viewer = MjViewer(self.sim) # self.viewer.start() # self.viewer.set_model(self.model) if config is not None: pass # self.viewer.set_window_pose(config["xpos"], config["ypos"]) # self.viewer.set_window_size(config["width"], config["height"]) # self.viewer.set_window_title(config["title"]) return self.viewer def render(self, close=False, mode='human', config=None): if mode == 'human': # viewer = self.get_viewer(config=config) try: self.viewer.render() except: self.get_viewer(config=config) self.viewer.render() elif mode == 'rgb_array': viewer = self.get_viewer(config=config) viewer.loop_once() # self.get_viewer(config=config).render() data, width, height = self.get_viewer(config=config).get_image() return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :] if close: self.stop_viewer() # def start_viewer(self): # viewer = self.get_viewer() # if not viewer.running: # viewer.start() # # def stop_viewer(self): # if self.viewer: # self.viewer.finish() # self.viewer = None # def release(self): # # temporarily alleviate the issue (but still some leak) # from learning_to_adapt.mujoco_py.mjlib import mjlib # mjlib.mj_deleteModel(self.model._wrapped) # mjlib.mj_deleteData(self.data._wrapped) def get_body_xmat(self, body_name): idx = self.model.body_names.index(body_name) return self.data.ximat[idx].reshape((3, 3)) def get_body_com(self, body_name): return self.data.get_body_xpos(body_name) def get_body_comvel(self, body_name): idx = self.model.body_names.index(body_name) ## _compute_subtree body_vels = np.zeros((self.model.nbody, 6)) # bodywise quantities mass = self.model.body_mass.flatten() for i in range(self.model.nbody): # body velocity # Compute object 6D velocity in object-centered frame, world/local orientation. # mj_objectVelocity(const mjModel* m, const mjData* d, int objtype, int objid, mjtMum* res, int flg_local) mujoco_py.cymj._mj_objectVelocity(self.model, self.data, 1, i, body_vels[i], 0) lin_moms = body_vels[:, 3:] * mass.reshape((-1, 1)) # init subtree mass body_parentid = self.model.body_parentid # subtree com and com_vel for i in range(self.model.nbody - 1, -1, -1): if i > 0: parent = body_parentid[i] # add scaled velocities lin_moms[parent] += lin_moms[i] # accumulate mass mass[parent] += mass[i] return_ = lin_moms / mass.reshape((-1, 1)) return return_[idx] # return self.model.body_comvels[idx] # def get_body_comvel(self, body_name): # idx = self.model.body_names.index(body_name) # # return self.model.body_comvels[idx] # def print_stats(self): # super(MujocoEnv, self).print_stats() # print("qpos dim:\t%d" % len(self.data.qpos)) def action_from_key(self, key): raise NotImplementedError # def set_state_tmp(self, state, restore=True): # if restore: # prev_pos = self.data.qpos # prev_qvel = self.data.qvel # prev_ctrl = self.data.ctrl # prev_act = self.data.act # qpos, qvel = self.decode_state(state) # self.model.data.qpos = qpos # self.model.data.qvel = qvel # self.model.forward() # yield # if restore: # self.data.qpos = prev_pos # self.data.qvel = prev_qvel # self.data.ctrl = prev_ctrl # self.data.act = prev_act # self.model.forward() def get_param_values(self): return {} def set_param_values(self, values): pass
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip=1, xml_string=""): """ @param model_path path of the default model @param xml_string if given, the model will be reset using these values """ fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.model = load_model_from_path(fullpath) with open(fullpath, 'r') as f: self.model_xml = f.read() self.default_xml = self.model_xml if xml_string != "": self.model = load_model_from_xml(xml_string) self.model_xml = xml_string self.frame_skip = frame_skip self.sim = MjSim(self.model) self.data = self.sim.data self.viewer = None self._viewers = {} self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self.step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([ o.size for o in observation ]) if type(observation) is tuple else observation.size high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed() self.set_param_space() def get_params(self): """ Returns a dict of (param_name, param_value) """ return MujocoUpdater(self.model_xml).get_params() def set_params(self, params): """ @param params: dict(param_name, param_value) param_name should be a string of bodyname__type__paramname where type is either geom or joint, e.g. thigh__joint__friction, and param_value is a numpy array """ # invalidate cached properties self.__dict__.pop('action_space', None) self.__dict__.pop('observation_space', None) new_xml = MujocoUpdater.set_params(self.model_xml, params) self.__init__(xml_string=new_xml) self.reset() return self def set_param_space(self, param_space=None, eps_scale=0.5, replace=True): """ Set param_space @param param_space: dict(string, gym.space.base.Space) @param eps_scale: scale of variation applied to all params @param replace: if true, param_space overwrites default param_space. Default behavior is to merge. """ if param_space is not None: if replace: self._param_space = param_space else: self._param_space = {**self._param_space, **param_space} else: params = MujocoUpdater(self.model_xml).get_params() self._param_space = dict() for param, value in params.items(): eps = np.abs(value) * eps_scale ub = value + eps lb = value - eps for name in positive_params: if name in param: lb = np.clip(lb, 1e-3, ub) break space = spaces.Box(lb, ub) self._param_space[param] = space def get_geom_params(self, body_name): geom = MujocoUpdater(self.model_xml).get_geom(body_name) return { k: v for k, v in geom.attrib.items() if k not in MujocoUpdater.ignore_params } def get_joint_params(self, body_name): joint = MujocoUpdater(self.model_xml).get_joint(body_name) return { k: v for k, v in joint.attrib.items() if k not in MujocoUpdater.ignore_params } def get_body_names(self): return MujocoUpdater(self.model_xml).get_body_names() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.sim.data cdists = np.copy(self.model.geom_margin).flat for c in self.sim.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, # data.cdof.flat, data.cinert.flat, data.cvel.flat, # data.cacc.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, # data.qfrc_bias.flat, # data.qfrc_passive.flat, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate( [self.sim.data.qpos.flat, self.sim.data.qvel.flat]) @property def _full_state(self): return np.concatenate([ self.sim.data.qpos, self.sim.data.qvel, self.sim.data.qacc, self.sim.data.ctrl, ]).ravel() # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def viewer_setup(self): """ Does not work. Use mj_viewer_setup() instead """ pass # ----------------------------- def reset(self, randomize=True): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob # Added for bayesian_rl def get_sim_state(self): return self.sim.get_state() # Added for bayesian_rl def set_sim_state(self, state): self.sim.set_state(state) # Added for bayesian_rl def set_state_vector(self, state_vector): qpos = state_vector[:self.model.nq] qvel = state_vector[self.model.nq:] self.set_state(qpos, qvel) # Added for bayesian_rl def get_state_vector(self): return self.state_vector() def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) and qvel.shape == ( self.model.nv, ) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 1.0 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self, mode): self.viewer = self._viewers.get(mode) if self.viewer is None: if mode == 'human': self.viewer = mujoco_py.MjViewer(self.sim) elif mode == 'rgb_array' or mode == 'depth_array': self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1) self.viewer_setup() self._viewers[mode] = self.viewer return self.viewer def close(self): if self.viewer is not None: # self.viewer.finish() self.viewer = None self._viewers = {} # def step(self, a): # return self._step(a) # Added for bayesian_rl def take_action(self, a): self.step(a) return self.get_sim_state() def state_vector(self): state = self.sim.get_state() return np.concatenate([state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action( o)[0] if mode == 'exploration' else policy.get_action( o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t + 1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy.get_action( o)[0] if mode == 'exploration' else policy.get_action( o)[1]['mean'] o, r, d, _ = self.step(a) t = t + 1 curr_frame = self.sim.render(width=640, height=480, mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1, :, :]) print(t, end=', ', flush=True) file_name = save_loc + filename + '_' + str(ep) + ".mp4" skvideo.io.vwrite(file_name, np.asarray(arrs)) print("saved", file_name) t1 = timer.time() print("time taken = %f" % (t1 - t0)) def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE): if mode == 'rgb_array': self._get_viewer(mode).render(width, height) # window size used for old mujoco-py: data = self._get_viewer(mode).read_pixels(width, height, depth=False) # original image is upside-down, so flip it return data[::-1, :, :] elif mode == 'depth_array': self._get_viewer(mode).render(width, height) # window size used for old mujoco-py: # Extract depth part of the read_pixels() tuple data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1] # original image is upside-down, so flip it return data[::-1, :] elif mode == 'human': self._get_viewer(mode).render()
class DeterministicGraspPolicy(Policy): def __init__(self, agentparams, policyparams): Policy.__init__(self) self.agentparams = agentparams self.min_lift = agentparams.get('min_z_lift', 0.05) self.policyparams = policyparams self.adim = agentparams['adim'] self.n_actions = policyparams['nactions'] if 'num_samples' in self.policyparams: self.M = self.policyparams['num_samples'] else: self.M = 20 # number of CEM Samples if 'best_to_take' in self.policyparams: self.K = self.policyparams['best_to_take'] else: self.K = 5 # best samples to average for next sampling assert self.adim >= 4, 'must have at least x, y, z + gripper actions' self.moveto = True self.drop = False self.grasp = False self.lift = False if 'iterations' in self.policyparams: self.niter = self.policyparams['iterations'] else: self.niter = 10 # number of iterations self.imgs = [] self.iter = 0 def setup_CEM_model(self, t, init_model): if t == 0: if 'gen_xml' in self.agentparams: self.CEM_model = MjSim( load_model_from_path(self.agentparams['gen_xml_fname'])) else: self.CEM_model = MjSim( load_model_from_path(self.agentparams['filename'])) self.initial_qpos = init_model.data.qpos.copy() self.initial_qvel = init_model.data.qvel.copy() self.reset_CEM_model() def reset_CEM_model(self): if len(self.imgs) > 0: print('saving iter', self.iter, 'with frames:', len(self.imgs)) npy_to_gif( self.imgs, os.path.join(self.agentparams['record'], 'iter_{}'.format(self.iter))) self.iter += 1 sim_state = self.CEM_model.get_state() sim_state.qpos[:] = self.initial_qpos.copy() sim_state.qvel[:] = self.initial_qvel.copy() self.CEM_model.set_state(sim_state) self.prev_target = self.CEM_model.data.qpos[:self.adim].squeeze().copy( ) self.target = self.CEM_model.data.qpos[:self.adim].squeeze().copy() for _ in range(5): self.step_model(self.target) self.imgs = [] def viewer_refresh(self): if 'debug_viewer' in self.policyparams and self.policyparams[ 'debug_viewer']: large_img = self.CEM_model.render( 640, 480, camera_name="maincam")[::-1, :, :] self.imgs.append(large_img) def perform_CEM(self, targetxy): self.reset_CEM_model() if 'object_meshes' in self.agentparams: targetxy = self.CEM_model.data.sensordata[:2].squeeze().copy() print('Beginning CEM') ang_dis_mean = self.policyparams['init_mean'].copy() ang_dis_cov = self.policyparams['init_cov'].copy() scores = np.zeros(self.M) best_score, best_ang, best_xy = -1, None, None for n in range(self.niter): ang_disp_samps = np.random.multivariate_normal(ang_dis_mean, ang_dis_cov, size=self.M) for s in range(self.M): #print('On iter', n, 'sample', s) self.reset_CEM_model() move = True drop = False grasp = False g_start = 0 lift = False angle_delta = ang_disp_samps[s, 0] targetxy_delta = targetxy + ang_disp_samps[s, 1:] print('init iter') print(targetxy) print(angle_delta, targetxy_delta) for t in range(self.n_actions): angle_action = np.zeros(self.adim) cur_xy = self.CEM_model.data.qpos[:2].squeeze() if move and np.linalg.norm( targetxy_delta - cur_xy, 2) <= self.policyparams['drop_thresh']: move = False drop = True if drop and self.CEM_model.data.qpos[2] <= -0.079: drop = False grasp = True g_start = t if grasp and t - g_start > 2: grasp = False lift = True if move: angle_action[:2] = targetxy_delta angle_action[2] = self.agentparams['ztarget'] angle_action[3] = angle_delta angle_action[4] = -100 elif drop: angle_action[:2] = targetxy_delta angle_action[2] = -0.08 angle_action[3] = angle_delta angle_action[4] = -100 elif grasp: angle_action[:2] = targetxy_delta angle_action[2] = -0.08 angle_action[3] = angle_delta angle_action[4] = 21 elif lift: angle_action[:2] = targetxy_delta angle_action[2] = self.agentparams['ztarget'] angle_action[3] = angle_delta angle_action[4] = 21 self.step_model(angle_action) # print 'final z', self.CEM_model.data.qpos[8].squeeze(), 'with angle', angle_samps[s] if 'object_meshes' in self.agentparams: obj_z = self.CEM_model.data.sensordata[2].squeeze() else: obj_z = self.CEM_model.data.qpos[8].squeeze() print('obj_z at iter', n * self.M + s, 'is', obj_z) scores[s] = obj_z if 'stop_iter_thresh' in self.policyparams and scores[ s] > self.policyparams['stop_iter_thresh']: return ang_disp_samps[s, 0], ang_disp_samps[s, 1:] # print 'score',scores[s] best_scores = np.argsort(-scores)[:self.K] if scores[best_scores[0]] > best_score or best_ang is None: #print('best', scores[best_scores[0]]) best_score = scores[best_scores[0]] best_ang = ang_disp_samps[best_scores[0], 0] best_xy = ang_disp_samps[best_scores[0], 1:] ang_dis_mean = np.mean(ang_disp_samps[best_scores, :], axis=0) ang_dis_cov = np.cov(ang_disp_samps[best_scores, :].T) return best_ang, best_xy def step_model(self, input_actions): pos_clip = self.agentparams['targetpos_clip'] self.prev_target = self.target.copy() self.target = input_actions.copy() self.target = np.clip(self.target, pos_clip[0], pos_clip[1]) for s in range(self.agentparams['substeps']): step_action = s / float(self.agentparams['substeps']) * ( self.target - self.prev_target) + self.prev_target self.CEM_model.data.ctrl[:] = step_action self.CEM_model.step() self.viewer_refresh() #print "end", self.CEM_model.data.qpos[:4].squeeze() def act(self, traj, t, init_model=None, goal_object_pose=None, hyperparams=None, goal_image=None): """ Scripted pick->place->wiggle trajectory. There's probably a better way to script this but booleans will suffice for now. """ self.setup_CEM_model(t, init_model) if t == 0: self.moveto = True self.drop = False self.lift = False self.grasp = False self.second_moveto = False self.final_drop = False self.wiggle = False self.switchTime = 0 print('start pose', traj.Object_pose[t, 0, :3]) self.targetxy = traj.Object_pose[t, 0, :2] self.angle, self.disp = self.perform_CEM(self.targetxy) print('best angle', self.angle, 'best target', self.targetxy) self.targetxy += self.disp traj.desig_pos = np.zeros((2, 2)) traj.desig_pos[0] = self.targetxy.copy() if self.lift and traj.X_full[t, 2] >= self.min_lift: self.lift = False if traj.Object_full_pose[t, 0, 2] > -0.01: #lift occursed self.second_moveto = True self.targetxy = np.random.uniform(-0.2, 0.2, size=2) print("LIFTED OBJECT!") print('dropping at', self.targetxy) traj.desig_pos[1] = self.targetxy.copy() else: self.wiggle = True if self.grasp and self.switchTime > 0: print('lifting at time', t, '!', 'have z', traj.X_full[t, 2]) self.grasp = False self.lift = True if self.drop and (traj.X_full[t, 2] <= -0.079 or self.switchTime >= 2): print('grasping at time', t, '!', 'have z', traj.X_full[t, 2]) self.drop = False self.grasp = True self.switchTime = 0 if self.moveto and (np.linalg.norm(traj.X_full[t, :2] - self.targetxy, 2) <= self.policyparams['drop_thresh']): if self.switchTime > 0: print('stopping moveto at time', t, '!') print(traj.X_full[t, :2], self.targetxy) self.moveto = False self.drop = True self.switchTime = 0 else: self.switchTime += 1 if self.second_moveto and np.linalg.norm( traj.X_full[t, :2] - self.targetxy, 2) <= self.policyparams['drop_thresh']: self.second_moveto = False self.final_drop = True self.switchTime = 0 actions = np.zeros(self.adim) if self.moveto or self.second_moveto: delta = np.zeros(3) delta[:2] = self.targetxy - traj.target_qpos[t, :2] if 'xyz_std' in self.policyparams: delta += self.policyparams['xyz_std'] * np.random.normal( size=3) norm = np.sqrt(np.sum(np.square(delta))) if norm > self.policyparams['max_norm']: delta *= self.policyparams['max_norm'] / norm actions[:3] = traj.target_qpos[t, :3] + delta actions[2] = self.agentparams['ztarget'] actions[3] = self.angle if self.moveto: actions[-1] = -1 else: actions[-1] = 1 self.switchTime += 1 elif self.drop: actions[:2] = self.targetxy actions[2] = -0.08 actions[3] = self.angle actions[-1] = -1 self.switchTime += 1 elif self.lift: actions[:2] = self.targetxy actions[2] = self.agentparams['ztarget'] actions[3] = self.angle actions[-1] = 1 elif self.grasp: actions[:2] = self.targetxy actions[2] = -0.08 actions[3] = self.angle actions[-1] = 1 self.switchTime += 1 elif self.final_drop: if self.switchTime > 0: print('opening') actions[:2] = self.targetxy actions[2] = -0.08 actions[3] = self.angle actions[-1] = -1 self.switchTime += 1 if self.switchTime > 1: print('up') actions[:2] = self.targetxy actions[2] = self.agentparams['ztarget'] + np.random.uniform( -0.03, 0.05) actions[3] = self.angle + np.random.uniform( -np.pi / 4., np.pi / 4.) actions[-1] = -1 self.final_drop = False self.wiggle = True self.switchTime = 0 else: actions[:2] = self.targetxy actions[2] = -0.08 actions[3] = self.angle actions[-1] = -1 self.switchTime += 1 elif self.wiggle: delta_vec = np.random.normal(size=2) norm = np.sqrt(np.sum(np.square(delta_vec))) if norm > self.policyparams['max_norm']: delta_vec *= self.policyparams['max_norm'] / norm actions[:2] = np.clip(traj.target_qpos[t, :2] + delta_vec, -0.15, 0.15) delta_z = np.random.uniform(-0.08 - traj.target_qpos[t, 2], 0.3 - traj.target_qpos[t, 2]) actions[2] = traj.target_qpos[t, 2] + delta_z actions[3] = traj.target_qpos[t, 3] + np.random.uniform(-0.1, 0.1) if np.random.uniform() > 0.5: actions[4] = -1 else: actions[4] = 1 if 'angle_std' in self.policyparams: actions[3] += self.policyparams['angle_std'] * np.random.normal() return actions - traj.target_qpos[t, :] * traj.mask_rel
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 30 } def __init__(self, model_path, frame_skip): print(model_path) if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = mujoco_py.load_model_from_path(fullpath) self.timestep = self.model.opt.timestep * self.frame_skip self.sim = MjSim(self.model) self.data = self.sim.data self.viewer = None # changed a place of setting action space # sometimes use self.action_space at first call self._step() bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = observation.size high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def viewer_setup(self): """ This method is called when the viewer is initialized and after every reset Optionally implement this method, if you need to tinker with camera position and so forth. """ pass # ----------------------------- def _reset(self): self.sim.reset() ob = self.reset_model() if self.viewer is not None: # TODO: # self.viewer.autoscale() self.viewer_setup() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) sim_state = self.sim.get_state() sim_state.qpos[:] = qpos[:] sim_state.qvel[:] = qvel[:] self.sim.set_state(sim_state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): assert len(ctrl) is len(self.sim.data.ctrl) self.data.ctrl[:] = ctrl[:] for _ in range(n_frames): self.sim.step() def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer = None return if mode == 'rgb_array': data = self._get_viewer()._read_pixels_as_in_window() return data elif mode == 'human': self._get_viewer().render() return def _get_viewer(self): if self.viewer is None: self.viewer = MjViewer(self.sim) self.viewer_setup() return self.viewer def get_body_com(self, body_name): return self.sim.data.get_body_xipos(body_name) def get_body_comvel(self, body_name): return self.sim.data.get_body_cvel(self.model.body_name2id(body_name)) def get_body_xmat(self, body_name): return self.sim.data.get_body_xmat(body_name).reshape((3, 3)) def state_vector(self): return np.concatenate([ self.data.qpos.flat, self.data.qvel.flat ])
model = load_model_from_xml(MODEL_XML) sim = MjSim(model) viewer = MjViewer(sim) states = [{'box:x': +0.8, 'box:y': +0.8}, {'box:x': -0.8, 'box:y': +0.8}, {'box:x': -0.8, 'box:y': -0.8}, {'box:x': +0.8, 'box:y': -0.8}, {'box:x': +0.0, 'box:y': +0.0}] # MjModel.joint_name2id returns the index of a joint in # MjData.qpos. x_joint_i = sim.model.get_joint_qpos_addr("box:x") y_joint_i = sim.model.get_joint_qpos_addr("box:y") print_box_xpos(sim) while True: for state in states: sim_state = sim.get_state() sim_state.qpos[x_joint_i] = state["box:x"] sim_state.qpos[y_joint_i] = state["box:y"] sim.set_state(sim_state) sim.forward() print("updated state to", state) print_box_xpos(sim) viewer.render() if os.getenv('TESTING') is not None: break
class RobotSimulatorMuJoCo: def __init__(self, config_file_name, config_folder_path=''): # reading config file and mujoco general info config_data = util.read_config_file(config_file_name,config_folder_path) model_xml = config_data['MuJoCo']['model_xml'] model_xml_path = os.path.join(os.environ[util.SIM_ENV_VAR],'xml/',model_xml) model = load_model_from_path(model_xml_path) self.sim = MjSim(model) self.render = config_data['MuJoCo']['render'] # create robot and environment self.robot = Robot(config_data,self.sim.model) self.environment = Environment(config_data,self.sim.model) # simulator parameters self.param = { "mj_xml": model_xml_path, "total_time_limit": float('Inf') } # initialize and update from the configuration file self.initialize_from_config(config_data) # initialize render self.init_render() def initialize_from_config(self, config_data): # extract the init states from the configuration file and assign them to init_state init_state = self.sim.get_state() if not config_data.has_option('Robot','ignore_init'): init_state = self.robot.read_init_state_from_config(init_state) if not config_data.has_option('Environment','ignore_init'): init_state = self.environment.read_init_state_from_config(init_state) # set the init_state to the simulator self.sim.set_state(init_state) self.sim.forward() print("Simulator init states are set.") self.update_state() self.robot.enable_actuator() # update total time to stop the simulation if specified total_time_limit = config_data.getfloat('MuJoCo', 'total_time_limit') if (total_time_limit>0): self.param.update({'total_time_limit': total_time_limit}) # update the experiment name and the sample ID if defined self.param.update(record_sampling_decimation=1) self.param.update(record_sampling_idx=1) if (config_data.has_section('Uncertainty')): current_sample_id = config_data['Uncertainty']['current_sample_id'] if (int(current_sample_id)>0): self.param.update({ 'experiment_name' : config_data['Uncertainty']['experiment_name'], 'current_sample_id': current_sample_id }) if ('record_sampling_decimation' in config_data['Uncertainty']): self.param.update( record_sampling_decimation=int(config_data['Uncertainty']['record_sampling_decimation'])) def init_render(self): # [optional] init render if (self.render=="yes"): self.viewer = MjViewer(self.sim) print("viewer initialized") def step(self): self.sim.step() if (self.render=="yes"): self.viewer.render() def update_state(self): current_state = self.sim.get_state() self.robot.get_joint(current_state) self.environment.get_joint(current_state) def update_robot_control(self): ctrl_pos = self.robot.compute_control() for i, joint_name in enumerate(ctrl_pos): self.sim.data.ctrl[i]=ctrl_pos[i] def record_state(self): if (self.param['record_sampling_idx']==self.param['record_sampling_decimation']): self.robot.record_joint() self.environment.record_joint() self.param.update(record_sampling_idx=1) else: self.param.update(record_sampling_idx=self.param['record_sampling_idx']+1) def check_time_limit(self): if (self.sim.data.time<self.param['total_time_limit']): return False else: return True def dump_data(self): # this func dumps data only, because the model can be loaded using the xml of the simulation. if ('experiment_name' in self.param.keys()): # prepare save path export_folder_path = os.path.join(os.environ[util.SIM_DATA_ENV_VAR], self.param['experiment_name'],'data') if (not os.path.isdir(export_folder_path)): os.makedirs(export_folder_path) # save the simulator object without the MuJoCo model self.sim=[] file_name = os.path.join(export_folder_path,self.param['current_sample_id']+'.joblib') joblib.dump(self, file_name) print(f'Simulation result saved at [{file_name}]')
class World: # Default configuration (this should not be nested since it gets copied) # *NOTE:* Changes to this configuration should also be reflected in `Engine` configuration DEFAULT = { 'robot_base': 'xmls/car.xml', # Which robot XML to use as the base 'robot_xy': np.zeros(2), # Robot XY location 'robot_rot': 0, # Robot rotation about Z axis 'floor_size': [3.5, 3.5, .1], # Used for displaying the floor # Objects -- this is processed and added by the Engine class 'objects': {}, # map from name -> object dict # Geoms -- similar to objects, but they are immovable and fixed in the scene. 'geoms': {}, # map from name -> geom dict # Mocaps -- mocap objects which are used to control other objects 'mocaps': {}, # Determine whether we create render contexts 'observe_vision': False, } def __init__(self, config={}, render_context=None, render_device_id=-1): ''' config - JSON string or dict of configuration. See self.parse() ''' self.parse(config) # Parse configuration self.first_reset = True self.viewer = None self.render_context = render_context self.render_device_id = render_device_id self.update_viewer_sim = False self.robot = Robot(self.robot_base) def parse(self, config): ''' Parse a config dict - see self.DEFAULT for description ''' self.config = deepcopy(self.DEFAULT) self.config.update(deepcopy(config)) for key, value in self.config.items(): assert key in self.DEFAULT, f'Bad key {key}' setattr(self, key, value) @property def data(self): ''' Helper to get the simulation data instance ''' return self.sim.data # TODO: remove this when mujoco-py fix is merged and a new version is pushed # https://github.com/openai/mujoco-py/pull/354 # Then all uses of `self.world.get_sensor()` should change to `self.data.get_sensor`. def get_sensor(self, name): id = self.model.sensor_name2id(name) adr = self.model.sensor_adr[id] dim = self.model.sensor_dim[id] return self.data.sensordata[adr:adr + dim].copy() def build(self): ''' Build a world, including generating XML and moving objects ''' # Read in the base XML (contains robot, camera, floor, etc) self.robot_base_path = os.path.join(BASE_DIR, self.robot_base) with open(self.robot_base_path) as f: self.robot_base_xml = f.read() self.xml = xmltodict.parse( self.robot_base_xml) # Nested OrderedDict objects # Convenience accessor for xml dictionary worldbody = self.xml['mujoco']['worldbody'] # Move robot position to starting position worldbody['body']['@pos'] = convert(np.r_[self.robot_xy, self.robot.z_height]) worldbody['body']['@quat'] = convert(rot2quat(self.robot_rot)) # We need this because xmltodict skips over single-item lists in the tree worldbody['body'] = [worldbody['body']] if 'geom' in worldbody: worldbody['geom'] = [worldbody['geom']] else: worldbody['geom'] = [] # Add equality section if missing if 'equality' not in self.xml['mujoco']: self.xml['mujoco']['equality'] = OrderedDict() equality = self.xml['mujoco']['equality'] if 'weld' not in equality: equality['weld'] = [] # Add asset section if missing if 'asset' not in self.xml['mujoco']: # old default rgb1: ".4 .5 .6" # old default rgb2: "0 0 0" # light pink: "1 0.44 .81" # light blue: "0.004 0.804 .996" # light purple: ".676 .547 .996" # med blue: "0.527 0.582 0.906" # indigo: "0.293 0 0.508" asset = xmltodict.parse(''' <asset> <texture type="skybox" builtin="gradient" rgb1="0.527 0.582 0.906" rgb2="0.1 0.1 0.35" width="800" height="800" markrgb="1 1 1" mark="random" random="0.001"/> <texture name="texplane" builtin="checker" height="100" width="100" rgb1="0.7 0.7 0.7" rgb2="0.8 0.8 0.8" type="2d"/> <material name="MatPlane" reflectance="0.1" shininess="0.1" specular="0.1" texrepeat="10 10" texture="texplane"/> </asset> ''') self.xml['mujoco']['asset'] = asset['asset'] # Add light to the XML dictionary light = xmltodict.parse('''<b> <light cutoff="100" diffuse="1 1 1" dir="0 0 -1" directional="true" exponent="1" pos="0 0 0.5" specular="0 0 0" castshadow="false"/> </b>''') worldbody['light'] = light['b']['light'] # Add floor to the XML dictionary if missing if not any(g.get('@name') == 'floor' for g in worldbody['geom']): floor = xmltodict.parse(''' <geom name="floor" type="plane" condim="6"/> ''') worldbody['geom'].append(floor['geom']) # Make sure floor renders the same for every world for g in worldbody['geom']: if g['@name'] == 'floor': g.update({ '@size': convert(self.floor_size), '@rgba': '1 1 1 1', '@material': 'MatPlane' }) # Add cameras to the XML dictionary cameras = xmltodict.parse('''<b> <camera name="fixednear" pos="0 -2 2" zaxis="0 -1 1"/> <camera name="fixedfar" pos="0 -5 5" zaxis="0 -1 1"/> <camera name="fixedtop" pos="0 0 5" zaxis="0 0 1"/> </b>''') worldbody['camera'] = cameras['b']['camera'] # Build and add a tracking camera (logic needed to ensure orientation correct) theta = self.robot_rot xyaxes = dict(x1=np.cos(theta), x2=-np.sin(theta), x3=0, y1=np.sin(theta), y2=np.cos(theta), y3=1) pos = dict(xp=0 * np.cos(theta) + (-2) * np.sin(theta), yp=0 * (-np.sin(theta)) + (-2) * np.cos(theta), zp=2) track_camera = xmltodict.parse('''<b> <camera name="track" mode="track" pos="{xp} {yp} {zp}" xyaxes="{x1} {x2} {x3} {y1} {y2} {y3}"/> </b>'''.format(**pos, **xyaxes)) # support multiple cameras on 'body' if not isinstance(worldbody['body'][0]['camera'], list): worldbody['body'][0]['camera'] = [worldbody['body'][0]['camera']] worldbody['body'][0]['camera'].append(track_camera['b']['camera']) # Add objects to the XML dictionary for name, object in self.objects.items(): assert object['name'] == name, f'Inconsistent {name} {object}' object = object.copy() # don't modify original object object['quat'] = rot2quat(object['rot']) if name == 'box': dim = object['size'][0] object['height'] = object['size'][-1] object['width'] = dim / 2 object['x'] = dim object['y'] = dim body = xmltodict.parse( ''' <body name="{name}" pos="{pos}" quat="{quat}"> <freejoint name="{name}"/> <geom name="{name}" type="{type}" size="{size}" density="{density}" rgba="{rgba}" group="{group}"/> <geom name="col1" type="{type}" size="{width} {width} {height}" density="{density}" rgba="{rgba}" group="{group}" pos="{x} {y} 0"/> <geom name="col2" type="{type}" size="{width} {width} {height}" density="{density}" rgba="{rgba}" group="{group}" pos="-{x} {y} 0"/> <geom name="col3" type="{type}" size="{width} {width} {height}" density="{density}" rgba="{rgba}" group="{group}" pos="{x} -{y} 0"/> <geom name="col4" type="{type}" size="{width} {width} {height}" density="{density}" rgba="{rgba}" group="{group}" pos="-{x} -{y} 0"/> </body> '''.format(**{k: convert(v) for k, v in object.items()})) else: body = xmltodict.parse( ''' <body name="{name}" pos="{pos}" quat="{quat}"> <freejoint name="{name}"/> <geom name="{name}" type="{type}" size="{size}" density="{density}" rgba="{rgba}" group="{group}"/> </body> '''.format(**{k: convert(v) for k, v in object.items()})) # Append new body to world, making it a list optionally # Add the object to the world worldbody['body'].append(body['body']) # Add mocaps to the XML dictionary for name, mocap in self.mocaps.items(): # Mocap names are suffixed with 'mocap' assert mocap['name'] == name, f'Inconsistent {name} {object}' assert name.replace( 'mocap', 'obj') in self.objects, f'missing object for {name}' # Add the object to the world mocap = mocap.copy() # don't modify original object mocap['quat'] = rot2quat(mocap['rot']) body = xmltodict.parse(''' <body name="{name}" mocap="true"> <geom name="{name}" type="{type}" size="{size}" rgba="{rgba}" pos="{pos}" quat="{quat}" contype="0" conaffinity="0" group="{group}"/> </body> '''.format(**{k: convert(v) for k, v in mocap.items()})) worldbody['body'].append(body['body']) # Add weld to equality list mocap['body1'] = name mocap['body2'] = name.replace('mocap', 'obj') weld = xmltodict.parse(''' <weld name="{name}" body1="{body1}" body2="{body2}" solref=".02 1.5"/> '''.format(**{k: convert(v) for k, v in mocap.items()})) equality['weld'].append(weld['weld']) # Add geoms to XML dictionary for name, geom in self.geoms.items(): assert geom['name'] == name, f'Inconsistent {name} {geom}' geom = geom.copy() # don't modify original object geom['quat'] = rot2quat(geom['rot']) geom['contype'] = geom.get('contype', 1) geom['conaffinity'] = geom.get('conaffinity', 1) body = xmltodict.parse(''' <body name="{name}" pos="{pos}" quat="{quat}"> <geom name="{name}" type="{type}" size="{size}" rgba="{rgba}" group="{group}" contype="{contype}" conaffinity="{conaffinity}"/> </body> '''.format(**{k: convert(v) for k, v in geom.items()})) # Append new body to world, making it a list optionally # Add the object to the world worldbody['body'].append(body['body']) # Instantiate simulator # print(xmltodict.unparse(self.xml, pretty=True)) self.xml_string = xmltodict.unparse(self.xml) self.model = load_model_from_xml(self.xml_string) self.sim = MjSim(self.model) # Add render contexts to newly created sim if self.render_context is None and self.observe_vision: render_context = MjRenderContextOffscreen( self.sim, device_id=self.render_device_id, quiet=True) render_context.vopt.geomgroup[:] = 1 self.render_context = render_context if self.render_context is not None: self.render_context.update_sim(self.sim) # Recompute simulation intrinsics from new position self.sim.forward() def rebuild(self, config={}, state=True): ''' Build a new sim from a model if the model changed ''' if state: old_state = self.sim.get_state() #self.config.update(deepcopy(config)) #self.parse(self.config) self.parse(config) self.build() if state: self.sim.set_state(old_state) self.sim.forward() def reset(self, build=True): ''' Reset the world (sim is accessed through self.sim) ''' if build: self.build() # set flag so that renderer knows to update sim self.update_viewer_sim = True def render(self, mode='human'): ''' Render the environment to the screen ''' if self.viewer is None: self.viewer = MjViewer(self.sim) # Turn all the geom groups on self.viewer.vopt.geomgroup[:] = 1 # Set camera if specified if mode == 'human': self.viewer.cam.fixedcamid = -1 self.viewer.cam.type = const.CAMERA_FREE else: self.viewer.cam.fixedcamid = self.model.camera_name2id(mode) self.viewer.cam.type = const.CAMERA_FIXED if self.update_viewer_sim: self.viewer.update_sim(self.sim) self.update_viewer_sim = False self.viewer.render() def robot_com(self): ''' Get the position of the robot center of mass in the simulator world reference frame ''' return self.body_com('robot') def robot_pos(self): ''' Get the position of the robot in the simulator world reference frame ''' return self.body_pos('robot') def robot_mat(self): ''' Get the rotation matrix of the robot in the simulator world reference frame ''' return self.body_mat('robot') def robot_vel(self): ''' Get the velocity of the robot in the simulator world reference frame ''' return self.body_vel('robot') def body_com(self, name): ''' Get the center of mass of a named body in the simulator world reference frame ''' return self.data.subtree_com[self.model.body_name2id(name)].copy() def body_pos(self, name): ''' Get the position of a named body in the simulator world reference frame ''' return self.data.get_body_xpos(name).copy() def body_mat(self, name): ''' Get the rotation matrix of a named body in the simulator world reference frame ''' return self.data.get_body_xmat(name).copy() def body_vel(self, name): ''' Get the velocity of a named body in the simulator world reference frame ''' return self.data.get_body_xvelp(name).copy()
model = load_model_from_xml(MODEL_LEG_XML) sim = MjSim(model) viewer = MjViewer(sim) qpos = np.zeros(2) qvel = np.zeros(2) old_state = sim.get_state() angHip = 120.0 / 180 * np.pi angKne = (180.0 - 40.0) / 180 * np.pi qpos[0] = angHip # hip joint initial angle qpos[1] = np.pi - angKne # knee joint initial angle qvel[0] = 0.0 # hip joint initial velocity qvel[1] = 0.0 # knee joint initial velocity new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel, old_state.act, old_state.udd_state) sim.set_state(new_state) sim.forward() # viewer.render() # create muscle timestep = 5e-4 # Series elastic element (SE) force-length relationship eref = 0.04 # [lslack] tendon reference strain # excitation-contraction coupling preAct = 0.01 # [] preactivation tau = 0.01 # [s] delay time constant # contractile element (CE) force-length relationship w = 0.56 # [lopt] width c = 0.05 # []; remaining force at +/- width # CE force-velocity relationship
class Environment(): def __init__(self, model_name, goal_space_train, goal_space_test, project_state_to_end_goal, end_goal_thresholds, initial_state_space, subgoal_bounds, project_state_to_subgoal, subgoal_thresholds, max_actions=1200, num_frames_skip=10, show=False): self.name = model_name # Create Mujoco Simulation self.model = load_model_from_path("./mujoco_files/" + model_name) self.sim = MjSim(self.model) # Set dimensions and ranges of states, actions, and goals in order to configure actor/critic networks if model_name == "pendulum.xml": self.state_dim = 2 * len(self.sim.data.qpos) + len( self.sim.data.qvel) else: self.state_dim = len(self.sim.data.qpos) + len( self.sim.data.qvel ) # State will include (i) joint angles and (ii) joint velocities self.action_dim = len( self.sim.model.actuator_ctrlrange) # low-level action dim self.action_bounds = self.sim.model.actuator_ctrlrange[:, 1] # low-level action bounds self.action_offset = np.zeros((len( self.action_bounds))) # Assumes symmetric low-level action ranges self.end_goal_dim = len(goal_space_test) self.subgoal_dim = len(subgoal_bounds) self.subgoal_bounds = subgoal_bounds # Projection functions self.project_state_to_end_goal = project_state_to_end_goal self.project_state_to_subgoal = project_state_to_subgoal # Convert subgoal bounds to symmetric bounds and offset. Need these to properly configure subgoal actor networks self.subgoal_bounds_symmetric = np.zeros((len(self.subgoal_bounds))) self.subgoal_bounds_offset = np.zeros((len(self.subgoal_bounds))) for i in range(len(self.subgoal_bounds)): self.subgoal_bounds_symmetric[i] = (self.subgoal_bounds[i][1] - self.subgoal_bounds[i][0]) / 2 self.subgoal_bounds_offset[i] = self.subgoal_bounds[i][ 1] - self.subgoal_bounds_symmetric[i] # End goal/subgoal thresholds self.end_goal_thresholds = end_goal_thresholds self.subgoal_thresholds = subgoal_thresholds # Set inital state and goal state spaces self.initial_state_space = initial_state_space self.goal_space_train = goal_space_train self.goal_space_test = goal_space_test self.subgoal_colors = [ "Magenta", "Green", "Red", "Blue", "Cyan", "Orange", "Maroon", "Gray", "White", "Black" ] self.max_actions = max_actions # Implement visualization if necessary self.visualize = show # Visualization boolean if self.visualize: self.viewer = MjViewer(self.sim) self.num_frames_skip = num_frames_skip # The following variables are hardcoded for the tower building env: self.n_objects = 1 # try out 2-3 later self.min_tower_height = 1 # try out 2 later self.max_tower_height = 4 # try out 2-3 later self.initial_state = copy.deepcopy(self.sim.get_state()) self.block_gripper = False self.n_substeps = 20 self.gripper_extra_height = 0.0 self.target_in_the_air = True # maybe make dynamic? self.target_offset = 0.0 self.obj_range = 0.15 self.target_range = 0.15 self.distance_threshold = 0.05 self.initial_qpos = { 'robot0:slide0': 0.0, 'robot0:slide1': 0.0, 'robot0:slide2': 0.0, 'object0:joint': [0.1, 0.0, 0.05, 1., 0., 0., 0.], 'object1:joint': [0.2, 0.0, 0.05, 1., 0., 0., 0.], 'object2:joint': [0.3, 0.0, 0.05, 1., 0., 0., 0.], 'object3:joint': [0.4, 0.0, 0.05, 1., 0., 0., 0.], 'object4:joint': [0.5, 0.0, 0.05, 1., 0., 0., 0.], } self.reward_type = 'sparse' self.gripper_goal = 'gripper_none' # can be 'gripper_none', 'gripper_random' and 'gripper_above' self.goal_size = (self.n_objects * 3) if self.gripper_goal != 'gripper_none': self.goal_size += 3 gripperSkipIdx = 0 if self.gripper_goal == "gripper_none" else 3 objectIdx = gripperSkipIdx + self.n_objects * 3 self.table_height = 0.5 self.obj_height = 0.05 if self.name == "assets/fetch/build_tower.xml": self._env_setup(self.initial_qpos) self.state_dim = self._get_obs_len() self.action_dim = 4 #self.action_bounds = np.array([[-1, 1], [-1, 1], [-1, 1], [-1, 1]])#self.sim.model.actuator_ctrlrange[:, 1] self.action_bounds = np.ones((self.action_dim)) self.action_offset = np.zeros( (self.action_dim)) # Assumes symmetric low-level action ranges self.end_goal_dim = self.goal_size self.project_state_to_end_goal = self._obs2goal_tower self.end_goal_thresholds = [0.05 for i in range(self.goal_size)] self.use_full_state_space_as_subgoal_space = True if not self.use_full_state_space_as_subgoal_space: self.subgoal_dim = self.goal_size self.project_state_to_subgoal = self._obs2goal_tower self.subgoal_bounds_offset = np.concatenate( [[1, 0.25, 0.55] for i in range(self.subgoal_dim // 3)]) self.subgoal_bounds_symmetric = np.ones((self.subgoal_dim)) self.subgoal_bounds = [([self.table_height, 1.5] if (i + 1) % 3 == 0 else [0, 1.5]) for i in range(self.subgoal_dim)] else: #self.subgoal_dim = self.state_dim self.subgoal_dim = 3 + self.n_objects * 3 self.project_state_to_subgoal = self._obs2goal_subgoal self.subgoal_bounds = [[0, 1.5] for _ in range(self.subgoal_dim)] #self.subgoal_bounds_offset = np.zeros((self.subgoal_dim)) #objects_offset = np.concatenate([[1, 0.25, 0.55] for i in range(self.n_objects)]) #self.subgoal_bounds_offset = np.zeros(self.subgoal_dim) #self.subgoal_bounds_offset[3:3+3*self.n_objects] = objects_offset #self.subgoal_bounds_offset = np.concatenate([[1, 0.25, 0.55] if gripperSkipIdx <= i <= objectIdx # else [0, 0, 0] for i in range(0, self.subgoal_dim, 3)]) #self.subgoal_bounds_symmetric = np.ones((self.subgoal_dim)) * 2 #self.subgoal_bounds = [([self.table_height, 1.5] if (i+1) % 3 == 0 else [0, 1.5]) if # 3 <= i < 3+3*self.n_objects else [-7, 7] for i in range(self.subgoal_dim)] # in ur5 subgoal bound: np.array([[-2 * np.pi, 2 * np.pi], [-2 * np.pi, 2 * np.pi], [-2 * np.pi, 2 * np.pi], [-4, 4], [-4, 4], [-4, 4]]) # Try the following, borrowed from original code self.subgoal_bounds_symmetric = np.zeros( (len(self.subgoal_bounds))) self.subgoal_bounds_offset = np.zeros((len(self.subgoal_bounds))) for i in range(len(self.subgoal_bounds)): self.subgoal_bounds_symmetric[i] = ( self.subgoal_bounds[i][1] - self.subgoal_bounds[i][0]) / 2 self.subgoal_bounds_offset[i] = self.subgoal_bounds[i][ 1] - self.subgoal_bounds_symmetric[i] print("Subgoal offset:", self.subgoal_bounds_offset) print("Subgoal bounds:", self.subgoal_bounds) #pos_threshold = 0.05 #angle_threshold = np.deg2rad(10) #velo_threshold = 2 #objects_threshold = np.concatenate([[pos_threshold for _ in range(3)] for _ in range(self.n_objects * 2)]) #objects_rotation = np.concatenate([[angle_threshold for _ in range(3)] for _ in range(self.n_objects)]) #objects_velo_pos = np.concatenate([[velo_threshold for _ in range(3)] for _ in range(self.n_objects)]) #self.subgoal_thresholds = np.concatenate((np.array([pos_threshold for i in range(3)]), objects_threshold, # np.array([pos_threshold for i in range(2)]), objects_rotation, # objects_velo_pos, objects_rotation * 4, # np.array([velo_threshold for i in range(3)]), # np.array([velo_threshold for i in range(2)]))) self.subgoal_thresholds = [0.05 for i in range(self.subgoal_dim)] print("Subgoal thresholds: ", self.subgoal_thresholds) print("Shape thresholds:", np.shape(self.subgoal_thresholds)) if __debug__: print("Action bounds: ", self.action_bounds) def _is_success(self, achieved_goal, desired_goal): d = goal_distance(achieved_goal, desired_goal) return (d < self.distance_threshold).astype(np.float32) def get_agent_params(self, agent_params): agent_params["subgoal_noise"] = [0.03 for i in range(self.subgoal_dim)] return agent_params def _obs2goal_tower(self, sim, obs): if self.gripper_goal != 'gripper_none': goal = obs[:self.goal_size] else: goal = obs[3:self.goal_size + 3] return goal def _obs2goal_subgoal(self, sim, obs): #return np.concatenate((np.array([bound_angle(sim.data.qpos[i]) for i in range(len(sim.data.qpos))]), # np.array([4 if sim.data.qvel[i] > 4 else -4 if sim.data.qvel[i] < -4 else sim.data.qvel[i] for i in # range(len(sim.data.qvel))]))) #return obs return obs[:3 + self.n_objects * 3] # Get state for tower building env: def _get_obs(self): dt = self.sim.nsubsteps * self.sim.model.opt.timestep # positions grip_pos = self.sim.data.get_site_xpos('robot0:grip') grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt robot_qpos, robot_qvel = self.sim.data.qpos, self.sim.data.qvel object_pos, object_rot, object_velp, object_velr = ([] for _ in range(4)) object_rel_pos = [] if self.n_objects > 0: for n_o in range(self.n_objects): oname = 'object{}'.format(n_o) this_object_pos = self.sim.data.get_site_xpos(oname) # rotations this_object_rot = rotations.mat2euler( self.sim.data.get_site_xmat(oname)) # velocities this_object_velp = self.sim.data.get_site_xvelp(oname) * dt this_object_velr = self.sim.data.get_site_xvelr(oname) * dt # gripper state this_object_rel_pos = this_object_pos - grip_pos this_object_velp -= grip_velp object_pos = np.concatenate([object_pos, this_object_pos]) object_rot = np.concatenate([object_rot, this_object_rot]) object_velp = np.concatenate([object_velp, this_object_velp]) object_velr = np.concatenate([object_velr, this_object_velr]) object_rel_pos = np.concatenate( [object_rel_pos, this_object_rel_pos]) else: object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.array( np.zeros(3)) gripper_state = robot_qpos[-2:] gripper_vel = robot_qvel[ -2:] * dt # change to a scalar if the gripper is made symmetric obs = np.concatenate([ grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(), object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel, ]) return obs # Get state len: def _get_obs_len(self): dt = self.sim.nsubsteps * self.sim.model.opt.timestep # positions grip_pos = self.sim.data.get_site_xpos('robot0:grip') grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt robot_qpos, robot_qvel = self.sim.data.qpos, self.sim.data.qvel object_pos, object_rot, object_velp, object_velr = ([] for _ in range(4)) object_rel_pos = [] if self.n_objects > 0: for n_o in range(self.n_objects): oname = 'object{}'.format(n_o) this_object_pos = self.sim.data.get_site_xpos(oname) # rotations this_object_rot = rotations.mat2euler( self.sim.data.get_site_xmat(oname)) # velocities this_object_velp = self.sim.data.get_site_xvelp(oname) * dt this_object_velr = self.sim.data.get_site_xvelr(oname) * dt # gripper state this_object_rel_pos = this_object_pos - grip_pos this_object_velp -= grip_velp object_pos = np.concatenate([object_pos, this_object_pos]) object_rot = np.concatenate([object_rot, this_object_rot]) object_velp = np.concatenate([object_velp, this_object_velp]) object_velr = np.concatenate([object_velr, this_object_velr]) object_rel_pos = np.concatenate( [object_rel_pos, this_object_rel_pos]) else: object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.array( np.zeros(3)) gripper_state = robot_qpos[-2:] gripper_vel = robot_qvel[ -2:] * dt # change to a scalar if the gripper is made symmetric obs = np.concatenate([ grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(), object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel, ]) print("obs len: ", len(obs)) return len(obs) # Get state, which concatenates joint positions and velocities def get_state(self): if self.name == "pendulum.xml": return np.concatenate([ np.cos(self.sim.data.qpos), np.sin(self.sim.data.qpos), self.sim.data.qvel ]) elif self.name == "assets/fetch/build_tower.xml": return self._get_obs() else: return np.concatenate((self.sim.data.qpos, self.sim.data.qvel)) def _env_setup(self, initial_qpos): for name, value in initial_qpos.items(): self.sim.data.set_joint_qpos(name, value) utils.reset_mocap_welds(self.sim) self.sim.forward() # Move end effector into position. gripper_target = np.array([ -0.498, 0.005, -0.431 + self.gripper_extra_height ]) + self.sim.data.get_site_xpos('robot0:grip') gripper_rotation = np.array([1., 0., 1., 0.]) self.sim.data.set_mocap_pos('robot0:mocap', gripper_target) self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation) for _ in range(10): self.sim.step() # Extract information for sampling goals. self.initial_gripper_xpos = self.sim.data.get_site_xpos( 'robot0:grip').copy() print("inital gripper xpos", self.initial_gripper_xpos) if self.n_objects > 0: self.height_offset = self.sim.data.get_site_xpos('object0')[2] def _reset_sim_tower(self): self.step_ctr = 0 self.sim.set_state(self.initial_state) # Randomize start position of objects. for o in range(self.n_objects): oname = 'object{}'.format(o) object_xpos = self.initial_gripper_xpos[:2] close = True while close: # self.initial_gripper_xpos[:2] object_xpos = [1, 0.25] + np.random.uniform( -self.obj_range, self.obj_range, size=2) close = False dist_to_nearest = np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) # Iterate through all previously placed boxes and select closest: for o_other in range(o): other_xpos = self.sim.data.get_joint_qpos( 'object{}:joint'.format(o_other))[:2] dist = np.linalg.norm(object_xpos - other_xpos) dist_to_nearest = min(dist, dist_to_nearest) if dist_to_nearest < 0.1: close = True object_qpos = self.sim.data.get_joint_qpos( '{}:joint'.format(oname)) assert object_qpos.shape == (7, ) object_qpos[:2] = object_xpos object_qpos[2] = self.table_height + (self.obj_height / 2) * self.obj_height * 1.05 self.sim.data.set_joint_qpos('{}:joint'.format(oname), object_qpos) self.sim.forward() return True # Reset simulation to state within initial state specified by user def reset_sim(self): if self.name == "assets/fetch/build_tower.xml": self._reset_sim_tower() else: # Reset joint positions and velocities for i in range(len(self.sim.data.qpos)): self.sim.data.qpos[i] = np.random.uniform( self.initial_state_space[i][0], self.initial_state_space[i][1]) for i in range(len(self.sim.data.qvel)): self.sim.data.qvel[i] = np.random.uniform( self.initial_state_space[len(self.sim.data.qpos) + i][0], self.initial_state_space[len(self.sim.data.qpos) + i][1]) self.sim.step() # Return state return self.get_state() # Set action for tower building env: def _set_action(self, action): assert action.shape == (4, ) action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) self.step_ctr += 1 # Execute low-level action for number of frames specified by num_frames_skip def execute_action(self, action): if __debug__: print("action: ", action) if self.name == "assets/fetch/build_tower.xml": self._set_action(action) #self.sim.data.ctrl[:] = action else: self.sim.data.ctrl[:] = action for _ in range(self.num_frames_skip): self.sim.step() if self.visualize: self.viewer.render() return self.get_state() # Visualize end goal. This function may need to be adjusted for new environments. def display_end_goal(self, end_goal): # Goal can be visualized by changing the location of the relevant site object. if self.name == "pendulum.xml": self.sim.data.mocap_pos[0] = np.array([ 0.5 * np.sin(end_goal[0]), 0, 0.5 * np.cos(end_goal[0]) + 0.6 ]) elif self.name == "assets/fetch/build_tower.xml": for i in range(self.n_objects): self.model.body_pos[-13 + i] = end_goal[i * 3:(i * 3) + 3] elif self.name == "ur5.xml": theta_1 = end_goal[0] theta_2 = end_goal[1] theta_3 = end_goal[2] # shoulder_pos_1 = np.array([0,0,0,1]) upper_arm_pos_2 = np.array([0, 0.13585, 0, 1]) forearm_pos_3 = np.array([0.425, 0, 0, 1]) wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1]) # Transformation matrix from shoulder to base reference frame T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159], [0, 0, 0, 1]]) # Transformation matrix from upper arm to shoulder reference frame T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0], [np.sin(theta_1), np.cos(theta_1), 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # Transformation matrix from forearm to upper arm reference frame T_3_2 = np.array([[np.cos(theta_2), 0, np.sin(theta_2), 0], [0, 1, 0, 0.13585], [-np.sin(theta_2), 0, np.cos(theta_2), 0], [0, 0, 0, 1]]) # Transformation matrix from wrist 1 to forearm reference frame T_4_3 = np.array([[np.cos(theta_3), 0, np.sin(theta_3), 0.425], [0, 1, 0, 0], [-np.sin(theta_3), 0, np.cos(theta_3), 0], [0, 0, 0, 1]]) # Determine joint position relative to original reference frame # shoulder_pos = T_1_0.dot(shoulder_pos_1) upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3] forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(forearm_pos_3)[:3] wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot( wrist_1_pos_4)[:3] joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos] """ print("\nEnd Goal Joint Pos: ") print("Upper Arm Pos: ", joint_pos[0]) print("Forearm Pos: ", joint_pos[1]) print("Wrist Pos: ", joint_pos[2]) """ for i in range(3): self.sim.data.mocap_pos[i] = joint_pos[i] else: assert False, "Provide display end goal function in environment.py file" def _sample_goal_tower(self): obs = self._get_obs() target_goal = None if obs is not None: # I changed obs['observation'] into obs because it is just a numpy array if self.gripper_goal != 'gripper_none': goal = obs.copy()[:self.goal_size] else: goal = obs.copy()[3:(self.goal_size + 3)] if self.gripper_goal != 'gripper_none' and self.n_objects > 0: target_goal_start_idx = 3 else: target_goal_start_idx = 0 stack_tower = (self.max_tower_height - self.min_tower_height + 1) == self.n_objects if not stack_tower: if self.n_objects > 0: target_range = self.n_objects else: target_range = 1 for n_o in range(target_range): # too_close = True while True: # self.initial_gripper_xpos[:3] target_goal = [ 1, 0.25, self.table_height + 0.05 ] + np.random.uniform( -self.target_range, self.target_range, size=3) target_goal += self.target_offset rnd_height = random.randint(self.min_tower_height, self.max_tower_height) self.goal_tower_height = rnd_height target_goal[2] = self.table_height + ( rnd_height * self.obj_height) - (self.obj_height / 2) too_close = False for i in range(0, target_goal_start_idx, 3): other_loc = goal[i:i + 3] dist = np.linalg.norm(other_loc[:2] - target_goal[:2], axis=-1) if dist < 0.1: too_close = True if too_close is False: break goal[target_goal_start_idx:target_goal_start_idx + 3] = target_goal.copy() target_goal_start_idx += 3 else: target_goal_xy = [1, 0.25] + np.random.uniform( -self.target_range, self.target_range, size=2) self.goal_tower_height = self.n_objects for n_o in range(self.n_objects): height = n_o + 1 target_z = self.table_height + ( height * self.obj_height) - (self.obj_height / 2) target_goal = np.concatenate((target_goal_xy, [target_z])) goal[target_goal_start_idx:target_goal_start_idx + 3] = target_goal.copy() target_goal_start_idx += 3 # Final gripper position if self.gripper_goal != 'gripper_none': gripper_goal_pos = goal.copy()[-3:] if self.gripper_goal == 'gripper_above': gripper_goal_pos[2] += (3 * self.obj_height) elif self.gripper_goal == 'gripper_random': too_close = True while too_close: gripper_goal_pos = [1, 0.25, self.table_height + 0.05] + \ np.random.uniform(-self.target_range, self.target_range, size=3) # gripper_goal_pos[0] += 0.25 gripper_goal_pos[2] += 0.14 if np.linalg.norm(gripper_goal_pos - target_goal, axis=-1) >= 0.1: too_close = False goal[:3] = gripper_goal_pos return goal.copy() else: return [] # Function returns an end goal def get_next_goal(self, test): end_goal = np.zeros((len(self.goal_space_test))) if self.name == "ur5.xml": goal_possible = False while not goal_possible: end_goal = np.zeros(shape=(self.end_goal_dim, )) end_goal[0] = np.random.uniform(self.goal_space_test[0][0], self.goal_space_test[0][1]) end_goal[1] = np.random.uniform(self.goal_space_test[1][0], self.goal_space_test[1][1]) end_goal[2] = np.random.uniform(self.goal_space_test[2][0], self.goal_space_test[2][1]) # Next need to ensure chosen joint angles result in achievable task (i.e., desired end effector position is above ground) theta_1 = end_goal[0] theta_2 = end_goal[1] theta_3 = end_goal[2] # shoulder_pos_1 = np.array([0,0,0,1]) upper_arm_pos_2 = np.array([0, 0.13585, 0, 1]) forearm_pos_3 = np.array([0.425, 0, 0, 1]) wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1]) # Transformation matrix from shoulder to base reference frame T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159], [0, 0, 0, 1]]) # Transformation matrix from upper arm to shoulder reference frame T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0], [np.sin(theta_1), np.cos(theta_1), 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # Transformation matrix from forearm to upper arm reference frame T_3_2 = np.array([[np.cos(theta_2), 0, np.sin(theta_2), 0], [0, 1, 0, 0.13585], [-np.sin(theta_2), 0, np.cos(theta_2), 0], [0, 0, 0, 1]]) # Transformation matrix from wrist 1 to forearm reference frame T_4_3 = np.array([[np.cos(theta_3), 0, np.sin(theta_3), 0.425], [0, 1, 0, 0], [-np.sin(theta_3), 0, np.cos(theta_3), 0], [0, 0, 0, 1]]) forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot( forearm_pos_3)[:3] wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot( wrist_1_pos_4)[:3] # Make sure wrist 1 pos is above ground so can actually be reached if np.absolute(end_goal[0]) > np.pi / 4 and forearm_pos[ 2] > 0.05 and wrist_1_pos[2] > 0.15: goal_possible = True elif self.name == "assets/fetch/build_tower.xml": end_goal = self._sample_goal_tower() elif not test and self.goal_space_train is not None: for i in range(len(self.goal_space_train)): end_goal[i] = np.random.uniform(self.goal_space_train[i][0], self.goal_space_train[i][1]) else: assert self.goal_space_test is not None, "Need goal space for testing. Set goal_space_test variable in \"design_env.py\" file" for i in range(len(self.goal_space_test)): end_goal[i] = np.random.uniform(self.goal_space_test[i][0], self.goal_space_test[i][1]) # Visualize End Goal self.display_end_goal(end_goal) return end_goal # Visualize all subgoals def display_subgoals(self, subgoals): # Display up to 10 subgoals and end goal if len(subgoals) <= 11: subgoal_ind = 0 else: subgoal_ind = len(subgoals) - 11 for i in range(1, min(len(subgoals), 11)): if self.name == "pendulum.xml": self.sim.data.mocap_pos[i] = np.array([ 0.5 * np.sin(subgoals[subgoal_ind][0]), 0, 0.5 * np.cos(subgoals[subgoal_ind][0]) + 0.6 ]) # Visualize subgoal self.sim.model.site_rgba[i][3] = 1 subgoal_ind += 1 elif self.name == "assets/fetch/build_tower.xml": # display only the goal positions of the blocks if self.use_full_state_space_as_subgoal_space: #for n in range(self.n_objects): # self.model.body_pos[-9 + (3 * (i-1)) + n] = self._obs2goal_tower(self.sim, subgoals[i-1])[n*3:(n*3)+3] # Gripper pos: self.model.body_pos[-1] = subgoals[i - 1][0:3] # Objects pos: for n in range(self.n_objects): self.model.body_pos[-10 + (3 * (i - 1)) + n] = subgoals[i - 1][3 + n * 3:(n * 3) + 6] else: for n in range(self.n_objects): self.model.body_pos[-9 + (3 * (i - 1)) + n] = subgoals[i - 1][n * 3:(n * 3) + 3] subgoal_ind += 1 elif self.name == "ur5.xml": theta_1 = subgoals[subgoal_ind][0] theta_2 = subgoals[subgoal_ind][1] theta_3 = subgoals[subgoal_ind][2] # shoulder_pos_1 = np.array([0,0,0,1]) upper_arm_pos_2 = np.array([0, 0.13585, 0, 1]) forearm_pos_3 = np.array([0.425, 0, 0, 1]) wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1]) # Transformation matrix from shoulder to base reference frame T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159], [0, 0, 0, 1]]) # Transformation matrix from upper arm to shoulder reference frame T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0], [np.sin(theta_1), np.cos(theta_1), 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # Transformation matrix from forearm to upper arm reference frame T_3_2 = np.array([[np.cos(theta_2), 0, np.sin(theta_2), 0], [0, 1, 0, 0.13585], [-np.sin(theta_2), 0, np.cos(theta_2), 0], [0, 0, 0, 1]]) # Transformation matrix from wrist 1 to forearm reference frame T_4_3 = np.array([[np.cos(theta_3), 0, np.sin(theta_3), 0.425], [0, 1, 0, 0], [-np.sin(theta_3), 0, np.cos(theta_3), 0], [0, 0, 0, 1]]) # Determine joint position relative to original reference frame # shoulder_pos = T_1_0.dot(shoulder_pos_1) upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3] forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot( forearm_pos_3)[:3] wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot( wrist_1_pos_4)[:3] joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos] """ print("\nSubgoal %d Joint Pos: " % i) print("Upper Arm Pos: ", joint_pos[0]) print("Forearm Pos: ", joint_pos[1]) print("Wrist Pos: ", joint_pos[2]) """ # Designate site position for upper arm, forearm and wrist for j in range(3): self.sim.data.mocap_pos[3 + 3 * (i - 1) + j] = np.copy( joint_pos[j]) self.sim.model.site_rgba[3 + 3 * (i - 1) + j][3] = 1 # print("\nLayer %d Predicted Pos: " % i, wrist_1_pos[:3]) subgoal_ind += 1 else: # Visualize desired gripper position, which is elements 18-21 in subgoal vector self.sim.data.mocap_pos[i] = subgoals[subgoal_ind] # Visualize subgoal self.sim.model.site_rgba[i][3] = 1 subgoal_ind += 1
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def viewer_setup(self): """ Does not work. Use mj_viewer_setup() instead """ pass # ----------------------------- def _reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 1.0 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self): return None def state_vector(self): state = self.sim.get_state() return np.concatenate([ state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 curr_frame = self.sim.render(width=640, height=480, mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1,:,:]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + ".mp4" skvideo.io.vwrite( file_name, np.asarray(arrs)) print("saved", file_name) t1 = timer.time() print("time taken = %f"% (t1-t0))
def main(data_dir): model = load_model_from_path("robot.xml") sim = MjSim(model) viewer = MjViewer(sim) initial_state = MjSimState(time=0, qpos=np.array([ 0, -np.pi / 4, 0, -3 * np.pi / 4 + 1, 0, np.pi / 2, np.pi / 4 ]), qvel=np.zeros(7), act=None, udd_state={}) sim.set_state(initial_state) sim.step() traj_count = 0 control_state = None while (traj_count < NUM_TRAJECTORIES and control_state != FINISHED): control_state = ACCELERATING outputFile = None initial_q = sim.get_state()[q_INDEX] velGen = randVelGen.RandVelGenerator() qd_des = velGen.generatePatternVel() coming_back_time = 0.0 time = 0 while control_state != FINISHED: state = sim.get_state() q = state[q_INDEX] qd = state[qd_INDEX] boundViolations = bounds.getBoundViolations(q) # RD = TB = bounds.tableBoundViolation(sim) OB = bounds.outOfBounds(q) DA = helperFun.moving(qd) DC = helperFun.stopped(qd) DD = DC DB = coming_back_time > RETURN_TIME FN = traj_count >= NUM_TRAJECTORIES prev_state = control_state # transition block if control_state == ACCELERATING: if not TB and not OB and not DA: control_state = ACCELERATING elif TB: control_state = COMING_BACK_IN_BOUNDS coming_back_time = 0.0 elif not TB and OB: control_state = DAMPING curBoundViolations = bounds.getBoundViolations(q) velGen.setJointDirections(curBoundViolations) elif not TB and not OB and DA: control_state = COASTING traj_count += 1 outputFile = open(data_dir + helperFun.getUniqueFileName("traj"), mode='x') outputWriter = csv.writer(outputFile, delimiter=',') print_count(traj_count) else: control_state = FINISHED print("Unknown transistion! ACCELERATING") elif control_state == COASTING: if not FN and not TB and not OB and DC: control_state = ACCELERATING qd_des = velGen.generatePatternVel() outputFile.close() elif not FN and TB: control_state = COMING_BACK_IN_BOUNDS coming_back_time = 0 outputFile.close() elif not FN and not TB and OB: control_state = DAMPING outputFile.close() curBoundViolations = bounds.getBoundViolations(q) velGen.setJointDirections(curBoundViolations) elif FN: control_state = FINISHED outputFile.close() elif not FN and not TB and not OB and not DC: control_state = COASTING else: control_state = FINISHED print("Unknown transition! COASTING") outputFile.close() elif control_state == DAMPING: if not TB and not DD: control_state = DAMPING elif TB: control_state = COMING_BACK_IN_BOUNDS coming_back_time = 0.0 elif not TB and DD: control_state = ACCELERATING qd_des = velGen.generatePatternVel() else: control_state = FINISHED print("Unknow transition! DAMPING") elif control_state == COMING_BACK_IN_BOUNDS: if not DB: control_state = COMING_BACK_IN_BOUNDS elif DB and OB: control_state = DAMPING curBoundViolations = bounds.getBoundViolations(q) velGen.setJointDirections(curBoundViolations) elif DB and not OB: control_state = ACCELERATING qd_des = velGen.generatePatternVel() else: control_state = FINISHED print("Unknown transition! COMING_BACK_IN_BOUNDS") elif control_state == FINISHED: control_state = FINISHED else: control_state = FINISHED print("Got to an invalid state!") # debug states if prev_state != control_state: if control_state == ACCELERATING: print("ACCELERATING") elif control_state == COASTING: print("COASTING") elif control_state == DAMPING: print("DAMPING") elif control_state == COMING_BACK_IN_BOUNDS: print("COMING_BACK_IN_BOUNDS") elif control_state == "FINISHED": print("FINISHED") else: print("In a bad state!") torques = np.zeros(7) if control_state == ACCELERATING: torques = controllers.basicVelControl(qd_des=qd_des, qd_cur=qd) elif control_state == COASTING: data = np.concatenate( (q, qd, data_calc.get_3D_data(sim), [time])) outputWriter.writerow(data) torques = controllers.basicVelControl(qd_des=qd_des, qd_cur=qd) elif control_state == DAMPING: torques = controllers.dampingControl(qd) elif control_state == COMING_BACK_IN_BOUNDS: coming_back_time += TIMESTEP torques = controllers.moveAwayFromTable(q=q, qd=qd) elif control_state == FINISHED: outputFile.close() break else: print("Got to an invalid state!") control_state = FINISHED break sim.data.ctrl[:] = torques sim.step() viewer.render() time += TIMESTEP
def test_sim_state(): model = load_model_from_xml(BASIC_MODEL_XML) foo = 10 d = {"foo": foo, "foo_array": np.array([foo, foo, foo]), "foo_2darray": np.reshape(np.array([foo, foo, foo, foo]), (2, 2)), } def udd_callback(sim): return d sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback) state = sim.get_state() assert np.array_equal(state.time, sim.data.time) assert np.array_equal(state.qpos, sim.data.qpos) assert np.array_equal(state.qvel, sim.data.qvel) assert np.array_equal(state.act, sim.data.act) for k in state.udd_state.keys(): if (isinstance(state.udd_state[k], Number)): assert state.udd_state[k] == sim.udd_state[k] else: assert np.array_equal(state.udd_state[k], sim.udd_state[k]) # test flatten, unflatten a = state.flatten() assert len(a) == (1 + sim.model.nq + sim.model.nv + sim.model.na + 8) state2 = MjSimState.from_flattened(a, sim) assert np.array_equal(state.time, sim.data.time) assert np.array_equal(state.qpos, sim.data.qpos) assert np.array_equal(state.qvel, sim.data.qvel) assert np.array_equal(state.act, sim.data.act) for k in state2.udd_state.keys(): if (isinstance(state2.udd_state[k], Number)): assert state2.udd_state[k] == sim.udd_state[k] else: assert np.array_equal(state2.udd_state[k], sim.udd_state[k]) assert state2 == state assert not state2 != state # test equality with deleting keys state2 = state2._replace(udd_state={"foo": foo}) assert state2 != state assert not (state2 == state) # test equality with changing contents of array state2 = state2._replace( udd_state={"foo": foo, "foo_array": np.array([foo, foo + 1])}) assert state2 != state assert not (state2 == state) # test equality with adding keys d2 = dict(d) d2.update({"not_foo": foo}) state2 = state2._replace(udd_state=d2) assert state2 != state assert not (state2 == state) # test defensive copy sim.set_state(state) state.qpos[0] = -1 assert not np.array_equal(state.qpos, sim.data.qpos) state3 = sim.get_state() state3.qpos[0] = -1 assert not np.array_equal(state3.qpos, sim.data.qpos) state3.udd_state["foo_array"][0] = -1 assert not np.array_equal( state3.udd_state["foo_array"], sim.udd_state["foo_array"]) # test no callback sim = MjSim(model, nsubsteps=2) state = sim.get_state() print("state.udd_state = %s" % state.udd_state) assert state.udd_state == {} # test flatten, unflatten a = state.flatten() assert len(a) == 1 + sim.model.nq + sim.model.nv + sim.model.na state2 = MjSimState.from_flattened(a, sim) assert np.array_equal(state.time, sim.data.time) assert np.array_equal(state.qpos, sim.data.qpos) assert np.array_equal(state.qvel, sim.data.qvel) assert np.array_equal(state.act, sim.data.act) assert state.udd_state == sim.udd_state
def closeloop_run_fcn(model, desired_kinematics, P, I, delay_timesteps=0, model_ver=0, plot_outputs=True, Mj_render=False, timestep=.005): est_activations = estimate_activations_fcn(model, desired_kinematics) number_of_task_samples = desired_kinematics.shape[0] chassis_pos = np.zeros(number_of_task_samples, ) input_kinematics = np.zeros(desired_kinematics.shape) real_attempt_positions = np.zeros([number_of_task_samples, 2]) real_attempt_activations = np.zeros([number_of_task_samples, 3]) q_error_cum = np.zeros([number_of_task_samples, 2]) # sample error history Mj_model = load_model_from_path( "./models/nmi_leg_w_chassis_v{}.xml".format(model_ver)) sim = MjSim(Mj_model) if Mj_render: viewer = MjViewer(sim) viewer.cam.fixedcamid += 1 viewer.cam.type = const.CAMERA_FIXED sim_state = sim.get_state() control_vector_length = sim.data.ctrl.__len__() print("control_vector_length: " + str(control_vector_length)) sim.set_state(sim_state) gradient_edge_order = 1 for ii in range(number_of_task_samples): if ii < max(gradient_edge_order, delay_timesteps + 1): #print(ii) input_kinematics[ii, :] = desired_kinematics[ii, :] else: [input_kinematics[ii, :], q_error_cum] = calculate_closeloop_inputkinematics( step_number=ii, real_attempt_positions=real_attempt_positions, desired_kinematics=desired_kinematics, q_error_cum=q_error_cum, P=P, I=I, delay_timesteps=delay_timesteps, gradient_edge_order=gradient_edge_order, timestep=timestep) est_activations[ii, :] = model.predict([input_kinematics[ii, :]])[0, :] sim.data.ctrl[:] = est_activations[ii, :] sim.step() chassis_pos[ii] = sim.data.get_geom_xpos("Chassis_frame")[0] current_positions_array = sim.data.qpos[-2:] real_attempt_positions[ii, :] = current_positions_array real_attempt_activations[ii, :] = sim.data.ctrl if Mj_render: viewer.render() real_attempt_kinematics = positions_to_kinematics_fcn( real_attempt_positions[:, 0], real_attempt_positions[:, 1], timestep=timestep) error0 = error_cal_fcn(desired_kinematics[:, 0], real_attempt_kinematics[:, 0]) error1 = error_cal_fcn(desired_kinematics[:, 3], real_attempt_kinematics[:, 3]) average_error = 0.5 * (error0 + error1) if plot_outputs: #plt.figure() alpha = .8 plot_t = np.linspace(timestep, desired_kinematics.shape[0] * timestep, desired_kinematics.shape[0]) plt.subplot(2, 1, 1) plt.plot(plot_t, desired_kinematics[:, 0], 'k', plot_t, real_attempt_kinematics[:, 0], 'C1', alpha=.9) plt.ylabel("$q_1$ (rads)") plt.subplot(2, 1, 2) plt.plot(plot_t, desired_kinematics[:, 3], 'k', plot_t, real_attempt_kinematics[:, 3], 'C1', alpha=.9) plt.ylabel("$q_2$ (rads)") plt.xlabel("time (s)") plt.show(block=True) return average_error, real_attempt_kinematics, real_attempt_activations
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, env_name, rand_seed, maximum_length, model_path, frame_skip, misc_info={}): self._env_name = env_name self._rand_seed = rand_seed self._maximum_length = maximum_length self._misc_info = misc_info if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() # Why is this here? Causes errors #observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) #assert not done #self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) # annoying should replace high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed(self._rand_seed) def _seed(self, seed=None): self.np_random, seed = seeding.np_random(self._rand_seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def get_env_state(self): """ Get full state of the environment beyond qpos and qvel For example, if targets are defined using sites, this function should also contain location of the sites (which are not included in qpos). Must return a dictionary that can be used in the set_env_state function """ raise NotImplementedError def set_env_state(self, state): """ Uses the state dictionary to set the state of the world """ raise NotImplementedError # ----------------------------- def reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob, 0, False, {} def reset_soft(self, seed=None): return self._old_obs, 0, False, {} def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) and qvel.shape == ( self.model.nv, ) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 0.5 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self): return None def state_vector(self): state = self.sim.get_state() return np.concatenate([state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action( o)[0] if mode == 'exploration' else policy.get_action( o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t + 1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, frame_size=(640, 480), mode='exploration', save_loc='./tmp/', filename='newvid', it=0, camera_name=None): for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o, *_ = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy(o) o, r, d, _ = self.step(a) t = t + 1 curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1], mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1, :, :]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + str(it) + ".mp4" if not os.path.exists(save_loc): os.makedirs(save_loc) imageio.mimwrite(file_name, np.asarray(arrs), fps=10.0) print("saved", file_name) t1 = timer.time() print("time taken = %f" % (t1 - t0))
def initial_config_from_mjcf(file, ee_list, verbose=False): """ Parameters ---------- file (str): mjcf file ee_list (list of str): End-effector body names Returns ------- ret (dict): nq (tf.Tensor) Number of joint nu (tf.Tensor) Number of input S (tf.Tensor): Selection Matrix. This includes actuator gear. (1,nu, nq) dt (tf.Tensor): Timestep joint_armature (tf.Tensor): (nq,) g (tf.Tensor): Gravity Mlist (tf.Tensor): Link frame i relative to p(i) at the home position (nbody,6,6) Slist (tf.Tensor): Screw axes Si of the joints in a space frame. (nq,6) init_ee_SE3 (dict of tf.Tensor): Initial end-effector SE3 Blist (dict of tf.Tensor): Joint screw axes in the end-effector frame when the manipulator is at the home position (nq_branch,6) Bidlist (dict of tf.Tensor): List of the jonit index related to the end-effector branch (nq_branch,) init_ee_SE3 (dict of tf.Tensor): (4,4) Glist (tf.Tensor): Spatial inertia matrices Gi of the links. (nbody,6,6) pidlist (tf.Tensor): Parent body index. (nbody,) """ ret = dict() model = load_model_from_path(file) sim = MjSim(model) sim_state = sim.get_state() for i in range(sim.model.nq): sim_state.qpos[i] = 0. sim_state.qvel[i] = 0. sim.set_state(sim_state) sim.forward() nq, nv, nu = sim.model.nq, sim.model.nv, sim.model.nu n_ee = len(ee_list) njoint, nbody = sim.model.njnt, sim.model.nbody-n_ee-1 assert sim.model.nq + 1 + n_ee == sim.model.nbody assert nq == nv assert nq == njoint assert nq == nbody S = None if nu is not 0: S = np.zeros((1,nu,nq)) S[0,0:nu, nq-nu:nq] = np.diag(sim.model.actuator_gear[:,0]) muj_world_id = 0 muj_ee_id, muj_body_id, muj_body_name = [], [], [] muj_global_body_pos, muj_global_body_SO3 = np.zeros((sim.model.nbody, 3)), np.zeros((sim.model.nbody, 3, 3)) muj_global_joint_pos, muj_global_joint_SO3 = np.zeros((sim.model.nq, 3)), np.zeros((sim.model.nq, 3, 3)) muj_global_body_SO3[0] = np.eye(3) muj_global_inertial_pos, muj_global_inertial_SO3 = np.zeros((sim.model.nbody, 3)), np.zeros((sim.model.nbody, 3, 3)) for i in range(sim.model.nbody): if sim.model.body_names[i] == "world": muj_world_id = i elif sim.model.body_names[i] in ee_list: muj_ee_id.append(sim.model.body_name2id(sim.model.body_names[i])) else: muj_body_id.append(i) muj_body_name.append(sim.model.body_names[i]) muj_global_body_SO3[i] = sim.data.get_body_xmat(sim.model.body_names[i]) muj_global_body_pos[i] = sim.data.get_body_xpos(sim.model.body_names[i]) muj_global_inertial_pos[i] = muj_global_body_pos[i] + sim.model.body_ipos[i] muj_global_inertial_SO3[i] = np.dot(muj_global_body_SO3[i], quat_to_SO3(sim.model.body_iquat[i])) for i in range(sim.model.nq): body_id = sim.model.jnt_bodyid[i] muj_global_joint_SO3[i] = muj_global_body_SO3[body_id] muj_global_joint_pos[i] = muj_global_body_pos[body_id] + np.dot(muj_global_body_SO3[body_id], sim.model.jnt_pos[i]) Bidlist = dict() for ee in ee_list: idlist_reverse = [] ee_parent = sim.model.body_parentid[sim.model.body_name2id(ee)] while True: j_st_id = sim.model.body_jntadr[ee_parent] nj = sim.model.body_jntnum[ee_parent] idlist_reverse.extend([*range(j_st_id, j_st_id+nj)][::-1]) ee_parent = sim.model.body_parentid[ee_parent] if (sim.model.body_names[ee_parent]=='world'): break Bidlist[ee] = idlist_reverse[::-1] if verbose: print("="*80) print("Infos aboue Mujoco Model") print("-"*80) print("Global Body Pos") print(muj_global_body_pos) print("-"*80) print("Global Body Ori") print(muj_global_body_SO3) print("-"*80) print("Global Inertia Pos") print(muj_global_inertial_pos) print("-"*80) print("Global Inertia Ori") print(muj_global_inertial_SO3) print("-"*80) print("Global Joint Pos") print(muj_global_joint_pos) print("-"*80) print("Global Joint Ori") print(muj_global_joint_SO3) for ee in ee_list: print("-"*80) print("{} Related Joint Id".format(ee)) print(Bidlist[ee]) print("="*80) ret['nq'] = tf.convert_to_tensor(nq, tf.int32) ret['nu'] = tf.convert_to_tensor(nu, tf.int32) if S is None: ret['S'] = None else: ret['S'] = tf.convert_to_tensor(S, tf.float32) ret['dt'] = tf.convert_to_tensor(sim.model.opt.timestep, tf.float32) ret['joint_armature'] = tf.convert_to_tensor(sim.model.dof_armature, tf.float32) ret['g'] = tf.convert_to_tensor(sim.model.opt.gravity, tf.float32) pidlist = [] for i in range(nbody): pname = sim.model.body_names[sim.model.body_parentid[sim.model.body_name2id(muj_body_name[i])]] if pname == "world": pidlist.append(-1) else: pidlist.append(muj_body_name.index(pname)) ret['pidlist'] = tf.convert_to_tensor(np.array(pidlist), tf.int32) ret['Mlist'] = [None]*nbody ret['Glist'] = [None]*nbody for i in range(nbody): muj_id = muj_body_id[i] muj_pid = sim.model.body_parentid[muj_id] rel_pos = np.dot(muj_global_inertial_SO3[muj_pid].T, muj_global_inertial_pos[muj_id] - muj_global_inertial_pos[muj_pid]) rel_SO3 = np.dot(muj_global_inertial_SO3[muj_pid].T, muj_global_inertial_SO3[muj_id]) rel_SE3 = tf.squeeze(Rp_to_SE3(tf.expand_dims(tf.convert_to_tensor(rel_SO3,tf.float32),0), tf.expand_dims(tf.convert_to_tensor(rel_pos,tf.float32),0)), 0) ret['Mlist'][i] = rel_SE3 G = np.diag(sim.model.body_inertia[muj_id]) mI = sim.model.body_mass[muj_id] * np.eye(3) Gi = np.zeros((6,6)) Gi[0:3,0:3] = G Gi[3:6,3:6] = mI ret['Glist'][i] = tf.convert_to_tensor(Gi, tf.float32) ret['Mlist'] = tf.stack(ret['Mlist'],0) ret['Glist'] = tf.stack(ret['Glist'],0) ret['Slist'] = [None]*nq for i in range(nq): R = muj_global_joint_SO3[i] p = muj_global_joint_pos[i] Tsj = Rp_to_SE3(tf.expand_dims(tf.convert_to_tensor(R,tf.float32),0), tf.expand_dims(tf.convert_to_tensor(p,tf.float32),0)) adTsj = tf.squeeze(adjoint(Tsj),0) if sim.model.jnt_type[i] == 2: screw_axis = np.concatenate([np.zeros(3), sim.model.jnt_axis[i]], axis=0) elif sim.model.jnt_type[i] == 3: screw_axis = np.concatenate([sim.model.jnt_axis[i], np.zeros(3)], axis=0) else: raise ValueError("Wrong Joint Type") screw_axis = tf.expand_dims(tf.convert_to_tensor(screw_axis, tf.float32),axis=1) S = tf.squeeze(tf.matmul(adTsj, screw_axis),1) ret['Slist'][i] = S ret['Slist'] = tf.stack(ret['Slist'],0) ret['init_ee_SE3'] = dict() ret['Bidlist'] = dict() ret['Blist'] = dict() for ee in ee_list: p = tf.expand_dims(tf.convert_to_tensor(muj_global_body_pos[sim.model.body_name2id(ee)], tf.float32),0) R = tf.expand_dims(tf.convert_to_tensor(muj_global_body_SO3[sim.model.body_name2id(ee)], tf.float32),0) ret['init_ee_SE3'][ee] = tf.squeeze(Rp_to_SE3(R,p),0) ret['Bidlist'][ee] = [None]*len(Bidlist[ee]) ret['Blist'][ee] = [None]*len(Bidlist[ee]) for i, id in enumerate(Bidlist[ee]): ret['Bidlist'][ee][i] = id R = muj_global_joint_SO3[id] p = muj_global_joint_pos[id] Tsj = tf.squeeze(Rp_to_SE3(tf.expand_dims(tf.convert_to_tensor(R,tf.float32),0), tf.expand_dims(tf.convert_to_tensor(p,tf.float32),0)),0) Tsb = ret['init_ee_SE3'][ee] Tbs = tf.squeeze(SE3_inv(tf.expand_dims(Tsb,0))) Tbj = tf.matmul(Tbs,Tsj) adTbj = tf.squeeze(adjoint(tf.expand_dims(Tbj,0)),0) if sim.model.jnt_type[id] == 2: screw_axis = np.concatenate([np.zeros(3), sim.model.jnt_axis[id]], axis=0) elif sim.model.jnt_type[id] == 3: screw_axis = np.concatenate([sim.model.jnt_axis[id], np.zeros(3)], axis=0) else: raise ValueError("Wrong Joint Type") screw_axis = tf.expand_dims(tf.convert_to_tensor(screw_axis, tf.float32),axis=1) B = tf.squeeze(tf.matmul(adTbj, screw_axis),1) ret['Blist'][ee][i] = B ret['Bidlist'][ee] = tf.stack(ret['Bidlist'][ee],0) ret['Blist'][ee] = tf.stack(ret['Blist'][ee],0) if verbose: print("="*80) print("Infos about Return Value") pretty_print_dictionary(ret) print("="*80) return ret
class GripperTester: """ A class that is used to test gripper Args: gripper (GripperModel): A gripper instance to be tested pos (str): (x y z) position to place the gripper in string form, e.g. '0 0 0.3' quat (str): rotation to apply to gripper in string form, e.g. '0 0 1 0' to flip z axis gripper_low_pos (float): controls the gipper y position, larger -> higher gripper_high_pos (float): controls the gipper y high position larger -> higher, must be larger than gripper_low_pos box_size (None or 3-tuple of int): the size of the box to grasp, None defaults to [0.02, 0.02, 0.02] box_density (int): the density of the box to grasp step_time (int): the interval between two gripper actions render (bool): if True, show rendering """ def __init__( self, gripper, pos, quat, gripper_low_pos, gripper_high_pos, box_size=None, box_density=10000, step_time=400, render=True, ): # define viewer self.viewer = None world = MujocoWorldBase() # Add a table arena = TableArena(table_full_size=(0.4, 0.4, 0.1), table_offset=(0, 0, 0.1), has_legs=False) world.merge(arena) # Add a gripper self.gripper = gripper # Create another body with a slider joint to which we'll add this gripper gripper_body = ET.Element("body") gripper_body.set("pos", pos) gripper_body.set("quat", quat) # flip z gripper_body.append(new_joint(name="gripper_z_joint", type="slide", axis="0 0 -1", damping="50")) # Add all gripper bodies to this higher level body for body in gripper.worldbody: gripper_body.append(body) # Merge the all of the gripper tags except its bodies world.merge(gripper, merge_body=None) # Manually add the higher level body we created world.worldbody.append(gripper_body) # Create a new actuator to control our slider joint world.actuator.append(new_actuator(joint="gripper_z_joint", act_type="position", name="gripper_z", kp="500")) # Add an object for grasping # density is in units kg / m3 TABLE_TOP = [0, 0, 0.09] if box_size is None: box_size = [0.02, 0.02, 0.02] box_size = np.array(box_size) self.cube = BoxObject( name="object", size=box_size, rgba=[1, 0, 0, 1], friction=[1, 0.005, 0.0001], density=box_density ) object_pos = np.array(TABLE_TOP + box_size * [0, 0, 1]) mujoco_object = self.cube.get_obj() # Set the position of this object mujoco_object.set("pos", array_to_string(object_pos)) # Add our object to the world body world.worldbody.append(mujoco_object) # add reference objects for x and y axes x_ref = BoxObject( name="x_ref", size=[0.01, 0.01, 0.01], rgba=[0, 1, 0, 1], obj_type="visual", joints=None ).get_obj() x_ref.set("pos", "0.2 0 0.105") world.worldbody.append(x_ref) y_ref = BoxObject( name="y_ref", size=[0.01, 0.01, 0.01], rgba=[0, 0, 1, 1], obj_type="visual", joints=None ).get_obj() y_ref.set("pos", "0 0.2 0.105") world.worldbody.append(y_ref) self.world = world self.render = render self.simulation_ready = False self.step_time = step_time self.cur_step = 0 if gripper_low_pos > gripper_high_pos: raise ValueError( "gripper_low_pos {} is larger " "than gripper_high_pos {}".format(gripper_low_pos, gripper_high_pos) ) self.gripper_low_pos = gripper_low_pos self.gripper_high_pos = gripper_high_pos def start_simulation(self): """ Starts simulation of the test world """ model = self.world.get_model(mode="mujoco_py") self.sim = MjSim(model) if self.render: self.viewer = MjViewer(self.sim) self.sim_state = self.sim.get_state() # For gravity correction gravity_corrected = ["gripper_z_joint"] self._gravity_corrected_qvels = [self.sim.model.get_joint_qvel_addr(x) for x in gravity_corrected] self.gripper_z_id = self.sim.model.actuator_name2id("gripper_z") self.gripper_z_is_low = False self.gripper_actuator_ids = [self.sim.model.actuator_name2id(x) for x in self.gripper.actuators] self.gripper_is_closed = True self.object_id = self.sim.model.body_name2id(self.cube.root_body) object_default_pos = self.sim.data.body_xpos[self.object_id] self.object_default_pos = np.array(object_default_pos, copy=True) self.reset() self.simulation_ready = True def reset(self): """ Resets the simulation to the initial state """ self.sim.set_state(self.sim_state) self.cur_step = 0 def close(self): """ Close the viewer if it exists """ if self.viewer is not None: self.viewer.close() def step(self): """ Forward the simulation by one timestep Raises: RuntimeError: if start_simulation is not yet called. """ if not self.simulation_ready: raise RuntimeError("Call start_simulation before calling step") if self.gripper_z_is_low: self.sim.data.ctrl[self.gripper_z_id] = self.gripper_low_pos else: self.sim.data.ctrl[self.gripper_z_id] = self.gripper_high_pos if self.gripper_is_closed: self._apply_gripper_action(1) else: self._apply_gripper_action(-1) self._apply_gravity_compensation() self.sim.step() if self.render: self.viewer.render() self.cur_step += 1 def _apply_gripper_action(self, action): """ Applies binary gripper action Args: action (int): Action to apply. Should be -1 (open) or 1 (closed) """ gripper_action_actual = self.gripper.format_action(np.array([action])) # rescale normalized gripper action to control ranges ctrl_range = self.sim.model.actuator_ctrlrange[self.gripper_actuator_ids] bias = 0.5 * (ctrl_range[:, 1] + ctrl_range[:, 0]) weight = 0.5 * (ctrl_range[:, 1] - ctrl_range[:, 0]) applied_gripper_action = bias + weight * gripper_action_actual self.sim.data.ctrl[self.gripper_actuator_ids] = applied_gripper_action def _apply_gravity_compensation(self): """ Applies gravity compensation to the simulation """ self.sim.data.qfrc_applied[self._gravity_corrected_qvels] = self.sim.data.qfrc_bias[ self._gravity_corrected_qvels ] def loop(self, total_iters=1, test_y=False, y_baseline=0.01): """ Performs lower, grip, raise and release actions of a gripper, each separated with T timesteps Args: total_iters (int): Iterations to perform before exiting test_y (bool): test if object is lifted y_baseline (float): threshold for determining that object is lifted """ seq = [(False, False), (True, False), (True, True), (False, True)] for cur_iter in range(total_iters): for cur_plan in seq: self.gripper_z_is_low, self.gripper_is_closed = cur_plan for step in range(self.step_time): self.step() if test_y: if not self.object_height > y_baseline: raise ValueError( "object is lifed by {}, ".format(self.object_height) + "not reaching the requirement {}".format(y_baseline) ) @property def object_height(self): """ Queries the height (z) of the object compared to on the ground Returns: float: Object height relative to default (ground) object position """ return self.sim.data.body_xpos[self.object_id][2] - self.object_default_pos[2]
class PushPuckBase(ABC): def __init__(self, n_substeps: int = 1, render: bool = False): self.render = render set_puck(raw_xml_path=self.raw_xml_path, xml_path=self.xml_path, puck_size=None, puck_pos=None) model = load_model_from_path(self.xml_path) self.sim = MjSim(model=model, nsubsteps=n_substeps) self.viewer = MjViewer(self.sim) if render else None self.reset() def __call__(self, weights, extra_timesteps=1000): self.reset() return self.rollout(weights, extra_timesteps) def reset(self) -> None: """Resets the environment (including the agent) to the initial conditions. """ self.sim.reset() # Set initial position and velocity self.qpos = self.sim.data.qpos.copy() self.qpos[:self.robot_init_qpos.shape[0]] = self.robot_init_qpos qvel = np.zeros(self.sim.data.qvel.shape) mjSimState = MjSimState(time=0.0, qpos=self.qpos, qvel=qvel, act=None, udd_state={}) self.sim.set_state(mjSimState) self.sim.forward() @property def xml_path(self) -> str: return str(Path(__file__).resolve().parents[0]) + '/' + 'assets/xml_model/env_model.xml' def set_target(self, target_pos) -> None: """ Sets the postion of the target. :param target_pos: Desired target position """ if target_pos is None: target_pos = [0.7, 0, 0.02] body_id = self.sim.model.body_name2id('target') self.sim.model.body_pos[body_id] = target_pos self.sim.forward() @property def target_pos(self): """ Helper for getting the current target position. :return: Current target position """ return self.sim.data.get_site_xpos('target:site1').copy() @property def puck_pos(self): """ Helper for getting the current puck position. :return: Current puck position """ return self.sim.data.get_body_xpos('puck').copy() @property def tcp_pos(self): """ Helper for getting the current end effector position. :return: Current end effector position """ return self.sim.data.get_body_xpos('tcp').copy() @property @abstractmethod def raw_xml_path(self): raise NotImplementedError @property @abstractmethod def robot_init_qpos(self): raise NotImplementedError @property @abstractmethod def robot_final_qpos(self): raise NotImplementedError @abstractmethod def rollout(self, weights, goal_pos, extra_timesteps=200): raise NotImplementedError
print("t: %5.3f" % time) print("qpos: ", qpos) print("qvel: ", qvel) print("tosser (slide,hinge): ", qpos[:2]) print("object (z,y,pitch): ", qpos[-3:]) model = load_model_from_path("../../xmls/tosser.xml") sim = MjSim(model) viewer = MjViewer(sim) sim_state = sim.get_state() while True: sim.set_state(sim_state) for i in range(1000): state = sim.get_state() # time, qpos, qvel, act, udd_state = state.time, state.qpos, state.qvel, state.act, state.udd_state # print(time, qpos, qvel) print_state(state) if i < 150: sim.data.ctrl[0] = -0.0 sim.data.ctrl[1] = -0.0 else: sim.data.ctrl[0] = -1.0 sim.data.ctrl[1] = -1.0 sim.step() viewer.render()
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def viewer_setup(self): """ Does not work. Use mj_viewer_setup() instead """ pass def evaluate_success(self, paths, logger=None): """ Log various success metrics calculated based on input paths into the logger """ pass # ----------------------------- def _reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 0.5 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self): return None def state_vector(self): state = self.sim.get_state() return np.concatenate([ state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, frame_size=(640,480), mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1], mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1,:,:]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + ".mp4" skvideo.io.vwrite( file_name, np.asarray(arrs)) print("saved", file_name) t1 = timer.time() print("time taken = %f"% (t1-t0))