def reset(self, reset_args=None, **kwargs): if reset_args is None: logger.log("Debug, warning, reset_args for env is None") goal = reset_args if goal is not None: assert len(goal) == 2, "wrong size goal" goal_num, test = goal if (goal_num != self.goal_num) or (test != self.test): if self.mujoco.viewer is not None: self.mujoco.stop_viewer() self.mujoco.terminate() self.goal_num, self.test = goal demo_path = (self.train_dir + str(self.goal_num) + ".pkl") if not self.test else ( self.test_dir + str(self.goal_num) + ".pkl") demo_data = joblib.load(demo_path) xml_file = demo_data["xml"] xml_file = xml_file.replace( "/root/code/rllab/vendor/mujoco_models/", self.xml_dir) # print("debug,xml_file", xml_file) if int(xml_file[-5]) % 2 == 0 and not self.debug: print("inverted_order", xml_file, self.goal_num, self.test) self.shuffle_order = [1, 0] else: print("normal_order", xml_file, self.goal_num, self.test) self.shuffle_order = [0, 1] self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file) # else: # print("continuing with xml", self.mujoco.FILE, self.goal_num, self.test) elif self.goal_num is None: #if we already have a goal_num, we don't sample a new one, just reset the model self.goal_num, self.test = self.sample_goals(num_goals=1, test=False)[0] demo_path = (self.train_dir + str(self.goal_num) + ".pkl") if not self.test else (self.test_dir + str(self.goal_num) + ".pkl") demo_data = joblib.load(demo_path) xml_file = demo_data["xml"] xml_file = xml_file.replace( "/root/code/rllab/vendor/mujoco_models/", self.xml_dir) if int(xml_file[-5]) % 2 == 0 and not self.debug: print("inverted_order, first time initializing env", xml_file) self.shuffle_order = [ 1, 0 ] # TODO: flip back, this is set to [0,1] just for debugging purposes else: print("normal_order, first time initializing env", xml_file) self.shuffle_order = [0, 1] self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file) # self.viewer_setup() self.reset_model() return self.get_current_obs()
def reset(self, reset_args=None, **kwargs): # print("debug,asked to reset with reset_args", reset_args) qpos = np.copy(self.mujoco.init_qpos) qvel = np.copy( self.mujoco.init_qvel) + 0.0 * self.mujoco.np_random.uniform( low=-0.005, high=0.005, size=self.mujoco.model.nv) goal_num = reset_args if goal_num is not None: if self.goal_num != goal_num: self.goal_num = goal_num self.shuffle_order = [[0, 1, 2], [1, 2, 0], [2, 0, 1]][self.goal_num] # if self.mujoco.viewer is not None: # self.mujoco.stop_viewer() # self.mujoco.release() # self.mujoco.terminate() # self.mujoco.terminate() xml_file = '/home/kevin/maml_rl/vendor/mujoco_models/r7dof_versions/reacher_7dof_2distr_%s%s%s.xml' % tuple( self.shuffle_order) self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file) self.viewer_setup() elif self.goal_num is None: # do not change color of goal or XML file between resets. self.goal_num = np.random.choice([0, 1, 2]) self.shuffle_order = [[0, 1, 2], [1, 2, 0], [2, 0, 1]][self.goal_num] if self.mujoco.viewer is not None: self.mujoco.stop_viewer() self.mujoco.release() self.mujoco.terminate() xml_file = '/home/kevin/maml_rl/vendor/mujoco_models/r7dof_versions/reacher_7dof_2distr_%s%s%s.xml' % tuple( self.shuffle_order) self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file) self.viewer_setup() self.goal = np.random.uniform(low=[-0.4, -0.4, -0.3], high=[0.4, 0.0, -0.3]).reshape(3, 1) self.distract1 = np.random.uniform(low=[-0.4, -0.4, -0.3], high=[0.4, 0.0, -0.3]).reshape(3, 1) self.distract2 = np.random.uniform(low=[-0.4, -0.4, -0.3], high=[0.4, 0.0, -0.3]).reshape(3, 1) qpos[-14:-11] = self.distract1 qpos[-21:-18] = self.distract2 qpos[-7:-4] = self.goal qvel[-7:] = 0 setattr(self.mujoco.model.data, 'qpos', qpos) setattr(self.mujoco.model.data, 'qvel', qvel) self.mujoco.model.data.qvel = qvel self.mujoco.model._compute_subtree() self.mujoco.model.forward() self.current_com = self.mujoco.model.data.com_subtree[0] self.dcom = np.zeros_like(self.current_com) return self.get_current_image_obs()[1]
def __init__(self, xml_file=None, distance_metric_order=None, distractors=True, *args, **kwargs): self.goal = None if 'noise' in kwargs: noise = kwargs['noise'] else: noise = 0.0 self.include_distractors = distractors if self.include_distractors: self.shuffle_order = [[0, 1, 2], [1, 2, 0], [2, 0, 1]][2] # self.shuffle_order = rd.sample([[0,1,2],[1,2,0],[2,0,1]],1)[0] if xml_file is None: if not self.include_distractors: xml_file = '/home/kevin/maml_rl/vendor/mujoco_models/r7dof_versions/reacher_7dof.xml' else: xml_file = '/home/kevin/maml_rl/vendor/mujoco_models/r7dof_versions/reacher_7dof_2distr_%s%s%s.xml' % tuple( self.shuffle_order) print("xml file", xml_file) self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file, action_noise=noise) self.action_space = self.mujoco.action_space self.get_viewer = self.mujoco.get_viewer self.log_diagnostics = self.mujoco.log_diagnostics Serializable.__init__(self, *args, **kwargs)
def reset(self, reset_args=None, **kwargs): goal = reset_args if goal is not None: assert len(goal) == 2, "wrong size goal" goal_num, test = goal if (goal_num != self.goal_num) or (test != self.test): if self.viewer is not None: self.stop_viewer() self.terminate() self.goal_num, self.test = goal demo_path = (self.train_dir + str(self.goal_num) + ".pkl") if not self.test else ( self.test_dir + str(self.goal_num) + ".pkl") demo_data = joblib.load(demo_path) xml_file = demo_data["xml"] xml_file = xml_file.replace( "/root/code/rllab/vendor/mujoco_models/", self.xml_dir) print("debug,xml_file", xml_file) if int(xml_file[-5]) % 2 == 0: print("retaining order") self.shuffle_order = [0, 1] else: print("flipping order") self.shuffle_order = [1, 0] self = mujoco_env.MujocoEnv(file_path=xml_file) elif self.goal_num is None: #if we already have a goal_num, we don't sample a new one, just reset the model self.goal_num, self.test = self.sample_goals(num_goals=1, test=False)[0] demo_path = (self.train_dir + str(self.goal_num) + ".pkl") if not self.test else (self.test_dir + str(self.goal_num) + ".pkl") demo_data = joblib.load(demo_path) xml_file = demo_data["xml"] xml_file = xml_file.replace( "/root/code/rllab/vendor/mujoco_models/", self.xml_dir) if int(xml_file[-5]) % 2 == 0: print("retaining order") self.shuffle_order = [0, 1] else: print("flipping order") self.shuffle_order = [1, 0] self = mujoco_env.MujocoEnv(file_path=xml_file) self.viewer_setup() self.reset_model() return self.get_current_obs()
def __init__(self, xml_file=None, goal_num=None, distance_metric_order=None, distractors=True, *args, **kwargs): self.goal_num = np.random.choice([0, 1, 2]) self.shuffle_order = [[0, 1, 2], [1, 2, 0], [2, 0, 1]][self.goal_num] self.include_distractors = distractors assert distractors == True, "not supported" if xml_file is None: xml_file = '/home/kevin/maml_rl/vendor/mujoco_models/r7dof_versions/reacher_7dof_2distr_%s%s%s.xml' % tuple( self.shuffle_order) print("xml file", xml_file) self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file) self.viewer_setup() self.action_space = self.mujoco.action_space self.get_viewer = self.mujoco.get_viewer self.log_diagnostics = self.mujoco.log_diagnostics Serializable.__init__(self, *args, **kwargs)