コード例 #1
0
    def reset(self, reset_args=None, **kwargs):
        if reset_args is None:
            logger.log("Debug, warning, reset_args for env is None")
        goal = reset_args
        if goal is not None:
            assert len(goal) == 2, "wrong size goal"
            goal_num, test = goal
            if (goal_num != self.goal_num) or (test != self.test):
                if self.mujoco.viewer is not None:
                    self.mujoco.stop_viewer()
                self.mujoco.terminate()
                self.goal_num, self.test = goal
                demo_path = (self.train_dir + str(self.goal_num) +
                             ".pkl") if not self.test else (
                                 self.test_dir + str(self.goal_num) + ".pkl")
                demo_data = joblib.load(demo_path)
                xml_file = demo_data["xml"]
                xml_file = xml_file.replace(
                    "/root/code/rllab/vendor/mujoco_models/", self.xml_dir)
                # print("debug,xml_file", xml_file)
                if int(xml_file[-5]) % 2 == 0 and not self.debug:
                    print("inverted_order", xml_file, self.goal_num, self.test)
                    self.shuffle_order = [1, 0]
                else:
                    print("normal_order", xml_file, self.goal_num, self.test)
                    self.shuffle_order = [0, 1]
                self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file)
            # else:
            # print("continuing with xml", self.mujoco.FILE, self.goal_num, self.test)
        elif self.goal_num is None:  #if we already have a goal_num, we don't sample a new one, just reset the model
            self.goal_num, self.test = self.sample_goals(num_goals=1,
                                                         test=False)[0]
            demo_path = (self.train_dir + str(self.goal_num) +
                         ".pkl") if not self.test else (self.test_dir +
                                                        str(self.goal_num) +
                                                        ".pkl")
            demo_data = joblib.load(demo_path)
            xml_file = demo_data["xml"]
            xml_file = xml_file.replace(
                "/root/code/rllab/vendor/mujoco_models/", self.xml_dir)

            if int(xml_file[-5]) % 2 == 0 and not self.debug:
                print("inverted_order, first time initializing env", xml_file)
                self.shuffle_order = [
                    1, 0
                ]  # TODO: flip back, this is set to [0,1] just for debugging purposes
            else:
                print("normal_order, first time initializing env", xml_file)
                self.shuffle_order = [0, 1]
            self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file)
            # self.viewer_setup()
        self.reset_model()
        return self.get_current_obs()
コード例 #2
0
 def reset(self, reset_args=None, **kwargs):
     # print("debug,asked to reset with reset_args", reset_args)
     qpos = np.copy(self.mujoco.init_qpos)
     qvel = np.copy(
         self.mujoco.init_qvel) + 0.0 * self.mujoco.np_random.uniform(
             low=-0.005, high=0.005, size=self.mujoco.model.nv)
     goal_num = reset_args
     if goal_num is not None:
         if self.goal_num != goal_num:
             self.goal_num = goal_num
             self.shuffle_order = [[0, 1, 2], [1, 2, 0], [2, 0,
                                                          1]][self.goal_num]
             # if self.mujoco.viewer is not None:
             #     self.mujoco.stop_viewer()
             # self.mujoco.release()
             # self.mujoco.terminate()
             # self.mujoco.terminate()
             xml_file = '/home/kevin/maml_rl/vendor/mujoco_models/r7dof_versions/reacher_7dof_2distr_%s%s%s.xml' % tuple(
                 self.shuffle_order)
             self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file)
             self.viewer_setup()
     elif self.goal_num is None:  # do not change color of goal or XML file between resets.
         self.goal_num = np.random.choice([0, 1, 2])
         self.shuffle_order = [[0, 1, 2], [1, 2, 0], [2, 0,
                                                      1]][self.goal_num]
         if self.mujoco.viewer is not None:
             self.mujoco.stop_viewer()
             self.mujoco.release()
             self.mujoco.terminate()
         xml_file = '/home/kevin/maml_rl/vendor/mujoco_models/r7dof_versions/reacher_7dof_2distr_%s%s%s.xml' % tuple(
             self.shuffle_order)
         self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file)
         self.viewer_setup()
     self.goal = np.random.uniform(low=[-0.4, -0.4, -0.3],
                                   high=[0.4, 0.0, -0.3]).reshape(3, 1)
     self.distract1 = np.random.uniform(low=[-0.4, -0.4, -0.3],
                                        high=[0.4, 0.0,
                                              -0.3]).reshape(3, 1)
     self.distract2 = np.random.uniform(low=[-0.4, -0.4, -0.3],
                                        high=[0.4, 0.0,
                                              -0.3]).reshape(3, 1)
     qpos[-14:-11] = self.distract1
     qpos[-21:-18] = self.distract2
     qpos[-7:-4] = self.goal
     qvel[-7:] = 0
     setattr(self.mujoco.model.data, 'qpos', qpos)
     setattr(self.mujoco.model.data, 'qvel', qvel)
     self.mujoco.model.data.qvel = qvel
     self.mujoco.model._compute_subtree()
     self.mujoco.model.forward()
     self.current_com = self.mujoco.model.data.com_subtree[0]
     self.dcom = np.zeros_like(self.current_com)
     return self.get_current_image_obs()[1]
コード例 #3
0
    def __init__(self,
                 xml_file=None,
                 distance_metric_order=None,
                 distractors=True,
                 *args,
                 **kwargs):
        self.goal = None
        if 'noise' in kwargs:
            noise = kwargs['noise']
        else:
            noise = 0.0
        self.include_distractors = distractors
        if self.include_distractors:
            self.shuffle_order = [[0, 1, 2], [1, 2, 0], [2, 0, 1]][2]
            # self.shuffle_order = rd.sample([[0,1,2],[1,2,0],[2,0,1]],1)[0]

        if xml_file is None:
            if not self.include_distractors:
                xml_file = '/home/kevin/maml_rl/vendor/mujoco_models/r7dof_versions/reacher_7dof.xml'
            else:
                xml_file = '/home/kevin/maml_rl/vendor/mujoco_models/r7dof_versions/reacher_7dof_2distr_%s%s%s.xml' % tuple(
                    self.shuffle_order)

        print("xml file", xml_file)
        self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file,
                                           action_noise=noise)
        self.action_space = self.mujoco.action_space
        self.get_viewer = self.mujoco.get_viewer
        self.log_diagnostics = self.mujoco.log_diagnostics
        Serializable.__init__(self, *args, **kwargs)
コード例 #4
0
    def reset(self, reset_args=None, **kwargs):
        goal = reset_args
        if goal is not None:
            assert len(goal) == 2, "wrong size goal"
            goal_num, test = goal
            if (goal_num != self.goal_num) or (test != self.test):
                if self.viewer is not None:
                    self.stop_viewer()
                self.terminate()
                self.goal_num, self.test = goal
                demo_path = (self.train_dir + str(self.goal_num) +
                             ".pkl") if not self.test else (
                                 self.test_dir + str(self.goal_num) + ".pkl")
                demo_data = joblib.load(demo_path)
                xml_file = demo_data["xml"]
                xml_file = xml_file.replace(
                    "/root/code/rllab/vendor/mujoco_models/", self.xml_dir)
                print("debug,xml_file", xml_file)
                if int(xml_file[-5]) % 2 == 0:
                    print("retaining order")
                    self.shuffle_order = [0, 1]
                else:
                    print("flipping order")
                    self.shuffle_order = [1, 0]
                self = mujoco_env.MujocoEnv(file_path=xml_file)
        elif self.goal_num is None:  #if we already have a goal_num, we don't sample a new one, just reset the model
            self.goal_num, self.test = self.sample_goals(num_goals=1,
                                                         test=False)[0]
            demo_path = (self.train_dir + str(self.goal_num) +
                         ".pkl") if not self.test else (self.test_dir +
                                                        str(self.goal_num) +
                                                        ".pkl")
            demo_data = joblib.load(demo_path)
            xml_file = demo_data["xml"]
            xml_file = xml_file.replace(
                "/root/code/rllab/vendor/mujoco_models/", self.xml_dir)

            if int(xml_file[-5]) % 2 == 0:
                print("retaining order")
                self.shuffle_order = [0, 1]
            else:
                print("flipping order")
                self.shuffle_order = [1, 0]
            self = mujoco_env.MujocoEnv(file_path=xml_file)
            self.viewer_setup()
        self.reset_model()
        return self.get_current_obs()
コード例 #5
0
    def __init__(self,
                 xml_file=None,
                 goal_num=None,
                 distance_metric_order=None,
                 distractors=True,
                 *args,
                 **kwargs):
        self.goal_num = np.random.choice([0, 1, 2])
        self.shuffle_order = [[0, 1, 2], [1, 2, 0], [2, 0, 1]][self.goal_num]

        self.include_distractors = distractors
        assert distractors == True, "not supported"

        if xml_file is None:
            xml_file = '/home/kevin/maml_rl/vendor/mujoco_models/r7dof_versions/reacher_7dof_2distr_%s%s%s.xml' % tuple(
                self.shuffle_order)

        print("xml file", xml_file)
        self.mujoco = mujoco_env.MujocoEnv(file_path=xml_file)
        self.viewer_setup()
        self.action_space = self.mujoco.action_space
        self.get_viewer = self.mujoco.get_viewer
        self.log_diagnostics = self.mujoco.log_diagnostics
        Serializable.__init__(self, *args, **kwargs)