Ejemplo n.º 1
0
    def _forward_sample_open_loop(self):
        """
        Sampling method.

        Open loop forward sampling from demonstrations. Starts by
        sampling from states near the beginning of the demonstrations.
        Increases the window forwards as the number of calls to
        this sampling method increases at a fixed rate.
        """

        # get a random episode index
        ep_ind = random.choice(self.demo_list)

        # sample uniformly in a window that grows forwards from the beginning of the demos
        states = self.demo_file["data/{}/states".format(ep_ind)].value
        eps_len = states.shape[0]
        index = np.random.randint(0, min(self.open_loop_window_size, eps_len))
        state = states[index]

        # increase window size at a fixed frequency (open loop)
        self.demo_sampled += 1
        if self.demo_sampled >= self.open_loop_increment_freq:
            if self.open_loop_window_size < eps_len:
                self.open_loop_window_size += self.open_loop_window_increment
            self.demo_sampled = 0

        if self.need_xml:
            model_xml = self._xml_for_episode_index(ep_ind)
            xml = postprocess_model_xml(model_xml)
            return state, xml

        return state
Ejemplo n.º 2
0
    def load_episode_xml(self, demo_num):
        """
        Loads demo episode with specified @demo_num into the simulator.

        Args:
            demo_num (int): Demonstration number to load
        """
        # Grab raw xml file
        ep = self.demos[demo_num]
        model_xml = self.f[f"data/{ep}"].attrs["model_file"]

        # Reset environment
        self.env.reset()
        xml = postprocess_model_xml(model_xml)
        xml = self.modify_xml_for_camera_movement(xml, camera_name=self.camera)
        self.env.reset_from_xml_string(xml)
        self.env.sim.reset()

        # Update camera info
        self.camera_id = self.env.sim.model.camera_name2id(self.camera)

        # Load states and actions
        self.states = self.f[f"data/{ep}/states"].value
        self.actions = np.array(self.f[f"data/{ep}/actions"].value)

        # Set initial state
        self.env.sim.set_state_from_flattened(self.states[0])

        # Reset step count and set current episode number
        self.step = 0
        self.n_steps = len(self.actions)
        self.current_ep = demo_num

        # Notify user of loaded episode
        print(f"Loaded episode {demo_num}.")
Ejemplo n.º 3
0
def render(args, f, env):
    demos = list(f["data"].keys())
    for key in tqdm.tqdm(demos):
        # read the model xml, using the metadata stored in the attribute for this episode
        model_file = f["data/{}".format(key)].attrs["model_file"]
        model_path = os.path.join(args.demo_folder, "models", model_file)
        with open(model_path, "r") as model_f:
            model_xml = model_f.read()

        env.reset()
        xml = postprocess_model_xml(model_xml)
        env.reset_from_xml_string(xml)
        env.sim.reset()

        # load + subsample data
        states, _ = FixedFreqSubsampler(n_skip=args.skip_frame)(f["data/{}/states".format(key)].value)
        d_pos, _ = FixedFreqSubsampler(n_skip=args.skip_frame, aggregator=SumAggregator()) \
                    (f["data/{}/right_dpos".format(key)].value, aggregate=True)
        d_quat, _ = FixedFreqSubsampler(n_skip=args.skip_frame, aggregator=QuaternionAggregator()) \
                     (f["data/{}/right_dquat".format(key)].value, aggregate=True)
        gripper_actuation, _ = FixedFreqSubsampler(n_skip=args.skip_frame)(f["data/{}/gripper_actuations".format(key)].value)
        joint_velocities, _ = FixedFreqSubsampler(n_skip=args.skip_frame, aggregator=SumAggregator()) \
                                (f["data/{}/joint_velocities".format(key)].value, aggregate=True)

        n_steps = states.shape[0]
        if args.target_length is not None and n_steps > args.target_length:
            continue

        # force the sequence of internal mujoco states one by one
        frames = []
        for i, state in enumerate(states):
            env.sim.set_state_from_flattened(state)
            env.sim.forward()
            obs = env._get_observation()
            frame = obs["image"][::-1]
            frames.append(frame)

        frames = np.stack(frames, axis=0)
        actions = np.concatenate((d_pos, d_quat, gripper_actuation), axis=-1)

        pad_mask = np.ones((n_steps,)) if n_steps == args.target_length \
                        else np.concatenate((np.ones((n_steps,)), np.zeros((args.target_length - n_steps,))))

        h5_path = os.path.join(args.output_path, "seq_{}.h5".format(key))
        with h5py.File(h5_path, 'w') as F:
            F['traj_per_file'] = 1
            F["traj0/images"] = frames
            F["traj0/actions"] = actions
            F["traj0/states"] = states
            F["traj0/pad_mask"] = pad_mask
            F["traj0/joint_velocities"] = joint_velocities
Ejemplo n.º 4
0
    def _uniform_sample_half(self):
        """
        ## Uniformly sample states from first half of demo.
        """

        # get a random episode index
        ep_ind = random.choice(self.demo_list)

        # select a flattened mujoco state uniformly from this episode
        states = self.demo_file["data/{}/states".format(ep_ind)].value
        state = random.choice(states[0:len(states) // 2])

        if self.need_xml:
            model_xml = self._xml_for_episode_index(ep_ind)
            xml = postprocess_model_xml(model_xml)
            return state, xml
        return state
Ejemplo n.º 5
0
    def _uniform_sample(self):
        """
        Sampling method.

        First uniformly sample a demonstration from the set of demonstrations.
        Then uniformly sample a state from the selected demonstration.
        """

        # get a random episode index
        ep_ind = random.choice(self.demo_list)

        # select a flattened mujoco state uniformly from this episode
        states = self.demo_file["data/{}/states".format(ep_ind)].value
        state = random.choice(states)

        if self.need_xml:
            model_xml = self._xml_for_episode_index(ep_ind)
            xml = postprocess_model_xml(model_xml)
            return state, xml
        return state
Ejemplo n.º 6
0
def gen_gifs(args, f, env, target_length=None):
    demos = list(f["data"].keys())
    for key in tqdm.tqdm(demos):
        # load the flattened mujoco states
        states = f["data/{}/states".format(key)].value
        seq_length = steps2length(states.shape[0])
        if target_length is not None and np.abs(seq_length -
                                                target_length) > 1:
            continue

        # read the model xml, using the metadata stored in the attribute for this episode
        model_file = f["data/{}".format(key)].attrs["model_file"]
        model_path = os.path.join(args.demo_folder, "models", model_file)
        with open(model_path, "r") as model_f:
            model_xml = model_f.read()

        env.reset()
        xml = postprocess_model_xml(model_xml)
        env.reset_from_xml_string(xml)
        env.sim.reset()

        # force the sequence of internal mujoco states one by one
        frames = []
        for i in tqdm.tqdm(range(states.shape[0])):
            if i % args.skip_frame == 0:
                env.sim.set_state_from_flattened(states[i])
                env.sim.forward()
                obs = env._get_observation()
                frame = obs["image"][::-1]
                frames.append(frame)

        frames = np.stack(frames, axis=0)
        fig_file_name = os.path.join(args.output_path, "seq_{}".format(key))
        if target_length is not None:
            fig_file_name += "_len_{}".format(target_length)
        save_gif(fig_file_name + ".gif", frames, fps=15)
        if target_length is not None:
            return True
    return False if target_length is not None else True
Ejemplo n.º 7
0
    def _reverse_sample_open_loop(self):
        """
        Sampling method.

        Open loop reverse sampling from demonstrations. Starts by
        sampling from states near the end of the demonstrations.
        Increases the window backwards as the number of calls to
        this sampling method increases at a fixed rate.

        Returns:
            np.array or 2-tuple: If np.array, is the state sampled from a demo file. If 2-tuple, additionally
                includes the model xml file
        """

        # get a random episode index
        ep_ind = random.choice(self.demo_list)

        # sample uniformly in a window that grows backwards from the end of the demos
        states = self.demo_file["data/{}/states".format(ep_ind)][()]
        eps_len = states.shape[0]
        index = np.random.randint(max(eps_len - self.open_loop_window_size, 0),
                                  eps_len)
        state = states[index]

        # increase window size at a fixed frequency (open loop)
        self.demo_sampled += 1
        if self.demo_sampled >= self.open_loop_increment_freq:
            if self.open_loop_window_size < eps_len:
                self.open_loop_window_size += self.open_loop_window_increment
            self.demo_sampled = 0

        if self.need_xml:
            model_xml = self._xml_for_episode_index(ep_ind)
            xml = postprocess_model_xml(model_xml)
            return state, xml

        return state
Ejemplo n.º 8
0
    def _uniform_sample(self):
        """
        Sampling method.

        First uniformly sample a demonstration from the set of demonstrations.
        Then uniformly sample a state from the selected demonstration.

        Returns:
            np.array or 2-tuple: If np.array, is the state sampled from a demo file. If 2-tuple, additionally
                includes the model xml file
        """

        # get a random episode index
        ep_ind = random.choice(self.demo_list)

        # select a flattened mujoco state uniformly from this episode
        states = self.demo_file["data/{}/states".format(ep_ind)][()]
        state = random.choice(states)

        if self.need_xml:
            model_xml = self._xml_for_episode_index(ep_ind)
            xml = postprocess_model_xml(model_xml)
            return state, xml
        return state
    def _load_raw_data(self, itr):
        """
        doing the reverse of save_raw_data
        :param itr:
        :return:
        """
        traj = self._start_goal_confs + 'itr.hdf5'
        self._demo_images = demo_images.astype(np.float32) / 255.
        self._goal_image = self._demo_images[-1]

        with open('{}/obs_dict.pkl'.format(traj), 'rb') as file:
            obs_dict.update(pkl.load(file))

        self._goal = self.env.get_goal_from_obs(obs_dict)
        reset_state = self.get_reset_state(obs_dict)

        if os.path.exists(traj + '/robosuite.xml'):
            with open(traj + '/robosuite.xml', "r") as model_f:
                model_xml = model_f.read()

            xml = postprocess_model_xml(model_xml)
            reset_state['robosuite_xml'] = xml

        return reset_state
Ejemplo n.º 10
0
    writer = imageio.get_writer('./test.mp4', fps=20)

    while True:
        print("Playing back random episode... (press ESC to quit)")

        # # select an episode randomly
        ep = random.choice(demos)

        # read the model xml, using the metadata stored in the attribute for this episode
        model_file = f["data/{}".format(ep)].attrs["model_file"]
        model_path = os.path.join(demo_path, "models", model_file)
        with open(model_path, "r") as model_f:
            model_xml = model_f.read()

        env.reset()
        xml = postprocess_model_xml(model_xml)
        env.reset_from_xml_string(xml)
        env.sim.reset()
        env.viewer.set_camera(0)

        # load the flattened mujoco states
        states = f["data/{}/states".format(ep)].value

        if args.use_actions:

            # load the initial state
            env.sim.set_state_from_flattened(states[0])
            if not args.visualize_gripper:
                # We make the gripper site invisible
                robot = env.robots[0]
                env.sim.model.site_rgba[robot.eef_site_id] = np.zeros(4)
Ejemplo n.º 11
0
    def _load_raw_data(self, itr):
        """
        doing the reverse of save_raw_data
        :param itr:
        :return:
        """
        if 'robot_name' in self._hp.env[
                1]:  # robot experiments don't have a reset state
            return None

        if 'iex' in self._hp:
            itr = self._hp.iex

        ngroup = 1000
        igrp = itr // ngroup
        group_folder = '{}/traj_group{}'.format(self._start_goal_confs, igrp)
        traj_folder = group_folder + '/traj{}'.format(itr)

        print('reading from: ', traj_folder)
        num_files = len(glob.glob("{}/images0/*.png".format(traj_folder)))
        assert num_files > 0, " no files found!"

        obs_dict = {}
        demo_images = np.zeros([
            num_files, self.ncam, self._hp.image_height, self._hp.image_width,
            3
        ])
        for t in [0, num_files - 1]:  #range(num_files):
            for c in range(self.ncam):
                image_file = '{}/images{}/im_{}.png'.format(traj_folder, c, t)
                if not os.path.isfile(image_file):
                    raise ValueError(
                        "Can't find goal image: {}".format(image_file))
                img = cv2.imread(image_file)[..., ::-1]
                if img.shape[0] != self._hp.image_height or img.shape[
                        1] != self._hp.image_width:
                    img = Image.fromarray(img)
                    img = img.resize(
                        (self._hp.image_height, self._hp.image_width),
                        PIL.Image.BILINEAR)
                    img = np.asarray(img, dtype=np.uint8)
                demo_images[t, c] = img
        self._demo_images = demo_images.astype(np.float32) / 255.

        self._goal_image = self._demo_images[-1]

        with open('{}/obs_dict.pkl'.format(traj_folder), 'rb') as file:
            obs_dict.update(pkl.load(file))

        self._goal = self.env.get_goal_from_obs(obs_dict)
        reset_state = self.get_reset_state(obs_dict)

        if os.path.exists(traj_folder + '/robosuite.xml'):
            with open(traj_folder + '/robosuite.xml', "r") as model_f:
                model_xml = model_f.read()

            from robosuite.utils.mjcf_utils import postprocess_model_xml
            xml = postprocess_model_xml(model_xml)
            reset_state['robosuite_xml'] = xml

        return reset_state
Ejemplo n.º 12
0
    def extract_states_for_episode_id(self, demo_id):
        """
        Extracts states and actions from a particular demonstration episode
        and writes it to the output hdf5 file.

        Args:
            demo_id (int): An episode index to extract states and actions for.
        """
        
        ep = self.demos[demo_id]
        ep_data_grp = self.f_sars_grp.create_group("demo_{}".format(demo_id))

        # read the model xml, using the metadata stored in the attribute for this episode
        model_file = self.f["data/{}".format(ep)].attrs["model_file"]
        model_path = os.path.join(self.demo_path, "models", model_file)
        with open(model_path, "r") as model_f:
            model_xml = model_f.read()

        # also store the location of model xml in the new hdf5 file
        ep_data_grp.attrs["model_file"] = model_file

        self.env.reset()
        xml = postprocess_model_xml(model_xml)
        self.env.reset_from_xml_string(xml)
        self.env.sim.reset()

        # load the flattened mujoco states and the actions in the environment
        states = self.f["data/{}/states".format(ep)][()]
        jvels = self.f["data/{}/joint_velocities".format(ep)][()]
        gripper_acts = self.f["data/{}/gripper_actuations".format(ep)][()]
        goal = np.array(self.f["data/{}".format(ep)].attrs["goal"])

        # assert gripper_acts.shape[1] == 1, 'gripper_acts shape: {}'.format(gripper_acts.shape)

        prev_obs = None
        prev_ac = None
        prev_rew = None
        prev_done = None
        prev_state = None
        ep_obs = []
        ep_acts = []
        ep_rews = []
        ep_next_obs = []
        ep_dones = []
        ep_states = []
        if self.use_images:
            prev_image = None
            ep_images = []
            ep_next_images = []

        # prev_eef_pos = None
        # prev_eef_rot = None

        # force the sequence of internal mujoco states one by one
        for t in range(len(states)):
            # self.env.sim.reset()
            self.env.sim.set_state_from_flattened(states[t])
            self.env.sim.forward()

            # make teleop visualization site colors transparent
            self.env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.])
            self.env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.])

            obs_dict = self.env._get_observation()
            if self.use_images or self.robot_only:
                obs = np.array(obs_dict["robot-state"])
            elif self.object_only:
                obs = np.array(obs_dict["object-state"])
            else:
                obs = np.concatenate([obs_dict["robot-state"], obs_dict["object-state"]])
            action = np.concatenate([jvels[t], gripper_acts[t]])
            if self.use_images:
                image = np.array(obs_dict["image"])[::-1]

            # NOTE: ours tasks use reward r(s'), reward AFTER transition, so this is 
            # the reward for the previous timestep
            prev_rew = self.env.reward(None) 
            prev_done = 0
            if self.env._check_success():
                prev_done = 1

            # skips the first iteration
            if prev_obs is not None: 
                # record (s, a) from last iteration and s' from this one
                ep_obs.append(prev_obs) 
                ep_acts.append(prev_ac)
                ep_rews.append(prev_rew)
                ep_next_obs.append(obs)
                ep_dones.append(prev_done)
                ep_states.append(prev_state)
                if self.use_images:
                    ep_images.append(prev_image)
                    ep_next_images.append(image)
                    # im = Image.fromarray(prev_image)
                    # path = pjoin("tmp", "img%06d.png"%t)
                    # im.save(path)

            prev_obs = np.array(obs)
            prev_ac = np.array(action)
            prev_state = np.array(states[t])
            if self.use_images:
                prev_image = np.array(image)

        # if len(states) == len(jvels):
        assert(len(states) == len(jvels))
        
        # play the last action to get one more additional data point.
        # this might be critical if only the last state got a reward in
        # the sparse reward setting. 
        # self.env.sim.reset()
        self.env.sim.set_state_from_flattened(states[-1])
        self.env.sim.forward()

        # make teleop visualization site colors transparent
        self.env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.])
        self.env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.])

        obs_dict = self.env._get_observation()
        if self.use_images or self.robot_only:
            obs = np.array(obs_dict["robot-state"])
        elif self.object_only:
            obs = np.array(obs_dict["object-state"])
        else:
            obs = np.concatenate([obs_dict["robot-state"], obs_dict["object-state"]])
        action = np.concatenate([jvels[-1], gripper_acts[-1]])
        if self.use_images:
            image = np.array(obs_dict["image"])[::-1]
        self.env.step(action)

        reward = self.env.reward(None)
        done = 0
        if self.env._check_success():
            done = 1

        # ensure consistency from loop above
        assert(np.array_equal(prev_obs, obs)) 
        if not self.use_eef_actions:
            assert(np.array_equal(prev_ac, action))
        # assert(done == 1)

        obs_dict = self.env._get_observation()
        if self.use_images or self.robot_only:
            next_obs = np.array(obs_dict["robot-state"])
        elif self.object_only:
            next_obs = np.array(obs_dict["object-state"])
        else:
            next_obs = np.concatenate([obs_dict["robot-state"], obs_dict["object-state"]])
        if self.use_images:
            next_image = np.array(obs_dict["image"])[::-1]

        ep_obs.append(obs)
        ep_acts.append(action)
        ep_rews.append(reward)
        ep_next_obs.append(next_obs)
        ep_dones.append(done)
        ep_states.append(states[-1])
        if self.use_images:
            ep_images.append(image)
            ep_next_images.append(next_image)

        if self.use_eef_actions:

            # re-run through transitions to rewrite actions as end effector actions, and
            # downsample the frames
            ep_eef_obs = []
            ep_eef_acts = []
            ep_eef_rews = []
            ep_eef_next_obs = []
            ep_eef_dones = []
            ep_eef_states = []
            if self.use_images:
                ep_eef_images = []
                ep_eef_next_images = []

            self.env.sim.reset()
            self.env.sim.set_state_from_flattened(ep_states[0])
            self.env.sim.forward()

            # make teleop visualization site colors transparent
            self.env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.])
            self.env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.])

            obs_dict = self.env._get_observation()
            prev_eef_pos = np.array(obs_dict["eef_pos"])
            prev_eef_rot = T.quat2mat(obs_dict["eef_quat"])
            prev_obs = np.array(ep_obs[0])
            prev_done = ep_dones[0]
            if self.use_images:
                prev_image = np.array(obs_dict["image"])[::-1]

            loop_cnt = self.eef_downsample
            num_transitions = len(ep_obs)
            while True:
                #self.env.sim.reset()
                self.env.sim.set_state_from_flattened(ep_states[loop_cnt])
                self.env.sim.forward()

                # make teleop visualization site colors transparent
                self.env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.])
                self.env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.])
                
                obs_dict = self.env._get_observation()
                cur_eef_pos = np.array(obs_dict["eef_pos"])
                cur_eef_rot = T.quat2mat(obs_dict["eef_quat"])
                dpos = cur_eef_pos - prev_eef_pos
                drotation = prev_eef_rot.T.dot(cur_eef_rot)
                dquat = T.mat2quat(drotation)

                # current timestep lets us compute delta pose from last downsampled timestep to get action for last timestep
                # but take gripper action from last downsapled timestep
                prev_eef_action = np.concatenate([dpos, dquat, ep_acts[loop_cnt - self.eef_downsample][-1:]])
                
                # take reward from the immediate prior timestep, since rewards at s depend on s'
                prev_rew = ep_rews[loop_cnt - 1]
                cur_obs = np.array(ep_obs[loop_cnt])
                if self.use_images:
                    cur_image = np.array(ep_images[loop_cnt])

                ep_eef_obs.append(prev_obs)
                ep_eef_acts.append(prev_eef_action)
                ep_eef_rews.append(prev_rew)
                ep_eef_next_obs.append(cur_obs)
                ep_eef_dones.append(prev_done)
                if self.use_images:
                    ep_eef_images.append(prev_image)
                    ep_eef_next_images.append(cur_image)

                prev_obs = np.array(cur_obs)
                prev_done = ep_dones[loop_cnt]
                prev_eef_pos = np.array(cur_eef_pos)
                prev_eef_rot = np.array(cur_eef_rot)
                if self.use_images:
                    prev_image = np.array(cur_image)

                loop_cnt += self.eef_downsample
                if loop_cnt > num_transitions - 1:
                    if loop_cnt - self.eef_downsample != num_transitions - 1:
                        # back track to the last point to make sure we add it
                        loop_cnt = num_transitions - 1
                    else:
                        break

            # overwrite prior variables
            ep_obs = ep_eef_obs
            ep_acts = ep_eef_acts
            ep_rews = ep_eef_rews
            ep_next_obs = ep_eef_next_obs
            ep_dones = ep_eef_dones
            if self.use_images:
                ep_images = ep_eef_images
                ep_next_images = ep_eef_next_images

        # write datasets for states and actions
        ep_data_grp.create_dataset("obs", data=np.array(ep_obs))
        ep_data_grp.create_dataset("actions", data=np.array(ep_acts))
        ep_data_grp.create_dataset("rewards", data=np.array(ep_rews))
        ep_data_grp.create_dataset("next_obs", data=np.array(ep_next_obs))
        ep_data_grp.create_dataset("dones", data=np.array(ep_dones))
        ep_data_grp.create_dataset("states", data=np.array(ep_states))
        ep_data_grp.attrs["goal"] = goal
        if self.use_images:
            ep_data_grp.create_dataset("images", data=np.array(ep_images))
            ep_data_grp.create_dataset("next_images", data=np.array(ep_next_images))

        # write some metadata
        ep_data_grp.attrs["num_samples"] = len(ep_obs) # number of transitions in this episode
        print("ep {}: wrote {} transitions".format(demo_id, ep_data_grp.attrs["num_samples"]))
        return ep_data_grp.attrs["num_samples"]