コード例 #1
0
ファイル: form2fit.py プロジェクト: zp2388/google-research
    def train(self, dataset, num_iter, writer, validation_dataset=None):
        """Train on dataset for a specific number of iterations."""
        del validation_dataset

        for i in range(num_iter):
            obs, act, _ = dataset.random_sample()

            # Get heightmap from RGB-D images.
            configs = act['camera_config']
            colormap, heightmap = self.get_heightmap(obs, configs)

            # Get training labels from data sample.
            pose0, pose1 = act['params']['pose0'], act['params']['pose1']
            p0_position, p0_rotation = pose0[0], pose0[1]
            p0 = utils.position_to_pixel(p0_position, self.bounds,
                                         self.pixel_size)
            p0_theta = -np.float32(
                utils.get_rot_from_pybullet_quaternion(p0_rotation)[2])
            p1_position, p1_rotation = pose1[0], pose1[1]
            p1 = utils.position_to_pixel(p1_position, self.bounds,
                                         self.pixel_size)
            p1_theta = -np.float32(
                utils.get_rot_from_pybullet_quaternion(p1_rotation)[2])
            p1_theta = p1_theta - p0_theta
            p0_theta = 0

            # Concatenate color with depth images.
            input_image = np.concatenate(
                (colormap, heightmap[Ellipsis, None],
                 heightmap[Ellipsis, None], heightmap[Ellipsis, None]),
                axis=2)

            # Do data augmentation (perturb rotation and translation).
            input_image, _, roundedpixels, _ = utils.perturb(
                input_image, [p0, p1])
            p0, p1 = roundedpixels

            # Compute training loss.
            loss0 = self.pick_model.train(input_image, p0, theta=0)
            loss1 = self.place_model.train(input_image, p1, theta=0)
            loss2 = self.match_model.train(input_image, p0, p1, theta=p1_theta)
            with writer.as_default():
                tf.summary.scalar('pick_loss',
                                  self.pick_model.metric.result(),
                                  step=self.total_iter + i)
                tf.summary.scalar('place_loss',
                                  self.place_model.metric.result(),
                                  step=self.total_iter + i)
                tf.summary.scalar('match_loss',
                                  self.match_model.metric.result(),
                                  step=self.total_iter + i)
            print(
                f'Train Iter: {self.total_iter + i} Loss: {loss0:.4f} {loss1:.4f} {loss2:.4f}'
            )

        self.total_iter += num_iter
        self.save()
コード例 #2
0
  def extract_x_y_theta(self,
                        object_info,
                        t_worldaug_world=None,
                        preserve_theta=False):
    """Extract in-plane theta."""
    object_position = object_info[0]
    object_quat_xyzw = object_info[1]

    if t_worldaug_world is not None:
      object_quat_wxyz = (object_quat_xyzw[3], object_quat_xyzw[0],
                          object_quat_xyzw[1], object_quat_xyzw[2])
      t_world_object = transformations.quaternion_matrix(object_quat_wxyz)
      t_world_object[0:3, 3] = np.array(object_position)
      t_worldaug_object = t_worldaug_world @ t_world_object

      object_quat_wxyz = transformations.quaternion_from_matrix(
          t_worldaug_object)
      if not preserve_theta:
        object_quat_xyzw = (object_quat_wxyz[1], object_quat_wxyz[2],
                            object_quat_wxyz[3], object_quat_wxyz[0])
      object_position = t_worldaug_object[0:3, 3]

    object_xy = object_position[0:2]
    object_theta = -np.float32(
        utils.get_rot_from_pybullet_quaternion(object_quat_xyzw)
        [2]) / self.theta_scale
    return np.hstack(
        (object_xy,
         object_theta)).astype(np.float32), object_position, object_quat_xyzw
コード例 #3
0
    def get_six_dof_object(self, object_info, t_worldaug_world=None):
        """Calculate the pose of 6DOF object."""
        object_position = object_info[0]
        object_quat_xyzw = object_info[1]

        if t_worldaug_world is not None:
            object_quat_wxyz = (object_quat_xyzw[3], object_quat_xyzw[0],
                                object_quat_xyzw[1], object_quat_xyzw[2])
            t_world_object = transformations.quaternion_matrix(
                object_quat_wxyz)
            t_world_object[0:3, 3] = np.array(object_position)
            t_worldaug_object = t_worldaug_world @ t_world_object

            object_quat_wxyz = transformations.quaternion_from_matrix(
                t_worldaug_object)
            object_quat_xyzw = (object_quat_wxyz[1], object_quat_wxyz[2],
                                object_quat_wxyz[3], object_quat_wxyz[0])
            object_position = t_worldaug_object[0:3, 3]

        euler = utils.get_rot_from_pybullet_quaternion(object_quat_xyzw)
        roll = euler[0]
        pitch = euler[1]
        theta = -euler[2]

        return np.asarray([
            object_position[0], object_position[1], object_position[2], roll,
            pitch, theta
        ])
コード例 #4
0
 def base2origin(self, pos):
     pos = pos.copy()
     theta = utils.get_rot_from_pybullet_quaternion(self.base_rot)[2]
     x = np.cos(theta) * pos[0] - np.sin(theta) * pos[1] + self.base_pos[0]
     y = np.sin(theta) * pos[0] + np.cos(theta) * pos[1] + self.base_pos[1]
     z = pos[2] + self.base_pos[2]
     return np.array([x, y, z])
コード例 #5
0
    def get_six_dof_act(self, transform_params, heightmap, pose0, pose1):
        """Adjust SE(3) poses via the in-plane SE(2) augmentation transform."""
        p1_position, p1_rotation = pose1[0], pose1[1]
        p0_position, p0_rotation = pose0[0], pose0[1]

        if transform_params is not None:
            t_world_center, t_world_centernew = utils.get_se3_from_image_transform(
                *transform_params, heightmap, self.bounds, self.pixel_size)
            t_worldnew_world = t_world_centernew @ np.linalg.inv(
                t_world_center)
        else:
            t_worldnew_world = np.eye(4)

        p1_quat_wxyz = (p1_rotation[3], p1_rotation[0], p1_rotation[1],
                        p1_rotation[2])
        t_world_p1 = transformations.quaternion_matrix(p1_quat_wxyz)
        t_world_p1[0:3, 3] = np.array(p1_position)

        t_worldnew_p1 = t_worldnew_world @ t_world_p1

        p0_quat_wxyz = (p0_rotation[3], p0_rotation[0], p0_rotation[1],
                        p0_rotation[2])
        t_world_p0 = transformations.quaternion_matrix(p0_quat_wxyz)
        t_world_p0[0:3, 3] = np.array(p0_position)
        t_worldnew_p0 = t_worldnew_world @ t_world_p0

        t_worldnew_p0theta0 = t_worldnew_p0 * 1.0
        t_worldnew_p0theta0[0:3, 0:3] = np.eye(3)

        # PLACE FRAME, adjusted for this 0 rotation on pick
        t_p0_p0theta0 = np.linalg.inv(t_worldnew_p0) @ t_worldnew_p0theta0
        t_worldnew_p1theta0 = t_worldnew_p1 @ t_p0_p0theta0

        # convert the above rotation to euler
        quatwxyz_worldnew_p1theta0 = transformations.quaternion_from_matrix(
            t_worldnew_p1theta0)
        q = quatwxyz_worldnew_p1theta0
        quatxyzw_worldnew_p1theta0 = (q[1], q[2], q[3], q[0])
        p1_rotation = quatxyzw_worldnew_p1theta0
        p1_euler = utils.get_rot_from_pybullet_quaternion(p1_rotation)

        roll_scaled = p1_euler[0] / self.theta_scale
        pitch_scaled = p1_euler[1] / self.theta_scale
        p1_theta_scaled = -p1_euler[2] / self.theta_scale

        x = p1_position[0]
        y = p1_position[1]
        z = p1_position[2]

        return np.array([x, y, p1_theta_scaled, roll_scaled, pitch_scaled, z])
コード例 #6
0
ファイル: aligning.py プロジェクト: zp2388/google-research
    def reset(self, env):
        self.num_steps = 1
        self.goal = {'places': {}, 'steps': []}

        # Generate randomly shaped box.
        box_size = self.random_size(0.05, 0.15, 0.05, 0.15, 0.01, 0.06)

        # Add corner.
        dimx = (box_size[0] / 2 - 0.025 + 0.0025, box_size[0] / 2 + 0.0025)
        dimy = (box_size[1] / 2 + 0.0025, box_size[1] / 2 - 0.025 + 0.0025)
        corner_template = 'assets/corner/corner-template.urdf'
        replace = {'DIMX': dimx, 'DIMY': dimy}
        corner_urdf = self.fill_template(corner_template, replace)
        corner_size = (box_size[0], box_size[1], 0)
        corner_pose = self.random_pose(env, corner_size)
        env.add_object(corner_urdf, corner_pose, fixed=True)
        os.remove(corner_urdf)

        # Add possible placing poses.
        theta = utils.get_rot_from_pybullet_quaternion(corner_pose[1])[2]
        flipped_rotation = utils.get_pybullet_quaternion_from_rot(
            (0, 0, theta + np.pi))
        flipped_pose = (corner_pose[0], flipped_rotation)
        alt_x = (box_size[0] / 2) - (box_size[1] / 2)
        alt_y = (box_size[1] / 2) - (box_size[0] / 2)
        alt_position = (alt_x, alt_y, 0)
        alt_rotation0 = utils.get_pybullet_quaternion_from_rot(
            (0, 0, np.pi / 2))
        alt_rotation1 = utils.get_pybullet_quaternion_from_rot(
            (0, 0, 3 * np.pi / 2))
        alt_pose0 = utils.multiply(corner_pose, (alt_position, alt_rotation0))
        alt_pose1 = utils.multiply(corner_pose, (alt_position, alt_rotation1))
        self.goal['places'] = {
            0: corner_pose,
            1: flipped_pose,
            2: alt_pose0,
            3: alt_pose1
        }

        # Add box.
        box_template = 'assets/box/box-template.urdf'
        box_urdf = self.fill_template(box_template, {'DIM': box_size})
        box_pose = self.random_pose(env, box_size)
        box_id = env.add_object(box_urdf, box_pose)
        os.remove(box_urdf)
        self.color_random_brown(box_id)
        self.goal['steps'].append({box_id: (2 * np.pi, [0, 1, 2, 3])})
コード例 #7
0
    def act(self, obs, info, compute_error=False, gt_act=None):
        """Run inference and return best action given visual observations."""

        act = {'camera_config': self.camera_config, 'primitive': None}
        if not obs:
            return act

        # Get heightmap from RGB-D images.
        colormap, heightmap = self.get_heightmap(obs, self.camera_config)

        # Concatenate color with depth images.
        input_image = np.concatenate(
            (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None],
             heightmap[Ellipsis, None]),
            axis=2)

        # Attention model forward pass.
        attention = self.attention_model.forward(input_image)
        argmax = np.argmax(attention)
        argmax = np.unravel_index(argmax, shape=attention.shape)
        p0_pixel = argmax[:2]
        p0_theta = argmax[2] * (2 * np.pi / attention.shape[2])

        # Transport model forward pass.
        transport = self.transport_model.forward(input_image, p0_pixel)
        _, z, roll, pitch = self.rpz_model.forward(input_image, p0_pixel)

        argmax = np.argmax(transport)
        argmax = np.unravel_index(argmax, shape=transport.shape)

        # Index into 3D discrete tensor, grab z, roll, pitch activations
        z_best = z[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None]
        roll_best = roll[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None]
        pitch_best = pitch[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None]

        # Send through regressors for each of z, roll, pitch
        z_best = self.rpz_model.z_regressor(z_best)[0, 0]
        roll_best = self.rpz_model.roll_regressor(roll_best)[0, 0]
        pitch_best = self.rpz_model.pitch_regressor(pitch_best)[0, 0]

        p1_pixel = argmax[:2]
        p1_theta = argmax[2] * (2 * np.pi / transport.shape[2])

        # Pixels to end effector poses.
        p0_position = utils.pixel_to_position(p0_pixel, heightmap, self.bounds,
                                              self.pixel_size)
        p1_position = utils.pixel_to_position(p1_pixel, heightmap, self.bounds,
                                              self.pixel_size)

        p1_position = (p1_position[0], p1_position[1], z_best)

        p0_rotation = utils.get_pybullet_quaternion_from_rot((0, 0, -p0_theta))
        p1_rotation = utils.get_pybullet_quaternion_from_rot(
            (roll_best, pitch_best, -p1_theta))

        if compute_error:
            gt_p0_position, gt_p0_rotation = gt_act['params']['pose0']
            gt_p1_position, gt_p1_rotation = gt_act['params']['pose1']

            gt_p0_pixel = np.array(
                utils.position_to_pixel(gt_p0_position, self.bounds,
                                        self.pixel_size))
            gt_p1_pixel = np.array(
                utils.position_to_pixel(gt_p1_position, self.bounds,
                                        self.pixel_size))

            self.p0_pixel_error(
                np.linalg.norm(gt_p0_pixel - np.array(p0_pixel)))
            self.p1_pixel_error(
                np.linalg.norm(gt_p1_pixel - np.array(p1_pixel)))

            gt_p0_theta = -np.float32(
                utils.get_rot_from_pybullet_quaternion(gt_p0_rotation)[2])
            gt_p1_theta = -np.float32(
                utils.get_rot_from_pybullet_quaternion(gt_p1_rotation)[2])

            self.p0_theta_error(
                abs((np.rad2deg(gt_p0_theta - p0_theta) + 180) % 360 - 180))
            self.p1_theta_error(
                abs((np.rad2deg(gt_p1_theta - p1_theta) + 180) % 360 - 180))

            return None

        return self.p0_p1_position_rotations_to_act(act, p0_position,
                                                    p0_rotation, p1_position,
                                                    p1_rotation)
コード例 #8
0
    def get_data_batch(self, dataset, augment=True):
        """Use dataset to extract and preprocess data.

    Supports adding a goal image, in which case the current and goal
    images are stacked together channel-wise (first 6 for current, last 6
    for goal) before doing data augmentation, to ensure consistency.

    Args:
      dataset: a ravens.Dataset (train or validation)
      augment: if True, perform data augmentation.

    Returns:
      tuple of data for training:
        (input_image, p0, p0_theta, p1, p1_theta)
      tuple additionally includes (z, roll, pitch) if self.six_dof
      if self.use_goal_image, then the goal image is stacked with the
      current image in `input_image`. If splitting up current and goal
      images is desired, it should be done outside this method.
    """
        if self.use_goal_image:
            obs, act, _, goal = dataset.random_sample(goal_images=True)
        else:
            obs, act, _ = dataset.random_sample()

        # Get heightmap from RGB-D images, including goal images if specified.
        configs = act['camera_config']
        colormap, heightmap = self.get_heightmap(obs, configs)
        if self.use_goal_image:
            colormap_g, heightmap_g = self.get_heightmap(goal, configs)

        # Get training labels from data sample.
        pose0, pose1 = act['params']['pose0'], act['params']['pose1']
        p0_position, p0_rotation = pose0[0], pose0[1]
        p0 = utils.position_to_pixel(p0_position, self.bounds, self.pixel_size)
        p0_theta = -np.float32(
            utils.get_rot_from_pybullet_quaternion(p0_rotation)[2])
        p1_position, p1_rotation = pose1[0], pose1[1]
        p1 = utils.position_to_pixel(p1_position, self.bounds, self.pixel_size)
        p1_theta = -np.float32(
            utils.get_rot_from_pybullet_quaternion(p1_rotation)[2])

        # Concatenate color with depth images.
        input_image = self.concatenate_c_h(colormap, heightmap)

        # If using goal image, stack _with_ input_image before data augmentation.
        if self.use_goal_image:
            goal_image = self.concatenate_c_h(colormap_g, heightmap_g)
            input_image = np.concatenate((input_image, goal_image), axis=2)
            assert input_image.shape[2] == 12, input_image.shape

        # Do data augmentation (perturb rotation and translation).
        if augment:
            input_image, _, rounded_pixels, transform_params = utils.perturb(
                input_image, [p0, p1])
            p0, p1 = rounded_pixels

        if self.six_dof:
            if not augment:
                transform_params = None
            p0_theta, p1_theta, z, roll, pitch = self.get_six_dof(
                transform_params, heightmap, pose0, pose1, augment=augment)
            return input_image, p0, p0_theta, p1, p1_theta, z, roll, pitch
        else:
            # If using a goal image, it is stacked with `input_image` and split later.
            p1_theta = p1_theta - p0_theta
            p0_theta = 0
            return input_image, p0, p0_theta, p1, p1_theta
コード例 #9
0
    def get_six_dof(self,
                    transform_params,
                    heightmap,
                    pose0,
                    pose1,
                    augment=True):
        """Adjust SE(3) poses via the in-plane SE(2) augmentation transform."""
        debug_visualize = False

        p1_position, p1_rotation = pose1[0], pose1[1]
        p0_position, p0_rotation = pose0[0], pose0[1]

        if debug_visualize:
            self.vis = utils.create_visualizer()
            self.transport_model.vis = self.vis

        if augment:
            t_world_center, t_world_centernew = utils.get_se3_from_image_transform(
                *transform_params, heightmap, self.bounds, self.pixel_size)

            if debug_visualize:
                label = 't_world_center'
                utils.make_frame(self.vis, label, h=0.05, radius=0.0012, o=1.0)
                self.vis[label].set_transform(t_world_center)

                label = 't_world_centernew'
                utils.make_frame(self.vis, label, h=0.05, radius=0.0012, o=1.0)
                self.vis[label].set_transform(t_world_centernew)

            t_worldnew_world = t_world_centernew @ np.linalg.inv(
                t_world_center)
        else:
            t_worldnew_world = np.eye(4)

        p1_quat_wxyz = (p1_rotation[3], p1_rotation[0], p1_rotation[1],
                        p1_rotation[2])
        t_world_p1 = transformations.quaternion_matrix(p1_quat_wxyz)
        t_world_p1[0:3, 3] = np.array(p1_position)

        t_worldnew_p1 = t_worldnew_world @ t_world_p1

        p0_quat_wxyz = (p0_rotation[3], p0_rotation[0], p0_rotation[1],
                        p0_rotation[2])
        t_world_p0 = transformations.quaternion_matrix(p0_quat_wxyz)
        t_world_p0[0:3, 3] = np.array(p0_position)
        t_worldnew_p0 = t_worldnew_world @ t_world_p0

        if debug_visualize:
            label = 't_worldnew_p1'
            utils.make_frame(self.vis, label, h=0.05, radius=0.0012, o=1.0)
            self.vis[label].set_transform(t_worldnew_p1)

            label = 't_world_p1'
            utils.make_frame(self.vis, label, h=0.05, radius=0.0012, o=1.0)
            self.vis[label].set_transform(t_world_p1)

            label = 't_worldnew_p0-0thetaoriginally'
            utils.make_frame(self.vis, label, h=0.05, radius=0.0021, o=1.0)
            self.vis[label].set_transform(t_worldnew_p0)

        # PICK FRAME, using 0 rotation due to suction rotational symmetry
        t_worldnew_p0theta0 = t_worldnew_p0 * 1.0
        t_worldnew_p0theta0[0:3, 0:3] = np.eye(3)

        if debug_visualize:
            label = 'PICK'
            utils.make_frame(self.vis, label, h=0.05, radius=0.0021, o=1.0)
            self.vis[label].set_transform(t_worldnew_p0theta0)

        # PLACE FRAME, adjusted for this 0 rotation on pick
        t_p0_p0theta0 = np.linalg.inv(t_worldnew_p0) @ t_worldnew_p0theta0
        t_worldnew_p1theta0 = t_worldnew_p1 @ t_p0_p0theta0

        if debug_visualize:
            label = 'PLACE'
            utils.make_frame(self.vis, label, h=0.05, radius=0.0021, o=1.0)
            self.vis[label].set_transform(t_worldnew_p1theta0)

        # convert the above rotation to euler
        quatwxyz_worldnew_p1theta0 = transformations.quaternion_from_matrix(
            t_worldnew_p1theta0)
        q = quatwxyz_worldnew_p1theta0
        quatxyzw_worldnew_p1theta0 = (q[1], q[2], q[3], q[0])
        p1_rotation = quatxyzw_worldnew_p1theta0
        p1_euler = utils.get_rot_from_pybullet_quaternion(p1_rotation)
        roll = p1_euler[0]
        pitch = p1_euler[1]
        p1_theta = -p1_euler[2]

        p0_theta = 0
        z = p1_position[2]
        return p0_theta, p1_theta, z, roll, pitch
コード例 #10
0
    def extract_x_y_theta(self,
                          object_info,
                          t_worldaug_world=None,
                          preserve_theta=False,
                          softbody=False):
        """Given either object OR action pose info, return stuff to put in GT observation.
        Note: only called from within this class and 2-step case via subclassing.

        During training, there is normally data augmentation applied, so t_worldaug_world
        is NOT None, and augmentation is applied as if the 'image' were adjusted. However,
        for actions, we preserve theta so `object_quat_xyzw` does not change, and we query
        index=2 for the z rotation. For most deformables tasks, we did not use rotations,
        hence theta=0.

        Get t_world_object which is a 4x4 homogeneous matrix, then stick the position there (why?).
        Then we do the matrix multiplication with '@' to get the homogeneous matrix representation
        of the object after augmentation is applied.
        https://stackoverflow.com/questions/34142485/difference-between-numpy-dot-and-python-3-5-matrix-multiplication
        Then the position is extracted from the resulting homogeneous matrix. (And we ignore the
        z coordinate for that, but in fact for cloth vertices I would keep it.)

        Augmentation for cloth vertices? For now I'm following what they do for positions only
        (ignoring orientation), and returning updated (x,y,z) where the last value is the z
        (height) and not a rotation (doesn't make sense with vertrics anyway). Augmentations
        are all in SE(2), so the z coordinates should not change. Indeed that's what I see after
        extracting positions from `t_worldaug_object`.

        And yes, with vertices, they are flattened in the same way regardless of whether data
        augmentation is used, so every third value represents a vertex height. :D So we get
        [ v1_x, v1_y, v1_z, v2_x, v2_y, v2_z, ... ] in the flattened version.

        Args:
            t_worldaug_world: Only True if there's an augmentation and we need to change the
                ground truth pose as if we applied the augmentation. Only happens during (a)
                determining mean/stds, and (b) training. During initialization and the action
                (loading) we do not use this.
            preserve_theta: Only True if calling where `object_info` is an action pose.
            softbody: Only True if `object_info` is a vertex mesh of a PyBullet soft body.

        Returns:
            object_x_y_theta, pos, quat: the first is an SE(2) pose and parameterized by
            three numbers, the xy position and a scalar rotation `theta`. In case we have
            a cloth, I'm not returning anything after that (no pos or quat) so that if we
            use the older API for cloth (which we should not be!), it should crash code.
        """
        # ------------------------------------------------------------------------------------------ #
        # Daniel: sanity checks to make sure we're handling the soft body ID correctly.
        # The last `if` of `softbody` helps to distinguish among actions, which are also length 2.
        # ------------------------------------------------------------------------------------------ #
        if (self.task in TASKS_SOFT) and (len(object_info) == 2) and softbody:
            nb_vertices, vert_pos_l = object_info
            assert nb_vertices == 100, f'We should be using 100 vertices but have: {nb_vertices}'
            assert nb_vertices == len(
                vert_pos_l), f'{nb_vertices}, {len(vert_pos_l)}'
            vert_pos_np = np.array(vert_pos_l)  # (100, 3)

            if t_worldaug_world is not None:
                augmented_vertices = []

                # For each vertex, apply data augmentation.
                for i in range(vert_pos_np.shape[0]):
                    vertex_position = vert_pos_np[i, :]

                    # Use identity quaternion (w=1, others 0). Othewrise, follow normal augmentation.
                    t_world_object = transformations.quaternion_matrix(
                        (1, 0, 0, 0))
                    t_world_object[0:3, 3] = np.array(vertex_position)
                    t_worldaug_object = t_worldaug_world @ t_world_object
                    new_v_position = t_worldaug_object[0:3, 3]
                    augmented_vertices.append(new_v_position)

                # Stack everything.
                augmented_flattened = np.concatenate(
                    augmented_vertices).flatten().astype(np.float32)
                return augmented_flattened
            else:
                vert_flattened = vert_pos_np.flatten().astype(np.float32)
                return vert_flattened
        # ------------------------------------------------------------------------------------------ #
        # Now proceed as usual, untouched from normal ravens code except for documentation/assertions.

        object_position = object_info[0]
        object_quat_xyzw = object_info[1]

        if t_worldaug_world is not None:
            object_quat_wxyz = (object_quat_xyzw[3], object_quat_xyzw[0],
                                object_quat_xyzw[1], object_quat_xyzw[2])
            t_world_object = transformations.quaternion_matrix(
                object_quat_wxyz)
            t_world_object[0:3, 3] = np.array(object_position)

            # Daniel: pretty sure this has to be true. Upper left 3x3 is rotation, upper right 3x1 is position.
            assert t_world_object.shape == (
                4, 4), f'shape is not 4x4: {t_world_object.shape}'
            assert t_worldaug_world.shape == (
                4, 4), f'shape is not 4x4: {t_worldaug_world.shape}'

            # Daniel: data augmentation. Then, extract `object_position` from augmented object.
            t_worldaug_object = t_worldaug_world @ t_world_object
            object_quat_wxyz = transformations.quaternion_from_matrix(
                t_worldaug_object)
            if not preserve_theta:
                object_quat_xyzw = (object_quat_wxyz[1], object_quat_wxyz[2],
                                    object_quat_wxyz[3], object_quat_wxyz[0])
            object_position = t_worldaug_object[0:3, 3]

        object_xy = object_position[0:2]
        object_theta = -np.float32(
            utils.get_rot_from_pybullet_quaternion(object_quat_xyzw)
            [2]) / self.THETA_SCALE
        return np.hstack((object_xy, object_theta)).astype(
            np.float32), object_position, object_quat_xyzw