Esempio n. 1
0
  def get_sample(self, dataset, augment=True):
    """Get a dataset sample.

    Args:
      dataset: a ravens.Dataset (train or validation)
      augment: if True, perform data augmentation.

    Returns:
      tuple of data for training:
        (input_image, p0, p0_theta, p1, p1_theta)
      tuple additionally includes (z, roll, pitch) if self.six_dof
      if self.use_goal_image, then the goal image is stacked with the
      current image in `input_image`. If splitting up current and goal
      images is desired, it should be done outside this method.
    """

    (obs, act, _, _), _ = dataset.sample()
    img = self.get_image(obs)

    # Get training labels from data sample.
    p0_xyz, p0_xyzw = act['pose0']
    p1_xyz, p1_xyzw = act['pose1']
    p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size)
    p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2])
    p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size)
    p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2])
    p1_theta = p1_theta - p0_theta
    p0_theta = 0

    # Data augmentation.
    if augment:
      img, _, (p0, p1), _ = utils.perturb(img, [p0, p1])

    return img, p0, p0_theta, p1, p1_theta
Esempio n. 2
0
    def train(self, dataset, num_iter, writer, validation_dataset=None):
        """Train on dataset for a specific number of iterations."""
        del validation_dataset

        for i in range(num_iter):
            obs, act, _ = dataset.random_sample()

            # Get heightmap from RGB-D images.
            configs = act['camera_config']
            colormap, heightmap = self.get_heightmap(obs, configs)

            # Get training labels from data sample.
            pose0, pose1 = act['params']['pose0'], act['params']['pose1']
            p0_position, p0_rotation = pose0[0], pose0[1]
            p0 = utils.xyz_to_pix(p0_position, self.bounds, self.pixel_size)
            p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_rotation)[2])
            p1_position, p1_rotation = pose1[0], pose1[1]
            p1 = utils.xyz_to_pix(p1_position, self.bounds, self.pixel_size)
            p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_rotation)[2])
            p1_theta = p1_theta - p0_theta
            p0_theta = 0

            # Concatenate color with depth images.
            input_image = np.concatenate(
                (colormap, heightmap[Ellipsis, None],
                 heightmap[Ellipsis, None], heightmap[Ellipsis, None]),
                axis=2)

            # Do data augmentation (perturb rotation and translation).
            input_image, _, roundedpixels, _ = utils.perturb(
                input_image, [p0, p1])
            p0, p1 = roundedpixels

            # Compute training loss.
            loss0 = self.pick_model.train(input_image, p0, theta=0)
            loss1 = self.place_model.train(input_image, p1, theta=0)
            loss2 = self.match_model.train(input_image, p0, p1, theta=p1_theta)
            with writer.as_default():
                tf.summary.scalar('pick_loss',
                                  self.pick_model.metric.result(),
                                  step=self.total_iter + i)
                tf.summary.scalar('place_loss',
                                  self.place_model.metric.result(),
                                  step=self.total_iter + i)
                tf.summary.scalar('match_loss',
                                  self.match_model.metric.result(),
                                  step=self.total_iter + i)
            print(
                f'Train Iter: {self.total_iter + i} Loss: {loss0:.4f} {loss1:.4f} {loss2:.4f}'
            )

        self.total_iter += num_iter
        self.save()
Esempio n. 3
0
    def get_sample(self, dataset, augment=True):
        """Get a dataset sample.

        Args:
          dataset: a ravens.Dataset (train or validation)
          augment: if True, perform data augmentation.

        Returns:
          tuple of data for training:
            (input_image, p0, p0_theta, p1, p1_theta)
          tuple additionally includes (z, roll, pitch) if self.six_dof
          if self.use_goal_image, then the goal image is stacked with the
          current image in `input_image`. If splitting up current and goal
          images is desired, it should be done outside this method.
        """

        # do: get current and goal observation here.
        (obs, act, _, _), (gobs, _, _, _) = dataset.sample()

        # do: visualize.
        # import cv2
        # img = obs['color'][0, :, :, :3]
        # gimg = gobs['color'][0, :, :, :3]
        # cv2.imshow('haha', img)
        # cv2.waitKey(0)
        # cv2.imshow('haha', gimg)
        # cv2.waitKey(0)

        img = self.get_image(obs)
        gimg = self.get_image(gobs)

        # Get training labels from data sample.
        p0_xyz, p0_xyzw = act['pose0']
        p1_xyz, p1_xyzw = act['pose1']
        p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size)
        p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2])
        p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size)
        p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2])
        p1_theta = p1_theta - p0_theta
        p0_theta = 0

        # Data augmentation.
        augment = False
        print('no augment')
        if augment:
            img, _, (p0, p1), _ = utils.perturb(img, [p0, p1])
            gimg, _, _, _ = utils.perturb(gimg, [p0, p1])

        return img, p0, p0_theta, p1, p1_theta, gimg
Esempio n. 4
0
    def get_sample(self, dataset, augment=True):
        """Get a dataset sample.

        Args:
          dataset: a ravens.Dataset (train or validation)
          augment: if True, perform data augmentation.

        Returns:
          tuple of data for training:
            (input_image, p0, p0_theta, p1, p1_theta)
          tuple additionally includes (z, roll, pitch) if self.six_dof
          if self.use_goal_image, then the goal image is stacked with the
          current image in `input_image`. If splitting up current and goal
          images is desired, it should be done outside this method.
        """

        (obs, act, _, _), _ = dataset.sample()
        # do: obs here are still three multi-view images.
        # import cv2
        # for i in range(3):
        #     rgb = obs['color'][i, ...]
        #     cv2.imshow('haha', rgb)
        #     cv2.waitKey(0)
        # exit(0)
        img = self.get_image(obs)
        # do: image has changed to top-down!
        # import cv2
        # cv2.imshow('haha', img[:, :, :3])
        # cv2.waitKey(0)
        # exit(0)

        # Get training labels from data sample.
        p0_xyz, p0_xyzw = act['pose0']
        p1_xyz, p1_xyzw = act['pose1']
        p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size)
        p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2])
        p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size)
        p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2])
        p1_theta = p1_theta - p0_theta
        p0_theta = 0

        # Data augmentation.
        if augment:
            img, _, (p0, p1), _ = utils.perturb(img, [p0, p1])

        return img, p0, p0_theta, p1, p1_theta
Esempio n. 5
0
    def get_sample(self, dataset, augment=True):
        (obs, act, _, _), _ = dataset.sample()
        img = self.get_image(obs)

        # Get training labels from data sample.
        p0_xyz, p0_xyzw = act['pose0']
        p1_xyz, p1_xyzw = act['pose1']
        p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size)
        p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2])
        p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size)
        p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2])
        p1_theta = p1_theta - p0_theta
        p0_theta = 0

        # Data augmentation.
        if augment:
            img, _, (p0, p1), _ = utils.perturb(img, [p0, p1])

        return img, p0, p0_theta, p1, p1_theta
Esempio n. 6
0
    def get_sample_place(self, dataset, augment=True):
        (obs, act, _, _), _ = dataset.sample()
        img_place = self.get_image_place(obs)

        # Get training labels from data sample.
        p0_xyz_place, p0_xyzw_place = act['pose0']
        p1_xyz_place, p1_xyzw_place = act['pose1']
        p0_place = utils.xyz_to_pix(p0_xyz_place, self.bounds_place,
                                    self.pix_size)
        p0_theta_place = -np.float32(
            utils.quatXYZW_to_eulerXYZ(p0_xyzw_place)[2])
        p1_place = utils.xyz_to_pix(p1_xyz_place, self.bounds_place,
                                    self.pix_size)
        p1_theta_place = -np.float32(
            utils.quatXYZW_to_eulerXYZ(p1_xyzw_place)[2])
        p1_theta_place = p1_theta_place - p0_theta_place
        p0_theta_place = 0

        # Data augmentation.
        if augment:
            img_place, _, (p0_place, p1_place), _ = utils.perturb(
                img_place, [p0_place, p1_place])

        return img_place, p0_place, p0_theta_place, p1_place, p1_theta_place
Esempio n. 7
0
  def act(self, obs, info, compute_error=False, gt_act=None):
    """Run inference and return best action given visual observations."""

    # Get heightmap from RGB-D images.
    colormap, heightmap = self.get_heightmap(obs, self.camera_config)

    # Concatenate color with depth images.
    input_image = np.concatenate(
        (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None], heightmap[Ellipsis,
                                                                         None]),
        axis=2)

    # Attention model forward pass.
    attention = self.attention_model.forward(input_image)
    argmax = np.argmax(attention)
    argmax = np.unravel_index(argmax, shape=attention.shape)
    p0_pixel = argmax[:2]
    p0_theta = argmax[2] * (2 * np.pi / attention.shape[2])

    # Transport model forward pass.
    transport = self.transport_model.forward(input_image, p0_pixel)
    _, z, roll, pitch = self.rpz_model.forward(input_image, p0_pixel)

    argmax = np.argmax(transport)
    argmax = np.unravel_index(argmax, shape=transport.shape)

    # Index into 3D discrete tensor, grab z, roll, pitch activations
    z_best = z[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None]
    roll_best = roll[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None]
    pitch_best = pitch[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None]

    # Send through regressors for each of z, roll, pitch
    z_best = self.rpz_model.z_regressor(z_best)[0, 0]
    roll_best = self.rpz_model.roll_regressor(roll_best)[0, 0]
    pitch_best = self.rpz_model.pitch_regressor(pitch_best)[0, 0]

    p1_pixel = argmax[:2]
    p1_theta = argmax[2] * (2 * np.pi / transport.shape[2])

    # Pixels to end effector poses.
    p0_position = utils.pix_to_xyz(p0_pixel, heightmap, self.bounds,
                                   self.pixel_size)
    p1_position = utils.pix_to_xyz(p1_pixel, heightmap, self.bounds,
                                   self.pixel_size)

    p1_position = (p1_position[0], p1_position[1], z_best)

    p0_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta))
    p1_rotation = utils.eulerXYZ_to_quatXYZW(
        (roll_best, pitch_best, -p1_theta))

    if compute_error:
      gt_p0_position, gt_p0_rotation = gt_act['params']['pose0']
      gt_p1_position, gt_p1_rotation = gt_act['params']['pose1']

      gt_p0_pixel = np.array(
          utils.xyz_to_pix(gt_p0_position, self.bounds, self.pixel_size))
      gt_p1_pixel = np.array(
          utils.xyz_to_pix(gt_p1_position, self.bounds, self.pixel_size))

      self.p0_pixel_error(np.linalg.norm(gt_p0_pixel - np.array(p0_pixel)))
      self.p1_pixel_error(np.linalg.norm(gt_p1_pixel - np.array(p1_pixel)))

      gt_p0_theta = -np.float32(
          utils.quatXYZW_to_eulerXYZ(gt_p0_rotation)[2])
      gt_p1_theta = -np.float32(
          utils.quatXYZW_to_eulerXYZ(gt_p1_rotation)[2])

      self.p0_theta_error(
          abs((np.rad2deg(gt_p0_theta - p0_theta) + 180) % 360 - 180))
      self.p1_theta_error(
          abs((np.rad2deg(gt_p1_theta - p1_theta) + 180) % 360 - 180))

      return None

    return {
        'pose0': (np.asarray(p0_position), np.asarray(p0_rotation)),
        'pose1': (np.asarray(p1_position), np.asarray(p1_rotation))
    }