Python Attention.train Examples

Programming Language: Python

Namespace/Package Name: ravens.models.attention

Class/Type: Attention

Method/Function: train

Examples at hotexamples.com: 2

Python Attention.train - 2 examples found. These are the top rated real world Python examples of ravens.models.attention.Attention.train extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Attention(5)

forward(2)

load(2)

save(2)

train(2)

Example #1

Show file

class GoalNaiveTransporterAgent(TransporterAgent):
    """Naive version which stacks current and goal images through normal Transport."""
    def __init__(self, name, task, n_rotations=36):
        super().__init__(name, task, n_rotations)

        # Stack the goal image for the vanilla Transport module.
        t_shape = (self.in_shape[0], self.in_shape[1],
                   int(self.in_shape[2] * 2))

        self.attention = Attention(in_shape=self.in_shape,
                                   n_rotations=1,
                                   preprocess=utils.preprocess)
        self.transport = Transport(in_shape=t_shape,
                                   n_rotations=self.n_rotations,
                                   crop_size=self.crop_size,
                                   preprocess=utils.preprocess)

    def get_image(self, obs):
        """Stack color and height images image."""

        # if self.use_goal_image:
        #   colormap_g, heightmap_g = utils.get_fused_heightmap(goal, configs)
        #   goal_image = self.concatenate_c_h(colormap_g, heightmap_g)
        #   input_image = np.concatenate((input_image, goal_image), axis=2)
        #   assert input_image.shape[2] == 12, input_image.shape

        # do: we can see some camera model implementation here -> fuse multi-view image with camera intrinsics to
        #   the orthographical height map.
        # Get color and height maps from RGB-D images.
        cmap, hmap = utils.get_fused_heightmap(obs, self.cam_config,
                                               self.bounds, self.pix_size)
        # do: wow, cmap has become a top-down view image -> how come?
        # import cv2
        # cv2.imshow('haha', cmap)
        # cv2.waitKey(0)
        # exit(0)
        # do: visualize image.
        img = np.concatenate(
            (cmap, hmap[Ellipsis, None], hmap[Ellipsis, None], hmap[Ellipsis,
                                                                    None]),
            axis=2)
        assert img.shape == self.in_shape, img.shape
        return img

    def get_sample(self, dataset, augment=True):
        """Get a dataset sample.

        Args:
          dataset: a ravens.Dataset (train or validation)
          augment: if True, perform data augmentation.

        Returns:
          tuple of data for training:
            (input_image, p0, p0_theta, p1, p1_theta)
          tuple additionally includes (z, roll, pitch) if self.six_dof
          if self.use_goal_image, then the goal image is stacked with the
          current image in `input_image`. If splitting up current and goal
          images is desired, it should be done outside this method.
        """

        # do: get current and goal observation here.
        (obs, act, _, _), (gobs, _, _, _) = dataset.sample()

        # do: visualize.
        # import cv2
        # img = obs['color'][0, :, :, :3]
        # gimg = gobs['color'][0, :, :, :3]
        # cv2.imshow('haha', img)
        # cv2.waitKey(0)
        # cv2.imshow('haha', gimg)
        # cv2.waitKey(0)

        img = self.get_image(obs)
        gimg = self.get_image(gobs)

        # Get training labels from data sample.
        p0_xyz, p0_xyzw = act['pose0']
        p1_xyz, p1_xyzw = act['pose1']
        p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size)
        p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2])
        p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size)
        p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2])
        p1_theta = p1_theta - p0_theta
        p0_theta = 0

        # Data augmentation.
        if augment:
            img, _, (p0, p1), _ = utils.perturb(img, [p0, p1])
            gimg, _, _, _ = utils.perturb(gimg, [p0, p1])

        return img, p0, p0_theta, p1, p1_theta, gimg

    def train(self, dataset, writer=None):
        """Train on a dataset sample for 1 iteration.

        Args:
          dataset: a ravens.Dataset.
          writer: a TF summary writer (for tensorboard).
        """
        tf.keras.backend.set_learning_phase(1)
        # SAY: we can see that, this problem is, predict two poses using image observation.
        img, p0, p0_theta, p1, p1_theta, gimg = self.get_sample(dataset)
        # Get training losses.
        step = self.total_steps + 1
        loss0 = self.attention.train(img, p0, p0_theta)
        if isinstance(self.transport, Attention):
            loss1 = self.transport.train(img, p1, p1_theta)
        else:
            img_stk = np.concatenate([img, gimg], axis=-1)
            loss1 = self.transport.train(img_stk, p0, p1, p1_theta)
        with writer.as_default():
            sc = tf.summary.scalar
            sc('train_loss/attention', loss0, step)
            sc('train_loss/transport', loss1, step)
        print(f'Train Iter: {step} Loss: {loss0:.4f} {loss1:.4f}')
        self.total_steps = step

        # TODO(andyzeng) cleanup goal-conditioned model.

        # if self.use_goal_image:
        #   half = int(input_image.shape[2] / 2)
        #   img_curr = input_image[:, :, :half]  # ignore goal portion
        #   loss0 = self.attention.train(img_curr, p0, p0_theta)
        # else:
        #   loss0 = self.attention.train(input_image, p0, p0_theta)

        # if isinstance(self.transport, Attention):
        #   loss1 = self.transport.train(input_image, p1, p1_theta)
        # elif isinstance(self.transport, TransportGoal):
        #   half = int(input_image.shape[2] / 2)
        #   img_curr = input_image[:, :, :half]
        #   img_goal = input_image[:, :, half:]
        #   loss1 = self.transport.train(img_curr, img_goal, p0, p1, p1_theta)
        # else:
        #   loss1 = self.transport.train(input_image, p0, p1, p1_theta)

    def validate(self, dataset, writer=None):  # pylint: disable=unused-argument
        """Test on a validation dataset for 10 iterations."""
        print('Skipping validation.')
        # tf.keras.backend.set_learning_phase(0)
        # n_iter = 10
        # loss0, loss1 = 0, 0
        # for i in range(n_iter):
        #   img, p0, p0_theta, p1, p1_theta = self.get_sample(dataset, False)

        #   # Get validation losses. Do not backpropagate.
        #   loss0 += self.attention.train(img, p0, p0_theta, backprop=False)
        #   if isinstance(self.transport, Attention):
        #     loss1 += self.transport.train(img, p1, p1_theta, backprop=False)
        #   else:
        #     loss1 += self.transport.train(img, p0, p1, p1_theta, backprop=False)
        # loss0 /= n_iter
        # loss1 /= n_iter
        # with writer.as_default():
        #   sc = tf.summary.scalar
        #   sc('test_loss/attention', loss0, self.total_steps)
        #   sc('test_loss/transport', loss1, self.total_steps)
        # print(f'Validation Loss: {loss0:.4f} {loss1:.4f}')

    def act(self, obs, info=None, goal=None):  # pylint: disable=unused-argument
        """Run inference and return best action given visual observations."""
        tf.keras.backend.set_learning_phase(0)

        # Get heightmap from RGB-D images.
        img = self.get_image(obs)
        gobs = goal[0]
        gimg = self.get_image(gobs)

        # Attention model forward pass.
        pick_conf = self.attention.forward(img)
        argmax = np.argmax(pick_conf)
        argmax = np.unravel_index(argmax, shape=pick_conf.shape)
        p0_pix = argmax[:2]
        p0_theta = argmax[2] * (2 * np.pi / pick_conf.shape[2])

        # Transport model forward pass.
        place_conf = self.transport.forward(img, gimg, p0_pix)
        argmax = np.argmax(place_conf)
        argmax = np.unravel_index(argmax, shape=place_conf.shape)
        p1_pix = argmax[:2]
        p1_theta = argmax[2] * (2 * np.pi / place_conf.shape[2])

        # Pixels to end effector poses.
        hmap = img[:, :, 3]
        p0_xyz = utils.pix_to_xyz(p0_pix, hmap, self.bounds, self.pix_size)
        p1_xyz = utils.pix_to_xyz(p1_pix, hmap, self.bounds, self.pix_size)
        p0_xyzw = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta))
        p1_xyzw = utils.eulerXYZ_to_quatXYZW((0, 0, -p1_theta))

        return {
            'pose0': (np.asarray(p0_xyz), np.asarray(p0_xyzw)),
            'pose1': (np.asarray(p1_xyz), np.asarray(p1_xyzw))
        }

        # TODO(andyzeng) cleanup goal-conditioned model.

        # Make a goal image if needed, and for consistency stack with input.
        # if self.use_goal_image:
        #   cmap_g, hmap_g = utils.get_fused_heightmap(goal, self.cam_config)
        #   goal_image = self.concatenate_c_h(colormap_g, heightmap_g)
        #   input_image = np.concatenate((input_image, goal_image), axis=2)
        #   assert input_image.shape[2] == 12, input_image.shape

        # if self.use_goal_image:
        #   half = int(input_image.shape[2] / 2)
        #   input_only = input_image[:, :, :half]  # ignore goal portion
        #   pick_conf = self.attention.forward(input_only)
        # else:
        # if isinstance(self.transport, TransportGoal):
        #   half = int(input_image.shape[2] / 2)
        #   img_curr = input_image[:, :, :half]
        #   img_goal = input_image[:, :, half:]
        #   place_conf = self.transport.forward(img_curr, img_goal, p0_pix)

    def load(self, n_iter):
        """Load pre-trained models."""
        print(f'Loading pre-trained model at {n_iter} iterations.')
        attention_fname = 'attention-ckpt-%d.h5' % n_iter
        transport_fname = 'transport-ckpt-%d.h5' % n_iter
        attention_fname = os.path.join(self.models_dir, attention_fname)
        transport_fname = os.path.join(self.models_dir, transport_fname)
        self.attention.load(attention_fname)
        self.transport.load(transport_fname)
        self.total_steps = n_iter

    def save(self):
        """Save models."""
        if not tf.io.gfile.exists(self.models_dir):
            tf.io.gfile.makedirs(self.models_dir)
        attention_fname = 'attention-ckpt-%d.h5' % self.total_steps
        transport_fname = 'transport-ckpt-%d.h5' % self.total_steps
        attention_fname = os.path.join(self.models_dir, attention_fname)
        transport_fname = os.path.join(self.models_dir, transport_fname)
        self.attention.save(attention_fname)
        self.transport.save(transport_fname)

Example #2

Show file

class Form2FitAgent:
    """Form-2-fit Agent (https://form2fit.github.io/)."""
    def __init__(self, name, task):
        self.name = name
        self.task = task
        self.total_iter = 0
        self.num_rotations = 24
        self.descriptor_dim = 16
        self.pixel_size = 0.003125
        self.input_shape = (320, 160, 6)
        self.camera_config = cameras.RealSenseD415.CONFIG
        self.models_dir = os.path.join('checkpoints', self.name)
        self.bounds = np.array([[0.25, 0.75], [-0.5, 0.5], [0, 0.28]])

        self.pick_model = Attention(input_shape=self.input_shape,
                                    num_rotations=1,
                                    preprocess=self.preprocess,
                                    lite=True)
        self.place_model = Attention(input_shape=self.input_shape,
                                     num_rotations=1,
                                     preprocess=self.preprocess,
                                     lite=True)
        self.match_model = Matching(input_shape=self.input_shape,
                                    descriptor_dim=self.descriptor_dim,
                                    num_rotations=self.num_rotations,
                                    preprocess=self.preprocess,
                                    lite=True)

    def train(self, dataset, num_iter, writer, validation_dataset=None):
        """Train on dataset for a specific number of iterations."""
        del validation_dataset

        for i in range(num_iter):
            obs, act, _ = dataset.random_sample()

            # Get heightmap from RGB-D images.
            configs = act['camera_config']
            colormap, heightmap = self.get_heightmap(obs, configs)

            # Get training labels from data sample.
            pose0, pose1 = act['params']['pose0'], act['params']['pose1']
            p0_position, p0_rotation = pose0[0], pose0[1]
            p0 = utils.xyz_to_pix(p0_position, self.bounds, self.pixel_size)
            p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_rotation)[2])
            p1_position, p1_rotation = pose1[0], pose1[1]
            p1 = utils.xyz_to_pix(p1_position, self.bounds, self.pixel_size)
            p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_rotation)[2])
            p1_theta = p1_theta - p0_theta
            p0_theta = 0

            # Concatenate color with depth images.
            input_image = np.concatenate(
                (colormap, heightmap[Ellipsis, None],
                 heightmap[Ellipsis, None], heightmap[Ellipsis, None]),
                axis=2)

            # Do data augmentation (perturb rotation and translation).
            input_image, _, roundedpixels, _ = utils.perturb(
                input_image, [p0, p1])
            p0, p1 = roundedpixels

            # Compute training loss.
            loss0 = self.pick_model.train(input_image, p0, theta=0)
            loss1 = self.place_model.train(input_image, p1, theta=0)
            loss2 = self.match_model.train(input_image, p0, p1, theta=p1_theta)
            with writer.as_default():
                tf.summary.scalar('pick_loss',
                                  self.pick_model.metric.result(),
                                  step=self.total_iter + i)
                tf.summary.scalar('place_loss',
                                  self.place_model.metric.result(),
                                  step=self.total_iter + i)
                tf.summary.scalar('match_loss',
                                  self.match_model.metric.result(),
                                  step=self.total_iter + i)
            print(
                f'Train Iter: {self.total_iter + i} Loss: {loss0:.4f} {loss1:.4f} {loss2:.4f}'
            )

        self.total_iter += num_iter
        self.save()

    def act(self, obs, info):
        """Run inference and return best action given visual observations."""
        del info

        act = {'camera_config': self.camera_config, 'primitive': None}
        if not obs:
            return act

        # Get heightmap from RGB-D images.
        colormap, heightmap = self.get_heightmap(obs, self.camera_config)

        # Concatenate color with depth images.
        input_image = np.concatenate(
            (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None],
             heightmap[Ellipsis, None]),
            axis=2)

        # Get top-k pixels from pick and place heatmaps.
        k = 100
        pick_heatmap = self.pick_model.forward(input_image,
                                               apply_softmax=True).squeeze()
        place_heatmap = self.place_model.forward(input_image,
                                                 apply_softmax=True).squeeze()
        descriptors = np.float32(self.match_model.forward(input_image))

        # V4
        pick_heatmap = cv2.GaussianBlur(pick_heatmap, (49, 49), 0)
        place_heatmap = cv2.GaussianBlur(place_heatmap, (49, 49), 0)
        pick_topk = np.int32(
            np.unravel_index(
                np.argsort(pick_heatmap.reshape(-1))[-k:],
                pick_heatmap.shape)).T
        pick_pixel = pick_topk[-1, :]
        from skimage.feature import peak_local_max  # pylint: disable=g-import-not-at-top
        place_peaks = peak_local_max(place_heatmap, num_peaks=1)
        distances = np.ones((place_peaks.shape[0], self.num_rotations)) * 10
        pick_descriptor = descriptors[0, pick_pixel[0],
                                      pick_pixel[1], :].reshape(1, -1)
        for i in range(place_peaks.shape[0]):
            peak = place_peaks[i, :]
            place_descriptors = descriptors[:, peak[0], peak[1], :]
            distances[i, :] = np.linalg.norm(place_descriptors -
                                             pick_descriptor,
                                             axis=1)
        ibest = np.unravel_index(np.argmin(distances), shape=distances.shape)
        p0_pixel = pick_pixel
        p0_theta = 0
        p1_pixel = place_peaks[ibest[0], :]
        p1_theta = ibest[1] * (2 * np.pi / self.num_rotations)

        # # V3
        # pick_heatmap = cv2.GaussianBlur(pick_heatmap, (49, 49), 0)
        # place_heatmap = cv2.GaussianBlur(place_heatmap, (49, 49), 0)
        # pick_topk = np.int32(
        #     np.unravel_index(
        #         np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T
        # place_topk = np.int32(
        #     np.unravel_index(
        #         np.argsort(place_heatmap.reshape(-1))[-k:],
        #         place_heatmap.shape)).T
        # pick_pixel = pick_topk[-1, :]
        # place_pixel = place_topk[-1, :]
        # pick_descriptor = descriptors[0, pick_pixel[0],
        #                               pick_pixel[1], :].reshape(1, -1)
        # place_descriptor = descriptors[:, place_pixel[0], place_pixel[1], :]
        # distances = np.linalg.norm(place_descriptor - pick_descriptor, axis=1)
        # irotation = np.argmin(distances)
        # p0_pixel = pick_pixel
        # p0_theta = 0
        # p1_pixel = place_pixel
        # p1_theta = irotation * (2 * np.pi / self.num_rotations)

        # # V2
        # pick_topk = np.int32(
        #     np.unravel_index(
        #         np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T
        # place_topk = np.int32(
        #     np.unravel_index(
        #         np.argsort(place_heatmap.reshape(-1))[-k:],
        #         place_heatmap.shape)).T
        # pick_pixel = pick_topk[-1, :]
        # pick_descriptor = descriptors[0, pick_pixel[0],
        #                               pick_pixel[1], :].reshape(1, 1, 1, -1)
        # distances = np.linalg.norm(descriptors - pick_descriptor, axis=3)
        # distances = np.transpose(distances, [1, 2, 0])
        # max_distance = int(np.round(np.max(distances)))
        # for i in range(self.num_rotations):
        #   distances[:, :, i] = cv2.circle(distances[:, :, i],
        #                                   (pick_pixel[1], pick_pixel[0]), 50,
        #                                   max_distance, -1)
        # ibest = np.unravel_index(np.argmin(distances), shape=distances.shape)
        # p0_pixel = pick_pixel
        # p0_theta = 0
        # p1_pixel = ibest[:2]
        # p1_theta = ibest[2] * (2 * np.pi / self.num_rotations)

        # # V1
        # pick_topk = np.int32(
        #     np.unravel_index(
        #         np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T
        # place_topk = np.int32(
        #     np.unravel_index(
        #         np.argsort(place_heatmap.reshape(-1))[-k:],
        #         place_heatmap.shape)).T
        # distances = np.zeros((k, k, self.num_rotations))
        # for ipick in range(k):
        #   pick_descriptor = descriptors[0, pick_topk[ipick, 0],
        #                                 pick_topk[ipick, 1], :].reshape(1, -1)
        #   for iplace in range(k):
        #     place_descriptors = descriptors[:, place_topk[iplace, 0],
        #                                     place_topk[iplace, 1], :]
        #     distances[ipick, iplace, :] = np.linalg.norm(
        #         place_descriptors - pick_descriptor, axis=1)
        # ibest = np.unravel_index(np.argmin(distances), shape=distances.shape)
        # p0_pixel = pick_topk[ibest[0], :]
        # p0_theta = 0
        # p1_pixel = place_topk[ibest[1], :]
        # p1_theta = ibest[2] * (2 * np.pi / self.num_rotations)

        # Pixels to end effector poses.
        p0_position = utils.pix_to_xyz(p0_pixel, heightmap, self.bounds,
                                       self.pixel_size)
        p1_position = utils.pix_to_xyz(p1_pixel, heightmap, self.bounds,
                                       self.pixel_size)
        p0_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta))
        p1_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, -p1_theta))

        act['primitive'] = 'pick_place'
        if self.task == 'sweeping':
            act['primitive'] = 'sweep'
        elif self.task == 'pushing':
            act['primitive'] = 'push'
        params = {
            'pose0': (np.asarray(p0_position), np.asarray(p0_rotation)),
            'pose1': (np.asarray(p1_position), np.asarray(p1_rotation))
        }
        act['params'] = params
        return act

    #-------------------------------------------------------------------------
    # Helper Functions
    #-------------------------------------------------------------------------

    def preprocess(self, image):
        """Pre-process images (subtract mean, divide by std)."""
        color_mean = 0.18877631
        depth_mean = 0.00509261
        color_std = 0.07276466
        depth_std = 0.00903967
        image[:, :, :3] = (image[:, :, :3] / 255 - color_mean) / color_std
        image[:, :, 3:] = (image[:, :, 3:] - depth_mean) / depth_std
        return image

    def get_heightmap(self, obs, configs):
        """Reconstruct orthographic heightmaps with segmentation masks."""
        heightmaps, colormaps = utils.reconstruct_heightmaps(
            obs['color'], obs['depth'], configs, self.bounds, self.pixel_size)
        colormaps = np.float32(colormaps)
        heightmaps = np.float32(heightmaps)

        # Fuse maps from different views.
        valid = np.sum(colormaps, axis=3) > 0
        repeat = np.sum(valid, axis=0)
        repeat[repeat == 0] = 1
        colormap = np.sum(colormaps, axis=0) / repeat[Ellipsis, None]
        colormap = np.uint8(np.round(colormap))
        heightmap = np.sum(heightmaps, axis=0) / repeat
        return colormap, heightmap

    def load(self, num_iter):
        """Load pre-trained models."""
        pick_fname = 'pick-ckpt-%d.h5' % num_iter
        place_fname = 'place-ckpt-%d.h5' % num_iter
        match_fname = 'match-ckpt-%d.h5' % num_iter
        pick_fname = os.path.join(self.models_dir, pick_fname)
        place_fname = os.path.join(self.models_dir, place_fname)
        match_fname = os.path.join(self.models_dir, match_fname)
        self.pick_model.load(pick_fname)
        self.place_model.load(place_fname)
        self.match_model.load(match_fname)
        self.total_iter = num_iter

    def save(self):
        """Save models."""
        if not tf.io.gfile.exists(self.models_dir):
            tf.io.gfile.makedirs(self.models_dir)
        pick_fname = 'pick-ckpt-%d.h5' % self.total_iter
        place_fname = 'place-ckpt-%d.h5' % self.total_iter
        match_fname = 'match-ckpt-%d.h5' % self.total_iter
        pick_fname = os.path.join(self.models_dir, pick_fname)
        place_fname = os.path.join(self.models_dir, place_fname)
        match_fname = os.path.join(self.models_dir, match_fname)
        self.pick_model.save(pick_fname)
        self.place_model.save(place_fname)
        self.match_model.save(match_fname)