コード例 #1
0
    def random_pose_6dof(self, env, object_size):
        """Get random collision-free pose in workspace bounds for object."""
        plane_id = 1
        max_size = np.linalg.norm(object_size[0:2])
        erode_size = int(np.round(max_size / self.pixel_size))
        _, heightmap, object_mask = self.get_object_masks(env)

        # Sample freespace regions in workspace.
        mask = np.uint8(object_mask == plane_id)
        mask[0, :], mask[:, 0], mask[-1, :], mask[:, -1] = 0, 0, 0, 0
        mask = cv2.erode(mask, np.ones((erode_size, erode_size), np.uint8))
        if np.sum(mask) == 0:
            return
        pixel = utils.sample_distribution(np.float32(mask))
        position = utils.pixel_to_position(pixel, heightmap, self.bounds,
                                           self.pixel_size)

        z_above_table = (np.random.rand(1)[0] / 10) + 0.03

        position = (position[0], position[1],
                    object_size[2] / 2 + z_above_table)

        roll = (np.random.rand() - 0.5) * 0.5 * np.pi
        pitch = (np.random.rand() - 0.5) * 0.5 * np.pi
        yaw = np.random.rand() * 2 * np.pi
        rotation = utils.get_pybullet_quaternion_from_rot((roll, pitch, yaw))

        print(position, rotation)

        return position, rotation
コード例 #2
0
    def train(self, in_img, p, q, theta, backprop=True):
        self.metric.reset_states()
        with tf.GradientTape() as tape:
            output = self.forward(in_img, p, softmax=False)

            itheta = theta / (2 * np.pi / self.n_rotations)
            itheta = np.int32(np.round(itheta)) % self.n_rotations
            label_size = in_img.shape[:2] + (self.n_rotations, )
            label = np.zeros(label_size)
            label[q[0], q[1], itheta] = 1

            # Get per-pixel sampling loss.
            sampling = True  # Sampling negatives seems to converge faster.
            if sampling:
                num_samples = 100
                inegative = utils.sample_distribution(1 - label, num_samples)
                inegative = [
                    np.ravel_multi_index(i, label.shape) for i in inegative
                ]
                ipositive = np.ravel_multi_index([q[0], q[1], itheta],
                                                 label.shape)
                output = tf.reshape(output, (-1, 2))
                output_samples = ()
                for i in inegative:
                    output_samples += (tf.reshape(output[i, :], (1, 2)), )
                output_samples += (tf.reshape(output[ipositive, :], (1, 2)), )
                output = tf.concat(output_samples, axis=0)
                label = np.int32([0] * num_samples + [1])[Ellipsis, None]
                label = np.hstack((1 - label, label))
                weights = np.ones(label.shape[0])
                weights[:num_samples] = 1. / num_samples
                weights = weights / np.sum(weights)

            else:
                ipositive = np.ravel_multi_index([q[0], q[1], itheta],
                                                 label.shape)
                output = tf.reshape(output, (-1, 2))
                label = np.int32(
                    np.reshape(label, (int(np.prod(label.shape)), 1)))
                label = np.hstack((1 - label, label))
                weights = np.ones(label.shape[0]) * 0.0025  # Magic constant.
                weights[ipositive] = 1

            label = tf.convert_to_tensor(label, dtype=tf.int32)
            weights = tf.convert_to_tensor(weights, dtype=tf.float32)
            loss = tf.nn.softmax_cross_entropy_with_logits(label, output)
            loss = tf.reduce_mean(loss * weights)

            train_vars = self.model.trainable_variables
            if backprop:
                grad = tape.gradient(loss, train_vars)
                self.optim.apply_gradients(zip(grad, train_vars))
            self.metric(loss)

        self.iters += 1
        return np.float32(loss)
コード例 #3
0
    def train(self, input_image, p, q, theta):
        """Train function."""
        self.metric.reset_states()
        with tf.GradientTape() as tape:
            output = self.forward(input_image)

            p_descriptor = output[0, p[0], p[1], :]
            itheta = theta / (2 * np.pi / self.num_rotations)
            itheta = np.int32(np.round(itheta)) % self.num_rotations
            q_descriptor = output[itheta, q[0], q[1], :]

            # Positives.
            positive_distances = tf.linalg.norm(p_descriptor - q_descriptor)
            positive_distances = tf.reshape(positive_distances, (1, ))
            positive_labels = tf.constant([1], dtype=tf.int32)
            positive_loss = tfa.losses.contrastive_loss(
                positive_labels, positive_distances)

            # Negatives.
            num_samples = 100
            sample_map = np.zeros(input_image.shape[:2] +
                                  (self.num_rotations, ))
            sample_map[p[0], p[1], 0] = 1
            sample_map[q[0], q[1], itheta] = 1
            inegative = utils.sample_distribution(1 - sample_map, num_samples)
            negative_distances = ()
            negative_labels = ()
            for i in range(num_samples):
                descriptor = output[inegative[i, 2], inegative[i, 0],
                                    inegative[i, 1], :]
                distance = tf.linalg.norm(p_descriptor - descriptor)
                distance = tf.reshape(distance, (1, ))
                negative_distances += (distance, )
                negative_labels += (tf.constant([0], dtype=tf.int32), )
            negative_distances = tf.concat(negative_distances, axis=0)
            negative_labels = tf.concat(negative_labels, axis=0)
            negative_loss = tfa.losses.contrastive_loss(
                negative_labels, negative_distances)
            negative_loss = tf.reduce_mean(negative_loss)

            loss = tf.reduce_mean(positive_loss) + tf.reduce_mean(
                negative_loss)

        # Backpropagate.
        grad = tape.gradient(loss, self.model.trainable_variables)
        self.optim.apply_gradients(zip(grad, self.model.trainable_variables))

        self.metric(loss)
        return np.float32(loss)
コード例 #4
0
    def get_random_pose(self, env, obj_size):
        """Get random collision-free object pose within workspace bounds."""

        # Get erosion size of object in pixels.
        max_size = np.sqrt(obj_size[0]**2 + obj_size[1]**2)
        erode_size = int(np.round(max_size / self.pix_size))

        _, hmap, obj_mask = self.get_true_image(env)

        # Randomly sample an object pose within free-space pixels.
        free = np.ones(obj_mask.shape, dtype=np.uint8)
        for obj_ids in env.obj_ids.values():
            for obj_id in obj_ids:
                free[obj_mask == obj_id] = 0
        free[0, :], free[:, 0], free[-1, :], free[:, -1] = 0, 0, 0, 0
        free = cv2.erode(free, np.ones((erode_size, erode_size), np.uint8))
        if np.sum(free) == 0:
            return
        pix = utils.sample_distribution(np.float32(free))
        pos = utils.pix_to_xyz(pix, hmap, self.bounds, self.pix_size)
        pos = (pos[0], pos[1], obj_size[2] / 2)
        theta = np.random.rand() * 2 * np.pi
        rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta))
        return pos, rot
コード例 #5
0
        def act(obs, info):  # pylint: disable=unused-argument
            """Calculate action."""

            # Oracle uses perfect RGB-D orthographic images and segmentation masks.
            _, hmap, obj_mask = self.get_true_image(env)

            # Unpack next goal step.
            objs, matches, targs, replace, rotations, _, _, _ = self.goals[0]

            # Match objects to targets without replacement.
            if not replace:

                # Modify a copy of the match matrix.
                matches = matches.copy()

                # Ignore already matched objects.
                for i in range(len(objs)):
                    object_id, (symmetry, _) = objs[i]
                    pose = p.getBasePositionAndOrientation(object_id)
                    targets_i = np.argwhere(matches[i, :]).reshape(-1)
                    for j in targets_i:
                        if self.is_match(pose, targs[j], symmetry):
                            matches[i, :] = 0
                            matches[:, j] = 0

            # Get objects to be picked (prioritize farthest from nearest neighbor).
            nn_dists = []
            nn_targets = []
            for i in range(len(objs)):
                object_id, (symmetry, _) = objs[i]
                xyz, _ = p.getBasePositionAndOrientation(object_id)
                targets_i = np.argwhere(matches[i, :]).reshape(-1)
                if len(targets_i) > 0:  # pylint: disable=g-explicit-length-test
                    targets_xyz = np.float32([targs[j][0] for j in targets_i])
                    dists = np.linalg.norm(targets_xyz -
                                           np.float32(xyz).reshape(1, 3),
                                           axis=1)
                    nn = np.argmin(dists)
                    nn_dists.append(dists[nn])
                    nn_targets.append(targets_i[nn])

                # Handle ignored objects.
                else:
                    nn_dists.append(0)
                    nn_targets.append(-1)
            order = np.argsort(nn_dists)[::-1]

            # Filter out matched objects.
            order = [i for i in order if nn_dists[i] > 0]

            pick_mask = None
            for pick_i in order:
                pick_mask = np.uint8(obj_mask == objs[pick_i][0])

                # Erode to avoid picking on edges.
                # pick_mask = cv2.erode(pick_mask, np.ones((3, 3), np.uint8))

                if np.sum(pick_mask) > 0:
                    break

            # Trigger task reset if no object is visible.
            if pick_mask is None or np.sum(pick_mask) == 0:
                self.goals = []
                print(
                    'Object for pick is not visible. Skipping demonstration.')
                return

            # Get picking pose.
            pick_prob = np.float32(pick_mask)
            pick_pix = utils.sample_distribution(pick_prob)
            # For "deterministic" demonstrations on insertion-easy, use this:
            # pick_pix = (160,80)
            pick_pos = utils.pix_to_xyz(pick_pix, hmap, self.bounds,
                                        self.pix_size)
            pick_pose = (pick_pos, (0, 0, 0, 1))

            # Get placing pose.
            targ_pose = targs[nn_targets[pick_i]]  # pylint: disable=undefined-loop-variable
            obj_pose = p.getBasePositionAndOrientation(objs[pick_i][0])  # pylint: disable=undefined-loop-variable
            if not self.sixdof:
                obj_euler = utils.quatXYZW_to_eulerXYZ(obj_pose[1])
                obj_quat = utils.eulerXYZ_to_quatXYZW((0, 0, obj_euler[2]))
                obj_pose = (obj_pose[0], obj_quat)
            world_to_pick = utils.invert(pick_pose)
            obj_to_pick = utils.multiply(world_to_pick, obj_pose)
            pick_to_obj = utils.invert(obj_to_pick)
            place_pose = utils.multiply(targ_pose, pick_to_obj)

            # Rotate end effector?
            if not rotations:
                place_pose = (place_pose[0], (0, 0, 0, 1))

            return {'pose0': pick_pose, 'pose1': place_pose}
コード例 #6
0
    def train(self, in_img, p, q, theta):
        """Transport pixel p to pixel q.

          Args:
            input:
            depth_image:
            p: pixel (y, x)
            q: pixel (y, x)
          Returns:
            A `Tensor`. Has the same type as `input`.

        Daniel: the `in_img` will include the color and depth. Much is
        similar to the attention model if we're not using the per-pixel loss:
        (a) forward pass, (b) get angle discretizations [though we set only 1
        rotation for the picking model], (c) make the label consider
        rotations in the last axis, but only provide the label to one single
        (pixel,rotation) combination, (d) follow same exact steps for the
        non-per pixel loss otherwise. The output reshaping to (1, ...) is
        done in the attention model forward pass, but not in the transport
        forward pass. Note the `1` meaning a batch size of 1.
        """
        self.metric.reset_states()
        with tf.GradientTape() as tape:
            output = self.forward(in_img, p, apply_softmax=False)

            itheta = theta / (2 * np.pi / self.num_rotations)
            itheta = np.int32(np.round(itheta)) % self.num_rotations

            label_size = in_img.shape[:2] + (self.num_rotations, )
            label = np.zeros(label_size)
            label[q[0], q[1], itheta] = 1

            if self.per_pixel_loss:
                sampling = True  # sampling negatives seems to converge faster
                if sampling:
                    num_samples = 100
                    inegative = utils.sample_distribution(
                        1 - label, num_samples)
                    inegative = [
                        np.ravel_multi_index(i, label.shape) for i in inegative
                    ]
                    ipositive = np.ravel_multi_index([q[0], q[1], itheta],
                                                     label.shape)
                    output = tf.reshape(output, (-1, 2))
                    output_samples = ()
                    for i in inegative:
                        output_samples += (tf.reshape(output[i, :], (1, 2)), )
                    output_samples += (tf.reshape(output[ipositive, :],
                                                  (1, 2)), )
                    output = tf.concat(output_samples, axis=0)
                    label = np.int32([0] * num_samples + [1])[..., None]
                    label = np.hstack((1 - label, label))
                    weights = np.ones(label.shape[0])
                    weights[:num_samples] = 1. / num_samples
                    weights = weights / np.sum(weights)

                else:
                    ipositive = np.ravel_multi_index([q[0], q[1], itheta],
                                                     label.shape)
                    output = tf.reshape(output, (-1, 2))
                    label = np.int32(
                        np.reshape(label, (int(np.prod(label.shape)), 1)))
                    label = np.hstack((1 - label, label))
                    weights = np.ones(
                        label.shape[0]) * 0.0025  # magic constant
                    weights[ipositive] = 1

                label = tf.convert_to_tensor(label, dtype=tf.int32)
                weights = tf.convert_to_tensor(weights, dtype=tf.float32)
                loss = tf.nn.softmax_cross_entropy_with_logits(label, output)
                loss = tf.reduce_mean(loss * weights)

            else:
                label = label.reshape(1, np.prod(label.shape))
                label = tf.convert_to_tensor(label, dtype=tf.float32)
                output = tf.reshape(output, (1, np.prod(output.shape)))
                loss = tf.nn.softmax_cross_entropy_with_logits(label, output)
                loss = tf.reduce_mean(loss)

        grad = tape.gradient(loss, self.model.trainable_variables)
        self.optim.apply_gradients(zip(grad, self.model.trainable_variables))

        self.metric(loss)

        return np.float32(loss)
コード例 #7
0
    def train(self,
              in_img,
              p,
              q,
              theta,
              z=None,
              roll=None,
              pitch=None,
              validate=False):
        """Transport pixel p to pixel q.

    Args:
      in_img:
      p: pixel (y, x)
      q: pixel (y, x)
      theta:
      z:
      roll:
      pitch:
      validate:

    Returns:
      A `Tensor`. Has the same type as `input`.
    """
        visualize_input = False
        if visualize_input and self.six_dof:  # only supported for six dof model
            self.visualize_train_input(in_img, p, q, theta, z, roll, pitch)

        self.metric.reset_states()
        if self.six_dof:
            self.z_metric.reset_states()
            self.roll_metric.reset_states()
            self.pitch_metric.reset_states()

        with tf.GradientTape() as tape:
            output = self.forward(in_img, p, apply_softmax=False, theta=theta)

            if self.six_dof:
                z_label, roll_label, pitch_label = z, roll, pitch
                output, z_tensor, roll_tensor, pitch_tensor = output

            itheta = theta / (2 * np.pi / self.num_rotations)
            itheta = np.int32(np.round(itheta)) % self.num_rotations

            label_size = in_img.shape[:2] + (self.num_rotations, )
            label = np.zeros(label_size)
            label[q[0], q[1], itheta] = 1

            if self.per_pixel_loss:
                sampling = True  # sampling negatives seems to converge faster
                if sampling:
                    num_samples = 100
                    inegative = utils.sample_distribution(
                        1 - label, num_samples)
                    inegative = [
                        np.ravel_multi_index(i, label.shape) for i in inegative
                    ]
                    ipositive = np.ravel_multi_index([q[0], q[1], itheta],
                                                     label.shape)
                    output = tf.reshape(output, (-1, 2))
                    output_samples = ()
                    for i in inegative:
                        output_samples += (tf.reshape(output[i, :], (1, 2)), )
                    output_samples += (tf.reshape(output[ipositive, :],
                                                  (1, 2)), )
                    output = tf.concat(output_samples, axis=0)
                    label = np.int32([0] * num_samples + [1])[Ellipsis, None]
                    label = np.hstack((1 - label, label))
                    weights = np.ones(label.shape[0])
                    weights[:num_samples] = 1. / num_samples
                    weights = weights / np.sum(weights)

                else:
                    ipositive = np.ravel_multi_index([q[0], q[1], itheta],
                                                     label.shape)
                    output = tf.reshape(output, (-1, 2))
                    label = np.int32(
                        np.reshape(label, (int(np.prod(label.shape)), 1)))
                    label = np.hstack((1 - label, label))
                    weights = np.ones(
                        label.shape[0]) * 0.0025  # magic constant
                    weights[ipositive] = 1

                label = tf.convert_to_tensor(label, dtype=tf.int32)
                weights = tf.convert_to_tensor(weights, dtype=tf.float32)
                loss = tf.nn.softmax_cross_entropy_with_logits(label, output)
                loss = tf.reduce_mean(loss * weights)
                transport_loss = loss

            elif not self.six_dof:
                label = label.reshape(1, np.prod(label.shape))
                label = tf.convert_to_tensor(label, dtype=tf.float32)
                output = tf.reshape(output, (1, np.prod(output.shape)))
                loss = tf.nn.softmax_cross_entropy_with_logits(label, output)
                loss = tf.reduce_mean(loss)
                transport_loss = loss

            if self.six_dof:

                # Use a window for regression, rather than only exact
                u_window = 7
                v_window = 7
                theta_window = 1
                u_min = max(q[0] - u_window, 0)
                u_max = min(q[0] + u_window + 1, z_tensor.shape[1])
                v_min = max(q[1] - v_window, 0)
                v_max = min(q[1] + v_window + 1, z_tensor.shape[2])
                theta_min = max(itheta - theta_window, 0)
                theta_max = min(itheta + theta_window + 1, z_tensor.shape[3])

                z_est_at_xytheta = z_tensor[0, u_min:u_max, v_min:v_max,
                                            theta_min:theta_max]
                roll_est_at_xytheta = roll_tensor[0, u_min:u_max, v_min:v_max,
                                                  theta_min:theta_max]
                pitch_est_at_xytheta = pitch_tensor[0, u_min:u_max,
                                                    v_min:v_max,
                                                    theta_min:theta_max]

                z_est_at_xytheta = tf.reshape(z_est_at_xytheta, (-1, 1))
                roll_est_at_xytheta = tf.reshape(roll_est_at_xytheta, (-1, 1))
                pitch_est_at_xytheta = tf.reshape(pitch_est_at_xytheta,
                                                  (-1, 1))

                z_est_at_xytheta = self.z_regressor(z_est_at_xytheta)
                roll_est_at_xytheta = self.roll_regressor(roll_est_at_xytheta)
                pitch_est_at_xytheta = self.pitch_regressor(
                    pitch_est_at_xytheta)

                z_weight = 10.0
                roll_weight = 10.0
                pitch_weight = 10.0

                z_label = tf.convert_to_tensor(z_label)[None, Ellipsis]
                roll_label = tf.convert_to_tensor(roll_label)[None, Ellipsis]
                pitch_label = tf.convert_to_tensor(pitch_label)[None, Ellipsis]

                z_loss = z_weight * self.regress_loss(z_label,
                                                      z_est_at_xytheta)
                roll_loss = roll_weight * self.regress_loss(
                    roll_label, roll_est_at_xytheta)
                pitch_loss = pitch_weight * self.regress_loss(
                    pitch_label, pitch_est_at_xytheta)

                loss = z_loss + roll_loss + pitch_loss

        if self.six_dof:
            train_vars = self.model.trainable_variables + \
                         self.z_regressor.trainable_variables + \
                         self.roll_regressor.trainable_variables + \
                         self.pitch_regressor.trainable_variables
        else:
            train_vars = self.model.trainable_variables

        if not validate:
            grad = tape.gradient(loss, train_vars)
            self.optim.apply_gradients(zip(grad, train_vars))

        if not self.six_dof:
            self.metric(transport_loss)

        if self.six_dof:
            self.z_metric(z_loss)
            self.roll_metric(roll_loss)
            self.pitch_metric(pitch_loss)

        return np.float32(loss)