Esempio n. 1
0
def control_point_l1_loss(pred_control_points,
                          gt_control_points,
                          confidence=None,
                          confidence_weight=None):
    """
      Computes the l1 loss between the predicted control points and the
      groundtruth contorl points on the gripper.
    """
    confidence_term = tf.constant(0, dtype=tf.float32)
    print('control_point_l1_loss', get_shape(pred_control_points),
          get_shape(gt_control_points))
    error = tf.reduce_sum(tf.abs(pred_control_points - gt_control_points), -1)
    error = tf.reduce_mean(error, -1)
    if confidence is not None:
        assert (confidence_weight is not None)
        error *= confidence
        confidence_term = tf.reduce_mean(tf.log(tf.maximum(
            confidence, 1e-10))) * confidence_weight
        print('confidence_term = ', get_shape(confidence_term))

    print('l1_error = {}'.format(get_shape(error)))
    if confidence is None:
        return tf.reduce_mean(error)
    else:
        return tf.reduce_mean(error), -confidence_term
Esempio n. 2
0
def scaled_dot_product_attention(Q, K, V, dropout_rate=0.0):
    scaler = tf.rsqrt(tf.to_float(
        tf_utils.get_shape(Q)[2]))  # depth of the query
    logits = tf.matmul(Q, K, transpose_b=True) * scaler
    weights = tf.nn.softmax(logits)
    weights = tf.nn.dropout(weights, 1.0 - dropout_rate)
    return tf.matmul(weights, V)
Esempio n. 3
0
def control_point_l1_loss_better_than_threshold(pred_control_points,
                                                gt_control_points, confidence,
                                                confidence_threshold):
    npoints = get_shape(pred_control_points)[1]
    mask = tf.greater_equal(confidence, confidence_threshold)
    mask_ratio = tf.reduce_mean(tf.to_float(mask))
    mask = tf.tile(mask, [1, npoints])
    p1 = tf.boolean_mask(pred_control_points, mask)
    p2 = tf.boolean_mask(gt_control_points, mask)

    return control_point_l1_loss(p1, p2), mask_ratio
Esempio n. 4
0
def merge_pc_and_gripper_pc(pc,
                            gripper_pc,
                            instance_mode=0,
                            pc_latent=None,
                            gripper_pc_latent=None):
    """
    Merges the object point cloud and gripper point cloud and
    adds a binary auxilary feature that indicates whether each point
    belongs to the object or to the gripper.
    """

    pc_shape = get_shape(pc)
    gripper_shape = get_shape(gripper_pc)
    assert (len(pc_shape) == 3)
    assert (len(gripper_shape) == 3)
    assert (pc_shape[0] == gripper_shape[0])

    npoints = get_shape(pc)[1]
    batch_size = tf.shape(pc)[0]

    if instance_mode == 1:
        assert pc_shape[-1] == 3
        latent_dist = [pc_latent, gripper_pc_latent]
        latent_dist = tf.concat(latent_dist, 1)

    l0_xyz = tf.concat((pc, gripper_pc), 1)
    labels = [
        tf.ones((get_shape(pc)[1], 1), dtype=tf.float32),
        tf.zeros((get_shape(gripper_pc)[1], 1), dtype=tf.float32)
    ]
    labels = tf.concat(labels, 0)
    labels = tf.expand_dims(labels, 0)
    labels = tf.tile(labels, [batch_size, 1, 1])

    if instance_mode == 1:
        l0_points = tf.concat([l0_xyz, latent_dist, labels], -1)
    else:
        l0_points = tf.concat([l0_xyz, labels], -1)

    return l0_xyz, l0_points
Esempio n. 5
0
def get_decoded_features_depth(raw_depth_and_masks, encoded_depth,
                               reg_constant):

    pyr_top_conv = normal_convolution(num_filters=196,
                                      reg_constant=reg_constant,
                                      input_shape=get_shape(
                                          encoded_depth[-1][-1]),
                                      num='_pyr_top')
    norm_top, conf_top = pyr_top_conv(encoded_depth[-1][0],
                                      encoded_depth[-1][1])
    decoder = [[norm_top, conf_top]]
    for i in range(4, 0, -1):
        norm, conf = encoded_depth[i]
        channels = norm.get_shape().as_list()[-1]
        #upsampling_lower_level
        with tf.variable_scope("upsampling" + str(i + 1) + 'to' + str(i)):
            upsample_conv = normal_convolution(
                num_filters=channels,
                reg_constant=reg_constant,
                input_shape=get_shape(decoder[-1][1]),
                num=str(i + 2) + 'to' + str(i + 1))
            upsample_norm, upsample_conf = upsample_conv(
                decoder[-1][0], decoder[-1][1])
            upsample_norm = upsample(upsample_norm)
            upsample_conf = upsample(upsample_conf)

        #refinement
        with tf.variable_scope("refine" + str(i)):
            pyramid_feature_n = upsample_norm + norm
            pyramid_feature_c = upsample_conf + conf
            refine_layer = normal_convolution(
                num_filters=channels,
                reg_constant=reg_constant,
                input_shape=get_shape(pyramid_feature_n),
                num='refinement')
            refined_n, refined_c = refine_layer(pyramid_feature_n,
                                                pyramid_feature_c)
        decoder.append([refined_n, refined_c])
    return decoder
Esempio n. 6
0
def get_features_encoder_depth(depth, masks, reg_constant):
    depth_encoder = []
    filters_by_level = [16, 32, 64, 96, 128, 196]
    for i, num_filters in (zip(range(0, 6), filters_by_level)):
        if i == 0:
            data, conf = max_pool_normalized(depth, masks)
            norm_inst = normal_convolution(num_filters=num_filters,
                                           num=str(i) + 'b',
                                           input_shape=get_shape(data),
                                           reg_constant=reg_constant)
            data, conf = norm_inst(data, conf)
            depth_encoder.append([data, conf])
        else:
            data, conf = depth_encoder[-1]
            data, conf = max_pool_normalized(data, conf)
            norm_inst = normal_convolution(num_filters=num_filters,
                                           num=str(i) + 'b',
                                           input_shape=get_shape(data),
                                           reg_constant=reg_constant)
            data, conf = norm_inst(data, conf)
            depth_encoder.append([data, conf])
    return depth_encoder
Esempio n. 7
0
def verify_tensor_size(t, expected_shape):
    """
    Checks whether input tensor t, has the expected_shape.

    Args:
      t: input tensor
      expected_shape: list of int indicating the expected shape.
    """
    shape = get_shape(t)
    if len(shape) != len(expected_shape):
        raise ValueError('shape do not match : {} != {}'.format(
            shape, expected_shape))

    if np.any(np.asarray(shape) != np.asarray(expected_shape)):
        raise ValueError('shape do not match : {} != {}'.format(
            shape, expected_shape))
Esempio n. 8
0
def accuracy_better_than_threshold(pred_success_logits, gt, confidence,
                                   confidence_threshold):
    """
      Computes average precision for the grasps with confidence > threshold.
    """
    pred_classes = tf.cast(tf.argmax(pred_success_logits, -1), tf.int32)
    correct = tf.to_float(tf.equal(pred_classes, gt))
    mask = tf.squeeze(
        tf.to_float(tf.greater_equal(confidence, confidence_threshold)), -1)

    gt = tf.to_float(gt)
    positive_acc = tf.reduce_sum(correct * mask * gt) / tf.maximum(
        tf.reduce_sum(mask * gt), 1.)
    negative_acc = tf.reduce_sum(correct * mask * (1. - gt)) / tf.maximum(
        tf.reduce_sum(mask * (1. - gt)), 1.)

    return 0.5 * (positive_acc +
                  negative_acc), tf.reduce_sum(mask) / get_shape(gt)[0]
Esempio n. 9
0
def build_vae_ops(data_dict, args, scope='vae'):
    """
      builds vae operations that are required for training/inference of vae.

      Args:
        data_dict: dict, contains the tensors for the input to the model.
        args: arguments that are set for training.
        scope: string.
      
      Returns:
        train_op, summary_op, data_dict, logger_dict, global_step
        train_op: tf op for running training.
        summary_op: tf summary op that needs to be run for populating the
            summaries.
        data_dict: dictionary of tensors. Keys are tensor names and values
            are tensors. New keys and tensors will be added to the input
            data_dict.
        logger_dict: dictionary of tensors for printing.
        global_step: tf.Step that keeps the step number of the training.
    """
    losses = None
    summaries = None
    train_op = None
    logger_dict = None
    summary_op = None
    global_step = None
    first_dimension = args.num_objects_per_batch * args.num_grasps_per_object
    is_training = args.is_training

    with tf.variable_scope(scope):
        if is_training:
            assert '{}_pred/samples' not in data_dict
            input_pcs = data_dict['{}_pc'.format(scope)]
            losses = {}
            summaries = {}

            gt_control_points = tf_utils.transform_control_points(
                data_dict['{}_grasp_rt'.format(scope)],
                first_dimension,
                mode='rt')
            gt_control_points = tf.slice(gt_control_points, [0, 0, 0],
                                         [-1, -1, 3])
            data_dict['{}_gt_control_point'.format(scope)] = gt_control_points
            pc_input = tf.slice(input_pcs, [0, 0, 0], [-1, -1, 3])

            if not args.gan:  # Create Encoder.
                latent_input = data_dict['{}_grasp_rt'.format(scope)]
                batch_size = get_shape(pc_input)[0]
                npoints = get_shape(pc_input)[1]
                latent_input = tf.tile(
                    tf.reshape(latent_input, [batch_size, 1, -1]),
                    [1, npoints, 1])

                with tf.variable_scope('encoder'):
                    latent_mean_std = models.model.model_with_confidence(
                        pc_input,
                        latent_input,
                        is_training=tf.constant(is_training),
                        bn_decay=None,
                        is_encoder=True,
                        latent_size=args.latent_size,
                        scale=args.model_scale,
                        merge_pcs=args.merge_pcs_in_vae_encoder,
                        pointnet_radius=args.pointnet_radius,
                        pointnet_nclusters=args.pointnet_nclusters)

                    latent_mean = tf.slice(latent_mean_std, [0, 0],
                                           [-1, args.latent_size])
                    latent_std = tf.slice(latent_mean_std,
                                          [0, args.latent_size],
                                          [-1, args.latent_size])

                with tf.variable_scope('sample_from_latent'):
                    samples = latent_mean + tf.exp(
                        latent_std / 2.0) * tf.random_normal(
                            latent_mean.shape, 0, 1, dtype=tf.float32)
                    data_dict['{}_pred/samples'.format(scope)] = samples

                kl_loss = models.model.kl_divergence(latent_mean, latent_std)
                kl_loss = tf.reduce_mean(kl_loss)
                losses['kl_loss'] = kl_loss * args.kl_loss_weight
                summaries['unscaled_kl_loss'] = kl_loss
            else:  # For gan just sample random latents.
                samples = tf.random.uniform(
                    [first_dimension, args.latent_size], name='gan_latents')
        else:
            input_pcs = data_dict['{}_pc'.format(scope)]
            samples = data_dict['{}_pred/samples'.format(scope)]

        with tf.variable_scope('decoder'):
            pc_input = tf.slice(input_pcs, [0, 0, 0], [-1, -1, 3])

            latent_input = samples
            batch_size = get_shape(pc_input)[0]
            npoints = get_shape(pc_input)[1]
            latent_input = tf.tile(
                tf.reshape(latent_input, [batch_size, 1, -1]), [1, npoints, 1])

            q, t, confidence = models.model.model_with_confidence(
                pc_input,
                latent_input,
                tf.constant(is_training),
                bn_decay=None,
                is_encoder=False,
                latent_size=None,
                scale=args.model_scale,
                pointnet_radius=args.pointnet_radius,
                pointnet_nclusters=args.pointnet_nclusters)
            predicted_qt = tf.concat((q, t), -1)
            data_dict['{}_pred/grasp_qt'.format(scope)] = predicted_qt
            data_dict['{}_pred/confidence'.format(scope)] = confidence

            cp = tf_utils.transform_control_points(
                predicted_qt,
                get_shape(data_dict['{}_pc'.format(scope)])[0],
                scope='transform_predicted_qt')
            data_dict['{}_pred/cps'.format(scope)] = cp

        if is_training:
            loss_fn = None
            if args.gan:
                loss_fn = models.model.min_distance_loss
            else:
                loss_fn = models.model.control_point_l1_loss

            loss_term, confidence_term = loss_fn(
                cp,
                gt_control_points,
                confidence=confidence,
                confidence_weight=args.confidence_weight)
            data_dict['{}_loss'.format(scope)] = loss_term
            losses['gan_min_dist' if args.
                   gan else 'L1_grasp_reconstruction'] = loss_term
            losses['confidence'] = confidence_term

            for c in CONFIDENCES:
                qkey = 'quality_at_confidence/{}'.format(c)
                rkey = 'ratio_at_confidence/{}'.format(c)
                summary_fn = models.model.control_point_l1_loss_better_than_threshold
                if args.gan:
                    summary_fn = models.model.min_distance_better_than_threshold
                summaries[qkey], summaries[rkey] = summary_fn(
                    cp, gt_control_points, confidence, c)

            global_step = tf.train.get_or_create_global_step()
            total_loss = tf.reduce_sum(tf.stack(list(losses.values())))
            summaries['total_loss'] = total_loss
            learning_rate = tf.constant(args.lr, dtype=tf.float32)

            if args.ngpus > 1:
                optimizer = tf.train.AdamOptimizer(learning_rate * hvd.size())
                optimizer = hvd.DistributedOptimizer(optimizer)
            else:
                optimizer = tf.train.AdamOptimizer(learning_rate)

            train_op = optimizer.minimize(total_loss, global_step=global_step)
            summaries['global_step'] = global_step
            for k in losses:
                summaries['loss/{}'.format(k)] = losses[k]

            logger_dict = {}
            for k, v in summaries.items():
                logger_dict[k] = summaries[k]
                summaries[k] = tf.summary.scalar(k, v)

            summary_op = tf.summary.merge(list(summaries.values()))

        return train_op, summary_op, data_dict, logger_dict, global_step
Esempio n. 10
0
def build_evaluator_ops(data_dict, args, scope='evaluator', npoints=-1):
    """
    Builds all the tf ops necessary for training/evaluating the evaluator
    network.
    
    Args:
      data_dict: dict, contains all the tensors for input and will be populated
        with more intermeddiate tensors.
      args: arguments that are set for training.

    Returns:
      train_op, summary_op, data_dict, logger_dict, global_step
      train_op: tf op for running training.
      summary_op: tf summary op that needs to be run for populating the
        summaries.
      data_dict: dictionary of tensors. Keys are tensor names and values
        are tensors. New keys and tensors will be added to the input
        data_dict.
      logger_dict: dictionary of tensors for printing.
      global_step: tf.Step that keeps the step number of the training.
    """
    logger_dict = {}
    summary_dict = {}
    global_step = None
    pc = data_dict['{}_pc'.format(scope)]
    gripper_pc_latent = None
    pc_latent = None

    gt_cps = tf_utils.get_control_point_tensor(get_shape(pc)[0])
    ones = tf.ones((get_shape(gt_cps)[0], get_shape(gt_cps)[1], 1),
                   dtype=tf.float32)
    gt_cps = tf.concat((gt_cps, ones), -1)  # B x N x 4

    data_dict['{}_gt_cps'.format(scope)] = gt_cps
    if args.gripper_pc_npoints == -1:  # Use a pre-defined set of points on the gripper. 5 points. Used in the paper
        grasp_pc_o = gt_cps
    else:
        grasp_pc_o = tf_utils.get_gripper_pc(
            get_shape(pc)[0], args.gripper_pc_npoints)

    if '{}_grasp_eulers'.format(scope) in data_dict:  # Refinement
        assert args.is_training == False
        assert '{}_grasp_translations'.format(scope) in data_dict
        assert isinstance(data_dict['{}_grasp_eulers'.format(scope)], list)
        assert len(data_dict['{}_grasp_eulers'.format(scope)]) == 3

        sample_batch_size = get_shape(pc)[0]
        sample_rotation = data_dict['{}_grasp_eulers'.format(scope)]
        sample_translation = data_dict['{}_grasp_translations'.format(scope)]

        verify_tensor_size(
            pc,
            [sample_batch_size, npoints if npoints > 0 else args.npoints, 3])
        for i in range(3):
            verify_tensor_size(sample_rotation[i], [sample_batch_size])
        verify_tensor_size(sample_translation, [sample_batch_size, 3])

        rot = tf_utils.tf_rotation_matrix(*sample_rotation, batched=True)
        grasp_pc = tf_utils.get_control_point_tensor(sample_batch_size)
        grasp_pc = tf.matmul(grasp_pc,
                             rot,
                             transpose_a=False,
                             transpose_b=True)
        grasp_pc += tf.expand_dims(sample_translation, 1)
    else:  # Training grasp generation
        assert args.is_training

        gt_cps = tf_utils.get_control_point_tensor(
            get_shape(pc)[0])  # Samples of the 3d points on the gripper
        ones = tf.ones((get_shape(gt_cps)[0], get_shape(gt_cps)[1], 1),
                       dtype=tf.float32)
        gt_cps = tf.concat((gt_cps, ones), -1)  # B x N x 4

        data_dict['{}_gt_cps'.format(scope)] = gt_cps
        if args.gripper_pc_npoints == -1:  # Use a pre-defined set of points on the gripper. 5 points. Used in the paper
            grasp_pc_o = gt_cps
        else:
            grasp_pc_o = tf_utils.get_gripper_pc(
                get_shape(pc)[0], args.gripper_pc_npoints)

        grasp_pc = tf.matmul(
            grasp_pc_o,
            data_dict['{}_grasp_rt'.format(scope)],
            transpose_a=False,
            transpose_b=True)  # apply the transformation to the gripper pc
        grasp_pc = tf.slice(grasp_pc, [0, 0, 0],
                            [-1, -1, 3])  # remove last dimension; B x N x 3
        data_dict['{}_grasp_pc'.format(scope)] = grasp_pc
        label = data_dict['{}_label'.format(scope)]

    with tf.variable_scope(scope):
        pc_input = tf.slice(pc, [0, 0, 0], [-1, -1, 3])

        success_logit, confidence = models.model.evaluator_model(
            # Confidence of the prediction; Not used now, i.e. confidence==1 (by setting the weight of the confidence loss to a large number)
            pc_input,
            grasp_pc,
            is_training=tf.constant(
                False
            ),  # May be buggy with the batchnorm with evaluator. Disabled.
            # right now the evaluator model does not work with batch norm, and I don't know why. VAE is fine with batch norm.
            bn_decay=None,
            scale=1,
            pc_latent=pc_latent,
            gripper_pc_latent=gripper_pc_latent)
        data_dict['{}_pred/evaluator'.format(scope)] = tf.nn.softmax(
            success_logit)  # Predicted success
        data_dict['{}_pred/confidence'.format(scope)] = confidence

        if args.is_training:
            global_step = tf.train.get_or_create_global_step()
            loss, confidence_term = models.model.classification_with_confidence_loss(
                success_logit, label, confidence, args.confidence_weight)
            total_loss = loss + confidence_term

            learning_rate = tf.constant(args.lr, tf.float32)
            if args.ngpus == 1:
                optimizer = tf.train.AdamOptimizer(learning_rate)
            else:
                optimizer = tf.train.AdamOptimizer(learning_rate * hvd.size())
                optimizer = hvd.DistributedOptimizer(optimizer)

            # with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(total_loss,
                                          global_step=global_step,
                                          var_list=tf.global_variables())
            confidences = [0.2, 0.4, 0.6, 0.8]
            for c in confidences:
                acc_at_confidence, ratio_at_confidence = models.model.accuracy_better_than_threshold(
                    success_logit, label, confidence, c)

                summary_dict['ratio_at_each_confidence/' +
                             str(c)] = ratio_at_confidence
                summary_dict['acc_at_Each_confidence/' +
                             str(c)] = acc_at_confidence
            summary_dict['losses/classification_loss'] = loss
            summary_dict['losses/confidence_loss'] = confidence_term
            summary_dict['losses/total_loss'] = total_loss
            summary_dict['step'] = global_step
            logger_dict['predictions'] = tf.math.argmax(success_logit, -1)

            for k in summary_dict:
                logger_dict[k] = summary_dict[k]
                summary_dict[k] = tf.summary.scalar(k, summary_dict[k])

            summary_op = tf.summary.merge(list(summary_dict.values()))
        else:
            train_op = None
            summary_op = None
            logger_dict = None
            tf_success = tf.slice(data_dict['{}_pred/evaluator'.format(scope)],
                                  [0, 1], [-1, 1])  # Got the success column
            data_dict['{}_pred/success'.format(scope)] = tf_success
            data_dict['{}_gradient'.format(scope)] = tf.gradients(
                tf_success, [
                    data_dict['{}_grasp_translations'.format(scope)],
                    data_dict['{}_grasp_eulers'.format(scope)][0],
                    data_dict['{}_grasp_eulers'.format(scope)][1],
                    data_dict['{}_grasp_eulers'.format(scope)][2]
                ])

        return train_op, summary_op, data_dict, logger_dict, global_step
Esempio n. 11
0
def model_with_confidence(pc,
                          latent,
                          is_training,
                          bn_decay,
                          is_encoder,
                          latent_size=None,
                          scale=1,
                          merge_pcs=False,
                          pointnet_radius=0.02,
                          pointnet_nclusters=128):
    """
      If is_encoder=True, it creates a model that outputs grasp score and 
      grasp confidence. Grasp confidence is the confidence of the network
      in the predicted scores.
    """

    assert (~isinstance(is_training, bool))
    if not is_encoder:
        if merge_pcs:
            raise ValueError(
                'unless in encoder mode, merge_pcs should be False!!!')

    l0_xyz = pc
    l0_points = tf.concat([l0_xyz, latent], -1)

    if is_encoder and merge_pcs:
        grasp_rt = latent
        grasp_shape = get_shape(grasp_rt)

        print('encoder: merge_pc: grasp_shape: ', grasp_shape)
        if len(grasp_shape) != 3 or grasp_shape[1] != 4 or grasp_shape[2] != 4:
            raise ValueError('invalid grasp shape '.format(grasp_shape))

        gripper_pc = tf.matmul(tf_utils.get_gripper_pc(get_shape(pc)[0], -1),
                               grasp_rt,
                               transpose_a=False,
                               transpose_b=True)
        gripper_pc = tf.slice(gripper_pc, [0, 0, 0], [-1, -1, 3])
        print('gripper_pc = {}, pc = {}'.format(get_shape(gripper_pc),
                                                get_shape(pc)))

        l0_xyz, l0_points = merge_pc_and_gripper_pc(pc, gripper_pc)

    print('l0_xyz = {} l0_points = {}'.format(get_shape(l0_xyz),
                                              get_shape(l0_points)))

    net = base_network(l0_xyz, l0_points, is_training, bn_decay, scale,
                       pointnet_radius, pointnet_nclusters)

    if is_encoder:
        assert (latent_size is not None)
        mean = tf_util.fully_connected(net,
                                       latent_size,
                                       activation_fn=None,
                                       scope='fc_mean')
        logvar = tf_util.fully_connected(net,
                                         latent_size,
                                         activation_fn=None,
                                         scope='fc_var')

        return tf.concat((mean, logvar), -1)
    else:
        q = tf_util.fully_connected(net, 4, activation_fn=None, scope='fc_q')
        q = tf.nn.l2_normalize(q, -1)
        t = tf_util.fully_connected(net, 3, activation_fn=None, scope='fc_t')
        confidence = tf_util.fully_connected(net,
                                             1,
                                             activation_fn=None,
                                             scope='fc_conf')
        confidence = tf.nn.sigmoid(confidence)

        return q, t, confidence
Esempio n. 12
0
def base_network(l0_xyz,
                 l0_points,
                 is_training,
                 bn_decay,
                 scale,
                 pointnet_radius=0.02,
                 pointnet_nclusters=128):
    """
      Backbone model used for encoder, decoder, and evaluator.
    """
    l1_xyz, l1_points, _ = pointnet_sa_module(
        l0_xyz,
        l0_points,
        npoint=pointnet_nclusters,
        radius=pointnet_radius,
        nsample=64,
        mlp=[64 * scale, 64 * scale, 128 * scale],
        mlp2=None,
        group_all=False,
        is_training=is_training,
        bn_decay=bn_decay,
        scope='ssg-layer1')
    l2_xyz, l2_points, _ = pointnet_sa_module(
        l1_xyz,
        l1_points,
        npoint=32,
        radius=0.04,
        nsample=128,
        mlp=[128 * scale, 128 * scale, 256 * scale],
        mlp2=None,
        group_all=False,
        is_training=is_training,
        bn_decay=bn_decay,
        scope='ssg-layer2')
    _, l3_points, _ = pointnet_sa_module(
        l2_xyz,
        l2_points,
        npoint=None,
        radius=None,
        nsample=None,
        mlp=[256 * scale, 256 * scale, 512 * scale],
        mlp2=None,
        group_all=True,
        is_training=is_training,
        bn_decay=bn_decay,
        scope='ssg-layer3')

    # Fully connected layers
    batch_size = get_shape(l0_xyz)[0]
    net = tf.reshape(l3_points, [batch_size, -1])
    net = tf_util.fully_connected(net,
                                  1024 * scale,
                                  bn=True,
                                  is_training=is_training,
                                  scope='fc1',
                                  bn_decay=bn_decay)
    net = tf_util.fully_connected(net,
                                  1024 * scale,
                                  bn=True,
                                  is_training=is_training,
                                  scope='fc2',
                                  bn_decay=bn_decay)
    return net
Esempio n. 13
0
def min_distance_loss(
    pred_control_points,
    gt_control_points,
    confidence=None,
    confidence_weight=None,
    threshold=None,
):
    """
    Computes the minimum distance (L1 distance)between each gt control point 
    and any of the predicted control points.

    Args: 
      pred_control_points: tensor of (N_pred, M, 4) shape. N is the number of
        grasps. M is the number of points on the gripper.
      gt_control_points: (N_gt, M, 4)
      confidence: tensor of N_pred, tensor for the confidence of each 
        prediction.
      confidence_weight: float, the weight for confidence loss.
    """
    pred_shape = get_shape(pred_control_points)
    gt_shape = get_shape(gt_control_points)

    if len(pred_shape) != 3:
        raise ValueError(
            "pred_control_point should have len of 3. {}".format(pred_shape))
    if len(gt_shape) != 3:
        raise ValueError(
            "gt_control_point should have len of 3. {}".format(gt_shape))
    if np.any([
            p != gt for i, (p, gt) in enumerate(zip(pred_shape, gt_shape))
            if i > 0
    ]):
        raise ValueError("shapes do no match {} != {}".format(
            pred_shape, gt_shape))

    # N_pred x Ngt x M x 3
    error = tf.expand_dims(pred_control_points, 1) - tf.expand_dims(
        gt_control_points, 0)
    error = tf.reduce_sum(tf.abs(error),
                          -1)  # L1 distance of error (N_pred, N_gt, M)
    error = tf.reduce_mean(
        error, -1)  # average L1 for all the control points. (N_pred, N_gt)
    min_distance_error = tf.reduce_min(
        error, 0)  # take the min distance for each gt control point. (N_gt)
    #print('min_distance_error', get_shape(min_distance_error))
    if confidence is not None:
        closest_index = tf.argmin(error, 0)  # (N_gt)
        #print('closest_index', get_shape(closest_index))
        selected_confidence = tf.one_hot(closest_index,
                                         axis=-1,
                                         depth=pred_shape[0])  # (N_gt, N_pred)
        #print('selected_confidence', selected_confidence)
        selected_confidence *= tf.expand_dims(confidence, 0)
        #print('selected_confidence', selected_confidence)
        selected_confidence = tf.reduce_sum(selected_confidence, -1)  # N_gt
        #print('selected_confidence', selected_confidence)
        min_distance_error *= selected_confidence
        confidence_term = tf.reduce_mean(tf.log(tf.maximum(
            confidence, 1e-4))) * confidence_weight
    else:
        confidence_term = 0.

    return tf.reduce_mean(min_distance_error), -confidence_term