Beispiel #1
0
    def inference(self, inputs, is_training):

        d_out = self.config.d_out
        feature = inputs['features']
        feature = tf.layers.dense(feature, 8, activation=None, name='fc0')
        feature = tf.nn.leaky_relu(tf.layers.batch_normalization(feature, -1, 0.99, 1e-6, training=is_training))
        feature = tf.expand_dims(feature, axis=2)

        # ###########################Encoder############################
        f_encoder_list = []
        for i in range(self.config.num_layers):
            
            #ES TODO: check 'self.dilated_res_block' and convert to our implementation, GT_res_blocks.
            #f_encoder_i = self.dilated_res_block(feature, inputs['xyz'][i], inputs['neigh_idx'][i], d_out[i],
            s_encoder_i, f_encoder_i = self.gt_res_block(feature, inputs['xyz'][i], inputs['neigh_idx'][i], d_out[i],'Encoder_layer_' + str(i), is_training, i)
            #print("{} : {}".format(i, f_encoder_i.shape))
            f_sampled_i = self.random_sample(f_encoder_i, inputs['sub_idx'][i])
            s_sampled_i = self.random_sample(s_encoder_i, inputs['sub_idx'][i])

            feature = s_sampled_i
            
            if i == 0:
                f_encoder_list.append(f_encoder_i)
            f_encoder_list.append(f_sampled_i)
        # ###########################Encoder############################

        feature = helper_tf_util.conv2d(f_encoder_list[-1], f_encoder_list[-1].get_shape()[3].value, [1, 1],
                                        'decoder_0',
                                        [1, 1], 'VALID', True, is_training)

        # ###########################Decoder############################
        f_decoder_list = []
        for j in range(self.config.num_layers):
            f_interp_i = self.nearest_interpolation(feature, inputs['interp_idx'][-j - 1])
            f_decoder_i = helper_tf_util.conv2d_transpose(tf.concat([f_encoder_list[-j - 2], f_interp_i], axis=3),
                                                          f_encoder_list[-j - 2].get_shape()[-1].value, [1, 1],
                                                          'Decoder_layer_' + str(j), [1, 1], 'VALID', bn=True,
                                                          is_training=is_training)
            feature = f_decoder_i
            f_decoder_list.append(f_decoder_i)
        # ###########################Decoder############################

        f_layer_fc1 = helper_tf_util.conv2d(f_decoder_list[-1], 64, [1, 1], 'fc1', [1, 1], 'VALID', True, is_training)
        f_layer_fc2 = helper_tf_util.conv2d(f_layer_fc1, 32, [1, 1], 'fc2', [1, 1], 'VALID', True, is_training)
        f_layer_drop = helper_tf_util.dropout(f_layer_fc2, keep_prob=0.5, is_training=is_training, scope='dp1')
        f_layer_fc3 = helper_tf_util.conv2d(f_layer_drop, self.config.num_classes, [1, 1], 'fc', [1, 1], 'VALID', False,
                                            is_training, activation_fn=None)
        f_out = tf.squeeze(f_layer_fc3, [2])
        return f_out
Beispiel #2
0
    def inference(self, inputs, is_training):

        d_out = self.config.d_out
        feature = inputs['features']
        feature = tf.layers.dense(feature, 8, activation=None, name='fc0')
        feature = tf.nn.leaky_relu(tf.layers.batch_normalization(feature, -1, 0.99, 1e-6, training=is_training))
        feature = tf.expand_dims(feature, axis=2)

        # ###########################Encoder############################
        f_encoder_list = []
        for i in range(self.config.num_layers):
            f_encoder_i = self.dilated_res_block(feature, inputs['xyz'][i], inputs['neigh_idx'][i], d_out[i],
                                                 'Encoder_layer_' + str(i), is_training)
            f_sampled_i = self.random_sample(f_encoder_i, inputs['sub_idx'][i])
            feature = f_sampled_i
            if i == 0:
                f_encoder_list.append(f_encoder_i)
            f_encoder_list.append(f_sampled_i)
        # ###########################Encoder############################

        feature = helper_tf_util.conv2d(f_encoder_list[-1], f_encoder_list[-1].get_shape()[3].value, [1, 1],
                                        'decoder_0',
                                        [1, 1], 'VALID', True, is_training)


        # # bboxes head
        # bboxes_layer_fc1 = helper_tf_util.conv2d(f_encoder_list[-1], 64, [1, 1], 'bboxes_fc1', [1, 1], 'VALID', True, is_training)
        # bboxes_layer_fc2 = helper_tf_util.conv2d(bboxes_layer_fc1, 32, [1, 1], 'bboxes_fc2', [1, 1], 'VALID', True, is_training)
        # bboxes_layer_drop = helper_tf_util.dropout(bboxes_layer_fc2, keep_prob=0.5, is_training=is_training, scope='bboxes_dp1')
        # bboxes_layer_fc3 = helper_tf_util.conv2d(bboxes_layer_drop, self.num_target_attributes-1, [1, 1], 'bboxes_fc', [1, 1], 'VALID', False,
        #                                     is_training, activation_fn=None)
        # bboxes_out = tf.squeeze(bboxes_layer_fc3, [2])


        # # fgbg head
        # fgbg_layer_fc1 = helper_tf_util.conv2d(f_encoder_list[-1], 64, [1, 1], 'fgbg_fc1', [1, 1], 'VALID', True, is_training)
        # fgbg_layer_fc2 = helper_tf_util.conv2d(fgbg_layer_fc1, 32, [1, 1], 'fgbg_fc2', [1, 1], 'VALID', True, is_training)
        # fgbg_layer_drop = helper_tf_util.dropout(fgbg_layer_fc2, keep_prob=0.5, is_training=is_training, scope='fgbg_dp1')
        # fgbg_layer_fc3 = helper_tf_util.conv2d(fgbg_layer_drop, 1, [1, 1], 'fgbg_fc', [1, 1], 'VALID', False,
        #                                     is_training, activation_fn=None)
        # fgbg_out = tf.squeeze(fgbg_layer_fc3, [2])


        # # classification head

        # cls_layer_fc1 = helper_tf_util.conv2d(f_encoder_list[-1], 64, [1, 1], 'cls_fc1', [1, 1], 'VALID', True, is_training)
        # cls_layer_fc2 = helper_tf_util.conv2d(cls_layer_fc1, 32, [1, 1], 'cls_fc2', [1, 1], 'VALID', True, is_training)
        # cls_layer_drop = helper_tf_util.dropout(cls_layer_fc2, keep_prob=0.5, is_training=is_training, scope='cls_dp1')
        # cls_layer_fc3 = helper_tf_util.conv2d(cls_layer_drop, self.num_classes, [1, 1], 'cls_fc', [1, 1], 'VALID', False,
        #                                     is_training, activation_fn=None)
        # cls_out = tf.squeeze(cls_layer_fc3, [2])
        # return bboxes_out, fgbg_out, cls_out

        f_layer_fc1 = helper_tf_util.conv2d(f_encoder_list[-1], 64, [1, 1], 'fc1', [1, 1], 'VALID', True, is_training)
        f_layer_fc2 = helper_tf_util.conv2d(f_layer_fc1, 32, [1, 1], 'fc2', [1, 1], 'VALID', True, is_training)
        f_layer_drop = helper_tf_util.dropout(f_layer_fc2, keep_prob=0.5, is_training=is_training, scope='dp1')
        # f_layer_fc3 = helper_tf_util.conv2d(f_layer_drop, self.num_output_attributes, [1, 1], 'fc', [1, 1], 'VALID', False,
        #                                     is_training, activation_fn=None)
        f_layer_fc3 = helper_tf_util.conv2d(f_layer_drop, self.num_fgbg_attributes, [1, 1], 'fc', [1, 1], 'VALID', False,
                                    is_training, activation_fn=None)
        f_out = tf.squeeze(f_layer_fc3, [2])
        return f_out
Beispiel #3
0
    def inference(self, inputs, is_training):
        """similar to pytorch's forward() function where the RandLA-Net architecture is implemented by an encoder-decoder structure-yc
        In the encoder, LocSE block and RandomSampling is used where LocSE consists of gather_neighbors, relative_pos_encoding, att_pooling()
        In the decoder, nearest interpolation is used w. short-cut connections

        Args:
            inputs ([type]): a dict containing all kinds of required inputs
            is_training (bool): training or not

        Returns:
            tensor: logits for segmentation scores
        """

        d_out = self.config.d_out
        feature = inputs['features']  # (B,N,6)
        feature = tf.layers.dense(feature, 8, activation=None,
                                  name='fc0')  # (B,N,8)
        feature = tf.nn.leaky_relu(
            tf.layers.batch_normalization(feature,
                                          -1,
                                          0.99,
                                          1e-6,
                                          training=is_training))
        feature = tf.expand_dims(
            feature, axis=2)  # expand 1 more dim to use Conv2D ops, (B,N,1,8)

        # ###########################Encoder############################
        f_encoder_list = [
        ]  # in the end, collect num_layers + 1 items for a group of hierarchical point feature embeddings
        for i in range(self.config.num_layers):
            f_encoder_i = self.dilated_res_block(
                feature, inputs['xyz'][i], inputs['neigh_idx'][i], d_out[i],
                'Encoder_layer_' + str(i),
                is_training)  # similar to LAO for local feature learning
            f_sampled_i = self.random_sample(
                f_encoder_i,
                inputs['sub_idx'][i])  # down-sampled the input using the idx
            feature = f_sampled_i
            if i == 0:
                f_encoder_list.append(f_encoder_i)
            f_encoder_list.append(
                f_sampled_i
            )  # (B,N,1,32), (B,N/4,1,32), (B,N/16,1,128), (B,N/64,1,256), (B,N/256,1,512), (B,N/512,1,1024)
        # ###########################Encoder############################
        # transition using a MLP/pointwise Conv2D, e.g., (N/512,1024)-> (N/512,1024)
        feature = helper_tf_util.conv2d(
            f_encoder_list[-1], f_encoder_list[-1].get_shape()[3].value,
            [1, 1], 'decoder_0', [1, 1], 'VALID', True, is_training)

        # ###########################Decoder############################
        f_decoder_list = []
        for j in range(self.config.num_layers):
            f_interp_i = self.nearest_interpolation(
                feature, inputs['interp_idx'][-j - 1]
            )  # interpolate w. the idx, (B,N/512,1024)-> (B,N/256,1,1024)
            f_decoder_i = helper_tf_util.conv2d_transpose(
                tf.concat([f_encoder_list[-j - 2], f_interp_i], axis=3),
                f_encoder_list[-j - 2].get_shape()[-1].value, [1, 1],
                'Decoder_layer_' + str(j), [1, 1],
                'VALID',
                bn=True,
                is_training=is_training)  # shortcut connection
            feature = f_decoder_i
            f_decoder_list.append(f_decoder_i)  # upsampled point embeddings-yc
        # ###########################Decoder############################
        # obtain classification scores using FCs (8->64,32(w. dropouts),num_classes)
        f_layer_fc1 = helper_tf_util.conv2d(f_decoder_list[-1], 64, [1, 1],
                                            'fc1', [1, 1], 'VALID', True,
                                            is_training)
        f_layer_fc2 = helper_tf_util.conv2d(f_layer_fc1, 32, [1, 1], 'fc2',
                                            [1, 1], 'VALID', True, is_training)
        f_layer_drop = helper_tf_util.dropout(f_layer_fc2,
                                              keep_prob=0.5,
                                              is_training=is_training,
                                              scope='dp1')
        f_layer_fc3 = helper_tf_util.conv2d(
            f_layer_drop,
            self.config.num_classes, [1, 1],
            'fc', [1, 1],
            'VALID',
            False,
            is_training,
            activation_fn=None)  # (B,N,1,num_classes)
        f_out = tf.squeeze(f_layer_fc3, [2])  # (B,N,num_classes)
        return f_out
Beispiel #4
0
    def inference(self, inputs, is_training):

        d_out = self.config.d_out
        ratio = self.config.sub_sampling_ratio
        k_n = self.config.k_n
        feature = inputs['features']
        og_xyz = feature[:, :, :3]
        feature = tf.layers.dense(feature, 8, activation=None, name='fc0')
        feature = tf.nn.leaky_relu(
            tf.layers.batch_normalization(feature,
                                          -1,
                                          0.99,
                                          1e-6,
                                          training=is_training))
        feature = tf.expand_dims(feature, axis=2)

        # ###########################Encoder############################
        f_encoder_list = []
        input_xyz = og_xyz
        input_up_samples = []
        new_xyz_list = []
        xyz_list = []
        n_pts = self.config.num_points
        for i in range(self.config.num_layers):
            # Farthest Point Sampling:
            input_neigh_idx = tf.py_func(DP.knn_search,
                                         [input_xyz, input_xyz, k_n], tf.int32)
            n_pts = n_pts // ratio[i]
            sub_xyz, inputs_sub_idx = tf.cond(
                tf.equal(is_training, tf.constant(True)), lambda: sampling(
                    self.config.batch_size, n_pts, input_xyz, input_neigh_idx),
                lambda: sampling(self.config.val_batch_size, n_pts, input_xyz,
                                 input_neigh_idx))
            inputs_interp_idx = tf.py_func(DP.knn_search,
                                           [sub_xyz, input_xyz, 1], tf.int32)
            input_up_samples.append(inputs_interp_idx)

            # Bilateral Context Encoding
            f_encoder_i, new_xyz = self.bilateral_context_block(
                feature, input_xyz, input_neigh_idx, d_out[i],
                'Encoder_layer_' + str(i), is_training)
            f_sampled_i = self.random_sample(f_encoder_i, inputs_sub_idx)
            feature = f_sampled_i
            if i == 0:
                f_encoder_list.append(f_encoder_i)
            f_encoder_list.append(f_sampled_i)
            xyz_list.append(input_xyz)
            new_xyz_list.append(new_xyz)
            input_xyz = sub_xyz
        # ###########################Encoder############################

        # ###########################Decoder############################
        # Adaptive Fusion Module
        f_multi_decoder = []  # full-sized feature maps
        f_weights_decoders = []  # point-wise adaptive fusion weights
        for n in range(self.config.num_layers):
            feature = f_encoder_list[-1 - n]
            feature = helper_tf_util.conv2d(feature,
                                            feature.get_shape()[3].value,
                                            [1, 1], 'decoder_0' + str(n),
                                            [1, 1], 'VALID', True, is_training)
            f_decoder_list = []
            for j in range(self.config.num_layers - n):
                f_interp_i = self.nearest_interpolation(
                    feature, input_up_samples[-j - 1 - n])
                f_decoder_i = helper_tf_util.conv2d_transpose(
                    tf.concat([f_encoder_list[-j - 2 - n], f_interp_i],
                              axis=3),
                    f_encoder_list[-j - 2 - n].get_shape()[-1].value, [1, 1],
                    'Decoder_layer_' + str(n) + '_' + str(j), [1, 1],
                    'VALID',
                    bn=True,
                    is_training=is_training)
                feature = f_decoder_i
                f_decoder_list.append(f_decoder_i)
            # collect full-sized feature maps which are upsampled from multiple resolutions
            f_multi_decoder.append(f_decoder_list[-1])
            # summarize point-level information
            curr_weight = helper_tf_util.conv2d(f_decoder_list[-1],
                                                1, [1, 1],
                                                'Decoder_weight_' + str(n),
                                                [1, 1],
                                                'VALID',
                                                bn=False,
                                                activation_fn=None)
            f_weights_decoders.append(curr_weight)
        # regress the fusion parameters
        f_weights = tf.concat(f_weights_decoders, axis=-1)
        f_weights = tf.nn.softmax(f_weights, axis=-1)
        # adptively fuse them by calculating a weighted sum
        f_decoder_final = tf.zeros_like(f_multi_decoder[-1])
        for i in range(len(f_multi_decoder)):
            f_decoder_final = f_decoder_final + tf.tile(
                tf.expand_dims(f_weights[:, :, :, i], axis=-1),
                [1, 1, 1, f_multi_decoder[i].get_shape()[-1].value
                 ]) * f_multi_decoder[i]
        # ###########################Decoder############################

        f_layer_fc1 = helper_tf_util.conv2d(f_decoder_final, 64, [1, 1], 'fc1',
                                            [1, 1], 'VALID', True, is_training)
        f_layer_fc2 = helper_tf_util.conv2d(f_layer_fc1, 32, [1, 1], 'fc2',
                                            [1, 1], 'VALID', True, is_training)
        f_layer_drop = helper_tf_util.dropout(f_layer_fc2,
                                              keep_prob=0.5,
                                              is_training=is_training,
                                              scope='dp1')
        f_layer_fc3 = helper_tf_util.conv2d(f_layer_drop,
                                            self.config.num_classes, [1, 1],
                                            'fc', [1, 1],
                                            'VALID',
                                            False,
                                            is_training,
                                            activation_fn=None)
        f_out = tf.squeeze(f_layer_fc3, [2])
        return f_out, new_xyz_list, xyz_list
Beispiel #5
0
    def inference(self, inputs, is_training):
        """similar to pytorch's forward() function where the SQN model architecture is implemented by an encoder-query structure
        Args:
            inputs ([type]): a dict containing all kinds of required inputs
            is_training (bool): training or not

        Returns:
            tensor: logits for segmentation scores
        """

        d_out = self.config.d_out  # [16, 64, 128, 256], note the channels of LFA will be doubled.
        feature = inputs['features']  # (B,N,6)
        # feature = tf.layers.dense(feature, 8, activation=None, name='fc0') # (B,N,8)
        # feature = tf.nn.leaky_relu(tf.layers.batch_normalization(feature, -1, 0.99, 1e-6, training=is_training))
        feature = tf.expand_dims(
            feature, axis=2)  # expand 1 more dim to use Conv2D ops, (B,N,1,8)

        # ###########################Encoder############################
        f_encoder_list = [
        ]  # in the end, collect num_layers + 1 items for a group of hierarchical point feature embeddings
        for i in range(self.config.num_layers):
            f_encoder_i = self.dilated_res_block(
                feature, inputs['xyz'][i], inputs['neigh_idx'][i], d_out[i],
                'Encoder_layer_' + str(i),
                is_training)  # similar to LAO for local feature learning
            f_sampled_i = self.random_sample(
                f_encoder_i,
                inputs['sub_idx'][i])  # down-sampled the input using the idx
            feature = f_sampled_i
            if i == 0:
                f_encoder_list.append(f_encoder_i)
            f_encoder_list.append(
                f_sampled_i
            )  # (B,N,1,32), (B,N/4,1,32), (B,N/16,1,128), (B,N/64,1,256), (B,N/256,1,512)
        # ###########################Encoder############################

        # ###########################Query Network############################
        # obtain weakly points and labels for a batch using weak_label_masks
        # method2 using the gather_nd
        selected_idx = tf.where(tf.equal(self.weak_label_masks, 1))  # (n,2)
        weak_points = tf.gather_nd(self.points, selected_idx)
        weak_points_labels = tf.gather_nd(self.labels, selected_idx)  # (n,)
        # or use method1 using boolean_mask
        # weak_points = tf.boolean_mask(self.points,tf.cast(self.weak_label_masks,tf.bool)) # (n,3), e.g., one batch has 26 weak pts
        # weakly_points_labels = tf.boolean_mask(self.labels,tf.cast(self.weak_label_masks,tf.bool)) # (n,)

        # obtain batch indices to denote which batch is for every weakly point
        batch_inds = selected_idx[:, 0]

        # query features for weak points
        f_query_feature_list = []
        for i in range(self.config.num_layers):
            xyz_current = inputs['xyz'][
                i +
                1]  # (B,N/4,3), index i plus 1 because the first element is the point_original
            features_current = f_encoder_list[
                i +
                1]  # (B,N/4,1,32), index plus 1 because the first one is the input of encoder

            # if training, shape (n,1,3), otherwise (B,N,3) (main reason here is to avoid GPU OOM issue)
            xyz_query = tf.cond(
                is_training,
                lambda: tf.reshape(weak_points, (tf.shape(weak_points)[0], 1, 3
                                                 )),  # (n,1,3)
                lambda: self.points)
            xyz_support = tf.cond(
                is_training,
                lambda: tf.gather(
                    xyz_current, batch_inds, axis=0
                ),  # (B,m,3)->(n,m,3) as each weak pt might be from diff. batch
                lambda: xyz_current)
            features_support = tf.cond(
                is_training,
                lambda: tf.gather(tf.squeeze(features_current, axis=2),
                                  batch_inds,
                                  axis=0),  # (B,m,C)->(n,m,C)
                lambda: tf.squeeze(features_current, axis=2))

            # if training (n,1,C) else (B, N, C) where n is based on (B,N) and the weak_label_mask
            f_query_feature_i = self.three_nearest_interpolation(
                xyz_query, xyz_support, features_support)  # (B,N,C)
            f_query_feature_list.append(f_query_feature_i)

        # concat all features, (n, 1116, 1); the tricky here is n is as batch dim, 1116 as channel dim, 1 as num_pt dim
        features_combined = tf.concat(f_query_feature_list,
                                      axis=-1)  # (n,1,928)

        # obtain classification scores using FCs, (n, 1, 928)-> ...-->(n, 1, num_classes) for training
        # or obtain classification scores using FCs, (B, N, 928)-> ...-->(B, N, num_classes) for validation
        FC_LIST = [256, 128, 64, self.config.num_classes]
        f_layer_fc1 = helper_tf_util.conv1d(features_combined, FC_LIST[0], 1,
                                            'fc1', 1, 'VALID', True,
                                            is_training)
        f_layer_fc2 = helper_tf_util.conv1d(f_layer_fc1, FC_LIST[1], 1, 'fc2',
                                            1, 'VALID', True, is_training)
        f_layer_fc3 = helper_tf_util.conv1d(f_layer_fc2, FC_LIST[2], 1, 'fc3',
                                            1, 'VALID', True, is_training)
        f_layer_drop = helper_tf_util.dropout(f_layer_fc3,
                                              keep_prob=0.5,
                                              is_training=is_training,
                                              scope='dp1')
        logits = helper_tf_util.conv1d(f_layer_drop,
                                       FC_LIST[-1],
                                       1,
                                       'fc4',
                                       1,
                                       'VALID',
                                       False,
                                       is_training,
                                       activation_fn=None)
        # ###########################Query Network############################

        # if training, logits's shape is like (n,1,C), if validation, shape like (B, N, C)
        logits = tf.cond(
            is_training,
            lambda: tf.squeeze(logits, [1]),  # (n, num_classes)
            lambda: tf.reshape(logits, [-1, tf.shape(logits)[-1]])
        )  # (B*N, num_classes)

        return logits, weak_points_labels  # (n,num_classes), (n,)