def get_transform(point_cloud, is_training, bn_decay=None, K=3): """ Transform Net, input is BxNx3 gray image Return: Transformation matrix of size 3xK """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value input_image = tf.expand_dims(point_cloud, -1) net = tf_util.conv2d(input_image, 64, [1, 3], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv4', bn_decay=bn_decay) net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='tmaxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='tfc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='tfc2', bn_decay=bn_decay) with tf.variable_scope('transform_XYZ') as sc: assert (K == 3) weights = tf.get_variable('weights', [128, 3 * K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) biases = tf.get_variable('biases', [3 * K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) + tf.constant([1, 0, 0, 0, 1, 0, 0, 0, 1], dtype=tf.float32) transform = tf.matmul(net, weights) transform = tf.nn.bias_add(transform, biases) # transform = tf_util.fully_connected(net, 3*K, activation_fn=None, scope='tfc3') transform = tf.reshape(transform, [batch_size, 3, K]) return transform
def get_model(point_cloud, is_training, bn_decay=None): """ Classification PointNetwork :param point_cloud: point cloud BxNx3 :param is_training: training flag :param bn_decay: decay flag :return: output model Bx40 """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} input_image = tf.expand_dims(point_cloud, -1) # Point functions (MPL implemented as conv2d) net = tf_util.conv2d(input_image, 64, [1, 3], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv4', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv5', bn_decay=bn_decay) # Symetric function: Max Pooling net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='maxpool') # MLP on global point cloud vector net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3') return net, end_points
def create_encoder(self, inputs): input_image = tf.expand_dims(inputs, -1) # Conv net = conv2d(input_image, 64, [1, 9], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv1', bn_decay=self.bn_decay) net = conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv2', bn_decay=self.bn_decay) net = conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv3', bn_decay=self.bn_decay) net = conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv4', bn_decay=self.bn_decay) points_feat1 = conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv5', bn_decay=self.bn_decay) # MaxPooling pc_feat1 = max_pool2d(points_feat1, [NUM_POINT, 1], padding='VALID', scope='maxpool') # Fully Connected Layers pc_feat1 = tf.reshape(pc_feat1, [BATCH_SIZE, -1]) pc_feat1 = fully_connected(pc_feat1, 256, bn=True, is_training=self.is_training, scope='fc1', bn_decay=self.bn_decay) pc_feat1 = fully_connected(pc_feat1, 128, bn=True, is_training=self.is_training, scope='fc2', bn_decay=self.bn_decay) # Concat pc_feat1_expand = tf.tile(tf.reshape(pc_feat1, [BATCH_SIZE, 1, 1, -1]), [1, NUM_POINT, 1, 1]) points_feat1_concat = tf.concat(axis=3, values=[points_feat1, pc_feat1_expand]) return points_feat1_concat
def get_transform_K(inputs, is_training, bn_decay=None, K=3): """ Transform Net, input is BxNx1xK gray image Return: Transformation matrix of size KxK """ batch_size = inputs.get_shape()[0].value num_point = inputs.get_shape()[1].value net = tf_util.conv2d(inputs, 256, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv2', bn_decay=bn_decay) net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='tmaxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='tfc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='tfc2', bn_decay=bn_decay) with tf.variable_scope('transform_feat') as sc: weights = tf.get_variable('weights', [256, K * K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) biases = tf.get_variable('biases', [K * K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) + tf.constant(np.eye(K).flatten(), dtype=tf.float32) transform = tf.matmul(net, weights) transform = tf.nn.bias_add(transform, biases) # transform = tf_util.fully_connected(net, 3*K, activation_fn=None, scope='tfc3') transform = tf.reshape(transform, [batch_size, K, K]) return transform
def VLAD_part(input_tensor, FLAGS, is_training, bn_decay, layer_name=None): with tf.variable_scope('VLAD_layer'): batch_size = input_tensor.get_shape()[0].value num_point = input_tensor.get_shape()[1].value D = input_tensor.get_shape()[-1].value reshape = tf.reshape(input_tensor, shape=[batch_size, num_point, D]) conv_norm = tf.nn.l2_normalize(reshape, dim=2) descriptor = tf.expand_dims(conv_norm, axis=-1, name='expanddim') conv = conv2d(descriptor, FLAGS.centers, [1, D], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='assignment', bn_decay=bn_decay, activation_fn=None) a = tf.nn.softmax(conv) top_k = FLAGS.topk one_hot = tf.nn.top_k(a, top_k) fuse = Top_K(one_hot, top_k, batch_size, num_point) Center = _variable_on_cpu( name=layer_name + '_centers', shape=[D, FLAGS.centers], initializer=tf.contrib.layers.xavier_initializer()) diff = tf.expand_dims(reshape, axis=-1) - Center diff = diff * a diff = tf.transpose(diff, perm=[0, 1, 3, 2]) diff = tf.gather_nd(diff, fuse) net = conv2d(diff, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='vlad_conv1', bn_decay=bn_decay) net = conv2d(net, 256, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='vlad_conv2', bn_decay=bn_decay) pool1 = tf.nn.max_pool(net, ksize=[1, 1, top_k, 1], strides=[1, 1, 2, 1], padding='VALID') concat = tf.concat([input_tensor, pool1], axis=-1) return concat, one_hot[1]
def create_decoder(self): net = conv2d(self.cat_feats, 512, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv6') net = conv2d(net, 256, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv7') net = dropout(net, keep_prob=0.7, is_training=self.is_training, scope='dp1') net = conv2d(net, 13, [1, 1], padding='VALID', stride=[1, 1], activation_fn=None, scope='conv8') net = tf.squeeze(net, [2]) return net
def get_model(point_cloud, is_training, bn_decay=None): """ Classification PointNet, input is BxNx3, output Bx40 """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} with tf.variable_scope('transform_net1') as sc: transform = input_transform_net(point_cloud, is_training, bn_decay, K=3) point_cloud_transformed = tf.matmul(point_cloud, transform) input_image = tf.expand_dims(point_cloud_transformed, -1) net = tf_util.conv2d(input_image, 64, [1,3], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) with tf.variable_scope('transform_net2') as sc: transform = feature_transform_net(net, is_training, bn_decay, K=64) end_points['transform'] = transform net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform) net_transformed = tf.expand_dims(net_transformed, [2]) net = tf_util.conv2d(net_transformed, 64, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv4', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv5', bn_decay=bn_decay) # Symmetric function: max pooling net = tf_util.max_pool2d(net, [num_point,1], padding='VALID', scope='maxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp2') net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3') return net, end_points
def input_transform_net(point_cloud, is_training, bn_decay=None, K=3): """ Input (XYZ) Transform Net, input is BxNx3 gray image Return: Transformation matrix of size 3xK """ # print('the input shape for t-net:', point_cloud.get_shape()) batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value # point_cloud -> Tensor of (batch size, number of points, 3d coordinates) input_image = tf.expand_dims(point_cloud, -1) # point_cloud -> (batch size, number of points, 3d coordinates, 1) # batch size * height * width * channel '''tf_util.conv2d(inputs, num_output_channels, kernel_size, scope, stride=[1, 1], padding='SAME', use_xavier=True, stddev=1e-3, weight_decay=0.0, activation_fn=tf.nn.relu, bn=False, bn_decay=None(default is set to 0.9), is_training=None)''' net = tf_util.conv2d(input_image, 64, [1, 3], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv2', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv3', bn_decay=bn_decay) # net = mlp_conv(input_image, [64, 128, 1024]) net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='tmaxpool') '''(default stride: (2, 2))''' # net = tf.reduce_max(net, axis=1, keep_dims=True, name='tmaxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='tfc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='tfc2', bn_decay=bn_decay) with tf.variable_scope('transform_XYZ') as sc: assert(K == 3) weights = tf.get_variable('weights', [256, 3*K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) biases = tf.get_variable('biases', [3*K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) biases += tf.constant([1, 0, 0, 0, 1, 0, 0, 0, 1], dtype=tf.float32) transform = tf.matmul(net, weights) transform = tf.nn.bias_add(transform, biases) transform = tf.reshape(transform, [batch_size, 3, K]) return transform
def ae_encoder(batch_size, num_point, point_dim, input_image, is_training, bn_decay=None, embedding_dim=128): net = tf_util.conv2d(input_image, 64, [1, point_dim], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv4', bn_decay=bn_decay) net = tf_util.conv2d(net, embedding_dim, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv5', bn_decay=bn_decay) global_feat = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='maxpool') net = tf.reshape(global_feat, [batch_size, -1]) end_points = {'embedding': net} return net, end_points
def input_transform_net_dgcnn(edge_feature, is_training, bn_decay=None, K=3): """ Input (XYZ) Transform Net, input is BxNx3 gray image Return: Transformation matrix of size 3xK """ batch_size = edge_feature.get_shape()[0].value num_point = edge_feature.get_shape()[1].value # input_image = tf.expand_dims(point_cloud, -1) net = tf_util.conv2d(edge_feature, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv2', bn_decay=bn_decay) net = tf.reduce_max(net, axis=-2, keep_dims=True) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv3', bn_decay=bn_decay) net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='tmaxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='tfc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='tfc2', bn_decay=bn_decay) with tf.variable_scope('transform_XYZ') as sc: # assert(K==3) with tf.device('/cpu:0'): weights = tf.get_variable('weights', [256, K * K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) biases = tf.get_variable('biases', [K * K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) biases += tf.constant(np.eye(K).flatten(), dtype=tf.float32) transform = tf.matmul(net, weights) transform = tf.nn.bias_add(transform, biases) transform = tf.reshape(transform, [batch_size, K, K]) return transform
def pointnet_fp_module(xyz1, xyz2, points1, points2, mlp, is_training, bn_decay, scope, bn=True): ''' PointNet Feature Propogation (FP) Module Input: xyz1: (batch_size, ndataset1, 3) TF tensor xyz2: (batch_size, ndataset2, 3) TF tensor, sparser than xyz1 points1: (batch_size, ndataset1, nchannel1) TF tensor points2: (batch_size, ndataset2, nchannel2) TF tensor mlp: list of int32 -- output size for MLP on each point Return: new_points: (batch_size, ndataset1, mlp[-1]) TF tensor ''' with tf.variable_scope(scope) as sc: dist, idx = three_nn(xyz1, xyz2) dist = tf.maximum(dist, 1e-10) norm = tf.reduce_sum((1.0 / dist), axis=2, keepdims=True) norm = tf.tile(norm, [1, 1, 3]) weight = (1.0 / dist) / norm interpolated_points = three_interpolate(points2, idx, weight) if points1 is not None: new_points1 = tf.concat(axis=2, values=[interpolated_points, points1]) # B,ndataset1,nchannel1+nchannel2 else: new_points1 = interpolated_points new_points1 = tf.expand_dims(new_points1, 2) for i, num_out_channel in enumerate(mlp): new_points1 = tf_util.conv2d(new_points1, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv_%d' % (i), bn_decay=bn_decay) new_points1 = tf.squeeze(new_points1, [2]) # B,ndataset1,mlp[-1] return new_points1
def pointnet_fp_module(xyz1, xyz2, points1, points2, mlp, is_training, bn_decay, scope, bn=True): with tf.variable_scope(scope) as sc: dist, idx = three_nn(xyz1, xyz2) dist = tf.maximum(dist, 1e-10) norm = tf.reduce_sum((1.0 / dist), axis=2, keep_dims=True) norm = tf.tile(norm, [1, 1, 3]) weight = (1.0 / dist) / norm interpolated_points = three_interpolate(points2, idx, weight) if points1 is not None: new_points1 = tf.concat( axis=2, values=[interpolated_points, points1]) # B,ndataset1,nchannel1+nchannel2 else: new_points1 = interpolated_points new_points1 = tf.expand_dims(new_points1, 2) for i, num_out_channel in enumerate(mlp): new_points1 = tf_util.conv2d(new_points1, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv%d' % (i), bn_decay=bn_decay) new_points1 = tf.squeeze(new_points1, [2]) # B,ndataset1,mlp[-1] return new_points1
def get_model(point_cloud, is_training, bn_decay=None): """ Classification PointNetwork :param point_cloud: input pointcloud BxNx3 :param is_training: training flag :param bn_decay: decay flag :return: output model BxNx50 """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} with tf.variable_scope('transform_net1') as sc: transform = input_transform_net(point_cloud, is_training, bn_decay, K=3) point_cloud_transformed = tf.matmul(point_cloud, transform) input_image = tf.expand_dims(point_cloud_transformed, -1) net = tf_util.conv2d(input_image, 64, [1, 3], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) with tf.variable_scope('transform_net2') as sc: transform = feature_transform_net(net, is_training, bn_decay, K=64) end_points['transform'] = transform net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform) point_feat = tf.expand_dims(net_transformed, [2]) print(point_feat) net = tf_util.conv2d(point_feat, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv4', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv5', bn_decay=bn_decay) global_feat = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='maxpool') print(global_feat) global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1]) concat_feat = tf.concat(3, [point_feat, global_feat_expand]) print(concat_feat) net = tf_util.conv2d(concat_feat, 512, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv6', bn_decay=bn_decay) net = tf_util.conv2d(net, 256, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv7', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv8', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv9', bn_decay=bn_decay) net = tf_util.conv2d(net, 50, [1, 1], padding='VALID', stride=[1, 1], activation_fn=None, scope='conv10') net = tf.squeeze(net, [2]) # BxNxC return net, end_points
def align_channel_network(info, mlp_list, bn, is_training, bn_decay, scope): with tf.variable_scope(scope) as sc: for i, num_out_channel in enumerate(mlp_list): info = tf_util.conv2d(info, num_out_channel, [1, 1], padding='VALID', bn=bn, is_training=is_training, scope='conv%d'%i, bn_decay=bn_decay) return info
def local_op(input, out_dim, scope, bn_decay, is_training): """ :param input: batch_size, num_point, num_sample, num_dim :param out_dim: :return: """ x = tf_util.conv2d(input, out_dim, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_biases=False, is_training=is_training, scope=scope + 'conv0', bn_decay=bn_decay) x = tf_util.conv2d(x, out_dim, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_biases=False, is_training=is_training, scope=scope + 'conv1', bn_decay=bn_decay) # replace the tf_util.max_pool2d() x = tf.reduce_max(x, axis=2, keepdims=False) return x
def get_model(point_cloud, is_training, bn_decay=None): """ ConvNet baseline, input is BxNx3 gray image """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value input_image = tf.expand_dims(point_cloud, -1) # CONV net = tf_util.conv2d(input_image, 64, [1,9], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv4', bn_decay=bn_decay) points_feat1 = tf_util.conv2d(net, 1024, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv5', bn_decay=bn_decay) # MAX pc_feat1 = tf_util.max_pool2d(points_feat1, [num_point,1], padding='VALID', scope='maxpool1') # FC pc_feat1 = tf.reshape(pc_feat1, [batch_size, -1]) pc_feat1 = tf_util.fully_connected(pc_feat1, 256, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) pc_feat1 = tf_util.fully_connected(pc_feat1, 128, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay) print(pc_feat1) # CONCAT pc_feat1_expand = tf.tile(tf.reshape(pc_feat1, [batch_size, 1, 1, -1]), [1, num_point, 1, 1]) print(points_feat1) print(pc_feat1_expand) points_feat1_concat = tf.concat(axis=3, values=[points_feat1, pc_feat1_expand]) print(points_feat1_concat) # CONV net = tf_util.conv2d(points_feat1_concat, 512, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv6') net = tf_util.conv2d(net, 256, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv7') net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.conv2d(net, 13, [1,1], padding='VALID', stride=[1,1], activation_fn=None, scope='conv8') net = tf.squeeze(net, [2]) return net
def pointnet_sa_module_msg(xyz, points, npoint, radius_list, nsample_list, mlp_list, is_training, bn_decay, scope, bn=True, use_xyz=True, use_nchw=False): ''' PointNet Set Abstraction (SA) module with Multi-Scale Grouping (MSG) Input: xyz: (batch_size, ndataset, 3) TF tensor points: (batch_size, ndataset, channel) TF tensor npoint: int32 -- #points sampled in farthest point sampling radius: list of float32 -- search radius in local region nsample: list of int32 -- how many points in each local region mlp: list of list of int32 -- output size for MLP on each point use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format Return: new_xyz: (batch_size, npoint, 3) TF tensor new_points: (batch_size, npoint, \sum_k{mlp[k][-1]}) TF tensor ''' data_format = 'NCHW' if use_nchw else 'NHWC' with tf.variable_scope(scope) as sc: new_xyz = gather_point(xyz, farthest_point_sample(npoint, xyz)) new_points_list = [] for i in range(len(radius_list)): radius = radius_list[i] nsample = nsample_list[i] idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz) grouped_xyz = group_point(xyz, idx) grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2), [1, 1, nsample, 1]) if points is not None: grouped_points = group_point(points, idx) if use_xyz: grouped_points = tf.concat([grouped_points, grouped_xyz], axis=-1) else: grouped_points = grouped_xyz if use_nchw: grouped_points = tf.transpose(grouped_points, [0, 3, 1, 2]) for j, num_out_channel in enumerate(mlp_list[i]): grouped_points = tf_util.conv2d(grouped_points, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv%d_%d' % (i, j), bn_decay=bn_decay) if use_nchw: grouped_points = tf.transpose(grouped_points, [0, 2, 3, 1]) new_points = tf.reduce_max(grouped_points, axis=[2]) new_points_list.append(new_points) new_points_concat = tf.concat(new_points_list, axis=-1) return new_xyz, new_points_concat
def create_encoder(self, point_cloud, npts): point_cloud = tf.reshape(point_cloud, (BATCH_SIZE, NUM_POINT, 3)) adj_matrix = tf_util.pairwise_distance(point_cloud) nn_idx = tf_util.knn(adj_matrix, k=self.knn) edge_feature = tf_util.get_edge_feature(point_cloud, nn_idx=nn_idx, k=self.knn) with tf.variable_scope('transform_net1') as sc: transform = input_transform_net_dgcnn(edge_feature, self.is_training, self.bn_decay, K=3) point_cloud_transformed = tf.matmul(point_cloud, transform) adj_matrix = tf_util.pairwise_distance(point_cloud_transformed) nn_idx = tf_util.knn(adj_matrix, k=self.knn) edge_feature = tf_util.get_edge_feature(point_cloud_transformed, nn_idx=nn_idx, k=self.knn) net = tf_util.conv2d(edge_feature, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='dgcnn1', bn_decay=self.bn_decay) net = tf.reduce_max(net, axis=-2, keep_dims=True) net1 = net adj_matrix = tf_util.pairwise_distance(net) nn_idx = tf_util.knn(adj_matrix, k=self.knn) edge_feature = tf_util.get_edge_feature(net, nn_idx=nn_idx, k=self.knn) net = tf_util.conv2d(edge_feature, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='dgcnn2', bn_decay=self.bn_decay) net = tf.reduce_max(net, axis=-2, keep_dims=True) net2 = net adj_matrix = tf_util.pairwise_distance(net) nn_idx = tf_util.knn(adj_matrix, k=self.knn) edge_feature = tf_util.get_edge_feature(net, nn_idx=nn_idx, k=self.knn) net = tf_util.conv2d(edge_feature, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='dgcnn3', bn_decay=self.bn_decay) net = tf.reduce_max(net, axis=-2, keep_dims=True) net3 = net adj_matrix = tf_util.pairwise_distance(net) nn_idx = tf_util.knn(adj_matrix, k=self.knn) edge_feature = tf_util.get_edge_feature(net, nn_idx=nn_idx, k=self.knn) net = tf_util.conv2d(edge_feature, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='dgcnn4', bn_decay=self.bn_decay) net = tf.reduce_max(net, axis=-2, keep_dims=True) net4 = net net = tf_util.conv2d(tf.concat([net1, net2, net3, net4], axis=-1), 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='agg', bn_decay=self.bn_decay) net = tf.reduce_max(net, axis=1, keep_dims=True) features = tf.reshape(net, [BATCH_SIZE, -1]) return features
def pointnet_sa_module(xyz, points, npoint, radius, nsample, mlp, mlp2, group_all, is_training, bn_decay, scope, bn=True, pooling='max', knn=False, use_xyz=True, use_nchw=False): ''' PointNet Set Abstraction (SA) Module Input: xyz: (batch_size, ndataset, 3) TF tensor points: (batch_size, ndataset, channel) TF tensor npoint: int32 -- #points sampled in farthest point sampling radius: float32 -- search radius in local region nsample: int32 -- how many points in each local region mlp: list of int32 -- output size for MLP on each point mlp2: list of int32 -- output size for MLP on each region group_all: bool -- group all points into one PC if set true, OVERRIDE npoint, radius and nsample settings use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format Return: new_xyz: (batch_size, npoint, 3) TF tensor new_points: (batch_size, npoint, mlp[-1] or mlp2[-1]) TF tensor idx: (batch_size, npoint, nsample) int32 -- indices for local regions ''' data_format = 'NCHW' if use_nchw else 'NHWC' with tf.variable_scope(scope) as sc: # Sample and Grouping if group_all: nsample = xyz.get_shape()[1].value new_xyz, new_points, idx, grouped_xyz = sample_and_group_all(xyz, points, use_xyz) else: new_xyz, new_points, idx, grouped_xyz = sample_and_group(npoint, radius, nsample, xyz, points, knn, use_xyz) # Point Feature Embedding if use_nchw: new_points = tf.transpose(new_points, [0, 3, 1, 2]) for i, num_out_channel in enumerate(mlp): new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv%d' % (i), bn_decay=bn_decay, data_format=data_format) if use_nchw: new_points = tf.transpose(new_points, [0, 2, 3, 1]) # Pooling in Local Regions if pooling == 'max': new_points = tf.reduce_max(new_points, axis=[2], keepdims=True, name='maxpool') elif pooling == 'avg': new_points = tf.reduce_mean(new_points, axis=[2], keepdims=True, name='avgpool') elif pooling == 'weighted_avg': with tf.variable_scope('weighted_avg'): dists = tf.norm(grouped_xyz, axis=-1, ord=2, keepdims=True) exp_dists = tf.exp(-dists * 5) weights = exp_dists / tf.reduce_sum(exp_dists, axis=2, keepdims=True) # (batch_size, npoint, nsample, 1) new_points *= weights # (batch_size, npoint, nsample, mlp[-1]) new_points = tf.reduce_sum(new_points, axis=2, keepdims=True) elif pooling == 'max_and_avg': max_points = tf.reduce_max(new_points, axis=[2], keepdims=True, name='maxpool') avg_points = tf.reduce_mean(new_points, axis=[2], keepdims=True, name='avgpool') new_points = tf.concat([avg_points, max_points], axis=-1) # [Optional] Further Processing if mlp2 is not None: if use_nchw: new_points = tf.transpose(new_points, [0, 3, 1, 2]) for i, num_out_channel in enumerate(mlp2): new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv_post_%d' % (i), bn_decay=bn_decay, data_format=data_format) if use_nchw: new_points = tf.transpose(new_points, [0, 2, 3, 1]) new_points = tf.squeeze(new_points, [2]) # (batch_size, npoints, mlp2[-1]) return new_xyz, new_points, idx
def create_encoder(self, point_cloud): point_cloud = tf.reshape(point_cloud, (BATCH_SIZE, NUM_POINT, 3)) ''' Previous Solution Author Provided ''' # point_cloud_transformed = point_cloud # adj_matrix = tf_util.pairwise_distance(point_cloud_transformed) # nn_idx = tf_util.knn(adj_matrix, k=self.knn) # x = tf_util.get_edge_feature(point_cloud_transformed, nn_idx=nn_idx, k=self.knn) x = self.get_graph_feature(point_cloud, self.knn) x = tf_util.conv2d(x, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, bias=False, is_training=self.is_training, activation_fn=tf.nn.leaky_relu, scope='conv1', bn_decay=self.bn_decay) x1 = tf.reduce_max(x, axis=-2, keep_dims=True) x = self.get_graph_feature(x1, self.knn) x = tf_util.conv2d(x, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, bias=False, is_training=self.is_training, activation_fn=tf.nn.leaky_relu, scope='conv2', bn_decay=self.bn_decay) x2 = tf.reduce_max(x, axis=-2, keep_dims=True) x = self.get_graph_feature(x2, self.knn) x = tf_util.conv2d(x, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, bias=False, is_training=self.is_training, activation_fn=tf.nn.leaky_relu, scope='conv3', bn_decay=self.bn_decay) x3 = tf.reduce_max(x, axis=-2, keep_dims=True) x = self.get_graph_feature(x3, self.knn) x = tf_util.conv2d(x, 256, [1, 1], padding='VALID', stride=[1, 1], bn=True, bias=False, is_training=self.is_training, activation_fn=tf.nn.leaky_relu, scope='conv4', bn_decay=self.bn_decay) x4 = tf.reduce_max(x, axis=-2, keep_dims=True) x = tf_util.conv2d(tf.concat([x1, x2, x3, x4], axis=-1), 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, bias=False, is_training=self.is_training, activation_fn=tf.nn.leaky_relu, scope='agg', bn_decay=self.bn_decay) x1 = tf.reduce_max(x, axis=1, keep_dims=True) x2 = tf.reduce_mean(x, axis=1, keep_dims=True) # pdb.set_trace() features = tf.reshape(tf.concat([x1, x2], axis=-1), [BATCH_SIZE, -1]) return features
def global_spatial_transformer(point_cloud, is_training, K=3, bn=True, bn_decay=None, is_dist=True): """ Input (XYZ) Transform Net, input is BxNx3 gray image Return: Transformation matrix of size KxK """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value net = tf_util.conv2d(point_cloud, 64, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='tconv1', bn_decay=bn_decay, is_dist=is_dist) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='tconv2', bn_decay=bn_decay, is_dist=is_dist) net = tf.reduce_max(net, axis=-2, keep_dims=True) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='tconv3', bn_decay=bn_decay, is_dist=is_dist) net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='tmaxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=bn, is_training=is_training, scope='tfc1', bn_decay=bn_decay, is_dist=is_dist) net = tf_util.fully_connected(net, 256, bn=bn, is_training=is_training, scope='tfc2', bn_decay=bn_decay, is_dist=is_dist) with tf.variable_scope('transform_XYZ') as sc: weights = tf.get_variable('weights', [256, K * K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) biases = tf.get_variable('biases', [K * K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) biases += tf.constant(np.eye(K).flatten(), dtype=tf.float32) transform = tf.matmul(net, weights) transform = tf.nn.bias_add(transform, biases) transform = tf.reshape(transform, [batch_size, K, K]) return transform
def feature_transform_net(inputs, is_training, bn_decay=None, K=64): """ Feature Transform Net, input is BxNx1xK Return: Transformation matrix of size KxK """ batch_size = inputs.get_shape()[0] # .value num_point = inputs.get_shape()[1] # .value net = tf_util.conv2d(inputs, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv2', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='tconv3', bn_decay=bn_decay) net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='tmaxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='tfc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='tfc2', bn_decay=bn_decay) with tf.compat.v1.variable_scope('transform_feat') as sc: weights = tf.compat.v1.get_variable( 'weights', [256, K * K], initializer=tf.compat.v1.constant_initializer(0.0), dtype=tf.float32) biases = tf.compat.v1.get_variable( 'biases', [K * K], initializer=tf.compat.v1.constant_initializer(0.0), dtype=tf.float32) biases.assign_add(tf.constant(np.eye(K).flatten(), dtype=tf.float32)) transform = tf.matmul(net, weights) transform = tf.nn.bias_add(transform, biases) transform = tf.reshape(transform, [batch_size, K, K]) return transform
def pointnet_sa_module_msg(xyz, points, radius_list, nsample_list, mlp_list, is_training, bn_decay, bn, fps_sample_range_list, fps_method_list, npoint_list, former_fps_idx, use_attention, scope, dilated_group, vote_ctr=None, aggregation_channel=None, debugging=False, epsilon=1e-5, img_features=None): ''' PointNet Set Abstraction (SA) module with Multi-Scale Grouping (MSG) Input: xyz: (batch_size, ndataset, 3) TF tensor points: (batch_size, ndataset, channel) TF tensor npoint: int -- points sampled in farthest point sampling radius_list: list of float32 -- search radius in local region nsample_list: list of int32 -- how many points in each local region mlp_list: list of list of int32 -- output size for MLP on each point fps_method: 'F-FPS', 'D-FPS', 'FS' fps_start_idx: Return: new_xyz: (batch_size, npoint, 3) TF tensor new_points: (batch_size, npoint, \sum_k{mlp[k][-1]}) TF tensor ''' bs = xyz.get_shape().as_list()[0] with tf.variable_scope(scope) as sc: cur_fps_idx_list = [] last_fps_end_index = 0 for fps_sample_range, fps_method, npoint in zip(fps_sample_range_list, fps_method_list, npoint_list): tmp_xyz = tf.slice(xyz, [0, last_fps_end_index, 0], [-1, fps_sample_range, -1]) tmp_points = tf.slice(points, [0, last_fps_end_index, 0], [-1, fps_sample_range, -1]) if npoint == 0: last_fps_end_index += fps_sample_range continue if vote_ctr is not None: npoint = vote_ctr.get_shape().as_list()[1] fps_idx = tf.tile(tf.reshape(tf.range(npoint), [1, npoint]), [bs, 1]) elif fps_method == 'FS': features_for_fps = tf.concat([tmp_xyz, tmp_points], axis=-1) features_for_fps_distance = model_util.calc_square_dist(features_for_fps, features_for_fps, norm=False) fps_idx_1 = farthest_point_sample_with_distance(npoint, features_for_fps_distance) fps_idx_2 = farthest_point_sample(npoint, tmp_xyz) fps_idx = tf.concat([fps_idx_1, fps_idx_2], axis=-1) # [bs, npoint * 2] elif npoint == tmp_xyz.get_shape().as_list()[1]: fps_idx = tf.tile(tf.reshape(tf.range(npoint), [1, npoint]), [bs, 1]) elif fps_method == 'F-FPS': features_for_fps = tf.concat([tmp_xyz, tmp_points], axis=-1) features_for_fps_distance = model_util.calc_square_dist(features_for_fps, features_for_fps, norm=False) fps_idx = farthest_point_sample_with_distance(npoint, features_for_fps_distance) else: # D-FPS fps_idx = farthest_point_sample(npoint, tmp_xyz) fps_idx = fps_idx + last_fps_end_index cur_fps_idx_list.append(fps_idx) last_fps_end_index += fps_sample_range fps_idx = tf.concat(cur_fps_idx_list, axis=-1) if former_fps_idx is not None: fps_idx = tf.concat([fps_idx, former_fps_idx], axis=-1) if vote_ctr is not None: new_xyz = gather_point(vote_ctr, fps_idx) else: new_xyz = gather_point(xyz, fps_idx) # if deformed_xyz is not None, then no attention model if use_attention: # first gather the points out new_points = gather_point(points, fps_idx) # [bs, npoint, c] # choose farthest feature to center points # [bs, npoint, ndataset] relation = model_util.calc_square_dist(new_points, points) # choose these points with largest distance to center_points _, relation_idx = tf.nn.top_k(relation, k=relation.shape.as_list()[-1]) idx_list, pts_cnt_list = [], [] cur_radius_list = [] for i in range(len(radius_list)): radius = radius_list[i] nsample = nsample_list[i] if dilated_group: # cfg.POINTNET.DILATED_GROUPING if i == 0: min_radius = 0. else: min_radius = radius_list[i - 1] idx, pts_cnt = query_ball_point_dilated(min_radius, radius, nsample, xyz, new_xyz) elif use_attention: idx, pts_cnt = query_ball_point_withidx(radius, nsample, xyz, new_xyz, relation_idx) else: idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz) idx_list.append(idx) pts_cnt_list.append(pts_cnt) # debugging debugging_list = [] new_points_list = [] for i in range(len(radius_list)): nsample = nsample_list[i] idx, pts_cnt = idx_list[i], pts_cnt_list[i] radius = radius_list[i] pts_cnt_mask = tf.cast(tf.greater(pts_cnt, 0), tf.int32) # [bs, npoint] pts_cnt_fmask = tf.cast(pts_cnt_mask, tf.float32) idx = idx * tf.expand_dims(pts_cnt_mask, axis=2) # [bs, npoint, nsample] grouped_xyz = group_point(xyz, idx) original_xyz = grouped_xyz grouped_xyz -= tf.expand_dims(new_xyz, 2) grouped_points = group_point(points, idx) grouped_points = tf.concat([grouped_points, grouped_xyz], axis=-1) for j, num_out_channel in enumerate(mlp_list[i]): grouped_points = tf_util.conv2d(grouped_points, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv%d_%d' % (i, j), bn_decay=bn_decay) new_points = tf.reduce_max(grouped_points, axis=[2]) new_points *= tf.expand_dims(pts_cnt_fmask, axis=-1) new_points_list.append(new_points) if len(new_points_list) > 0: new_points_concat = tf.concat(new_points_list, axis=-1) if img_features is not None: new_points_concat = tf.concat([new_points_concat, img_features], axis=-1) if cfg.MODEL.NETWORK.AGGREGATION_SA_FEATURE: new_points_concat = tf_util.conv1d(new_points_concat, aggregation_channel, 1, padding='VALID', bn=bn, is_training=is_training, scope='ensemble', bn_decay=bn_decay) else: new_points_concat = gather_point(points, fps_idx) return new_xyz, new_points_concat, fps_idx
def create_encoder(self, inputs, npts): """PointNet encoder""" inputs = tf.reshape(inputs, (BATCH_SIZE, NUM_POINT, 3)) with tf.variable_scope('transform_net1') as sc: transform = input_transform_net(inputs, self.is_training, self.bn_decay, K=3) point_cloud_transformed = tf.matmul(inputs, transform) input_image = tf.expand_dims(point_cloud_transformed, -1) net = conv2d(inputs=input_image, num_output_channels=64, kernel_size=[1, 3], scope='conv1', padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, bn_decay=self.bn_decay) net = conv2d(inputs=net, num_output_channels=64, kernel_size=[1, 1], scope='conv2', padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, bn_decay=self.bn_decay) with tf.variable_scope('transform_net2') as sc: transform = feature_transform_net(net, self.is_training, self.bn_decay, K=64) net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform) net_transformed = tf.expand_dims(net_transformed, [2]) '''conv2d, with kernel size of [1,1,1,1] and stride of [1,1,1,1], basically equals with the MLPs''' # use_xavier=True, stddev=1e-3, weight_decay=0.0, activation_fn=tf.nn.relu, net = conv2d(net_transformed, 64, [1, 1], scope='conv3', padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, bn_decay=self.bn_decay) net = conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv4', bn_decay=self.bn_decay) net = conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv5', bn_decay=self.bn_decay) net = max_pool2d(net, [NUM_POINT, 1], padding='VALID', scope='maxpool') features = tf.reshape(net, [BATCH_SIZE, -1]) return features
def get_model(point_cloud, is_training, part_num, batch_size, \ num_point, weight_decay, bn_decay=None): """ ConvNet baseline, input is BxNx3 gray image """ end_points = {} with tf.variable_scope('transform_net1') as sc: K = 3 transform = get_transform(point_cloud, is_training, bn_decay, K=3) point_cloud_transformed = tf.matmul(point_cloud, transform) input_image = tf.expand_dims(point_cloud_transformed, -1) out1 = tf_util.conv2d(input_image, 64, [1, K], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv1', bn_decay=bn_decay) out2 = tf_util.conv2d(out1, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) out3 = tf_util.conv2d(out2, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) with tf.variable_scope('transform_net2') as sc: K = 128 transform = get_transform_K(out3, is_training, bn_decay, K) end_points['transform'] = transform squeezed_out3 = tf.reshape(out3, [batch_size, num_point, 128]) net_transformed = tf.matmul(squeezed_out3, transform) net_transformed = tf.expand_dims(net_transformed, [2]) out4 = tf_util.conv2d(net_transformed, 512, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv4', bn_decay=bn_decay) out5 = tf_util.conv2d(out4, 2048, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv5', bn_decay=bn_decay) out_max = tf_util.max_pool2d(out5, [num_point, 1], padding='VALID', scope='maxpool') expand = tf.tile(out_max, [1, num_point, 1, 1]) concat = tf.concat(axis=3, values=[expand, out1, out2, out3, out4, out5]) net = tf_util.conv2d(concat, 256, [1, 1], padding='VALID', stride=[1, 1], bn_decay=bn_decay, bn=True, is_training=is_training, scope='seg/conv1', weight_decay=weight_decay) net = tf_util.dropout(net, keep_prob=0.8, is_training=is_training, scope='seg/dp1') net = tf_util.conv2d(net, 256, [1, 1], padding='VALID', stride=[1, 1], bn_decay=bn_decay, bn=True, is_training=is_training, scope='seg/conv2', weight_decay=weight_decay) net = tf_util.dropout(net, keep_prob=0.8, is_training=is_training, scope='seg/dp2') net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn_decay=bn_decay, bn=True, is_training=is_training, scope='seg/conv3', weight_decay=weight_decay) net = tf_util.conv2d(net, part_num, [1, 1], padding='VALID', stride=[1, 1], activation_fn=None, bn=False, scope='seg/conv4', weight_decay=weight_decay) net = tf.reshape(net, [batch_size, num_point, part_num]) return net, end_points
def get_model(inputs, is_training, bn_decay=None, num_class=40, FLAGS=None): """ Classification PointNet, input is BxNx3, output Bx40 """ point_cloud = inputs[:, :, 0:3] if FLAGS.normal: D = 6 points = inputs[:, :, 3:] else: D = 3 points = None # --------------------------------------- STN ------------------------------------- if FLAGS.STN: with tf.variable_scope('transform_net') as sc: transform = input_transform_net(point_cloud, is_training, bn_decay, K=3) point_cloud = tf.matmul(point_cloud, transform) # ---------------------------------- Node Sampling -------------------------------- with tf.variable_scope('group_sampling') as sc: KNN = FLAGS.KNN point_cloud_sampled, nn_points, _, _ = sample_and_group( npoint=FLAGS.node_num, radius=0.2, nsample=KNN, xyz=point_cloud, points=points, knn=True, use_xyz=True) point_cloud_sampled = tf.expand_dims(point_cloud_sampled, axis=-1) net1 = tf_util.conv2d(point_cloud_sampled, 64, [1, 3], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv1_1', bn_decay=bn_decay) net1 = tf.tile(net1, multiples=[1, 1, KNN, 1]) net1 = tf.expand_dims(net1, axis=-2) nn_points = tf.expand_dims(nn_points, axis=-1) net = tf_util.conv3d(nn_points, 64, [1, 1, D], padding='VALID', stride=[1, 1, 1], bn=True, is_training=is_training, scope='conv1_2', bn_decay=bn_decay) concat = tf.concat(values=[net, net1], axis=-1) net = tf_util.conv3d(concat, 128, [1, 1, 1], padding='VALID', stride=[1, 1, 1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) net = tf_util.conv3d(net, 128, [1, 1, 1], padding='VALID', stride=[1, 1, 1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) # ---------------------- local pooling: merge local feature ------------------------- if FLAGS.local_pool == 'average': pool_k = tf_util.avg_pool3d(net, kernel_size=[1, KNN, 1], stride=[1, 2, 2], padding='VALID', scope='pool_k') else: pool_k = tf_util.max_pool3d(net, kernel_size=[1, KNN, 1], stride=[1, 2, 2], padding='VALID', scope='pool_k') net = tf.squeeze(pool_k, axis=2) # ---------------------------------- VLAD layer -------------------------------------- net, index = VLAD(net, FLAGS, is_training, bn_decay, layer_name='VLAD') # -------------------------------- classification ------------------------------------ with tf.name_scope('fc_layer'): net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp2') net = tf_util.fully_connected(net, num_class, activation_fn=None, scope='fc3') return net, index
def create_encoder(self, inputs, npts): # with tf.variable_scope('encoder_0', reuse=tf.AUTO_REUSE): # features = mlp_conv(inputs, [128, 256]) # features_global = tf.reduce_max(features, axis=1, keep_dims=True, name='maxpool_0') # features = tf.concat([features, tf.tile(features_global, [1, tf.shape(inputs)[1], 1])], axis=2) # with tf.variable_scope('encoder_1', reuse=tf.AUTO_REUSE): # features = mlp_conv(features, [512, 1024]) # features = tf.reduce_max(features, axis=1, name='maxpool_1') # end_points = {} # if DATASET =='modelnet40': inputs = tf.reshape(inputs, (BATCH_SIZE, NUM_POINT, 3)) with tf.variable_scope('transform_net1') as sc: transform = input_transform_net(inputs, self.is_training, self.bn_decay, K=3) point_cloud_transformed = tf.matmul(inputs, transform) input_image = tf.expand_dims(point_cloud_transformed, -1) net = conv2d(inputs=input_image, num_output_channels=64, kernel_size=[1, 3], scope='conv1', padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, bn_decay=self.bn_decay) net = conv2d(inputs=net, num_output_channels=64, kernel_size=[1, 1], scope='conv2', padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, bn_decay=self.bn_decay) with tf.variable_scope('transform_net2') as sc: transform = feature_transform_net(net, self.is_training, self.bn_decay, K=64) # end_points['transform'] = transform net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform) net_transformed = tf.expand_dims(net_transformed, [2]) '''conv2d, with kernel size of [1,1,1,1] and stride of [1,1,1,1], basically equals with the MLPs''' # use_xavier=True, stddev=1e-3, weight_decay=0.0, activation_fn=tf.nn.relu, net = conv2d(net_transformed, 64, [1, 1], scope='conv3', padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, bn_decay=self.bn_decay) net = conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv4', bn_decay=self.bn_decay) net = conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=self.is_training, scope='conv5', bn_decay=self.bn_decay) net = max_pool2d(net, [NUM_POINT, 1], padding='VALID', scope='maxpool') features = tf.reshape(net, [BATCH_SIZE, -1]) return features
def build_layer(self, xyz_list, feature_list, fps_idx_list, bn_decay, output_dict, p2, img_input=None, img_seg_point_cloud=None, point_seg_net=None, pooling_size=None): """ Build layers """ xyz_input = [] for xyz_index in self.xyz_index: xyz_input.append(xyz_list[xyz_index]) feature_input = [] for feature_index in self.feature_index: feature_input.append(feature_list[feature_index]) if self.former_fps_idx != -1: former_fps_idx = fps_idx_list[self.former_fps_idx] else: former_fps_idx = None if self.vote_ctr_index != -1: vote_ctr = xyz_list[self.vote_ctr_index] else: vote_ctr = None # image feature extraction if vote_ctr is not None: num_point = vote_ctr.get_shape().as_list()[1] batch_size = vote_ctr.get_shape().as_list()[0] img_rois = projection.crop_rois(vote_ctr, p2, img_input, pooling_size=pooling_size) img_features = tf_util.conv2d(img_rois, 256, [3, 3], padding='VALID', stride=[1, 1], bn=self.bn, is_training=self.is_training, scope=self.scope, bn_decay=bn_decay) img_features = tf.reshape(img_features, [batch_size, num_point, 256]) else: img_features = None if self.layer_type == 'SA_Layer': if self.scope == 'layer4': new_xyz, new_points, new_fps_idx = pointnet_sa_module_msg( xyz_input[0], feature_input[0], self.radius_list, self.nsample_list, self.mlp_list, self.is_training, bn_decay, self.bn, self.fps_sample_range_list, self.fps_method_list, self.npoint_list, former_fps_idx, self.use_attention, self.scope, self.dilated_group, vote_ctr, self.aggregation_channel, img_features=img_features) else: new_xyz, new_points, new_fps_idx = pointnet_sa_module_msg( xyz_input[0], feature_input[0], self.radius_list, self.nsample_list, self.mlp_list, self.is_training, bn_decay, self.bn, self.fps_sample_range_list, self.fps_method_list, self.npoint_list, former_fps_idx, self.use_attention, self.scope, self.dilated_group, vote_ctr, self.aggregation_channel) xyz_list.append(new_xyz) feature_list.append(new_points) fps_idx_list.append(new_fps_idx) elif self.layer_type == 'SA_Layer_SSG_Last': new_points = pointnet_sa_module( xyz_input[0], feature_input[0], self.mlp_list, self.is_training, bn_decay, self.bn, self.scope, ) xyz_list.append(None) feature_list.append(new_points) fps_idx_list.append(None) elif self.layer_type == 'FP_Layer': new_points = pointnet_fp_module(xyz_input[0], xyz_input[1], feature_input[0], feature_input[1], self.mlp_list, self.is_training, bn_decay, self.scope, self.bn) xyz_list.append(xyz_input[0]) feature_list.append(new_points) fps_idx_list.append(None) elif self.layer_type == 'Vote_Layer': # new_xyz, new_points, ctr_offsets = vote_layer(xyz_input[0], feature_input[0], self.mlp_list, self.is_training, bn_decay, self.bn, self.scope) # output_dict[maps_dict.PRED_VOTE_BASE].append(xyz_input[0]) # output_dict[maps_dict.PRED_VOTE_OFFSET].append(ctr_offsets) # # if voting_xyz is not None: # voting_xyz_update = tf.concat([new_xyz, voting_xyz], axis=1) # voting_feature_update = tf.concat([new_points, voting_points], axis=1) # new_xyz, new_points, fps_idx_update = pointnet_fps_method(voting_xyz_update, voting_feature_update, # [-1], ['F-FPS'], [256], self.scope, vote_ctr=None) xyz_update = tf.concat([xyz_input[0], img_seg_point_cloud], axis=1) feature_update = tf.concat([feature_input[0], point_seg_net], axis=1) xyz_update, feature_update, fps_idx_update = pointnet_fps_method( xyz_update, feature_update, [-1], ['F-FPS'], [256], self.scope, vote_ctr=None) new_xyz, new_points, ctr_offsets = vote_layer( xyz_update, feature_update, self.mlp_list, self.is_training, bn_decay, self.bn, self.scope) output_dict[maps_dict.PRED_VOTE_BASE].append(xyz_update) output_dict[maps_dict.PRED_VOTE_OFFSET].append(ctr_offsets) xyz_list.append(new_xyz) feature_list.append(new_points) fps_idx_list.append(None) return xyz_list, feature_list, fps_idx_list
def get_model_other(point_cloud, is_training, bn_decay=None): """ B: batch size; N: number of points, C: channels; k: number of nearest neighbors point_cloud: B*N*C """ end_points = {} minSF = tf.reshape(tf.math.argmin(point_cloud[:, :, 0], axis=1), (-1, 1)) batch_size = point_cloud.get_shape()[0] # .value # # 1. graph for first EdgeConv B N C=6 adj_matrix = tf_util.pairwise_distance( point_cloud[:, :, :para.dim]) # B N C=6 => B*N*N # adj_matrix = tf_util.pairwise_distance(point_cloud[:, :, 1:para.dim]) # B N C=6 => B*N*N nn_idx = tf_util.knn(adj_matrix, k=20) # get the distance to minSF of 1024 points allSF_dist = tf.gather(adj_matrix, indices=minSF, axis=2, batch_dims=1) end_points['knn1'] = allSF_dist point_cloud = tf.expand_dims(point_cloud[:, :, :para.dim], axis=-2) # point_cloud = tf.expand_dims(point_cloud[:, :, 1:para.dim], axis=-2) edge_feature = tf_util.get_edge_feature(point_cloud, nn_idx=nn_idx, k=20) net = tf_util.conv2d(edge_feature, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='dgcnn1', bn_decay=bn_decay) net = tf.reduce_max(net, axis=-2, keepdims=True) net1 = net # # 2. graph for second EdgeConv B N C=64 adj_matrix = tf_util.pairwise_distance(net) nn_idx = tf_util.knn(adj_matrix, k=20) # get the distance to minSF of 1024 points allSF_dist = tf.gather(adj_matrix, indices=minSF, axis=2, batch_dims=1) end_points['knn2'] = allSF_dist # net: B*N*1*6+64=71 net = tf.concat([point_cloud, net1], axis=-1) # edge_feature: B*N*k*142 edge_feature = tf_util.get_edge_feature(net, nn_idx=nn_idx, k=20) net = tf_util.conv2d(edge_feature, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='dgcnn2', bn_decay=bn_decay) net = tf.reduce_max(net, axis=-2, keepdims=True) net2 = net # 3. graph for third EdgeConv B N C=64 adj_matrix = tf_util.pairwise_distance(net) nn_idx = tf_util.knn(adj_matrix, k=20) # get the distance to minSF of 1024 points allSF_dist = tf.gather(adj_matrix, indices=minSF, axis=2, batch_dims=1) end_points['knn3'] = allSF_dist # net: B*N*1*6+64+64=134 net = tf.concat([point_cloud, net1, net2], axis=-1) # edge_feature: B*N*k*268 edge_feature = tf_util.get_edge_feature(net, nn_idx=nn_idx, k=20) net = tf_util.conv2d(edge_feature, 64, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='dgcnn3', bn_decay=bn_decay) net = tf.reduce_max(net, axis=-2, keepdims=True) net3 = net # 4. graph for fourth EdgeConv B N C=64 adj_matrix = tf_util.pairwise_distance(net) nn_idx = tf_util.knn(adj_matrix, k=20) # get the distance to minSF of 1024 points allSF_dist = tf.gather(adj_matrix, indices=minSF, axis=2, batch_dims=1) end_points['knn4'] = allSF_dist # net: B*N*1*6+64+64+64=198 net = tf.concat([point_cloud, net1, net2, net3], axis=-1) # edge_feature: B*N*k*396 edge_feature = tf_util.get_edge_feature(net, nn_idx=nn_idx, k=20) net = tf_util.conv2d(edge_feature, 128, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='dgcnn4', bn_decay=bn_decay) net = tf.reduce_max(net, axis=-2, keepdims=True) net4 = net # input: B*N*1*6+64+64+128+128 = 326 => net: B*N*1*1024 net = tf_util.conv2d(tf.concat([point_cloud, net1, net2, net3, net4], axis=-1), 1024, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='agg', bn_decay=bn_decay) # net: B*1*1*1024 # SF_features = tf.gather(net, indices=minSF, axis=1, batch_dims=1) net = tf.reduce_max(net, axis=1, keepdims=True) # SF_all = tf.concat([SF_features,net], axis=-1) # net: B*1024 net = tf.squeeze(net) # net: B*2048 # net = tf.squeeze(SF_all) # MLP on global point cloud vector net = tf.reshape(net, [batch_size, -1]) print(net.get_shape()) end_points['global_feature'] = net # Fully connected end_points: classifier net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) end_points['fc1'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay) end_points['fc2'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp2') net = tf_util.fully_connected(net, para.outputClassN, activation_fn=None, scope='fc3') end_points['fc3'] = net return net, end_points
def pointnet(self, point_cloud, is_training, bn=True, bn_decay=None): """ Classification PointNet, input is BxNx3, output Bxn where n is num classes """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value input_image = tf.expand_dims(point_cloud, -1) # Point functions (MLP implemented as conv2d) net = tf_util.conv2d(input_image, 64, [1, 3], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv2', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv4', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv5', bn_decay=bn_decay) # Symmetric function: max pooling net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='maxpool') # MLP on global point cloud vector net = tf.layers.flatten(net) net = tf_util.fully_connected(net, 512, bn=bn, is_training=is_training, scope='fc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=bn, is_training=is_training, scope='fc2', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, self.n_classes, activation_fn=None, scope='fc3') return net