def pointnet_sa_module(xyz, points, npoint, radius, nsample, mlp, mlp2, group_all, is_training, bn_decay, scope, bn=True, pooling='max', tnet_spec=None, knn=False, use_xyz=True): ''' PointNet Set Abstraction (SA) Module Input: xyz: (batch_size, ndataset, 3) TF tensor points: (batch_size, ndataset, channel) TF tensor npoint: int32 -- #points sampled in farthest point sampling radius: float32 -- search radius in local region nsample: int32 -- how many points in each local region mlp: list of int32 -- output size for MLP on each point mlp2: list of int32 -- output size for MLP on each region group_all: bool -- group all points into one PC if set true, OVERRIDE npoint, radius and nsample settings use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features Return: new_xyz: (batch_size, npoint, 3) TF tensor new_points: (batch_size, npoint, mlp[-1] or mlp2[-1]) TF tensor idx: (batch_size, npoint, nsample) int32 -- indices for local regions ''' with tf.variable_scope(scope) as sc: if group_all: nsample = xyz.get_shape()[1].value new_xyz, new_points, idx, grouped_xyz = sample_and_group_all(xyz, points, use_xyz) else: new_xyz, new_points, idx, grouped_xyz = sample_and_group(npoint, radius, nsample, xyz, points, tnet_spec, knn, use_xyz) for i, num_out_channel in enumerate(mlp): new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1], padding='VALID', stride=[1,1], bn=bn, is_training=is_training, scope='conv%d'%(i), bn_decay=bn_decay) if pooling=='avg': new_points = tf_util.avg_pool2d(new_points, [1, nsample], stride=[1, 1], padding='VALID', scope='avgpool1') elif pooling=='weighted_avg': with tf.variable_scope('weighted_avg1'): dists = tf.norm(grouped_xyz,axis=-1,ord=2,keep_dims=True) exp_dists = tf.exp(-dists * 5) weights = exp_dists/tf.reduce_sum(exp_dists,axis=2,keep_dims=True) # (batch_size, npoint, nsample, 1) new_points *= weights # (batch_size, npoint, nsample, mlp[-1]) new_points = tf.reduce_sum(new_points, axis=2, keep_dims=True) elif pooling=='max': new_points = tf.reduce_max(new_points, axis=[2], keep_dims=True) elif pooling=='min': new_points = tf_util.max_pool2d(-1 * new_points, [1, nsample], stride=[1, 1], padding='VALID', scope='minpool1') elif pooling=='max_and_avg': avg_points = tf_util.max_pool2d(new_points, [1, nsample], stride=[1, 1], padding='VALID', scope='maxpool1') max_points = tf_util.avg_pool2d(new_points, [1, nsample], stride=[1, 1], padding='VALID', scope='avgpool1') new_points = tf.concat([avg_points, max_points], axis=-1) if mlp2 is None: mlp2 = [] for i, num_out_channel in enumerate(mlp2): new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1], padding='VALID', stride=[1,1], bn=bn, is_training=is_training, scope='conv_post_%d'%(i), bn_decay=bn_decay) new_points = tf.squeeze(new_points, [2]) # (batch_size, npoints, mlp2[-1]) return new_xyz, new_points, idx
def pointnet(self, point_cloud, is_training, bn=True, bn_decay=None): """ Classification PointNet, input is BxNx3, output Bxn where n is num classes """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value input_image = tf.expand_dims(point_cloud, -1) # Point functions (MLP implemented as conv2d) net = tf_util.conv2d(input_image, 64, [1,3], padding='VALID', stride=[1,1], bn=bn, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1,1], padding='VALID', stride=[1,1], bn=bn, is_training=is_training, scope='conv2', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1,1], padding='VALID', stride=[1,1], bn=bn, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=bn, is_training=is_training, scope='conv4', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1,1], padding='VALID', stride=[1,1], bn=bn, is_training=is_training, scope='conv5', bn_decay=bn_decay) # Symmetric function: max pooling net = tf_util.max_pool2d(net, [num_point,1], padding='VALID', scope='maxpool') # MLP on global point cloud vector net = tf.layers.flatten(net) net = tf_util.fully_connected(net, 512, bn=bn, is_training=is_training, scope='fc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=bn, is_training=is_training, scope='fc2', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, self.n_classes, activation_fn=None, scope='fc3') return net
def encoder(self, point_cloud, latent_dim, is_training, bn=True, bn_decay=None): """ Classification PointNet, input is BxNx3, output Bxn where n is num classes """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} input_image = tf.expand_dims(point_cloud, -1) with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE): # Point functions (MLP implemented as conv2d) net = tf_util.conv2d(input_image, 64, [1, 3], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv2', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv4', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv5', bn_decay=bn_decay) # Symmetric function: max pooling global_feat = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='maxpool') net = tf.layers.flatten(global_feat) net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1') gaussians = tf_util.fully_connected(net, latent_dim * 2, bn=True, is_training=is_training, activation_fn=None, scope='fc2') mu = gaussians[:, :latent_dim] sigma = tf.nn.softplus(gaussians[:, latent_dim:]) return mu, sigma
def encoder(self, point_cloud, is_training, bn=True, bn_decay=None): """ Classification PointNet, input is BxNx3, output Bxn where n is num classes """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} input_image = tf.expand_dims(point_cloud, -1) # Point functions (MLP implemented as conv2d) net = tf_util.conv2d(input_image, 64, [1, 3], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv2', bn_decay=bn_decay) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv4', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv5', bn_decay=bn_decay) # Symmetric function: max pooling global_feat = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='maxpool') net = tf.reshape(global_feat, [batch_size, -1]) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='latentfc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 64, bn=True, is_training=is_training, scope='latentfc2', bn_decay=bn_decay) #net = tf_util.fully_connected(net, 3, bn=True, is_training=is_training, scope='latentfc3', bn_decay=bn_decay) latent = net return latent
def get_model(point_cloud, is_training, classes, dtype=tf.float32, bn=True, bn_decay=None): """ Classification PointNet, input is BxNxF, output BxNxC B = number of images per batch N = number of points F = number of features C = number of classes """ # batch_size = point_cloud.get_shape()[0].value global num_classes num_classes = classes num_point = point_cloud.get_shape()[1].value features = point_cloud.get_shape()[2].value end_points = {} with tf.compat.v1.variable_scope('transform_net1'): transform = input_transform_net(point_cloud, is_training, bn_decay, K=features, dtype=dtype) point_cloud_transformed = tf.matmul(point_cloud, transform) input_image = tf.expand_dims(point_cloud_transformed, -1) net = tf_util.conv2d(input_image, 64, [1, features], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv1', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, 64, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv2', bn_decay=bn_decay, dtype=dtype) with tf.compat.v1.variable_scope('transform_net2'): transform = feature_transform_net(net, is_training, bn_decay, K=64, dtype=dtype) end_points['transform'] = transform net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform) point_feat = tf.expand_dims(net_transformed, [2]) logger.info('point_feat = %s', point_feat) net = tf_util.conv2d(point_feat, 64, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv3', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv4', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv5', bn_decay=bn_decay, dtype=dtype) global_feat = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='maxpool') logger.info('global_feat = %s', global_feat) global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1]) concat_feat = tf.concat([point_feat, global_feat_expand], 3) logger.info('concat_feat = %s', concat_feat) net = tf_util.conv2d(concat_feat, 512, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv6', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, 256, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv7', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv8', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv9', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, classes, [1, 1], padding='VALID', stride=[1, 1], activation_fn=None, scope='conv10', dtype=dtype) net = tf.squeeze(net, [2]) # BxNxC logger.info('net = %s', net) return net, end_points
def feature_transform_net(inputs, is_training, bn_decay=None, K=64, dtype=tf.float32, bn=False): """ Feature Transform Net, input is BxNx1xK Return: Transformation matrix of size KxK """ batch_size = inputs.get_shape()[0].value num_point = inputs.get_shape()[1].value num_feature = inputs.get_shape()[2].value net = tf_util.conv2d(inputs, 64, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='tconv1', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='tconv2', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='tconv3', bn_decay=bn_decay, dtype=dtype) net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='tmaxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=bn, is_training=is_training, scope='tfc1', bn_decay=bn_decay, dtype=dtype) net = tf_util.fully_connected(net, 256, bn=bn, is_training=is_training, scope='tfc2', bn_decay=bn_decay, dtype=dtype) with tf.compat.v1.variable_scope('transform_feat') as sc: weights = tf.compat.v1.get_variable( 'weights', [256, K * K], initializer=tf.constant_initializer(0.0), dtype=dtype) biases = tf.compat.v1.get_variable( 'biases', [K * K], initializer=tf.constant_initializer(0.0), dtype=dtype) biases = biases + tf.constant(np.eye(K).flatten(), dtype=dtype) transform = tf.matmul(net, weights) transform = tf.nn.bias_add(transform, biases) transform = tf.reshape(transform, [batch_size, K, K]) return transform
def input_transform_net(point_cloud, is_training, bn_decay=None, K=3, dtype=tf.float32, bn=False): """ Input (XYZ) Transform Net, input is BxNx3 gray image Return: Transformation matrix of size 3xK """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value num_feature = point_cloud.get_shape()[2].value input_image = tf.expand_dims(point_cloud, -1) net = tf_util.conv2d(input_image, 64, [1, K], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='tconv1', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, 128, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='tconv2', bn_decay=bn_decay, dtype=dtype) net = tf_util.conv2d(net, 1024, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='tconv3', bn_decay=bn_decay, dtype=dtype) net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='tmaxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=bn, is_training=is_training, scope='tfc1', bn_decay=bn_decay, dtype=dtype) net = tf_util.fully_connected(net, 256, bn=bn, is_training=is_training, scope='tfc2', bn_decay=bn_decay, dtype=dtype) with tf.compat.v1.variable_scope('transform_XYZ'): assert K == num_feature, f'K={K} num_feature={num_feature}' weights = tf.compat.v1.get_variable( 'weights', [256, K * K], initializer=tf.constant_initializer(0.0), dtype=dtype) biases = tf.compat.v1.get_variable( 'biases', [K * K], initializer=tf.constant_initializer(0.0), dtype=dtype) biases = biases + tf.constant(np.eye(K).flatten(), dtype=dtype) transform = tf.matmul(net, weights) transform = tf.nn.bias_add(transform, biases) transform = tf.reshape(transform, [batch_size, K, K]) return transform