def get_model(point_cloud, is_training, num_class, sigma, bn_decay=None, weight_decay=None): """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = point_cloud l0_points = point_cloud # Feature encoding layers l1_xyz, l1_points = feature_encoding_layer(l0_xyz, l0_points, npoint=1024, radius=0.1, sigma=sigma, K=32, mlp=[32, 32, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1') l2_xyz, l2_points = feature_encoding_layer(l1_xyz, l1_points, npoint=256, radius=0.2, sigma=2 * sigma, K=32, mlp=[64, 64, 128], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer2') l3_xyz, l3_points = feature_encoding_layer(l2_xyz, l2_points, npoint=64, radius=0.4, sigma=4 * sigma, K=32, mlp=[128, 128, 256], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer3') l4_xyz, l4_points = feature_encoding_layer(l3_xyz, l3_points, npoint=36, radius=0.8, sigma=8 * sigma, K=32, mlp=[256, 256, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer4') # Feature decoding layers l3_points = feature_decoding_layer(l3_xyz, l4_xyz, l3_points, l4_points, 0.8, 8 * sigma, 16, [512, 512], is_training, bn_decay, weight_decay, scope='fa_layer1') l2_points = feature_decoding_layer(l2_xyz, l3_xyz, l2_points, l3_points, 0.4, 4 * sigma, 16, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer2') l1_points = feature_decoding_layer(l1_xyz, l2_xyz, l1_points, l2_points, 0.2, 2 * sigma, 16, [256, 128], is_training, bn_decay, weight_decay, scope='fa_layer3') l0_points = feature_decoding_layer(l0_xyz, l1_xyz, l0_points, l1_points, 0.1, sigma, 16, [128, 128, 128], is_training, bn_decay, weight_decay, scope='fa_layer4') # FC layers net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weight_decay=weight_decay) end_points['feats'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.conv1d(net, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='fc2') return net, end_points
def get_embedding_model(points, is_training, bn_decay, batch_size=1): # Get inputs from our features map. l0_xyz = tf.reshape(points, shape=(batch_size, -1, 3)) l0_points = None with tf.variable_scope('encoder'): # Encode w/ PointConv Layers. l1_xyz, l1_points = feature_encoding_layer(l0_xyz, l0_xyz, npoint=512, radius=0.1, sigma=0.05, K=32, mlp=[32, 32, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=None, scope='layer1') l2_xyz, l2_points = feature_encoding_layer(l1_xyz, l1_points, npoint=256, radius=0.2, sigma=0.1, K=32, mlp=[64, 64, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=None, scope='layer2') l3_xyz, l3_points = feature_encoding_layer(l2_xyz, l2_points, npoint=64, radius=0.4, sigma=0.2, K=32, mlp=[128, 128, 256], is_training=is_training, bn_decay=bn_decay, weight_decay=None, scope='layer3') l4_xyz, l4_points = feature_encoding_layer(l3_xyz, l3_points, npoint=36, radius=0.8, sigma=0.4, K=32, mlp=[256, 256, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=None, scope='layer4') # Fully connected layers embedding = tf.reshape(l4_points, [batch_size, -1]) # Encode to a 256 large embedding vector. cloud_embedding = tf.compat.v1.layers.Dense(256)(embedding) cloud_embedding = tf.compat.v1.layers.batch_normalization( cloud_embedding, training=is_training) cloud_embedding = tf.nn.relu(cloud_embedding) return cloud_embedding
def get_pointconv_model(points, xyz, sdf_label, is_training, bn_decay, batch_size=32, loss_feature='loss'): ''' Given features and label return prediction, loss ops. ''' # Get inputs from our features map. l0_xyz = tf.reshape(points, shape=(batch_size, -1, 3)) l0_points = None xyz_in = tf.reshape(xyz, shape=(batch_size, -1, 3)) sdf_label = tf.reshape(sdf_label, shape=(batch_size, -1, 1)) # This is important. with tf.compat.v1.variable_scope('points_embedding'): # Embed our input points to some 256 vector. l1_pts = tf.compat.v1.layers.Dense(512, activation=tf.nn.relu, use_bias=True)(xyz_in) l1_pts = tf.layers.dropout(l1_pts, rate=0.2, training=is_training) pts_embedding = tf.compat.v1.layers.Dense(256, activation=tf.nn.relu, use_bias=True)(l1_pts) pts_embedding = tf.layers.dropout(pts_embedding, rate=0.2, training=is_training) with tf.compat.v1.variable_scope('encoder'): # Encode w/ PointConv Layers. l1_xyz, l1_points = feature_encoding_layer(l0_xyz, l0_xyz, npoint=512, radius=0.1, sigma=0.05, K=32, mlp=[32, 32, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=None, scope='layer1') l2_xyz, l2_points = feature_encoding_layer(l1_xyz, l1_points, npoint=256, radius=0.2, sigma=0.1, K=32, mlp=[64, 64, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=None, scope='layer2') l3_xyz, l3_points = feature_encoding_layer(l2_xyz, l2_points, npoint=64, radius=0.4, sigma=0.2, K=32, mlp=[128, 128, 256], is_training=is_training, bn_decay=bn_decay, weight_decay=None, scope='layer3') l4_xyz, l4_points = feature_encoding_layer(l3_xyz, l3_points, npoint=36, radius=0.8, sigma=0.4, K=32, mlp=[256, 256, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=None, scope='layer4') # Fully connected layers embedding = tf.reshape(l4_points, [batch_size, -1]) # Encode to a 256 large embedding vector. cloud_embedding = tf.compat.v1.layers.Dense(256)(embedding) cloud_embedding = tf.compat.v1.layers.batch_normalization( cloud_embedding, training=is_training) # cloud_embedding = tf.keras.layers.batch_normalization(cloud_embedding, training=is_training) cloud_embedding = tf.nn.relu(cloud_embedding) with tf.compat.v1.variable_scope('sdf'): # Combine embeddings. First reshape cloud embeddings to concat with each pt embedding. cloud_embedding = tf.tile(tf.expand_dims(cloud_embedding, 1), [1, tf.shape(pts_embedding)[1], 1]) embedded_inputs = tf.concat([pts_embedding, cloud_embedding], axis=2) # 8 Dense layers w/ ReLU non-linearities to predict SDF. l1_sdf = tf.compat.v1.layers.Dense(512, name='sdf_1')(embedded_inputs) l1_sdf_1 = tf.compat.v1.layers.batch_normalization( l1_sdf, training=is_training) l1_sdf_2 = tf.nn.relu(l1_sdf_1) l2_sdf = tf.compat.v1.layers.Dense(512, name='sdf_2')(l1_sdf_2) l2_sdf_1 = tf.compat.v1.layers.batch_normalization( l2_sdf, training=is_training) l2_sdf_2 = tf.nn.relu(l2_sdf_1) l3_sdf = tf.compat.v1.layers.Dense(256, name='sdf_3')(l2_sdf_2) l3_sdf_1 = tf.compat.v1.layers.batch_normalization( l3_sdf, training=is_training) l3_sdf_2 = tf.nn.relu(l3_sdf_1) # Feed our input embedding space back in here. l3_sdf_aug = tf.concat([l3_sdf_2, embedded_inputs], axis=2) l4_sdf = tf.compat.v1.layers.Dense(512, name='sdf_4')(l3_sdf_aug) l4_sdf_1 = tf.compat.v1.layers.batch_normalization( l4_sdf, training=is_training) l4_sdf_2 = tf.nn.relu(l4_sdf_1) l5_sdf = tf.compat.v1.layers.Dense(512, name='sdf_5')(l4_sdf_2) l5_sdf_1 = tf.compat.v1.layers.batch_normalization( l5_sdf, training=is_training) l5_sdf_2 = tf.nn.relu(l5_sdf_1) l6_sdf = tf.compat.v1.layers.Dense(512, name='sdf_6')(l5_sdf_2) l6_sdf_1 = tf.compat.v1.layers.batch_normalization( l6_sdf, training=is_training) l6_sdf_2 = tf.nn.relu(l6_sdf_1) l7_sdf = tf.compat.v1.layers.Dense(512, name='sdf_7')(l6_sdf_2) l7_sdf_1 = tf.compat.v1.layers.batch_normalization( l7_sdf, training=is_training) l7_sdf_2 = tf.nn.relu(l7_sdf_1) sdf_prediction = tf.compat.v1.layers.Dense( 1, activation=tf.nn.tanh, use_bias=True, name='sdf_8')(l7_sdf_2) # Last is tanh # Define the loss: clipped surface loss. # loss = tf.losses.absolute_difference( # tf.clip_by_value(sdf_label, -0.1, 0.1), # tf.clip_by_value(sdf_prediction, -0.1, 0.1)) loss = tf.compat.v1.losses.mean_squared_error(sdf_label, sdf_prediction) tf.compat.v1.summary.scalar(loss_feature, loss) # Collect debug print statements as needed. debug = tf.no_op() return sdf_prediction, loss, debug
def get_scene_model(point_cloud, cls_label, is_training, bn_decay=None, num_classes=50): point_cloud_with_norm = point_cloud point_cloud = point_cloud[:, :, 0:3] sigma = 0.05 weight_decay = None num_class = num_classes """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = point_cloud l0_points = point_cloud_with_norm # Feature encoding layers l1_xyz, l1_points = feature_encoding_layer(l0_xyz, l0_points, npoint=512, radius=0.1, sigma=sigma, K=32, mlp=[32, 32, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1') l2_xyz, l2_points = feature_encoding_layer(l1_xyz, l1_points, npoint=128, radius=0.2, sigma=2 * sigma, K=32, mlp=[64, 64, 128], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer2') l3_xyz, l3_points = feature_encoding_layer(l2_xyz, l2_points, npoint=36, radius=0.4, sigma=4 * sigma, K=32, mlp=[128, 128, 256], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer3') l4_xyz, l4_points = feature_encoding_layer(l3_xyz, l3_points, npoint=16, radius=0.8, sigma=8 * sigma, K=8, mlp=[256, 256, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer4') external_l5_xyz, external_l5_points = feature_encoding_layer( l4_xyz, l4_points, npoint=8, radius=1.6, sigma=8 * sigma, K=8, mlp=[512, 512, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='external_layer5') external_l6_scene_feature = tf.reduce_mean(external_l5_points, axis=1, keepdims=True) external_scene_feature = tf_util.dropout(external_l6_scene_feature, keep_prob=0.5, is_training=is_training, scope='external_dp') external_scene_feature = tf_util.conv1d(external_scene_feature, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='external_fc') # Feature decoding layers l3_points = feature_decoding_layer(l3_xyz, l4_xyz, l3_points, l4_points, 0.8, 8 * sigma, 16, [512, 512], is_training, bn_decay, weight_decay, scope='fa_layer1') l2_points = feature_decoding_layer(l2_xyz, l3_xyz, l2_points, l3_points, 0.4, 4 * sigma, 16, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer2') l1_points = feature_decoding_layer(l1_xyz, l2_xyz, l1_points, l2_points, 0.2, 2 * sigma, 16, [256, 128], is_training, bn_decay, weight_decay, scope='fa_layer3') l0_points = feature_decoding_layer(l0_xyz, l1_xyz, l0_points, l1_points, 0.1, sigma, 16, [128, 128, 128], is_training, bn_decay, weight_decay, scope='fa_layer4') # print('l0', l0_points.shape) # end_points['feats'] = l0_points # FC layers net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weight_decay=weight_decay) end_points['feats'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.conv1d(net, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='fc2') return net, end_points, external_scene_feature
def get_scene_model(point_cloud, is_training, num_class, sigma, bn_decay=None, weight_decay=None): """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = point_cloud l0_points = point_cloud # Feature encoding layers l1_xyz, l1_points = feature_encoding_layer(l0_xyz, l0_points, npoint=2048, radius=0.1, sigma=sigma, K=8, mlp=[32, 32, 32], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1') l1_xyz_1024, l1_points_1024 = feature_encoding_layer( l1_xyz, l1_points, npoint=1024, radius=0.1, sigma=sigma, K=8, mlp=[32, 32, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1_1024') l1_xyz_512, l1_points_512 = feature_encoding_layer( l1_xyz_1024, l1_points_1024, npoint=512, radius=0.1, sigma=sigma, K=8, mlp=[64, 64, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1_512') l2_xyz, l2_points = feature_encoding_layer(l1_xyz_512, l1_points_512, npoint=256, radius=0.2, sigma=2 * sigma, K=8, mlp=[64, 64, 128], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer2') l2_xyz_128, l2_points_128 = feature_encoding_layer( l2_xyz, l2_points, npoint=128, radius=0.2, sigma=2 * sigma, K=8, mlp=[128, 128, 128], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer2_128') l3_xyz, l3_points = feature_encoding_layer(l2_xyz_128, l2_points_128, npoint=64, radius=0.4, sigma=4 * sigma, K=8, mlp=[128, 128, 256], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer3') l4_xyz, l4_points = feature_encoding_layer(l3_xyz, l3_points, npoint=36, radius=0.8, sigma=8 * sigma, K=8, mlp=[256, 256, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer4') external_l5_xyz, external_l5_points = feature_encoding_layer( l4_xyz, l4_points, npoint=8, radius=1.6, sigma=8 * sigma, K=8, mlp=[512, 512, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='external_layer5') external_l6_scene_feature = tf.reduce_mean(external_l5_points, axis=1, keepdims=True) external_scene_feature = tf_util.dropout(external_l6_scene_feature, keep_prob=0.5, is_training=is_training, scope='external_dp') external_scene_feature = tf_util.conv1d(external_scene_feature, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='external_fc') # Feature decoding layers l3_points = feature_decoding_layer(l3_xyz, l4_xyz, l3_points, l4_points, 0.8, 8 * sigma, 8, [512, 512], is_training, bn_decay, weight_decay, scope='fa_layer1') l2_points_128 = feature_decoding_layer(l2_xyz_128, l3_xyz, l2_points_128, l3_points, 0.4, 4 * sigma, 8, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer2_128') l2_points = feature_decoding_layer(l2_xyz, l2_xyz_128, l2_points, l2_points_128, 0.4, 4 * sigma, 8, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer2') l1_points_512 = feature_decoding_layer(l1_xyz_512, l2_xyz, l1_points_512, l2_points, 0.2, 2 * sigma, 8, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer3_512') l1_points_1024 = feature_decoding_layer(l1_xyz_1024, l1_xyz_512, l1_points_1024, l1_points_512, 0.2, 2 * sigma, 8, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer3_1024') l1_points = feature_decoding_layer(l1_xyz, l1_xyz_1024, l1_points, l1_points_1024, 0.2, 2 * sigma, 8, [256, 128], is_training, bn_decay, weight_decay, scope='fa_layer3') l0_points = feature_decoding_layer(l0_xyz, l1_xyz, l0_points, l1_points, 0.1, sigma, 8, [128, 128, 128], is_training, bn_decay, weight_decay, scope='fa_layer4') # FC layers net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weight_decay=weight_decay) end_points['feats'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.conv1d(net, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='fc2') return net, end_points, external_scene_feature
def get_model(boundary_label, point_cloud, is_training, num_class, sigma, bn_decay=None, weight_decay=None): """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """ boundary_label = tf.sigmoid(boundary_label) boundary_label = tf.stop_gradient(boundary_label) batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = point_cloud[:, :, :3] l0_points = point_cloud[:, :, :3] # Feature encoding layers l1_xyz, l1_points, sub_boundary1 = feature_encoding_layer( l0_xyz, l0_points, npoint=1024, radius=0.1, sigma=sigma, K=32, mlp=[32, 32, 64], local_num_out_channel=3, is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1', boundary_label=boundary_label) l2_xyz, l2_points, sub_boundary2 = feature_encoding_layer( l1_xyz, l1_points, npoint=256, radius=0.2, sigma=2 * sigma, K=32, mlp=[64, 64, 128], local_num_out_channel=32, is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer2', boundary_label=sub_boundary1) l3_xyz, l3_points, _ = feature_encoding_layer(l2_xyz, l2_points, npoint=64, radius=0.4, sigma=4 * sigma, K=32, mlp=[128, 128, 256], local_num_out_channel=64, is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer3') l4_xyz, l4_points, _ = feature_encoding_layer(l3_xyz, l3_points, npoint=36, radius=0.8, sigma=8 * sigma, K=32, mlp=[256, 256, 512], local_num_out_channel=128, is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer4') # Feature decoding layers l3_points = feature_decoding_layer(l3_xyz, l4_xyz, l3_points, l4_points, 0.8, 8 * sigma, 16, [512, 512], is_training, bn_decay, weight_decay, scope='fa_layer1') l2_points = feature_decoding_layer(l2_xyz, l3_xyz, l2_points, l3_points, 0.4, 4 * sigma, 16, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer2') l1_points = feature_decoding_layer(l1_xyz, l2_xyz, l1_points, l2_points, 0.2, 2 * sigma, 16, [256, 128], is_training, bn_decay, weight_decay, scope='fa_layer3', boundary_label=sub_boundary1) #l0_points = feature_decoding_layer(l0_xyz, l1_xyz, l0_points, l1_points, 0.1, sigma, 16, [128,128,128], is_training, bn_decay, weight_decay, scope='fa_layer4', boundary_label=boundary_label) l0_points = feature_decoding_layer(l0_xyz, l1_xyz, l0_points, l1_points, 0.1, sigma, 16, [128, 128, 128], is_training, bn_decay, weight_decay, scope='fa_layer4', boundary_label=boundary_label) # FC layers net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weight_decay=weight_decay) net = tf.concat([net, point_cloud], axis=2) end_points['feats'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.conv1d(net, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='fc2') return net, end_points