def get_model(point_cloud, is_training, num_class, sigma, bn_decay=None, weight_decay=None): """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = point_cloud l0_points = point_cloud # Feature encoding layers l1_xyz, l1_points = feature_encoding_layer(l0_xyz, l0_points, npoint=1024, radius=0.1, sigma=sigma, K=32, mlp=[32, 32, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1') l2_xyz, l2_points = feature_encoding_layer(l1_xyz, l1_points, npoint=256, radius=0.2, sigma=2 * sigma, K=32, mlp=[64, 64, 128], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer2') l3_xyz, l3_points = feature_encoding_layer(l2_xyz, l2_points, npoint=64, radius=0.4, sigma=4 * sigma, K=32, mlp=[128, 128, 256], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer3') l4_xyz, l4_points = feature_encoding_layer(l3_xyz, l3_points, npoint=36, radius=0.8, sigma=8 * sigma, K=32, mlp=[256, 256, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer4') # Feature decoding layers l3_points = feature_decoding_layer(l3_xyz, l4_xyz, l3_points, l4_points, 0.8, 8 * sigma, 16, [512, 512], is_training, bn_decay, weight_decay, scope='fa_layer1') l2_points = feature_decoding_layer(l2_xyz, l3_xyz, l2_points, l3_points, 0.4, 4 * sigma, 16, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer2') l1_points = feature_decoding_layer(l1_xyz, l2_xyz, l1_points, l2_points, 0.2, 2 * sigma, 16, [256, 128], is_training, bn_decay, weight_decay, scope='fa_layer3') l0_points = feature_decoding_layer(l0_xyz, l1_xyz, l0_points, l1_points, 0.1, sigma, 16, [128, 128, 128], is_training, bn_decay, weight_decay, scope='fa_layer4') # FC layers net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weight_decay=weight_decay) end_points['feats'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.conv1d(net, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='fc2') return net, end_points
def get_scene_model(point_cloud, is_training, num_class, sigma, bn_decay=None, weight_decay=None): """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = point_cloud l0_points = point_cloud # Feature encoding layers l1_xyz, l1_points = feature_encoding_layer(l0_xyz, l0_points, npoint=2048, radius=0.1, sigma=sigma, K=8, mlp=[32, 32, 32], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1') l1_xyz_1024, l1_points_1024 = feature_encoding_layer( l1_xyz, l1_points, npoint=1024, radius=0.1, sigma=sigma, K=8, mlp=[32, 32, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1_1024') l1_xyz_512, l1_points_512 = feature_encoding_layer( l1_xyz_1024, l1_points_1024, npoint=512, radius=0.1, sigma=sigma, K=8, mlp=[64, 64, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1_512') l2_xyz, l2_points = feature_encoding_layer(l1_xyz_512, l1_points_512, npoint=256, radius=0.2, sigma=2 * sigma, K=8, mlp=[64, 64, 128], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer2') l2_xyz_128, l2_points_128 = feature_encoding_layer( l2_xyz, l2_points, npoint=128, radius=0.2, sigma=2 * sigma, K=8, mlp=[128, 128, 128], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer2_128') l3_xyz, l3_points = feature_encoding_layer(l2_xyz_128, l2_points_128, npoint=64, radius=0.4, sigma=4 * sigma, K=8, mlp=[128, 128, 256], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer3') l4_xyz, l4_points = feature_encoding_layer(l3_xyz, l3_points, npoint=36, radius=0.8, sigma=8 * sigma, K=8, mlp=[256, 256, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer4') external_l5_xyz, external_l5_points = feature_encoding_layer( l4_xyz, l4_points, npoint=8, radius=1.6, sigma=8 * sigma, K=8, mlp=[512, 512, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='external_layer5') external_l6_scene_feature = tf.reduce_mean(external_l5_points, axis=1, keepdims=True) external_scene_feature = tf_util.dropout(external_l6_scene_feature, keep_prob=0.5, is_training=is_training, scope='external_dp') external_scene_feature = tf_util.conv1d(external_scene_feature, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='external_fc') # Feature decoding layers l3_points = feature_decoding_layer(l3_xyz, l4_xyz, l3_points, l4_points, 0.8, 8 * sigma, 8, [512, 512], is_training, bn_decay, weight_decay, scope='fa_layer1') l2_points_128 = feature_decoding_layer(l2_xyz_128, l3_xyz, l2_points_128, l3_points, 0.4, 4 * sigma, 8, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer2_128') l2_points = feature_decoding_layer(l2_xyz, l2_xyz_128, l2_points, l2_points_128, 0.4, 4 * sigma, 8, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer2') l1_points_512 = feature_decoding_layer(l1_xyz_512, l2_xyz, l1_points_512, l2_points, 0.2, 2 * sigma, 8, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer3_512') l1_points_1024 = feature_decoding_layer(l1_xyz_1024, l1_xyz_512, l1_points_1024, l1_points_512, 0.2, 2 * sigma, 8, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer3_1024') l1_points = feature_decoding_layer(l1_xyz, l1_xyz_1024, l1_points, l1_points_1024, 0.2, 2 * sigma, 8, [256, 128], is_training, bn_decay, weight_decay, scope='fa_layer3') l0_points = feature_decoding_layer(l0_xyz, l1_xyz, l0_points, l1_points, 0.1, sigma, 8, [128, 128, 128], is_training, bn_decay, weight_decay, scope='fa_layer4') # FC layers net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weight_decay=weight_decay) end_points['feats'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.conv1d(net, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='fc2') return net, end_points, external_scene_feature
def get_scene_model(point_cloud, cls_label, is_training, bn_decay=None, num_classes=50): point_cloud_with_norm = point_cloud point_cloud = point_cloud[:, :, 0:3] sigma = 0.05 weight_decay = None num_class = num_classes """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = point_cloud l0_points = point_cloud_with_norm # Feature encoding layers l1_xyz, l1_points = feature_encoding_layer(l0_xyz, l0_points, npoint=512, radius=0.1, sigma=sigma, K=32, mlp=[32, 32, 64], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1') l2_xyz, l2_points = feature_encoding_layer(l1_xyz, l1_points, npoint=128, radius=0.2, sigma=2 * sigma, K=32, mlp=[64, 64, 128], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer2') l3_xyz, l3_points = feature_encoding_layer(l2_xyz, l2_points, npoint=36, radius=0.4, sigma=4 * sigma, K=32, mlp=[128, 128, 256], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer3') l4_xyz, l4_points = feature_encoding_layer(l3_xyz, l3_points, npoint=16, radius=0.8, sigma=8 * sigma, K=8, mlp=[256, 256, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer4') external_l5_xyz, external_l5_points = feature_encoding_layer( l4_xyz, l4_points, npoint=8, radius=1.6, sigma=8 * sigma, K=8, mlp=[512, 512, 512], is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='external_layer5') external_l6_scene_feature = tf.reduce_mean(external_l5_points, axis=1, keepdims=True) external_scene_feature = tf_util.dropout(external_l6_scene_feature, keep_prob=0.5, is_training=is_training, scope='external_dp') external_scene_feature = tf_util.conv1d(external_scene_feature, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='external_fc') # Feature decoding layers l3_points = feature_decoding_layer(l3_xyz, l4_xyz, l3_points, l4_points, 0.8, 8 * sigma, 16, [512, 512], is_training, bn_decay, weight_decay, scope='fa_layer1') l2_points = feature_decoding_layer(l2_xyz, l3_xyz, l2_points, l3_points, 0.4, 4 * sigma, 16, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer2') l1_points = feature_decoding_layer(l1_xyz, l2_xyz, l1_points, l2_points, 0.2, 2 * sigma, 16, [256, 128], is_training, bn_decay, weight_decay, scope='fa_layer3') l0_points = feature_decoding_layer(l0_xyz, l1_xyz, l0_points, l1_points, 0.1, sigma, 16, [128, 128, 128], is_training, bn_decay, weight_decay, scope='fa_layer4') # print('l0', l0_points.shape) # end_points['feats'] = l0_points # FC layers net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weight_decay=weight_decay) end_points['feats'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.conv1d(net, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='fc2') return net, end_points, external_scene_feature
def get_model(boundary_label, point_cloud, is_training, num_class, sigma, bn_decay=None, weight_decay=None): """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """ boundary_label = tf.sigmoid(boundary_label) boundary_label = tf.stop_gradient(boundary_label) batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = point_cloud[:, :, :3] l0_points = point_cloud[:, :, :3] # Feature encoding layers l1_xyz, l1_points, sub_boundary1 = feature_encoding_layer( l0_xyz, l0_points, npoint=1024, radius=0.1, sigma=sigma, K=32, mlp=[32, 32, 64], local_num_out_channel=3, is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer1', boundary_label=boundary_label) l2_xyz, l2_points, sub_boundary2 = feature_encoding_layer( l1_xyz, l1_points, npoint=256, radius=0.2, sigma=2 * sigma, K=32, mlp=[64, 64, 128], local_num_out_channel=32, is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer2', boundary_label=sub_boundary1) l3_xyz, l3_points, _ = feature_encoding_layer(l2_xyz, l2_points, npoint=64, radius=0.4, sigma=4 * sigma, K=32, mlp=[128, 128, 256], local_num_out_channel=64, is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer3') l4_xyz, l4_points, _ = feature_encoding_layer(l3_xyz, l3_points, npoint=36, radius=0.8, sigma=8 * sigma, K=32, mlp=[256, 256, 512], local_num_out_channel=128, is_training=is_training, bn_decay=bn_decay, weight_decay=weight_decay, scope='layer4') # Feature decoding layers l3_points = feature_decoding_layer(l3_xyz, l4_xyz, l3_points, l4_points, 0.8, 8 * sigma, 16, [512, 512], is_training, bn_decay, weight_decay, scope='fa_layer1') l2_points = feature_decoding_layer(l2_xyz, l3_xyz, l2_points, l3_points, 0.4, 4 * sigma, 16, [256, 256], is_training, bn_decay, weight_decay, scope='fa_layer2') l1_points = feature_decoding_layer(l1_xyz, l2_xyz, l1_points, l2_points, 0.2, 2 * sigma, 16, [256, 128], is_training, bn_decay, weight_decay, scope='fa_layer3', boundary_label=sub_boundary1) #l0_points = feature_decoding_layer(l0_xyz, l1_xyz, l0_points, l1_points, 0.1, sigma, 16, [128,128,128], is_training, bn_decay, weight_decay, scope='fa_layer4', boundary_label=boundary_label) l0_points = feature_decoding_layer(l0_xyz, l1_xyz, l0_points, l1_points, 0.1, sigma, 16, [128, 128, 128], is_training, bn_decay, weight_decay, scope='fa_layer4', boundary_label=boundary_label) # FC layers net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weight_decay=weight_decay) net = tf.concat([net, point_cloud], axis=2) end_points['feats'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.conv1d(net, num_class, 1, padding='VALID', activation_fn=None, weight_decay=weight_decay, scope='fc2') return net, end_points