def build_down_block(inputs, name, down_outputs, first=False, TPS=False, is_training=True, bn_decay=None): out_num = voxel_start_channel_num if first else 2 * \ inputs.shape[voxel_channel_axis].value conv1 = tf_util.conv3d(inputs, out_num, voxel_conv_size, name + '/conv1', bn=True, is_training=is_training, bn_decay=bn_decay) #if TPS == True: # conv1= self.transform.Encoder(conv1,conv1) conv2 = tf_util.conv3d(conv1, out_num, voxel_conv_size, name + '/conv2', bn=True, is_training=is_training, bn_decay=bn_decay) down_outputs.append(conv2) pool = ops.pool(conv2, voxel_pool_size, name + '/pool') return pool
def inception_module(input, n_filters=64, kernel_sizes=[3, 5], is_training=None, bn_decay=None, scope='inception'): one_by_one = tf_util.conv3d(input, n_filters, [1, 1, 1], scope=scope + '_conv1', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) three_by_three = tf_util.conv3d( one_by_one, int(n_filters / 2), [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]], scope=scope + '_conv2', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) five_by_five = tf_util.conv3d( one_by_one, int(n_filters / 2), [kernel_sizes[1], kernel_sizes[1], kernel_sizes[1]], scope=scope + '_conv3', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) average_pooling = tf_util.avg_pool3d( input, [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]], scope=scope + '_avg_pool', stride=[1, 1, 1], padding='SAME') average_pooling = tf_util.conv3d(average_pooling, n_filters, [1, 1, 1], scope=scope + '_conv4', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) output = tf.concat( [one_by_one, three_by_three, five_by_five, average_pooling], axis=4) #output = output + tf.tile(input) ??? #resnet return output
def get_model(sequence, is_training, num_classes=10, bn_decay=0.999, weight_decay=0.0001, sn=4, pool_t=False, pool_first=False, freeze_bn=False): """ sequence Net, input is BxTxHxWx3, output Bx400 """ bsize = sequence.get_shape()[0].value end_points = {} channel_stride = [(64, 1), (128, 2), (256, 2), (512, 2)] # res block options num_blocks = [2, 2, 2, 2] topks = [None, sn, sn, None] shrink_ratios = [None, 2, None, None] net = tf_util.conv3d(sequence, 64, [1, 3, 3], stride=[1, 2 if pool_first else 1, 2 if pool_first else 1], bn=True, bn_decay=bn_decay, is_training=is_training, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv0') net = tf_util.max_pool3d(net, [1, 3, 3], stride=[1, 2, 2], scope='pool0', padding='SAME') for gp, cs in enumerate(channel_stride): n_channels = cs[0] stride = cs[1] with tf.variable_scope('group{}'.format(gp)): for i in range(num_blocks[gp]): with tf.variable_scope('block{}'.format(i)): end_points['res{}_{}_in'.format(gp, i)] = net if i == 0: net_bra = tf_util.conv3d(net, n_channels, [1, 3, 3], stride=[1, stride, stride], bn=True, bn_decay=bn_decay, \ is_training=is_training, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv1') else: net_bra = tf_util.conv3d(net, n_channels, [1, 3, 3], stride=[1, 1, 1], bn=True, bn_decay=bn_decay, \ is_training=is_training, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv1') net_bra = tf_util.conv3d(net_bra, n_channels, [1, 3, 3], stride=[1, 1, 1], bn=True, bn_decay=bn_decay, \ is_training=is_training, activation_fn=None, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv2') if net.get_shape()[-1].value != n_channels: net = tf_util.conv3d(net, n_channels, [1, 1, 1], stride=[1, stride, stride], bn=True, bn_decay=bn_decay, \ is_training=is_training, activation_fn=None, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='convshortcut') net = net + net_bra end_points['res{}_{}_mid'.format(gp, i)] = net if topks[gp] is not None: c = net.get_shape()[-1].value net_pointnet, end_point = net_utils.senot_module(net, k=topks[gp], mlp=[c//4,c//2], scope='pointnet', is_training=is_training, bn_decay=bn_decay, \ weight_decay=weight_decay, distance='l2', activation_fn=None, freeze_bn=freeze_bn, shrink_ratio=shrink_ratios[gp]) net += net_pointnet end_points['pointnet{}_{}'.format(gp, i)] = end_point end_points['after_pointnet{}_{}'.format(gp, i)] = net net = tf.nn.relu(net) end_points['res{}_{}_out'.format(gp, i)] = net net = tf.reduce_mean(net, [1,2,3]) net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp') net = tf_util.fully_connected(net, num_classes, activation_fn=None, weight_decay=weight_decay, scope='fc') return net, end_points
def inception_module(input, n_filters=64, kernel_sizes=[3, 5], is_training=None, bn_decay=None, scope='inception'): """ 3D inception_module """ one_by_one = tf_util.conv3d(input, n_filters, [1, 1, 1], scope= scope + '_conv1', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training)
def get_sdf_3dcnn_binary(grid_idx, globalfeats, is_training, batch_size, num_point, bn, bn_decay, wd=None, FLAGS=None): globalfeats_expand = tf.reshape(globalfeats, [batch_size, 1, 1, 1, -1]) print('globalfeats_expand', globalfeats_expand.get_shape()) net2 = tf_util.conv3d_transpose(globalfeats_expand, 128, [2, 2, 2], stride=[2, 2, 2], bn_decay=bn_decay, bn=bn, is_training=is_training, weight_decay=wd, scope='3deconv1') # 2 net2 = tf_util.conv3d_transpose(net2, 128, [3, 3, 3], stride=[2, 2, 2],bn_decay=bn_decay, bn=bn, is_training=is_training, weight_decay=wd, scope='3deconv2') # 4 net2 = tf_util.conv3d_transpose(net2, 128, [3, 3, 3], stride=[2, 2, 2], bn_decay=bn_decay, bn=bn, is_training=is_training, weight_decay=wd, scope='3deconv3') # 8 net2 = tf_util.conv3d_transpose(net2, 64, [3, 3, 3], stride=[2, 2, 2], bn_decay=bn_decay, bn=bn, is_training=is_training, weight_decay=wd, scope='3deconv4') # 16 net2 = tf_util.conv3d_transpose(net2, 64, [3, 3, 3], stride=[2, 2, 2], bn_decay=bn_decay, bn=bn, is_training=is_training, weight_decay=wd, scope='3deconv5') # 32 net2 = tf_util.conv3d_transpose(net2, 32, [3, 3, 3], stride=[2, 2, 2], bn_decay=bn_decay, bn=bn, is_training=is_training, weight_decay=wd, padding='VALID', scope='3deconv6') # 65 net2 = tf_util.conv3d(net2, 2, [1, 1, 1], stride=[1, 1, 1], bn_decay=bn_decay, bn=bn, activation_fn=None, is_training=is_training, weight_decay=wd, padding='VALID', scope='3conv7_binary') res_plus = FLAGS.sdf_res+1 full_inter = tf.reshape(net2, (batch_size, res_plus, res_plus, res_plus)) print("3d cnn net2 shape:", full_inter.get_shape()) pred = tf.reshape(full_inter, [batch_size, -1, 2]) return pred
def build_bottom_block(inputs, name, is_training=True, bn_decay=None): out_num = inputs.shape[voxel_channel_axis].value conv1 = tf_util.conv3d(inputs, 2 * out_num, voxel_conv_size, name + '/conv1', bn=True, is_training=is_training, bn_decay=bn_decay) conv2 = tf_util.conv3d(conv1, out_num, voxel_conv_size, name + '/conv2', bn=True, is_training=is_training, bn_decay=bn_decay) return conv2
def get_model(point_cloud, is_training, bn_decay=None): """ Classification PointNet, input is BxNx3, output Bx40 """ batch_size = point_cloud.get_shape()[0].value # vx = point_cloud.get_shape()[1].value # vy = point_cloud.get_shape()[2].value # vz = point_cloud.get_shape()[3].value end_points = {} input_image = tf.expand_dims(point_cloud, -1) net = tf_util.conv3d(input_image, 32, [5, 5, 5], scope='conv1', stride=[2, 2, 2], bn=True, is_training=is_training, padding='SAME', bn_decay=bn_decay) net = tf_util.conv3d(net, 32, [3, 3, 3], scope='conv2', stride=[2, 2, 2], bn=True, is_training=is_training, padding='SAME', bn_decay=bn_decay) # Symmetric function: max pooling net = tf_util.max_pool3d(net, [2, 2, 2], padding='VALID', scope='maxpool') # MLP on global point cloud vector net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, 10, activation_fn=None, scope='fc2') return net, end_points
def get_model(points, w, mu, sigma, is_training, bn_decay=None, weigth_decay=0.005, add_noise=False, num_classes=40): """ Classification PointNet, input is BxNx3, output Bx40 """ batch_size = points.get_shape()[0].value n_points = points.get_shape()[1].value n_gaussians = w.shape[0].value res = int(np.round(np.power(n_gaussians,1.0/3.0))) fv = tf_util.get_fv_minmax(points, w, mu, sigma, flatten=False) if add_noise: noise = tf.cond(is_training, lambda: tf.random_normal(shape=tf.shape(fv), mean=0.0, stddev=0.01, dtype=tf.float32), lambda: tf.zeros(shape=tf.shape(fv))) #noise = tf.random_normal(shape=tf.shape(fv), mean=0.0, stddev=0.01, dtype=tf.float32) fv = fv + noise grid_fisher = tf.reshape(fv, [batch_size, -1, res, res, res]) grid_fisher = tf.transpose(grid_fisher, [0, 2, 3, 4, 1]) #3D Voxenet with pfv layer = 1 net = tf_util.conv3d(grid_fisher, 32, [5, 5, 5], scope='conv'+str(layer), stride=[2, 2, 2], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) layer = layer + 1 net = tf_util.conv3d(net, 32, [3, 3, 3], scope='conv'+str(layer), stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool'+str(layer), stride=[2, 2, 2], padding='SAME') net = tf.reshape(net,[batch_size, -1]) #Classifier net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weigth_decay=weigth_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, num_classes, activation_fn=None, scope='fc4', is_training=is_training, weigth_decay=weigth_decay) return net, fv
def inception_module(input, n_filters=64, kernel_sizes=[3,5], is_training=None, bn_decay=None, scope='inception'): ''' :param input: [B,K,K,K,FV] :param n_filters: :return: output: [B,K,K,K,n_filters*2] ''' one_by_one = tf_util.conv3d(input, n_filters, [1,1,1], scope= scope + '_conv1', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) three_by_three = tf_util.conv3d(one_by_one, int(n_filters), [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]], scope= scope + '_conv2', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) five_by_five = tf_util.conv3d(one_by_one, int(n_filters), [kernel_sizes[1], kernel_sizes[1], kernel_sizes[1]], scope=scope + '_conv3', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) average_pooling = tf_util.avg_pool3d(input, [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]], scope=scope+'_avg_pool', stride=[1, 1, 1], padding='SAME') average_pooling = tf_util.conv3d(average_pooling, n_filters, [1,1,1], scope= scope + '_conv4', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) output = tf.concat([ one_by_one, three_by_three, five_by_five, average_pooling], axis=4) return output
def build_up_block(inputs, down_inputs, name, final=False, Decoder=False, is_training=True, bn_decay=None): out_num = inputs.shape[voxel_channel_axis].value conv1 = deconv_func()(inputs, out_num, voxel_conv_size, name + '/conv1', action=voxel_action, is_training=is_training, bn_decay=bn_decay) conv1 = tf.concat([conv1, down_inputs], voxel_channel_axis, name=name + '/concat') conv2 = tf_util.conv3d(conv1, out_num, voxel_conv_size, name + '/conv2', bn=True, is_training=is_training, bn_decay=bn_decay) #if Decoder == True: # conv2 = self.transform.Decoder(conv2,conv2) out_num = voxel_class_num if final else out_num / 2 conv3 = tf_util.conv3d(conv2, out_num, voxel_conv_size, name + '/conv3', bn=(not final), is_training=is_training, bn_decay=bn_decay) return conv3
def get_model(source_point_cloud, template_point_cloud, is_training, bn_decay=None): point_cloud = tf.concat([source_point_cloud, template_point_cloud], 0) batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} input_image = tf.expand_dims(point_cloud, -1) net = tf_util.conv3d(input_image, 32, [5, 5, 5], padding='VALID', stride=[2, 2, 2], bn=False, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv3d(net, 32, [3, 3, 3], padding='VALID', stride=[1, 1, 1], bn=False, is_training=is_training, scope='conv2', bn_decay=bn_decay) # Symmetric function: max pooling net = tf_util.max_pool3d(net, [2, 2, 2], padding='VALID', scope='maxpool') net = tf.reshape(net, [batch_size, -1]) print(net) source_global_feature = tf.slice(net, [0, 0], [int(batch_size / 2), 6912]) template_global_feature = tf.slice(net, [int(batch_size / 2), 0], [int(batch_size / 2), 6912]) return source_global_feature, template_global_feature
def get_model(pointgrid, is_training): # Args: # pointgrid: of size B x N x N x N x NUM_FEATURES # is_training: boolean tensor # Returns: # pred_cat: of size B x NUM_CATEGORY # pred_seg: of size B x N x N x N x (K+1) x NUM_PART_SEG # Encoder batch_size = pointgrid.get_shape()[0].value conv1 = tf_util.conv3d(pointgrid, 64, [5, 5, 5], scope='conv1', activation_fn=leak_relu, bn=True, is_training=is_training) # N conv2 = tf_util.conv3d(conv1, 64, [5, 5, 5], scope='conv2', activation_fn=leak_relu, stride=[2, 2, 2], bn=True, is_training=is_training) # N/2 conv3 = tf_util.conv3d(conv2, 64, [5, 5, 5], scope='conv3', activation_fn=leak_relu, bn=True, is_training=is_training) # N/2 conv4 = tf_util.conv3d(conv3, 128, [3, 3, 3], scope='conv4', activation_fn=leak_relu, stride=[2, 2, 2], bn=True, is_training=is_training) # N/4 conv5 = tf_util.conv3d(conv4, 128, [3, 3, 3], scope='conv5', activation_fn=leak_relu, bn=True, is_training=is_training) # N/4 conv6 = tf_util.conv3d(conv5, 256, [3, 3, 3], scope='conv6', activation_fn=leak_relu, stride=[2, 2, 2], bn=True, is_training=is_training) # N/8 conv7 = tf_util.conv3d(conv6, 256, [3, 3, 3], scope='conv7', activation_fn=leak_relu, bn=True, is_training=is_training) # N/8 conv8 = tf_util.conv3d(conv7, 512, [3, 3, 3], scope='conv8', activation_fn=leak_relu, stride=[2, 2, 2], bn=True, is_training=is_training) # N/16 conv9 = tf_util.conv3d(conv8, 512, [1, 1, 1], scope='conv9', activation_fn=leak_relu, bn=True, is_training=is_training) # N/16 # Classification Network conv9_flat = tf.reshape(conv9, [batch_size, -1]) fc1 = tf_util.fully_connected(conv9_flat, 512, activation_fn=leak_relu, bn=True, is_training=is_training, scope='fc1') do1 = tf_util.dropout(fc1, keep_prob=0.7, is_training=is_training, scope='do1') fc2 = tf_util.fully_connected(do1, 256, activation_fn=leak_relu, bn=True, is_training=is_training, scope='fc2') do2 = tf_util.dropout(fc2, keep_prob=0.7, is_training=is_training, scope='do2') pred_cat = tf_util.fully_connected(do2, NUM_CATEGORY, activation_fn=None, bn=False, scope='pred_cat') # Segmentation Network cat_features = tf.tile( tf.reshape(tf.concat([fc2, pred_cat], axis=1), [batch_size, 1, 1, 1, -1]), [1, N / 16, N / 16, N / 16, 1]) conv9_cat = tf.concat([conv9, cat_features], axis=4) deconv1 = tf_util.conv3d_transpose(conv9_cat, 256, [3, 3, 3], scope='deconv1', activation_fn=leak_relu, bn=True, is_training=is_training, stride=[2, 2, 2], padding='SAME') # N/8 conv7_deconv1 = tf.concat(axis=4, values=[conv7, deconv1]) deconv2 = tf_util.conv3d(conv7_deconv1, 256, [3, 3, 3], scope='deconv2', activation_fn=leak_relu, bn=True, is_training=is_training) # N/8 deconv3 = tf_util.conv3d_transpose(deconv2, 128, [3, 3, 3], scope='deconv3', activation_fn=leak_relu, bn=True, is_training=is_training, stride=[2, 2, 2], padding='SAME') # N/4 conv5_deconv3 = tf.concat(axis=4, values=[conv5, deconv3]) deconv4 = tf_util.conv3d(conv5_deconv3, 128, [3, 3, 3], scope='deconv4', activation_fn=leak_relu, bn=True, is_training=is_training) # N/4 deconv5 = tf_util.conv3d_transpose(deconv4, 64, [3, 3, 3], scope='deconv5', activation_fn=leak_relu, bn=True, is_training=is_training, stride=[2, 2, 2], padding='SAME') # N/2 conv3_deconv5 = tf.concat(axis=4, values=[conv3, deconv5]) deconv6 = tf_util.conv3d(conv3_deconv5, 64, [5, 5, 5], scope='deconv6', activation_fn=leak_relu, bn=True, is_training=is_training) # N/2 deconv7 = tf_util.conv3d_transpose(deconv6, 64, [5, 5, 5], scope='deconv7', activation_fn=leak_relu, bn=True, is_training=is_training, stride=[2, 2, 2], padding='SAME') # N conv1_deconv7 = tf.concat(axis=4, values=[conv1, deconv7]) deconv8 = tf_util.conv3d(conv1_deconv7, 64, [5, 5, 5], scope='deconv8', activation_fn=leak_relu, bn=True, is_training=is_training) # N pred_seg = tf_util.conv3d(deconv8, (K + 1) * NUM_SEG_PART, [5, 5, 5], scope='pred_seg', activation_fn=None, bn=False, is_training=is_training) pred_seg = tf.reshape(pred_seg, [batch_size, N, N, N, K + 1, NUM_SEG_PART]) return pred_cat, pred_seg
def get_model(point_cloud, input_label, is_training, cat_num, part_num, \ batch_size, num_point, weight_decay, bn_decay=None): """ ConvNet baseline, input is BxNx3 gray image """ end_points = {} KNN = 12 with tf.variable_scope('transform_net1') as sc: K = 3 transform = get_transform(point_cloud, is_training, bn_decay, K=3) point_cloud_transformed = tf.matmul(point_cloud, transform) # KNN search knn_point = KNN_search(point_cloud_transformed, KNN=KNN, name_scope='KNN_search') # 32 x 1024 x KNN x 3 knn_point = tf.expand_dims(knn_point, axis=-1) # 32 x 1024 x KNN x 3 x 1 point_cloud_transformed = tf.expand_dims(point_cloud_transformed, axis=-1) out0 = tf_util.conv2d(point_cloud_transformed, 64, [1, 3], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv', bn_decay=bn_decay) # 32 x 1024 x 1 x 64 out0_tile = tf.tile(out0, multiples=[1, 1, KNN, 1]) # 32 x 1024 x 16 x 64 out0_tile = tf.expand_dims(out0_tile, axis=-2) out1 = tf_util.conv3d(knn_point, 64, [1, 1, 3], padding='VALID', stride=[1, 1, 1], bn=True, is_training=is_training, scope='conv1', bn_decay=bn_decay) concat = tf.concat(values=[out1, out0_tile], axis=-1) out2 = tf_util.conv3d(concat, 128, [1, 1, 1], padding='VALID', stride=[1, 1, 1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) out3 = tf_util.conv3d(out2, 128, [1, 1, 1], padding='VALID', stride=[1, 1, 1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) pool_k = tf_util.max_pool3d(out3, kernel_size=[1, KNN, 1], stride=[1, 2, 2], padding='VALID', scope='pool_k') # 32 x 1024 x 1 x 1 x 128 pool_k = tf.squeeze(pool_k, axis=2) # VLAD layer vlad_out, index = VLAD(pool_k, 16, is_training, bn_decay, layer_name='VLAD') out4 = tf_util.conv2d(vlad_out, 512, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='vlad_conv3', bn_decay=bn_decay) out5 = tf_util.conv2d(out4, 2048, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='vlad_conv4', bn_decay=bn_decay) out_max = tf.nn.max_pool(out5, ksize=[1, num_point, 1, 1], strides=[1, 2, 2, 1], padding='VALID') # 32 x 1 x 1 x 1024 # classification network net = tf.reshape(out_max, [batch_size, -1]) # 32 x 1 x 1024 net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='cla/fc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='cla/fc2', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='cla/dp1') net = tf_util.fully_connected(net, cat_num, activation_fn=None, scope='cla/fc3') # segmentation network one_hot_label_expand = tf.reshape(input_label, [batch_size, 1, 1, cat_num]) out_max = tf.concat(axis=3, values=[out_max, one_hot_label_expand]) expand = tf.tile(out_max, [1, num_point, 1, 1]) concat = tf.concat(axis=3, values=[expand, out0, pool_k, vlad_out, out4, out5]) net2 = tf_util.conv2d(concat, 256, [1, 1], padding='VALID', stride=[1, 1], bn_decay=bn_decay, bn=True, is_training=is_training, scope='seg/conv1', weight_decay=weight_decay) net2 = tf_util.dropout(net2, keep_prob=0.8, is_training=is_training, scope='seg/dp1') net2 = tf_util.conv2d(net2, 256, [1, 1], padding='VALID', stride=[1, 1], bn_decay=bn_decay, bn=True, is_training=is_training, scope='seg/conv2', weight_decay=weight_decay) net2 = tf_util.dropout(net2, keep_prob=0.8, is_training=is_training, scope='seg/dp2') net2 = tf_util.conv2d(net2, 128, [1, 1], padding='VALID', stride=[1, 1], bn_decay=bn_decay, bn=True, is_training=is_training, scope='seg/conv3', weight_decay=weight_decay) net2 = tf_util.conv2d(net2, part_num, [1, 1], padding='VALID', stride=[1, 1], activation_fn=None, bn=False, scope='seg/conv4', weight_decay=weight_decay) net2 = tf.reshape(net2, [batch_size, num_point, part_num]) return net, net2, end_points
def pointnet_sa_module(cascade_id, xyz, points, bidmap, mlp_configs, block_bottom_center_mm, configs, sgf_config_pls, is_training, bn_decay, scope, bn=True, tnet_spec=None, use_xyz=True, IsShowModel=False): ''' Input cascade_id==0: xyz is grouped_points: (batch_size,nsubblock0,npoint_subblock0,6) points: None bidmap: None Input cascade_id==1: xyz: (batch_size,nsubblock0,3) points: (batch_size,nsubblock0,channel) bidmap: (batch_size,nsubblock1,npoint_subblock1) Medium cascade_id==1: grouped_xyz: (batch_size,nsubblock1,npoint_subblock1,3) new_xyz: (batch_size,nsubblock1,3) group_points: (batch_size,nsubblock1,npoint_subblock1,channel) output cascade_id==1: new_xyz: (batch_size,nsubblock1,3) new_points: (batch_size,nsubblock1,channel) ''' block_bottom_center_mm = tf.cast( block_bottom_center_mm, tf.float32, name='block_bottom_center_mm' ) # gpu_0/sa_layer3/block_bottom_center_mm:0 batch_size = xyz.get_shape()[0].value with tf.variable_scope(scope) as sc: cascade_num = configs['flatten_bm_extract_idx'].shape[ 0] - 1 # include global here (Note: cascade_num does not include global in block_pre_util ) assert configs['sub_block_step_candis'].size == cascade_num - 1 if cascade_id == 0: indrop_keep_mask = tf.get_default_graph().get_tensor_by_name( 'indrop_keep_mask:0') # indrop_keep_mask:0 assert len(xyz.shape) == 3 if bidmap == None: grouped_xyz = tf.expand_dims(xyz, 1) grouped_points = tf.expand_dims(points, 1) new_xyz = None valid_mask = None else: batch_idx = tf.reshape(tf.range(batch_size), [batch_size, 1, 1, 1]) nsubblock = bidmap.get_shape()[1].value npoint_subblock = bidmap.get_shape()[2].value batch_idx_ = tf.tile(batch_idx, [1, nsubblock, npoint_subblock, 1]) bidmap = tf.expand_dims(bidmap, axis=-1, name='bidmap') bidmap_concat = tf.concat( [batch_idx_, bidmap], axis=-1, name='bidmap_concat') # gpu_0/sa_layer0/bidmap_concat:0 # The value for invalid item in bidmap is -17. # On GPU, the responding grouped_xyz and grouped_points is 0. # NOT WORK on CPU !!! # invalid indices comes from merge_blocks_while_fix_bmap # set point_indices_f for invalid points as # NETCONFIG['redundant_points_in_block'] ( shoud be set < -500) valid_mask = tf.greater(bidmap, tf.constant( -500, tf.int32), 'valid_mask') # gpu_0/sa_layer0/valid_mask:0 grouped_xyz = tf.gather_nd( xyz, bidmap_concat, name='grouped_xyz') # gpu_0/sa_layer0/grouped_xyz:0 grouped_points = tf.gather_nd(points, bidmap_concat, name='group_points') if cascade_id == 0 and len(indrop_keep_mask.get_shape()) != 0: grouped_indrop_keep_mask = tf.gather_nd( indrop_keep_mask, bidmap_concat, name='grouped_indrop_keep_mask' ) # gpu_0/sa_layer0/grouped_indrop_keep_mask:0 # new_xyz is the "voxel center" or "mean position of points in the voxel" if configs['mean_grouping_position'] and ( not mlp_configs['block_learning'] == '3DCNN'): new_xyz = tf.reduce_mean(grouped_xyz, -2) else: new_xyz = block_bottom_center_mm[:, :, 3:6] * tf.constant( 0.001, tf.float32) # the mid can be mean or block center, decided by configs['mean_grouping_position'] sub_block_mid = tf.expand_dims( new_xyz, -2, name='sub_block_mid') # gpu_1/sa_layer0/sub_block_mid global_block_mid = tf.reduce_mean(sub_block_mid, 1, keepdims=True, name='global_block_mid') grouped_xyz_submid = grouped_xyz - sub_block_mid grouped_xyz_glomid = grouped_xyz - global_block_mid grouped_xyz_feed = [] if 'raw' in configs['xyz_elements']: grouped_xyz_feed.append(grouped_xyz) if 'sub_mid' in configs['xyz_elements']: grouped_xyz_feed.append(grouped_xyz_submid) if 'global_mid' in configs['xyz_elements']: grouped_xyz_feed.append(grouped_xyz_glomid) grouped_xyz_feed = tf.concat(grouped_xyz_feed, -1) if cascade_id == 0: # xyz must be at the first in feed_data_elements !!!! grouped_points = tf.concat( [grouped_xyz_feed, grouped_points[..., 3:]], -1) if len(indrop_keep_mask.get_shape()) != 0: if InDropMethod == 'set1st': # set all the dropped item as the first item tmp1 = tf.multiply(grouped_points, grouped_indrop_keep_mask) points_1st = grouped_points[:, :, 0:1, :] points_1st = tf.tile(points_1st, [1, 1, grouped_points.shape[2], 1]) indrop_mask_inverse = 1 - grouped_indrop_keep_mask tmp2 = indrop_mask_inverse * points_1st grouped_points = tf.add( tmp1, tmp2, name='grouped_points_droped' ) # gpu_0/sa_layer0/grouped_points_droped #tf.add_to_collection( 'check', grouped_points ) elif InDropMethod == 'set0': valid_mask = tf.logical_and( valid_mask, tf.equal(grouped_indrop_keep_mask, 0), name='valid_mask_droped' ) # gpu_1/sa_layer0/valid_mask_droped elif use_xyz: grouped_points = tf.concat([grouped_xyz_feed, grouped_points], axis=-1) tf.add_to_collection('grouped_xyz', grouped_xyz) tf.add_to_collection('grouped_xyz_submid', grouped_xyz_submid) tf.add_to_collection('grouped_xyz_glomid', grouped_xyz_glomid) if cascade_id > 0 and use_xyz and (not cascade_id == cascade_num - 1): grouped_points = tf.concat([grouped_xyz_feed, grouped_points], axis=-1) nsample = grouped_points.get_shape()[2].value # the conv kernel size if IsShowModel: print( '\n\npointnet_sa_module cascade_id:%d\n xyz:%s\n grouped_xyz:%s\n new_xyz:%s\n grouped_points:%s\n nsample:%d' % (cascade_id, shape_str([xyz]), shape_str([grouped_xyz]), shape_str([new_xyz]), shape_str([grouped_points]), nsample)) new_points = grouped_points if valid_mask != None: new_points = new_points * tf.cast(valid_mask[:, :, :, 0:1], tf.float32) if 'growth_rate' in mlp_configs['point_encoder'][cascade_id]: new_points = tf_util.dense_net( new_points, mlp_configs['point_encoder'][cascade_id], bn, is_training, bn_decay,\ scope = 'dense_cascade_%d_point_encoder'%(cascade_id) , is_show_model = IsShowModel ) else: for i, num_out_channel in enumerate( mlp_configs['point_encoder'][cascade_id]): new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv%d' % (i), bn_decay=bn_decay) if configs['Cnn_keep_prob'] < 1: if (not configs['only_last_layer_ineach_cascade'] ) or i == len( mlp_configs['point_encoder'][cascade_id]) - 1: new_points = tf_util.dropout( new_points, keep_prob=configs['Cnn_keep_prob'], is_training=is_training, scope='dropout', name='cnn_dp%d' % (i)) if IsShowModel: print('point encoder1 %d, new_points:%s' % (i, shape_str([new_points]))) if cascade_id == 0: root_point_features = new_points #if InDropMethod == 'set0': # if len(indrop_keep_mask.get_shape()) != 0: # new_points = tf.identity(new_points,'points_before_droped') # gpu_0/sa_layer0/points_before_droped:0 # new_points = tf.multiply( new_points, grouped_indrop_keep_mask, name='droped_points' ) # gpu_0/sa_layer0/droped_points:0 else: root_point_features = None pooling = mlp_configs['block_learning'] if pooling == '3DCNN' and (cascade_id == 0): pooling = 'max' #if pooling=='avg': # new_points = tf_util.avg_pool2d(new_points, [1,nsample], stride=[1,1], padding='VALID', scope='avgpool1') #elif pooling=='weighted_avg': # with tf.variable_scope('weighted_avg1'): # dists = tf.norm(grouped_xyz,axis=-1,ord=2,keep_dims=True) # exp_dists = tf.exp(-dists * 5) # weights = exp_dists/tf.reduce_sum(exp_dists,axis=2,keep_dims=True) # (batch_size, npoint, nsample, 1) # new_points *= weights # (batch_size, npoint, nsample, mlps_0[-1]) # new_points = tf.reduce_sum(new_points, axis=2, keep_dims=True) if pooling == 'max': # Even the grouped_points and grouped_xyz are 0 for invalid points, the # vaule after mlp will not be. It has to be set as 0 forcely before # pooling. if valid_mask != None: new_points = new_points * tf.cast(valid_mask[:, :, :, 0:1], tf.float32) new_points = tf.identity( new_points, 'points_before_max') # gpu_0/sa_layer0/points_before_max new_points = tf.reduce_max(new_points, axis=[2], keepdims=True, name='points_after_max') #elif pooling=='min': # new_points = tf_util.max_pool2d(-1*new_points, [1,nsample], stride=[1,1], padding='VALID', scope='minpool1') #elif pooling=='max_and_avg': # avg_points = tf_util.max_pool2d(new_points, [1,nsample], stride=[1,1], padding='VALID', scope='maxpool1') # max_points = tf_util.avg_pool2d(new_points, [1,nsample], stride=[1,1], padding='VALID', scope='avgpool1') # new_points = tf.concat([avg_points, max_points], axis=-1) elif pooling == '3DCNN': new_points = grouped_points_to_voxel_points( cascade_id, new_points, valid_mask, block_bottom_center_mm, configs, grouped_xyz, IsShowVoxelModel=IsShowModel) if IsShowModel: print('voxel points:%s' % (shape_str([new_points]))) for i, num_out_channel in enumerate( mlp_configs['voxel_channels'][cascade_id]): kernel_i = [mlp_configs['voxel_kernels'][cascade_id][i]] * 3 stride_i = [mlp_configs['voxel_strides'][cascade_id][i]] * 3 if new_points.shape[1] % 2 == 0: padding_i = np.array([[0, 0], [1, 0], [1, 0], [1, 0], [ 0, 0 ]]) * mlp_configs['voxel_paddings'][cascade_id][i] else: padding_i = np.array([[0, 0], [1, 1], [1, 1], [1, 1], [ 0, 0 ]]) * mlp_configs['voxel_paddings'][cascade_id][i] new_points = tf.pad(new_points, padding_i, "CONSTANT") if type(num_out_channel) == int: new_points = tf_util.conv3d(new_points, num_out_channel, kernel_i, scope='3dconv_%d' % (i), stride=stride_i, padding='VALID', bn=bn, is_training=is_training, bn_decay=bn_decay, name='points_3dcnn_%d' % (i)) if IsShowModel: print('block learning by 3dcnn %d, new_points:%s' % (i, shape_str([new_points]))) elif num_out_channel == 'max': new_points = tf_util.max_pool3d(new_points, kernel_i, scope='3dmax_%d' % (i), stride=stride_i, padding='VALID') if IsShowModel: print('block learning max pooling %d, new_points:%s' % (i, shape_str([new_points]))) elif num_out_channel == 'avg': new_points = tf_util.avg_pool3d(new_points, kernel_i, scope='3dmax_%d' % (i), stride=stride_i, padding='VALID') if IsShowModel: print('block learning avg pooling %d, new_points:%s' % (i, shape_str([new_points]))) # gpu_0/sa_layer1/3dconv_0/points_3dcnn_0:0 if configs['Cnn_keep_prob'] < 1: if (not configs['only_last_layer_ineach_cascade'] ) or i == len( mlp_configs['voxel_channels'][cascade_id]) - 1: new_points = tf_util.dropout( new_points, keep_prob=configs['Cnn_keep_prob'], is_training=is_training, scope='dropout', name='3dcnn_dp%d' % (i)) # gpu_0/sa_layer4/3dconv_0/points_3dcnn_0:0 new_points = tf.squeeze(new_points, [1, 2, 3]) new_points = tf.reshape( new_points, [batch_size, -1, 1, new_points.shape[-1].value]) if IsShowModel: print('after %s, new_points:%s' % (pooling, shape_str([new_points]))) if 'growth_rate' in mlp_configs['block_encoder'][cascade_id]: new_points = tf_util.dense_net( new_points, mlp_configs['block_encoder'][cascade_id], bn, is_training, bn_decay, scope='dense_cascade_%d_block_encoder' % (cascade_id), is_show_model=IsShowModel) else: for i, num_out_channel in enumerate( mlp_configs['block_encoder'][cascade_id]): new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv_post_%d' % (i), bn_decay=bn_decay) if configs['Cnn_keep_prob'] < 1: if (not configs['only_last_layer_ineach_cascade'] ) or i == len( mlp_configs['block_encoder'][cascade_id]) - 1: new_points = tf_util.dropout( new_points, keep_prob=configs['Cnn_keep_prob'], is_training=is_training, scope='dropout', name='cnn_dp%d' % (i)) if IsShowModel: print('block encoder %d, new_points:%s' % (i, shape_str([new_points]))) # (2, 512, 1, 64) new_points = tf.squeeze(new_points, [2]) # (batch_size, npoints, mlps_1[-1]) if IsShowModel: print( 'pointnet_sa_module return\n new_xyz: %s\n new_points:%s\n\n' % (shape_str([new_xyz]), shape_str([new_points]))) #import pdb;pdb.set_trace() # (2, 512, 64) return new_xyz, new_points, root_point_features
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs): super(Policy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=True) print(self.obs_ph.get_shape()) net = tf.cast(self.obs_ph, tf.float32) #net = tf.Print(net, [net], "input: ", summarize=1000) print(net.get_shape()) with tf.variable_scope("model", reuse=reuse): net = tu.conv3d(inputs=net, num_output_channels=16, kernel_size=[6, 6, 6], scope="conv1", stride=[1, 1, 1], padding="VALID") print(net.get_shape()) net = tu.max_pool3d(inputs=net, kernel_size=[3, 3, 3], scope="pool1", stride=[2, 2, 2], padding="VALID") print(net.get_shape()) net = tu.conv3d(inputs=net, num_output_channels=32, kernel_size=[5, 5, 5], scope="conv2", stride=[1, 1, 1], padding="VALID") print(net.get_shape()) net = tu.max_pool3d(inputs=net, kernel_size=[3, 3, 3], scope="pool2", stride=[2, 2, 2], padding="VALID") print(net.get_shape()) net = tu.conv3d(inputs=net, num_output_channels=64, kernel_size=[3, 3, 3], scope="conv3", stride=[1, 1, 1], padding="VALID") print(net.get_shape()) net = tu.max_pool3d(inputs=net, kernel_size=[3, 3, 3], scope="pool3", stride=[2, 2, 2], padding="VALID") print(net.get_shape()) net = tu.conv3d(inputs=net, num_output_channels=64, kernel_size=[2, 2, 2], scope="conv4", stride=[1, 1, 1], padding="VALID") print(net.get_shape()) net = tu.max_pool3d(inputs=net, kernel_size=[3, 3, 3], scope="pool4", stride=[1, 1, 1], padding="VALID") print(net.get_shape()) net = tf.layers.flatten(inputs=net) print(net.get_shape()) with tf.name_scope("pi_h_fc1"): pi_h = tf.layers.dense( net, 8, activation=tf.nn.relu, kernel_initializer=tf.truncated_normal_initializer( stddev=1e-3)) print(pi_h.get_shape()) pi_latent = pi_h with tf.name_scope("vf_h_fc1"): vf_h = tf.layers.dense( net, 8, activation=tf.nn.relu, kernel_initializer=tf.truncated_normal_initializer( stddev=1e-3)) print(vf_h.get_shape()) value_fn = tf.layers.dense(vf_h, 1, name="vf") vf_latent = vf_h self._proba_distribution, self._policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01) self._value_fn = value_fn self._setup_init()
net = tf_util.fully_connected(net, 3, activation_fn=None, scope='fc4' + scope_str, is_training=is_training, weigth_decay=weight_decay) net = tf.squeeze(net) return net def inception_module(input, n_filters=64, kernel_sizes=[3, 5], is_training=None, bn_decay=None, scope='inception'): """ 3D inception_module """ one_by_one = tf_util.conv3d(input, n_filters, [1, 1, 1], scope= scope + '_conv1', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) three_by_three = tf_util.conv3d(one_by_one, int(n_filters/2), [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]], scope= scope + '_conv2', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) five_by_five = tf_util.conv3d(one_by_one, int(n_filters/2), [kernel_sizes[1], kernel_sizes[1], kernel_sizes[1]], scope=scope + '_conv3', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) average_pooling = tf_util.avg_pool3d(input, [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]], scope=scope+'_avg_pool', stride=[1, 1, 1], padding='SAME') average_pooling = tf_util.conv3d(average_pooling, n_filters, [1,1,1], scope= scope + '_conv4', stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) output = tf.concat([ one_by_one, three_by_three, five_by_five, average_pooling], axis=4) #output = output + tf.tile(input) ??? #resnet return output
def senot_module(sequence, k, mlp, scope, mlp0=None, is_training=None, bn_decay=None, weight_decay=None, data_format='NHWC', distance='l2', activation_fn=None, shrink_ratio=None, freeze_bn=False): """ Args: sequence: (batch_size, T, H, W, C) TF tensor k: int -- Top k closest points mlp: list of int32 -- output size for MLP on each point Returns: new features: (batch_size, num_points, mlp[-1]) TF tensor """ batch_size = sequence.get_shape()[0].value num_frames = sequence.get_shape()[1].value height = sequence.get_shape()[2].value width = sequence.get_shape()[3].value num_channels = sequence.get_shape()[-1].value end_points = {} if shrink_ratio is not None: new_height = height // shrink_ratio new_width = width // shrink_ratio else: new_height = height new_width = width end_points['input_sequence'] = sequence if mlp0 is not None: with tf.variable_scope(scope) as sc: for i, num_out_channel in enumerate(mlp0): sequence = tf_util.conv3d(sequence, num_out_channel, [1,1,1], padding='VALID', stride=[1,1,1], bn=True, is_training=is_training, scope='conv_bottle%d'%(i), bn_decay=bn_decay, weight_decay=weight_decay, activation_fn=None, freeze_bn=freeze_bn) num_channels_bottleneck = sequence.get_shape()[-1].value if shrink_ratio is not None: sequence = tf.reshape(sequence, [-1, height, width, num_channels_bottleneck]) sequence = tf.image.resize_images(sequence, [new_height, new_width], method=tf.image.ResizeMethod.BILINEAR) net = tf.reshape(sequence, [batch_size, -1, num_channels_bottleneck]) if distance == 'l2': import knn_l2 as knn elif distance == 'dot': import knn_dot as knn elif distance == 'cos': import knn_cosin as knn nn_idx = knn.knn(net, k, new_height * new_width) net_expand = tf.tile(tf.expand_dims(net, axis=2), [1,1,k,1]) net_grouped = tf_grouping.group_point(net, nn_idx) coord = get_coord(tf.reshape(sequence, [batch_size, -1, new_height, new_width, num_channels_bottleneck])) coord_expand = tf.tile(tf.expand_dims(coord, axis=2), [1,1,k,1]) coord_grouped = tf_grouping.group_point(coord, nn_idx) coord_diff = coord_grouped - coord_expand end_points['coord'] = {'coord': coord, 'coord_grouped': coord_grouped, 'coord_diff': coord_diff} net = tf.concat([coord_diff, net_expand, net_grouped], axis=-1) with tf.variable_scope(scope) as sc: for i, num_out_channel in enumerate(mlp): net = tf_util.conv2d(net, num_out_channel, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='conv%d'%(i), bn_decay=bn_decay, weight_decay=weight_decay, data_format=data_format, freeze_bn=freeze_bn) end_points['before_max'] = net net = tf.reduce_max(net, axis=[2], keepdims=True, name='maxpool') end_points['after_max'] = net '''end_points['before_avg'] = net net = tf.reduce_mean(net, axis=[2], keepdims=True, name='avgpool') end_points['after_avg'] = net''' net = tf.reshape(net, [batch_size, num_frames, new_height, new_width, mlp[-1]]) with tf.variable_scope(scope) as sc: net = tf_util.conv3d(net, num_channels, [1, 1, 1], stride=[1, 1, 1], bn=False, activation_fn=None, weight_decay=weight_decay, scope='conv_final') net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_training if not freeze_bn else tf.constant(False, shape=(), dtype=tf.bool), decay=bn_decay, updates_collections=None, scope='bn_final', data_format=data_format, param_initializers={'gamma': tf.constant_initializer(0., dtype=tf.float32)}, trainable=not freeze_bn) if shrink_ratio is not None: net = tf.reshape(net, [-1, new_height, new_width, num_channels]) net = tf.image.resize_images(net, [height, width], method=tf.image.ResizeMethod.BILINEAR) net = tf.reshape(net, [batch_size, -1, height, width, num_channels]) if activation_fn is not None: net = activation_fn(net) return net, end_points