Ejemplo n.º 1
0
def build_down_block(inputs,
                     name,
                     down_outputs,
                     first=False,
                     TPS=False,
                     is_training=True,
                     bn_decay=None):
    out_num = voxel_start_channel_num if first else 2 * \
              inputs.shape[voxel_channel_axis].value

    conv1 = tf_util.conv3d(inputs,
                           out_num,
                           voxel_conv_size,
                           name + '/conv1',
                           bn=True,
                           is_training=is_training,
                           bn_decay=bn_decay)
    #if TPS == True:
    #    conv1= self.transform.Encoder(conv1,conv1)
    conv2 = tf_util.conv3d(conv1,
                           out_num,
                           voxel_conv_size,
                           name + '/conv2',
                           bn=True,
                           is_training=is_training,
                           bn_decay=bn_decay)
    down_outputs.append(conv2)
    pool = ops.pool(conv2, voxel_pool_size, name + '/pool')
    return pool
Ejemplo n.º 2
0
def inception_module(input,
                     n_filters=64,
                     kernel_sizes=[3, 5],
                     is_training=None,
                     bn_decay=None,
                     scope='inception'):
    one_by_one = tf_util.conv3d(input,
                                n_filters, [1, 1, 1],
                                scope=scope + '_conv1',
                                stride=[1, 1, 1],
                                padding='SAME',
                                bn=True,
                                bn_decay=bn_decay,
                                is_training=is_training)
    three_by_three = tf_util.conv3d(
        one_by_one,
        int(n_filters / 2),
        [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]],
        scope=scope + '_conv2',
        stride=[1, 1, 1],
        padding='SAME',
        bn=True,
        bn_decay=bn_decay,
        is_training=is_training)
    five_by_five = tf_util.conv3d(
        one_by_one,
        int(n_filters / 2),
        [kernel_sizes[1], kernel_sizes[1], kernel_sizes[1]],
        scope=scope + '_conv3',
        stride=[1, 1, 1],
        padding='SAME',
        bn=True,
        bn_decay=bn_decay,
        is_training=is_training)
    average_pooling = tf_util.avg_pool3d(
        input, [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]],
        scope=scope + '_avg_pool',
        stride=[1, 1, 1],
        padding='SAME')
    average_pooling = tf_util.conv3d(average_pooling,
                                     n_filters, [1, 1, 1],
                                     scope=scope + '_conv4',
                                     stride=[1, 1, 1],
                                     padding='SAME',
                                     bn=True,
                                     bn_decay=bn_decay,
                                     is_training=is_training)

    output = tf.concat(
        [one_by_one, three_by_three, five_by_five, average_pooling], axis=4)
    #output = output + tf.tile(input) ??? #resnet
    return output
Ejemplo n.º 3
0
def get_model(sequence, is_training, num_classes=10, bn_decay=0.999, weight_decay=0.0001, sn=4, pool_t=False, pool_first=False, freeze_bn=False):
    """ sequence Net, input is BxTxHxWx3, output Bx400 """
    bsize = sequence.get_shape()[0].value
    end_points = {}

    channel_stride = [(64, 1), (128, 2), (256, 2), (512, 2)]
    # res block options
    num_blocks = [2, 2, 2, 2]
    topks = [None, sn, sn, None]
    shrink_ratios = [None, 2, None, None]

    net = tf_util.conv3d(sequence, 64, [1, 3, 3], stride=[1, 2 if pool_first else 1, 2 if pool_first else 1], bn=True, bn_decay=bn_decay, is_training=is_training, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv0')
    net = tf_util.max_pool3d(net, [1, 3, 3], stride=[1, 2, 2], scope='pool0', padding='SAME')

    for gp, cs in enumerate(channel_stride):
        n_channels = cs[0]
        stride = cs[1]
        with tf.variable_scope('group{}'.format(gp)):
            for i in range(num_blocks[gp]):
                with tf.variable_scope('block{}'.format(i)):
                    end_points['res{}_{}_in'.format(gp, i)] = net
                    if i == 0:
                        net_bra = tf_util.conv3d(net, n_channels, [1, 3, 3], stride=[1, stride, stride], bn=True, bn_decay=bn_decay, \
                                is_training=is_training, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv1')
                    else:
                        net_bra = tf_util.conv3d(net, n_channels, [1, 3, 3], stride=[1, 1, 1], bn=True, bn_decay=bn_decay, \
                                is_training=is_training, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv1')
                    net_bra = tf_util.conv3d(net_bra, n_channels, [1, 3, 3], stride=[1, 1, 1], bn=True, bn_decay=bn_decay, \
                            is_training=is_training, activation_fn=None, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv2')
                    if net.get_shape()[-1].value != n_channels:
                        net = tf_util.conv3d(net, n_channels, [1, 1, 1], stride=[1, stride, stride], bn=True, bn_decay=bn_decay, \
                                is_training=is_training, activation_fn=None, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='convshortcut')
                    net = net + net_bra
                    end_points['res{}_{}_mid'.format(gp, i)] = net
                    if topks[gp] is not None:
                        c = net.get_shape()[-1].value
                        net_pointnet, end_point = net_utils.senot_module(net, k=topks[gp], mlp=[c//4,c//2], scope='pointnet', is_training=is_training, bn_decay=bn_decay, \
                                weight_decay=weight_decay, distance='l2', activation_fn=None, freeze_bn=freeze_bn, shrink_ratio=shrink_ratios[gp])
                        net += net_pointnet
                        end_points['pointnet{}_{}'.format(gp, i)] = end_point
                        end_points['after_pointnet{}_{}'.format(gp, i)] = net
                    net = tf.nn.relu(net)
                    end_points['res{}_{}_out'.format(gp, i)] = net

    net = tf.reduce_mean(net, [1,2,3])
    net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp')
    net = tf_util.fully_connected(net, num_classes, activation_fn=None, weight_decay=weight_decay, scope='fc')

    return net, end_points
Ejemplo n.º 4
0
def inception_module(input, n_filters=64, kernel_sizes=[3, 5], is_training=None, bn_decay=None, scope='inception'):
    """
     3D inception_module
     """
    one_by_one =  tf_util.conv3d(input, n_filters, [1, 1, 1], scope= scope + '_conv1',
           stride=[1, 1, 1], padding='SAME', bn=True,
           bn_decay=bn_decay, is_training=is_training)
Ejemplo n.º 5
0
def get_sdf_3dcnn_binary(grid_idx, globalfeats, is_training, batch_size, num_point, bn, bn_decay, wd=None, FLAGS=None):
    globalfeats_expand = tf.reshape(globalfeats, [batch_size, 1, 1, 1, -1])
    print('globalfeats_expand', globalfeats_expand.get_shape())
    net2 = tf_util.conv3d_transpose(globalfeats_expand, 128, [2, 2, 2], stride=[2, 2, 2],
                                    bn_decay=bn_decay, bn=bn,
                                    is_training=is_training, weight_decay=wd, scope='3deconv1') # 2

    net2 = tf_util.conv3d_transpose(net2, 128, [3, 3, 3], stride=[2, 2, 2],bn_decay=bn_decay, bn=bn,
                                    is_training=is_training, weight_decay=wd, scope='3deconv2') # 4

    net2 = tf_util.conv3d_transpose(net2, 128, [3, 3, 3], stride=[2, 2, 2], bn_decay=bn_decay, bn=bn,
                                    is_training=is_training, weight_decay=wd, scope='3deconv3')  # 8

    net2 = tf_util.conv3d_transpose(net2, 64, [3, 3, 3], stride=[2, 2, 2], bn_decay=bn_decay, bn=bn,
                                    is_training=is_training, weight_decay=wd, scope='3deconv4')  # 16

    net2 = tf_util.conv3d_transpose(net2, 64, [3, 3, 3], stride=[2, 2, 2], bn_decay=bn_decay, bn=bn,
                                    is_training=is_training, weight_decay=wd, scope='3deconv5')  # 32

    net2 = tf_util.conv3d_transpose(net2, 32, [3, 3, 3], stride=[2, 2, 2], bn_decay=bn_decay, bn=bn,
                                    is_training=is_training, weight_decay=wd, padding='VALID', scope='3deconv6') # 65

    net2 = tf_util.conv3d(net2, 2, [1, 1, 1], stride=[1, 1, 1], bn_decay=bn_decay, bn=bn, activation_fn=None,
                                is_training=is_training, weight_decay=wd, padding='VALID', scope='3conv7_binary')
    res_plus = FLAGS.sdf_res+1
    full_inter = tf.reshape(net2, (batch_size, res_plus, res_plus, res_plus))

    print("3d cnn net2 shape:", full_inter.get_shape())

    pred = tf.reshape(full_inter, [batch_size, -1, 2])
    return pred
Ejemplo n.º 6
0
def build_bottom_block(inputs, name, is_training=True, bn_decay=None):
    out_num = inputs.shape[voxel_channel_axis].value
    conv1 = tf_util.conv3d(inputs,
                           2 * out_num,
                           voxel_conv_size,
                           name + '/conv1',
                           bn=True,
                           is_training=is_training,
                           bn_decay=bn_decay)
    conv2 = tf_util.conv3d(conv1,
                           out_num,
                           voxel_conv_size,
                           name + '/conv2',
                           bn=True,
                           is_training=is_training,
                           bn_decay=bn_decay)
    return conv2
Ejemplo n.º 7
0
def get_model(point_cloud, is_training, bn_decay=None):
    """ Classification PointNet, input is BxNx3, output Bx40 """
    batch_size = point_cloud.get_shape()[0].value
    #    vx = point_cloud.get_shape()[1].value
    #    vy = point_cloud.get_shape()[2].value
    #    vz = point_cloud.get_shape()[3].value
    end_points = {}
    input_image = tf.expand_dims(point_cloud, -1)

    net = tf_util.conv3d(input_image,
                         32, [5, 5, 5],
                         scope='conv1',
                         stride=[2, 2, 2],
                         bn=True,
                         is_training=is_training,
                         padding='SAME',
                         bn_decay=bn_decay)

    net = tf_util.conv3d(net,
                         32, [3, 3, 3],
                         scope='conv2',
                         stride=[2, 2, 2],
                         bn=True,
                         is_training=is_training,
                         padding='SAME',
                         bn_decay=bn_decay)

    # Symmetric function: max pooling
    net = tf_util.max_pool3d(net, [2, 2, 2], padding='VALID', scope='maxpool')

    # MLP on global point cloud vector
    net = tf.reshape(net, [batch_size, -1])
    net = tf_util.fully_connected(net,
                                  128,
                                  bn=True,
                                  is_training=is_training,
                                  scope='fc1',
                                  bn_decay=bn_decay)
    net = tf_util.dropout(net,
                          keep_prob=0.5,
                          is_training=is_training,
                          scope='dp1')
    net = tf_util.fully_connected(net, 10, activation_fn=None, scope='fc2')

    return net, end_points
Ejemplo n.º 8
0
def get_model(points, w, mu, sigma, is_training, bn_decay=None, weigth_decay=0.005, add_noise=False, num_classes=40):
    """ Classification PointNet, input is BxNx3, output Bx40 """
    batch_size = points.get_shape()[0].value
    n_points = points.get_shape()[1].value
    n_gaussians = w.shape[0].value
    res = int(np.round(np.power(n_gaussians,1.0/3.0)))


    fv = tf_util.get_fv_minmax(points, w, mu, sigma, flatten=False)

    if add_noise:
        noise = tf.cond(is_training,
                        lambda: tf.random_normal(shape=tf.shape(fv), mean=0.0, stddev=0.01, dtype=tf.float32),
                        lambda:  tf.zeros(shape=tf.shape(fv)))
        #noise = tf.random_normal(shape=tf.shape(fv), mean=0.0, stddev=0.01, dtype=tf.float32)
        fv = fv + noise

    grid_fisher = tf.reshape(fv, [batch_size, -1, res, res, res])
    grid_fisher = tf.transpose(grid_fisher, [0, 2, 3, 4, 1])

    #3D Voxenet with pfv
    layer = 1
    net = tf_util.conv3d(grid_fisher, 32, [5, 5, 5], scope='conv'+str(layer),
                         stride=[2, 2, 2], padding='SAME', bn=True,
                         bn_decay=bn_decay, is_training=is_training)
    layer = layer + 1
    net = tf_util.conv3d(net, 32, [3, 3, 3], scope='conv'+str(layer),
                         stride=[1, 1, 1], padding='SAME', bn=True,
                         bn_decay=bn_decay, is_training=is_training)
    layer = layer + 1
    net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool'+str(layer), stride=[2, 2, 2], padding='SAME')

    net = tf.reshape(net,[batch_size, -1])

    #Classifier
    net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training,
                                  scope='fc1', bn_decay=bn_decay, weigth_decay=weigth_decay)
    net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
                          scope='dp1')
    net = tf_util.fully_connected(net, num_classes, activation_fn=None, scope='fc4', is_training=is_training, weigth_decay=weigth_decay)

    return net, fv
Ejemplo n.º 9
0
def inception_module(input, n_filters=64, kernel_sizes=[3,5], is_training=None, bn_decay=None, scope='inception'):
    '''

    :param input: [B,K,K,K,FV]
    :param n_filters:
    :return: output: [B,K,K,K,n_filters*2]
    '''
    one_by_one =  tf_util.conv3d(input, n_filters, [1,1,1], scope= scope + '_conv1',
           stride=[1, 1, 1], padding='SAME', bn=True,
           bn_decay=bn_decay, is_training=is_training)
    three_by_three = tf_util.conv3d(one_by_one, int(n_filters), [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]], scope= scope + '_conv2',
           stride=[1, 1, 1], padding='SAME', bn=True,
           bn_decay=bn_decay, is_training=is_training)
    five_by_five = tf_util.conv3d(one_by_one, int(n_filters), [kernel_sizes[1], kernel_sizes[1], kernel_sizes[1]], scope=scope + '_conv3',
                          stride=[1, 1, 1], padding='SAME', bn=True,
                          bn_decay=bn_decay, is_training=is_training)
    average_pooling = tf_util.avg_pool3d(input, [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]], scope=scope+'_avg_pool', stride=[1, 1, 1], padding='SAME')
    average_pooling = tf_util.conv3d(average_pooling, n_filters, [1,1,1], scope= scope + '_conv4',
           stride=[1, 1, 1], padding='SAME', bn=True,
           bn_decay=bn_decay, is_training=is_training)

    output = tf.concat([ one_by_one, three_by_three, five_by_five, average_pooling], axis=4)
    return output
Ejemplo n.º 10
0
def build_up_block(inputs,
                   down_inputs,
                   name,
                   final=False,
                   Decoder=False,
                   is_training=True,
                   bn_decay=None):
    out_num = inputs.shape[voxel_channel_axis].value
    conv1 = deconv_func()(inputs,
                          out_num,
                          voxel_conv_size,
                          name + '/conv1',
                          action=voxel_action,
                          is_training=is_training,
                          bn_decay=bn_decay)
    conv1 = tf.concat([conv1, down_inputs],
                      voxel_channel_axis,
                      name=name + '/concat')
    conv2 = tf_util.conv3d(conv1,
                           out_num,
                           voxel_conv_size,
                           name + '/conv2',
                           bn=True,
                           is_training=is_training,
                           bn_decay=bn_decay)
    #if Decoder == True:
    #    conv2 = self.transform.Decoder(conv2,conv2)
    out_num = voxel_class_num if final else out_num / 2
    conv3 = tf_util.conv3d(conv2,
                           out_num,
                           voxel_conv_size,
                           name + '/conv3',
                           bn=(not final),
                           is_training=is_training,
                           bn_decay=bn_decay)
    return conv3
Ejemplo n.º 11
0
def get_model(source_point_cloud,
              template_point_cloud,
              is_training,
              bn_decay=None):
    point_cloud = tf.concat([source_point_cloud, template_point_cloud], 0)
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value
    end_points = {}

    input_image = tf.expand_dims(point_cloud, -1)

    net = tf_util.conv3d(input_image,
                         32, [5, 5, 5],
                         padding='VALID',
                         stride=[2, 2, 2],
                         bn=False,
                         is_training=is_training,
                         scope='conv1',
                         bn_decay=bn_decay)
    net = tf_util.conv3d(net,
                         32, [3, 3, 3],
                         padding='VALID',
                         stride=[1, 1, 1],
                         bn=False,
                         is_training=is_training,
                         scope='conv2',
                         bn_decay=bn_decay)

    # Symmetric function: max pooling
    net = tf_util.max_pool3d(net, [2, 2, 2], padding='VALID', scope='maxpool')
    net = tf.reshape(net, [batch_size, -1])
    print(net)
    source_global_feature = tf.slice(net, [0, 0], [int(batch_size / 2), 6912])
    template_global_feature = tf.slice(net, [int(batch_size / 2), 0],
                                       [int(batch_size / 2), 6912])
    return source_global_feature, template_global_feature
Ejemplo n.º 12
0
def get_model(pointgrid, is_training):
    # Args:
    #     pointgrid: of size B x N x N x N x NUM_FEATURES
    #     is_training: boolean tensor
    # Returns:
    #     pred_cat: of size B x NUM_CATEGORY
    #     pred_seg: of size B x N x N x N x (K+1) x NUM_PART_SEG

    # Encoder
    batch_size = pointgrid.get_shape()[0].value
    conv1 = tf_util.conv3d(pointgrid,
                           64, [5, 5, 5],
                           scope='conv1',
                           activation_fn=leak_relu,
                           bn=True,
                           is_training=is_training)  # N
    conv2 = tf_util.conv3d(conv1,
                           64, [5, 5, 5],
                           scope='conv2',
                           activation_fn=leak_relu,
                           stride=[2, 2, 2],
                           bn=True,
                           is_training=is_training)  # N/2
    conv3 = tf_util.conv3d(conv2,
                           64, [5, 5, 5],
                           scope='conv3',
                           activation_fn=leak_relu,
                           bn=True,
                           is_training=is_training)  # N/2
    conv4 = tf_util.conv3d(conv3,
                           128, [3, 3, 3],
                           scope='conv4',
                           activation_fn=leak_relu,
                           stride=[2, 2, 2],
                           bn=True,
                           is_training=is_training)  # N/4
    conv5 = tf_util.conv3d(conv4,
                           128, [3, 3, 3],
                           scope='conv5',
                           activation_fn=leak_relu,
                           bn=True,
                           is_training=is_training)  # N/4
    conv6 = tf_util.conv3d(conv5,
                           256, [3, 3, 3],
                           scope='conv6',
                           activation_fn=leak_relu,
                           stride=[2, 2, 2],
                           bn=True,
                           is_training=is_training)  # N/8
    conv7 = tf_util.conv3d(conv6,
                           256, [3, 3, 3],
                           scope='conv7',
                           activation_fn=leak_relu,
                           bn=True,
                           is_training=is_training)  # N/8
    conv8 = tf_util.conv3d(conv7,
                           512, [3, 3, 3],
                           scope='conv8',
                           activation_fn=leak_relu,
                           stride=[2, 2, 2],
                           bn=True,
                           is_training=is_training)  # N/16
    conv9 = tf_util.conv3d(conv8,
                           512, [1, 1, 1],
                           scope='conv9',
                           activation_fn=leak_relu,
                           bn=True,
                           is_training=is_training)  # N/16

    # Classification Network
    conv9_flat = tf.reshape(conv9, [batch_size, -1])
    fc1 = tf_util.fully_connected(conv9_flat,
                                  512,
                                  activation_fn=leak_relu,
                                  bn=True,
                                  is_training=is_training,
                                  scope='fc1')
    do1 = tf_util.dropout(fc1,
                          keep_prob=0.7,
                          is_training=is_training,
                          scope='do1')
    fc2 = tf_util.fully_connected(do1,
                                  256,
                                  activation_fn=leak_relu,
                                  bn=True,
                                  is_training=is_training,
                                  scope='fc2')
    do2 = tf_util.dropout(fc2,
                          keep_prob=0.7,
                          is_training=is_training,
                          scope='do2')
    pred_cat = tf_util.fully_connected(do2,
                                       NUM_CATEGORY,
                                       activation_fn=None,
                                       bn=False,
                                       scope='pred_cat')

    # Segmentation Network
    cat_features = tf.tile(
        tf.reshape(tf.concat([fc2, pred_cat], axis=1),
                   [batch_size, 1, 1, 1, -1]), [1, N / 16, N / 16, N / 16, 1])
    conv9_cat = tf.concat([conv9, cat_features], axis=4)
    deconv1 = tf_util.conv3d_transpose(conv9_cat,
                                       256, [3, 3, 3],
                                       scope='deconv1',
                                       activation_fn=leak_relu,
                                       bn=True,
                                       is_training=is_training,
                                       stride=[2, 2, 2],
                                       padding='SAME')  # N/8
    conv7_deconv1 = tf.concat(axis=4, values=[conv7, deconv1])
    deconv2 = tf_util.conv3d(conv7_deconv1,
                             256, [3, 3, 3],
                             scope='deconv2',
                             activation_fn=leak_relu,
                             bn=True,
                             is_training=is_training)  # N/8
    deconv3 = tf_util.conv3d_transpose(deconv2,
                                       128, [3, 3, 3],
                                       scope='deconv3',
                                       activation_fn=leak_relu,
                                       bn=True,
                                       is_training=is_training,
                                       stride=[2, 2, 2],
                                       padding='SAME')  # N/4
    conv5_deconv3 = tf.concat(axis=4, values=[conv5, deconv3])
    deconv4 = tf_util.conv3d(conv5_deconv3,
                             128, [3, 3, 3],
                             scope='deconv4',
                             activation_fn=leak_relu,
                             bn=True,
                             is_training=is_training)  # N/4
    deconv5 = tf_util.conv3d_transpose(deconv4,
                                       64, [3, 3, 3],
                                       scope='deconv5',
                                       activation_fn=leak_relu,
                                       bn=True,
                                       is_training=is_training,
                                       stride=[2, 2, 2],
                                       padding='SAME')  # N/2
    conv3_deconv5 = tf.concat(axis=4, values=[conv3, deconv5])
    deconv6 = tf_util.conv3d(conv3_deconv5,
                             64, [5, 5, 5],
                             scope='deconv6',
                             activation_fn=leak_relu,
                             bn=True,
                             is_training=is_training)  # N/2
    deconv7 = tf_util.conv3d_transpose(deconv6,
                                       64, [5, 5, 5],
                                       scope='deconv7',
                                       activation_fn=leak_relu,
                                       bn=True,
                                       is_training=is_training,
                                       stride=[2, 2, 2],
                                       padding='SAME')  # N
    conv1_deconv7 = tf.concat(axis=4, values=[conv1, deconv7])
    deconv8 = tf_util.conv3d(conv1_deconv7,
                             64, [5, 5, 5],
                             scope='deconv8',
                             activation_fn=leak_relu,
                             bn=True,
                             is_training=is_training)  # N

    pred_seg = tf_util.conv3d(deconv8, (K + 1) * NUM_SEG_PART, [5, 5, 5],
                              scope='pred_seg',
                              activation_fn=None,
                              bn=False,
                              is_training=is_training)
    pred_seg = tf.reshape(pred_seg, [batch_size, N, N, N, K + 1, NUM_SEG_PART])

    return pred_cat, pred_seg
Ejemplo n.º 13
0
def get_model(point_cloud, input_label, is_training, cat_num, part_num, \
  batch_size, num_point, weight_decay, bn_decay=None):
    """ ConvNet baseline, input is BxNx3 gray image """
    end_points = {}
    KNN = 12

    with tf.variable_scope('transform_net1') as sc:
        K = 3
        transform = get_transform(point_cloud, is_training, bn_decay, K=3)
    point_cloud_transformed = tf.matmul(point_cloud, transform)

    # KNN search
    knn_point = KNN_search(point_cloud_transformed,
                           KNN=KNN,
                           name_scope='KNN_search')  # 32 x 1024 x KNN x 3
    knn_point = tf.expand_dims(knn_point, axis=-1)  # 32 x 1024 x KNN x 3 x 1

    point_cloud_transformed = tf.expand_dims(point_cloud_transformed, axis=-1)
    out0 = tf_util.conv2d(point_cloud_transformed,
                          64, [1, 3],
                          padding='VALID',
                          stride=[1, 1],
                          bn=True,
                          is_training=is_training,
                          scope='conv',
                          bn_decay=bn_decay)  # 32 x 1024 x 1 x 64
    out0_tile = tf.tile(out0, multiples=[1, 1, KNN, 1])  # 32 x 1024 x 16 x 64
    out0_tile = tf.expand_dims(out0_tile, axis=-2)

    out1 = tf_util.conv3d(knn_point,
                          64, [1, 1, 3],
                          padding='VALID',
                          stride=[1, 1, 1],
                          bn=True,
                          is_training=is_training,
                          scope='conv1',
                          bn_decay=bn_decay)
    concat = tf.concat(values=[out1, out0_tile], axis=-1)

    out2 = tf_util.conv3d(concat,
                          128, [1, 1, 1],
                          padding='VALID',
                          stride=[1, 1, 1],
                          bn=True,
                          is_training=is_training,
                          scope='conv2',
                          bn_decay=bn_decay)
    out3 = tf_util.conv3d(out2,
                          128, [1, 1, 1],
                          padding='VALID',
                          stride=[1, 1, 1],
                          bn=True,
                          is_training=is_training,
                          scope='conv3',
                          bn_decay=bn_decay)

    pool_k = tf_util.max_pool3d(out3,
                                kernel_size=[1, KNN, 1],
                                stride=[1, 2, 2],
                                padding='VALID',
                                scope='pool_k')  # 32 x 1024 x 1 x 1 x 128
    pool_k = tf.squeeze(pool_k, axis=2)

    # VLAD layer
    vlad_out, index = VLAD(pool_k,
                           16,
                           is_training,
                           bn_decay,
                           layer_name='VLAD')

    out4 = tf_util.conv2d(vlad_out,
                          512, [1, 1],
                          padding='VALID',
                          stride=[1, 1],
                          bn=True,
                          is_training=is_training,
                          scope='vlad_conv3',
                          bn_decay=bn_decay)
    out5 = tf_util.conv2d(out4,
                          2048, [1, 1],
                          padding='VALID',
                          stride=[1, 1],
                          bn=True,
                          is_training=is_training,
                          scope='vlad_conv4',
                          bn_decay=bn_decay)

    out_max = tf.nn.max_pool(out5,
                             ksize=[1, num_point, 1, 1],
                             strides=[1, 2, 2, 1],
                             padding='VALID')  # 32 x 1 x 1 x 1024

    # classification network
    net = tf.reshape(out_max, [batch_size, -1])  # 32 x 1 x 1024
    net = tf_util.fully_connected(net,
                                  256,
                                  bn=True,
                                  is_training=is_training,
                                  scope='cla/fc1',
                                  bn_decay=bn_decay)
    net = tf_util.fully_connected(net,
                                  256,
                                  bn=True,
                                  is_training=is_training,
                                  scope='cla/fc2',
                                  bn_decay=bn_decay)
    net = tf_util.dropout(net,
                          keep_prob=0.7,
                          is_training=is_training,
                          scope='cla/dp1')
    net = tf_util.fully_connected(net,
                                  cat_num,
                                  activation_fn=None,
                                  scope='cla/fc3')

    # segmentation network
    one_hot_label_expand = tf.reshape(input_label, [batch_size, 1, 1, cat_num])
    out_max = tf.concat(axis=3, values=[out_max, one_hot_label_expand])

    expand = tf.tile(out_max, [1, num_point, 1, 1])
    concat = tf.concat(axis=3,
                       values=[expand, out0, pool_k, vlad_out, out4, out5])

    net2 = tf_util.conv2d(concat,
                          256, [1, 1],
                          padding='VALID',
                          stride=[1, 1],
                          bn_decay=bn_decay,
                          bn=True,
                          is_training=is_training,
                          scope='seg/conv1',
                          weight_decay=weight_decay)
    net2 = tf_util.dropout(net2,
                           keep_prob=0.8,
                           is_training=is_training,
                           scope='seg/dp1')
    net2 = tf_util.conv2d(net2,
                          256, [1, 1],
                          padding='VALID',
                          stride=[1, 1],
                          bn_decay=bn_decay,
                          bn=True,
                          is_training=is_training,
                          scope='seg/conv2',
                          weight_decay=weight_decay)
    net2 = tf_util.dropout(net2,
                           keep_prob=0.8,
                           is_training=is_training,
                           scope='seg/dp2')
    net2 = tf_util.conv2d(net2,
                          128, [1, 1],
                          padding='VALID',
                          stride=[1, 1],
                          bn_decay=bn_decay,
                          bn=True,
                          is_training=is_training,
                          scope='seg/conv3',
                          weight_decay=weight_decay)
    net2 = tf_util.conv2d(net2,
                          part_num, [1, 1],
                          padding='VALID',
                          stride=[1, 1],
                          activation_fn=None,
                          bn=False,
                          scope='seg/conv4',
                          weight_decay=weight_decay)

    net2 = tf.reshape(net2, [batch_size, num_point, part_num])

    return net, net2, end_points
Ejemplo n.º 14
0
def pointnet_sa_module(cascade_id,
                       xyz,
                       points,
                       bidmap,
                       mlp_configs,
                       block_bottom_center_mm,
                       configs,
                       sgf_config_pls,
                       is_training,
                       bn_decay,
                       scope,
                       bn=True,
                       tnet_spec=None,
                       use_xyz=True,
                       IsShowModel=False):
    '''
    Input cascade_id==0:
        xyz is grouped_points: (batch_size,nsubblock0,npoint_subblock0,6)
        points: None
        bidmap: None
    Input cascade_id==1:
        xyz: (batch_size,nsubblock0,3)
        points: (batch_size,nsubblock0,channel)
        bidmap: (batch_size,nsubblock1,npoint_subblock1)
    Medium cascade_id==1:
        grouped_xyz: (batch_size,nsubblock1,npoint_subblock1,3)
        new_xyz: (batch_size,nsubblock1,3)
        group_points: (batch_size,nsubblock1,npoint_subblock1,channel)

    output cascade_id==1:
        new_xyz: (batch_size,nsubblock1,3)
        new_points: (batch_size,nsubblock1,channel)
    '''
    block_bottom_center_mm = tf.cast(
        block_bottom_center_mm, tf.float32, name='block_bottom_center_mm'
    )  # gpu_0/sa_layer3/block_bottom_center_mm:0
    batch_size = xyz.get_shape()[0].value
    with tf.variable_scope(scope) as sc:
        cascade_num = configs['flatten_bm_extract_idx'].shape[
            0] - 1  # include global here (Note: cascade_num does not include global in block_pre_util )
        assert configs['sub_block_step_candis'].size == cascade_num - 1
        if cascade_id == 0:
            indrop_keep_mask = tf.get_default_graph().get_tensor_by_name(
                'indrop_keep_mask:0')  # indrop_keep_mask:0

        assert len(xyz.shape) == 3

        if bidmap == None:
            grouped_xyz = tf.expand_dims(xyz, 1)
            grouped_points = tf.expand_dims(points, 1)
            new_xyz = None
            valid_mask = None
        else:
            batch_idx = tf.reshape(tf.range(batch_size), [batch_size, 1, 1, 1])
            nsubblock = bidmap.get_shape()[1].value
            npoint_subblock = bidmap.get_shape()[2].value
            batch_idx_ = tf.tile(batch_idx, [1, nsubblock, npoint_subblock, 1])
            bidmap = tf.expand_dims(bidmap, axis=-1, name='bidmap')
            bidmap_concat = tf.concat(
                [batch_idx_, bidmap], axis=-1,
                name='bidmap_concat')  # gpu_0/sa_layer0/bidmap_concat:0
            # The value for invalid item in bidmap is -17.
            # On GPU, the responding grouped_xyz and grouped_points is 0.
            # NOT WORK on CPU !!!

            # invalid indices comes from merge_blocks_while_fix_bmap
            # set point_indices_f for invalid points as
            # NETCONFIG['redundant_points_in_block'] ( shoud be set < -500)
            valid_mask = tf.greater(bidmap, tf.constant(
                -500, tf.int32), 'valid_mask')  # gpu_0/sa_layer0/valid_mask:0

            grouped_xyz = tf.gather_nd(
                xyz, bidmap_concat,
                name='grouped_xyz')  # gpu_0/sa_layer0/grouped_xyz:0
            grouped_points = tf.gather_nd(points,
                                          bidmap_concat,
                                          name='group_points')
            if cascade_id == 0 and len(indrop_keep_mask.get_shape()) != 0:
                grouped_indrop_keep_mask = tf.gather_nd(
                    indrop_keep_mask,
                    bidmap_concat,
                    name='grouped_indrop_keep_mask'
                )  # gpu_0/sa_layer0/grouped_indrop_keep_mask:0

        # new_xyz is the "voxel center" or "mean position of points in the voxel"
        if configs['mean_grouping_position'] and (
                not mlp_configs['block_learning'] == '3DCNN'):
            new_xyz = tf.reduce_mean(grouped_xyz, -2)
        else:
            new_xyz = block_bottom_center_mm[:, :, 3:6] * tf.constant(
                0.001, tf.float32)
        # the mid can be mean or block center, decided by configs['mean_grouping_position']
        sub_block_mid = tf.expand_dims(
            new_xyz, -2, name='sub_block_mid')  # gpu_1/sa_layer0/sub_block_mid
        global_block_mid = tf.reduce_mean(sub_block_mid,
                                          1,
                                          keepdims=True,
                                          name='global_block_mid')
        grouped_xyz_submid = grouped_xyz - sub_block_mid
        grouped_xyz_glomid = grouped_xyz - global_block_mid

        grouped_xyz_feed = []
        if 'raw' in configs['xyz_elements']:
            grouped_xyz_feed.append(grouped_xyz)
        if 'sub_mid' in configs['xyz_elements']:
            grouped_xyz_feed.append(grouped_xyz_submid)
        if 'global_mid' in configs['xyz_elements']:
            grouped_xyz_feed.append(grouped_xyz_glomid)
        grouped_xyz_feed = tf.concat(grouped_xyz_feed, -1)

        if cascade_id == 0:
            # xyz must be at the first in feed_data_elements !!!!
            grouped_points = tf.concat(
                [grouped_xyz_feed, grouped_points[..., 3:]], -1)

            if len(indrop_keep_mask.get_shape()) != 0:
                if InDropMethod == 'set1st':
                    # set all the dropped item as the first item
                    tmp1 = tf.multiply(grouped_points,
                                       grouped_indrop_keep_mask)
                    points_1st = grouped_points[:, :, 0:1, :]
                    points_1st = tf.tile(points_1st,
                                         [1, 1, grouped_points.shape[2], 1])
                    indrop_mask_inverse = 1 - grouped_indrop_keep_mask
                    tmp2 = indrop_mask_inverse * points_1st
                    grouped_points = tf.add(
                        tmp1, tmp2, name='grouped_points_droped'
                    )  # gpu_0/sa_layer0/grouped_points_droped
                    #tf.add_to_collection( 'check', grouped_points )
                elif InDropMethod == 'set0':
                    valid_mask = tf.logical_and(
                        valid_mask,
                        tf.equal(grouped_indrop_keep_mask, 0),
                        name='valid_mask_droped'
                    )  # gpu_1/sa_layer0/valid_mask_droped

        elif use_xyz:
            grouped_points = tf.concat([grouped_xyz_feed, grouped_points],
                                       axis=-1)

        tf.add_to_collection('grouped_xyz', grouped_xyz)
        tf.add_to_collection('grouped_xyz_submid', grouped_xyz_submid)
        tf.add_to_collection('grouped_xyz_glomid', grouped_xyz_glomid)

        if cascade_id > 0 and use_xyz and (not cascade_id == cascade_num - 1):
            grouped_points = tf.concat([grouped_xyz_feed, grouped_points],
                                       axis=-1)

        nsample = grouped_points.get_shape()[2].value  # the conv kernel size

        if IsShowModel:
            print(
                '\n\npointnet_sa_module cascade_id:%d\n xyz:%s\n grouped_xyz:%s\n new_xyz:%s\n grouped_points:%s\n nsample:%d'
                % (cascade_id, shape_str([xyz]), shape_str([grouped_xyz]),
                   shape_str([new_xyz]), shape_str([grouped_points]), nsample))

        new_points = grouped_points
        if valid_mask != None:
            new_points = new_points * tf.cast(valid_mask[:, :, :, 0:1],
                                              tf.float32)

        if 'growth_rate' in mlp_configs['point_encoder'][cascade_id]:
            new_points = tf_util.dense_net( new_points, mlp_configs['point_encoder'][cascade_id], bn, is_training, bn_decay,\
                                           scope = 'dense_cascade_%d_point_encoder'%(cascade_id) , is_show_model = IsShowModel )
        else:
            for i, num_out_channel in enumerate(
                    mlp_configs['point_encoder'][cascade_id]):
                new_points = tf_util.conv2d(new_points,
                                            num_out_channel, [1, 1],
                                            padding='VALID',
                                            stride=[1, 1],
                                            bn=bn,
                                            is_training=is_training,
                                            scope='conv%d' % (i),
                                            bn_decay=bn_decay)
                if configs['Cnn_keep_prob'] < 1:
                    if (not configs['only_last_layer_ineach_cascade']
                        ) or i == len(
                            mlp_configs['point_encoder'][cascade_id]) - 1:
                        new_points = tf_util.dropout(
                            new_points,
                            keep_prob=configs['Cnn_keep_prob'],
                            is_training=is_training,
                            scope='dropout',
                            name='cnn_dp%d' % (i))
                if IsShowModel:
                    print('point encoder1 %d, new_points:%s' %
                          (i, shape_str([new_points])))

        if cascade_id == 0:
            root_point_features = new_points
            #if InDropMethod == 'set0':
            #    if len(indrop_keep_mask.get_shape()) != 0:
            #            new_points = tf.identity(new_points,'points_before_droped') # gpu_0/sa_layer0/points_before_droped:0
            #            new_points = tf.multiply( new_points, grouped_indrop_keep_mask, name='droped_points' )   # gpu_0/sa_layer0/droped_points:0
        else:
            root_point_features = None

        pooling = mlp_configs['block_learning']
        if pooling == '3DCNN' and (cascade_id == 0):
            pooling = 'max'

        #if pooling=='avg':
        #    new_points = tf_util.avg_pool2d(new_points, [1,nsample], stride=[1,1], padding='VALID', scope='avgpool1')
        #elif pooling=='weighted_avg':
        #    with tf.variable_scope('weighted_avg1'):
        #        dists = tf.norm(grouped_xyz,axis=-1,ord=2,keep_dims=True)
        #        exp_dists = tf.exp(-dists * 5)
        #        weights = exp_dists/tf.reduce_sum(exp_dists,axis=2,keep_dims=True) # (batch_size, npoint, nsample, 1)
        #        new_points *= weights # (batch_size, npoint, nsample, mlps_0[-1])
        #        new_points = tf.reduce_sum(new_points, axis=2, keep_dims=True)
        if pooling == 'max':
            # Even the grouped_points and grouped_xyz are 0 for invalid points, the
            # vaule after mlp will not be. It has to be set as 0 forcely before
            # pooling.
            if valid_mask != None:
                new_points = new_points * tf.cast(valid_mask[:, :, :, 0:1],
                                                  tf.float32)
            new_points = tf.identity(
                new_points,
                'points_before_max')  # gpu_0/sa_layer0/points_before_max
            new_points = tf.reduce_max(new_points,
                                       axis=[2],
                                       keepdims=True,
                                       name='points_after_max')
        #elif pooling=='min':
        #    new_points = tf_util.max_pool2d(-1*new_points, [1,nsample], stride=[1,1], padding='VALID', scope='minpool1')
        #elif pooling=='max_and_avg':
        #    avg_points = tf_util.max_pool2d(new_points, [1,nsample], stride=[1,1], padding='VALID', scope='maxpool1')
        #    max_points = tf_util.avg_pool2d(new_points, [1,nsample], stride=[1,1], padding='VALID', scope='avgpool1')
        #    new_points = tf.concat([avg_points, max_points], axis=-1)
        elif pooling == '3DCNN':
            new_points = grouped_points_to_voxel_points(
                cascade_id,
                new_points,
                valid_mask,
                block_bottom_center_mm,
                configs,
                grouped_xyz,
                IsShowVoxelModel=IsShowModel)
            if IsShowModel:
                print('voxel points:%s' % (shape_str([new_points])))
            for i, num_out_channel in enumerate(
                    mlp_configs['voxel_channels'][cascade_id]):
                kernel_i = [mlp_configs['voxel_kernels'][cascade_id][i]] * 3
                stride_i = [mlp_configs['voxel_strides'][cascade_id][i]] * 3
                if new_points.shape[1] % 2 == 0:
                    padding_i = np.array([[0, 0], [1, 0], [1, 0], [1, 0], [
                        0, 0
                    ]]) * mlp_configs['voxel_paddings'][cascade_id][i]
                else:
                    padding_i = np.array([[0, 0], [1, 1], [1, 1], [1, 1], [
                        0, 0
                    ]]) * mlp_configs['voxel_paddings'][cascade_id][i]
                new_points = tf.pad(new_points, padding_i, "CONSTANT")

                if type(num_out_channel) == int:
                    new_points = tf_util.conv3d(new_points,
                                                num_out_channel,
                                                kernel_i,
                                                scope='3dconv_%d' % (i),
                                                stride=stride_i,
                                                padding='VALID',
                                                bn=bn,
                                                is_training=is_training,
                                                bn_decay=bn_decay,
                                                name='points_3dcnn_%d' % (i))
                    if IsShowModel:
                        print('block learning by 3dcnn %d, new_points:%s' %
                              (i, shape_str([new_points])))
                elif num_out_channel == 'max':
                    new_points = tf_util.max_pool3d(new_points,
                                                    kernel_i,
                                                    scope='3dmax_%d' % (i),
                                                    stride=stride_i,
                                                    padding='VALID')
                    if IsShowModel:
                        print('block learning max pooling %d, new_points:%s' %
                              (i, shape_str([new_points])))
                elif num_out_channel == 'avg':
                    new_points = tf_util.avg_pool3d(new_points,
                                                    kernel_i,
                                                    scope='3dmax_%d' % (i),
                                                    stride=stride_i,
                                                    padding='VALID')
                    if IsShowModel:
                        print('block learning avg pooling %d, new_points:%s' %
                              (i, shape_str([new_points])))
                # gpu_0/sa_layer1/3dconv_0/points_3dcnn_0:0
                if configs['Cnn_keep_prob'] < 1:
                    if (not configs['only_last_layer_ineach_cascade']
                        ) or i == len(
                            mlp_configs['voxel_channels'][cascade_id]) - 1:
                        new_points = tf_util.dropout(
                            new_points,
                            keep_prob=configs['Cnn_keep_prob'],
                            is_training=is_training,
                            scope='dropout',
                            name='3dcnn_dp%d' % (i))
                # gpu_0/sa_layer4/3dconv_0/points_3dcnn_0:0
            new_points = tf.squeeze(new_points, [1, 2, 3])
            new_points = tf.reshape(
                new_points, [batch_size, -1, 1, new_points.shape[-1].value])

        if IsShowModel:
            print('after %s, new_points:%s' %
                  (pooling, shape_str([new_points])))

        if 'growth_rate' in mlp_configs['block_encoder'][cascade_id]:
            new_points = tf_util.dense_net(
                new_points,
                mlp_configs['block_encoder'][cascade_id],
                bn,
                is_training,
                bn_decay,
                scope='dense_cascade_%d_block_encoder' % (cascade_id),
                is_show_model=IsShowModel)
        else:
            for i, num_out_channel in enumerate(
                    mlp_configs['block_encoder'][cascade_id]):
                new_points = tf_util.conv2d(new_points,
                                            num_out_channel, [1, 1],
                                            padding='VALID',
                                            stride=[1, 1],
                                            bn=bn,
                                            is_training=is_training,
                                            scope='conv_post_%d' % (i),
                                            bn_decay=bn_decay)
                if configs['Cnn_keep_prob'] < 1:
                    if (not configs['only_last_layer_ineach_cascade']
                        ) or i == len(
                            mlp_configs['block_encoder'][cascade_id]) - 1:
                        new_points = tf_util.dropout(
                            new_points,
                            keep_prob=configs['Cnn_keep_prob'],
                            is_training=is_training,
                            scope='dropout',
                            name='cnn_dp%d' % (i))
                if IsShowModel:
                    print('block encoder %d, new_points:%s' %
                          (i, shape_str([new_points])))
        # (2, 512, 1, 64)
        new_points = tf.squeeze(new_points,
                                [2])  # (batch_size, npoints, mlps_1[-1])

        if IsShowModel:
            print(
                'pointnet_sa_module return\n new_xyz: %s\n new_points:%s\n\n' %
                (shape_str([new_xyz]), shape_str([new_points])))
            #import pdb;pdb.set_trace()
        # (2, 512, 64)
        return new_xyz, new_points, root_point_features
Ejemplo n.º 15
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 n_env,
                 n_steps,
                 n_batch,
                 reuse=False,
                 **kwargs):
        super(Policy, self).__init__(sess,
                                     ob_space,
                                     ac_space,
                                     n_env,
                                     n_steps,
                                     n_batch,
                                     reuse=reuse,
                                     scale=True)

        print(self.obs_ph.get_shape())
        net = tf.cast(self.obs_ph, tf.float32)
        #net = tf.Print(net, [net], "input: ", summarize=1000)
        print(net.get_shape())
        with tf.variable_scope("model", reuse=reuse):
            net = tu.conv3d(inputs=net,
                            num_output_channels=16,
                            kernel_size=[6, 6, 6],
                            scope="conv1",
                            stride=[1, 1, 1],
                            padding="VALID")
            print(net.get_shape())
            net = tu.max_pool3d(inputs=net,
                                kernel_size=[3, 3, 3],
                                scope="pool1",
                                stride=[2, 2, 2],
                                padding="VALID")
            print(net.get_shape())

            net = tu.conv3d(inputs=net,
                            num_output_channels=32,
                            kernel_size=[5, 5, 5],
                            scope="conv2",
                            stride=[1, 1, 1],
                            padding="VALID")
            print(net.get_shape())
            net = tu.max_pool3d(inputs=net,
                                kernel_size=[3, 3, 3],
                                scope="pool2",
                                stride=[2, 2, 2],
                                padding="VALID")
            print(net.get_shape())

            net = tu.conv3d(inputs=net,
                            num_output_channels=64,
                            kernel_size=[3, 3, 3],
                            scope="conv3",
                            stride=[1, 1, 1],
                            padding="VALID")
            print(net.get_shape())
            net = tu.max_pool3d(inputs=net,
                                kernel_size=[3, 3, 3],
                                scope="pool3",
                                stride=[2, 2, 2],
                                padding="VALID")
            print(net.get_shape())

            net = tu.conv3d(inputs=net,
                            num_output_channels=64,
                            kernel_size=[2, 2, 2],
                            scope="conv4",
                            stride=[1, 1, 1],
                            padding="VALID")
            print(net.get_shape())
            net = tu.max_pool3d(inputs=net,
                                kernel_size=[3, 3, 3],
                                scope="pool4",
                                stride=[1, 1, 1],
                                padding="VALID")
            print(net.get_shape())

            net = tf.layers.flatten(inputs=net)
            print(net.get_shape())

            with tf.name_scope("pi_h_fc1"):
                pi_h = tf.layers.dense(
                    net,
                    8,
                    activation=tf.nn.relu,
                    kernel_initializer=tf.truncated_normal_initializer(
                        stddev=1e-3))
                print(pi_h.get_shape())
            pi_latent = pi_h

            with tf.name_scope("vf_h_fc1"):
                vf_h = tf.layers.dense(
                    net,
                    8,
                    activation=tf.nn.relu,
                    kernel_initializer=tf.truncated_normal_initializer(
                        stddev=1e-3))
                print(vf_h.get_shape())
            value_fn = tf.layers.dense(vf_h, 1, name="vf")
            vf_latent = vf_h

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01)

        self._value_fn = value_fn
        self._setup_init()
Ejemplo n.º 16
0
    net = tf_util.fully_connected(net, 3, activation_fn=None, scope='fc4' + scope_str, is_training=is_training,
                                  weigth_decay=weight_decay)

    net = tf.squeeze(net)

    return net


def inception_module(input, n_filters=64, kernel_sizes=[3, 5], is_training=None, bn_decay=None, scope='inception'):
    """
     3D inception_module
     """
    one_by_one =  tf_util.conv3d(input, n_filters, [1, 1, 1], scope= scope + '_conv1',
           stride=[1, 1, 1], padding='SAME', bn=True,
           bn_decay=bn_decay, is_training=is_training)
    three_by_three = tf_util.conv3d(one_by_one, int(n_filters/2), [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]], scope= scope + '_conv2',
           stride=[1, 1, 1], padding='SAME', bn=True,
           bn_decay=bn_decay, is_training=is_training)
    five_by_five = tf_util.conv3d(one_by_one, int(n_filters/2), [kernel_sizes[1], kernel_sizes[1], kernel_sizes[1]], scope=scope + '_conv3',
                          stride=[1, 1, 1], padding='SAME', bn=True,
                          bn_decay=bn_decay, is_training=is_training)
    average_pooling = tf_util.avg_pool3d(input, [kernel_sizes[0], kernel_sizes[0], kernel_sizes[0]], scope=scope+'_avg_pool', stride=[1, 1, 1], padding='SAME')
    average_pooling = tf_util.conv3d(average_pooling, n_filters, [1,1,1], scope= scope + '_conv4',
           stride=[1, 1, 1], padding='SAME', bn=True,
           bn_decay=bn_decay, is_training=is_training)

    output = tf.concat([ one_by_one, three_by_three, five_by_five, average_pooling], axis=4)
    #output = output + tf.tile(input) ??? #resnet
    return output

Ejemplo n.º 17
0
def senot_module(sequence, k, mlp, scope, mlp0=None, is_training=None, bn_decay=None, weight_decay=None, data_format='NHWC', distance='l2', activation_fn=None, shrink_ratio=None, freeze_bn=False):
    """
    Args:
        sequence: (batch_size, T, H, W, C) TF tensor
        k: int -- Top k closest points
        mlp: list of int32 -- output size for MLP on each point
    Returns:
        new features: (batch_size, num_points, mlp[-1]) TF tensor
    """
    batch_size = sequence.get_shape()[0].value
    num_frames = sequence.get_shape()[1].value
    height = sequence.get_shape()[2].value
    width = sequence.get_shape()[3].value
    num_channels = sequence.get_shape()[-1].value
    end_points = {}

    if shrink_ratio is not None:
        new_height = height // shrink_ratio
        new_width = width // shrink_ratio
    else:
        new_height = height
        new_width = width

    end_points['input_sequence'] = sequence

    if mlp0 is not None:
        with tf.variable_scope(scope) as sc:
            for i, num_out_channel in enumerate(mlp0):
                sequence = tf_util.conv3d(sequence, num_out_channel, [1,1,1], padding='VALID',
                         stride=[1,1,1], bn=True, is_training=is_training,
                         scope='conv_bottle%d'%(i), bn_decay=bn_decay, weight_decay=weight_decay, activation_fn=None, freeze_bn=freeze_bn)

    num_channels_bottleneck = sequence.get_shape()[-1].value
    if shrink_ratio is not None:
        sequence = tf.reshape(sequence, [-1, height, width, num_channels_bottleneck])
        sequence = tf.image.resize_images(sequence, [new_height, new_width], method=tf.image.ResizeMethod.BILINEAR)

    net = tf.reshape(sequence, [batch_size, -1, num_channels_bottleneck])

    if distance == 'l2':
        import knn_l2 as knn
    elif distance == 'dot':
        import knn_dot as knn
    elif distance == 'cos':
        import knn_cosin as knn
    nn_idx = knn.knn(net, k, new_height * new_width)

    net_expand = tf.tile(tf.expand_dims(net, axis=2), [1,1,k,1])
    net_grouped = tf_grouping.group_point(net, nn_idx)

    coord = get_coord(tf.reshape(sequence, [batch_size, -1, new_height, new_width, num_channels_bottleneck]))
    coord_expand = tf.tile(tf.expand_dims(coord, axis=2), [1,1,k,1])
    coord_grouped = tf_grouping.group_point(coord, nn_idx)
    coord_diff = coord_grouped - coord_expand

    end_points['coord'] = {'coord': coord, 'coord_grouped': coord_grouped, 'coord_diff': coord_diff}

    net = tf.concat([coord_diff, net_expand, net_grouped], axis=-1)

    with tf.variable_scope(scope) as sc:
        for i, num_out_channel in enumerate(mlp):
            net = tf_util.conv2d(net, num_out_channel, [1,1], padding='VALID',
                    stride=[1,1], bn=True, is_training=is_training,
                    scope='conv%d'%(i), bn_decay=bn_decay, weight_decay=weight_decay,
                    data_format=data_format, freeze_bn=freeze_bn)
    end_points['before_max'] = net
    net = tf.reduce_max(net, axis=[2], keepdims=True, name='maxpool')
    end_points['after_max'] = net
    '''end_points['before_avg'] = net
    net = tf.reduce_mean(net, axis=[2], keepdims=True, name='avgpool')
    end_points['after_avg'] = net'''
    net = tf.reshape(net, [batch_size, num_frames, new_height, new_width, mlp[-1]])

    with tf.variable_scope(scope) as sc:
        net = tf_util.conv3d(net, num_channels, [1, 1, 1], stride=[1, 1, 1], bn=False, activation_fn=None, weight_decay=weight_decay, scope='conv_final')
        net = tf.contrib.layers.batch_norm(net, center=True, scale=True,
                is_training=is_training if not freeze_bn else tf.constant(False, shape=(), dtype=tf.bool), decay=bn_decay, updates_collections=None,
                scope='bn_final', data_format=data_format, param_initializers={'gamma': tf.constant_initializer(0., dtype=tf.float32)}, trainable=not freeze_bn)

    if shrink_ratio is not None:
        net = tf.reshape(net, [-1, new_height, new_width, num_channels])
        net = tf.image.resize_images(net, [height, width], method=tf.image.ResizeMethod.BILINEAR)
        net = tf.reshape(net, [batch_size, -1, height, width, num_channels])

    if activation_fn is not None:
        net = activation_fn(net)

    return net, end_points