예제 #1
0
def conv3d(batch_input,
           depth,
           height,
           width,
           output_channel,
           stride,
           use_bias=False,
           scope='cov3d'):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        if use_bias:
            return slim.conv3d(
                batch_input,
                output_channel, [depth, height, width],
                stride,
                'SAME',
                data_format="NDHWC",
                activation_fn=None,
                weights_initializer=tf.contrib.layers.xavier_initializer())
        else:
            return slim.conv3d(
                batch_input,
                output_channel, [depth, height, width],
                stride,
                'SAME',
                activation_fn=None,
                weights_initializer=tf.contrib.layers.xavier_initializer(),
                data_format="NDHWC",
                biases_initializer=None)
예제 #2
0
 def resnet_k(
     net, kernel_size=3, num_out=None,
     scale=1.0, activation_fn=tf.nn.relu,
         scope=None, reuse=None):
     """ general residual model """
     num = int(net.shape[-1].value)
     if num_out is None:
         num_out = num
     num2 = (num_out >> 1)
     num4 = (num2 >> 1)
     sc_current = 'residual_{}_{}'.format(kernel_size, num2)
     with tf.variable_scope(scope, sc_current, [net], reuse=reuse):
         with tf.variable_scope('branch0'):
             tower0 = slim.conv3d(net, num2, 1, stride=1)
         with tf.variable_scope('branch1'):  # equavalent to 3x3
             tower1 = slim.conv3d(net, num4, 1, stride=1)
             tower1 = slim.conv3d(
                 tower1, num2, kernel_size, stride=1)
         mixed = tf.concat(axis=-1, values=[tower0, tower1])
         mixup = slim.conv3d(
             mixed, num_out, 1, stride=1,
             normalizer_fn=None, activation_fn=None,
             scope='mixup')
         if num != num_out:
             net = slim.conv3d(net, num_out, 1, stride=1)
         net += mixup * scale
     if activation_fn is not None:
         net = activation_fn(net)
     return net
예제 #3
0
def block3d(input, num_filters, stride=1, use_final_relu=True):
    num_filters_in = input.get_shape().as_list()[1]

    # residual
    residual = slim.conv3d(input,
                           num_filters,
                           kernel_size=(3, 3, 3),
                           stride=(stride, stride, stride),
                           scope='conv1')
    residual = slim.batch_norm(residual, scope='bn_1')
    residual = tf.nn.relu(residual)

    residual = slim.conv3d(residual,
                           num_filters,
                           kernel_size=(3, 3, 3),
                           stride=1,
                           scope='conv2')
    residual = slim.batch_norm(residual, scope='bn_2')

    # identity
    shortcut = input
    if stride != 1 or num_filters_in != num_filters:
        shortcut = slim.conv3d(input,
                               num_filters,
                               kernel_size=(1, 1, 1),
                               stride=(stride, stride, stride),
                               scope='shortcut')
        shortcut = slim.batch_norm(shortcut, scope='bn_3')

    out = shortcut + residual
    if use_final_relu:
        out = tf.nn.relu(out)

    return out
예제 #4
0
    def _region_proposal(self, net_conv, is_training, initializer):
        rpn = slim.conv3d(net_conv,
                          cfg.RPN_CHANNELS, [3, 3],
                          trainable=is_training,
                          weights_initializer=initializer,
                          scope="rpn_conv/3x3")
        self._act_summaries.append(rpn)
        rpn_cls_score = slim.conv3d(rpn,
                                    self._num_anchors * 2, [1, 1],
                                    trainable=is_training,
                                    weights_initializer=initializer,
                                    padding='VALID',
                                    activation_fn=None,
                                    scope='rpn_cls_score')
        # change it so that the score has 2 as its channel size
        rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2,
                                                    'rpn_cls_score_reshape')
        rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape,
                                                   "rpn_cls_prob_reshape")
        rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]),
                                 axis=1,
                                 name="rpn_cls_pred")
        rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape,
                                           self._num_anchors * 2,
                                           "rpn_cls_prob")
        rpn_bbox_pred = slim.conv3d(rpn,
                                    self._num_anchors * 6, [1, 1],
                                    trainable=is_training,
                                    weights_initializer=initializer,
                                    padding='VALID',
                                    activation_fn=None,
                                    scope='rpn_bbox_pred')
        if is_training:
            rois, roi_scores = self._proposal_layer(rpn_cls_prob,
                                                    rpn_bbox_pred, "rois")
            rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
            # Try to have a deterministic order for the computing graph, for reproducibility
            with tf.control_dependencies([rpn_labels]):
                rois, _ = self._proposal_target_layer(rois, roi_scores,
                                                      "rpn_rois")
        else:
            if cfg.TEST.MODE == 'nms':
                rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred,
                                               "rois")
            elif cfg.TEST.MODE == 'top':
                rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred,
                                                   "rois")
            else:
                raise NotImplementedError

        self._predictions["rpn_cls_score"] = rpn_cls_score
        self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
        self._predictions["rpn_cls_prob"] = rpn_cls_prob
        self._predictions["rpn_cls_pred"] = rpn_cls_pred
        self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
        self._predictions["rois"] = rois

        return rois
예제 #5
0
    def _module_fn():
        """
        Function building the module
        """

        feature_layer = tf.placeholder(
            tf.float32,
            shape=[None, None, None, None, nchannels],
            name='input')
        obs_layer = tf.placeholder(tf.float32,
                                   shape=[None, None, None, None, n_y],
                                   name='observations')

        # Builds the neural network
        net = slim.conv3d(feature_layer,
                          16,
                          5,
                          activation_fn=tf.nn.leaky_relu,
                          padding='valid')
        #net = wide_resnet(feature_layer, 8, activation_fn=tf.nn.leaky_relu, is_training=is_training)
        net = wide_resnet(net,
                          16,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = wide_resnet(net,
                          32,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = wide_resnet(net,
                          32,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = slim.conv3d(net, 32, 3, activation_fn=tf.nn.tanh)

        # Define the probabilistic layer
        #out_rate = slim.conv3d(net, 1, 1, activation_fn=tf.nn.relu)
        #out_rate = tf.math.add(out_rate, 1e-6, name='rate')
        net = slim.conv3d(net, n_mixture * n_y, 1, activation_fn=tf.nn.relu)
        cube_size = tf.shape(obs_layer)[1]
        out_rate = tf.reshape(net, [-1, cube_size, cube_size, cube_size, n_y])
        out_rate = tf.math.add(out_rate, 1e-6, name='rate')
        pdf = tfd.Poisson(rate=out_rate)

        # Define a function for sampling, and a function for estimating the log likelihood
        sample = tf.squeeze(pdf.sample())
        loglik = pdf.log_prob(obs_layer)
        hub.add_signature(inputs={
            'features': feature_layer,
            'labels': obs_layer
        },
                          outputs={
                              'sample': sample,
                              'loglikelihood': loglik
                          })
예제 #6
0
def C3D(input, num_classes, keep_pro=0.5):
    with tf.variable_scope('C3D'):
        with slim.arg_scope([slim.conv3d],
                            padding='SAME',
                            weights_regularizer=slim.l2_regularizer(0.0005),
                            activation_fn=tf.nn.relu,
                            kernel_size=[3, 3, 3],
                            stride=[1, 1, 1]):
            net = slim.conv3d(input, 64, scope='conv1')
            net = slim.max_pool3d(net,
                                  kernel_size=[1, 2, 2],
                                  stride=[1, 2, 2],
                                  padding='SAME',
                                  scope='max_pool1')
            net = slim.conv3d(net, 128, scope='conv2')
            net = slim.max_pool3d(net,
                                  kernel_size=[2, 2, 2],
                                  stride=[2, 2, 2],
                                  padding='SAME',
                                  scope='max_pool2')
            net = slim.repeat(net, 2, slim.conv3d, 256, scope='conv3')
            net = slim.max_pool3d(net,
                                  kernel_size=[2, 2, 2],
                                  stride=[2, 2, 2],
                                  padding='SAME',
                                  scope='max_pool3')
            net = slim.repeat(net, 2, slim.conv3d, 512, scope='conv4')
            net = slim.max_pool3d(net,
                                  kernel_size=[2, 2, 2],
                                  stride=[2, 2, 2],
                                  padding='SAME',
                                  scope='max_pool4')
            net = slim.repeat(net, 2, slim.conv3d, 512, scope='conv5')
            net = slim.max_pool3d(net,
                                  kernel_size=[2, 2, 2],
                                  stride=[2, 2, 2],
                                  padding='SAME',
                                  scope='max_pool5')

            net = tf.reshape(net, [-1, 512 * 4 * 4])
            net = slim.fully_connected(
                net,
                4096,
                weights_regularizer=slim.l2_regularizer(0.0005),
                scope='fc6')
            net = slim.dropout(net, keep_pro, scope='dropout1')
            net = slim.fully_connected(
                net,
                4096,
                weights_regularizer=slim.l2_regularizer(0.0005),
                scope='fc7')
            net = slim.dropout(net, keep_pro, scope='dropout2')
            out = slim.fully_connected(net, num_classes, weights_regularizer=slim.l2_regularizer(0.0005), \
                                       activation_fn=None, scope='out')

            return out
예제 #7
0
 def _cnn_embedding(self, inputs):
     net = slim.conv3d(inputs, 16, [1, 3, 3])
     net = slim.conv3d(net, 16, [1, 3, 3])
     net = slim.max_pool3d(net, [1, 3, 3], [1, 2, 2], 'SAME')
     net = slim.conv3d(net, 32, [1, 3, 3])
     net = slim.conv3d(net, 32, [1, 3, 3])
     net = slim.max_pool3d(net, [1, 3, 3], [1, 2, 2], 'SAME')
     net = tf.reshape(net, [-1, tf.shape(inputs)[1], tf.size(net[0][0])])
     outputs = slim.fully_connected(net, 100, tf.nn.tanh)
     return outputs
예제 #8
0
    def _inference3D(self, x, is_training):
        """ Inference part of the network.

            Per view we get a 46x46x128 encoding (and maybe a 46x46x3 eye map).
            We unproject into a hand centered volume of dimension 64, so input dim is:
                64x64x64x 8*128 = 64x64x64x 1024
        """

        with tf.variable_scope('PoseNet3D') as scope:
            num_chan = self.config.num_kp

            skips = list()
            scorevolumes = list()
            skips.append(None)  # this is needed for the final upsampling step

            # 3D encoder
            # chan_list = [64, 128, 128, 256]
            chan_list = [32, 64, 64, 64]
            for chan in chan_list:
                x = self._enc3D_step(x, chan,
                                     dim_red=True, is_training=is_training)  # voxel sizes: 32, 16, 8, 4
                skips.append(x)
            skips.pop()  # the last one is of no use

            # bottleneck in the middle
            x = slim.conv3d(x, 64, kernel_size=[1, 1, 1], trainable=is_training,activation_fn=tf.nn.relu)

            # make initial guess of the scorevolume
            scorevol = slim.conv3d_transpose(x, num_chan, kernel_size=[32, 32, 32], trainable=is_training, stride=16, activation_fn=None)
            scorevolumes.append(scorevol)

            # 3D decoder
            kernels = [16, 8, 4]
            # chan_list = [64, 64, 64]
            chan_list = [32, 32, 32]
            for chan, kernel in zip(chan_list, kernels):
                x, scorevol = self._dec3D_stop(x, skips.pop(), scorevol, chan, num_chan, kernel, is_training)
                scorevolumes.append(scorevol)

            # final decoder step
            x = slim.conv3d_transpose(x, 64, kernel_size=[4, 4, 4], trainable=is_training, stride=2, activation_fn=tf.nn.relu)
            scorevol_delta = slim.conv3d(x, num_chan, kernel_size=[1, 1, 1], trainable=is_training, activation_fn=None)
            scorevol = scorevol_delta
            scorevolumes.append(scorevol)

            variables = tf.contrib.framework.get_variables(scope)

            if self.net_config.use_softargmax:
                xyz_vox_list = [softargmax3D(svol, output_vox_space=True) for svol in scorevolumes]
                score_list = [tf.reduce_mean(svol, [1, 2, 3]) for svol in scorevolumes]
            else:
                xyz_vox_list = [argmax_3d(svol) for svol in scorevolumes]
                score_list = [tf.reduce_max(svol, [1, 2, 3]) for svol in scorevolumes]

            return scorevolumes, xyz_vox_list, score_list, variables
예제 #9
0
def module_fn():
    '''Define network here'''
    x = tf.placeholder(
        tf.float32,
        shape=[None, cube_sizeft, cube_sizeft, cube_sizeft, nchannels],
        name='input')
    y = tf.placeholder(tf.float32,
                       shape=[None, cube_size, cube_size, cube_size, 1],
                       name='labels')
    keepprob = tf.placeholder(tf.float32, name='keepprob')
    print('Shape of training and testing data is : ',
          x.shape,
          y.shape,
          file=fname)

    #
    wregwt, bregwt = 0.001, 0.001
    if wregwt: wreg = slim.regularizers.l2_regularizer(wregwt)
    else: wreg = None
    if bregwt: breg = slim.regularizers.l2_regularizer(bregwt)
    else: breg = None
    print('Regularizing weights are : ', wregwt, bregwt, file=fname)
    #
    net = slim.conv3d(x,
                      16,
                      5,
                      activation_fn=tf.nn.leaky_relu,
                      padding='valid',
                      weights_regularizer=wreg,
                      biases_regularizer=breg)
    net = wide_resnet(net,
                      32,
                      keep_prob=keepprob,
                      activation_fn=tf.nn.leaky_relu)
    net = wide_resnet(net,
                      64,
                      keep_prob=keepprob,
                      activation_fn=tf.nn.leaky_relu)
    net = wide_resnet(net,
                      32,
                      keep_prob=keepprob,
                      activation_fn=tf.nn.leaky_relu)
    net = wide_resnet(net,
                      16,
                      keep_prob=keepprob,
                      activation_fn=tf.nn.leaky_relu)
    net = slim.conv3d(net, 1, 3, activation_fn=None)
    net = tf.identity(net, name='logits')
    pred = tf.nn.sigmoid(net, name='prediction')
    #
    inputs = dict(input=x, label=y, keepprob=keepprob)
    outputs = dict(default=net, prediction=pred)
    hub.add_signature(inputs=inputs, outputs=outputs)
예제 #10
0
def cnn3d_example(inputs, pkeep_conv, pkeep_hidden):
    """
    """
    print(inputs.shape)
    net = slim.conv3d(inputs=inputs,
                      num_outputs=2,
                      kernel_size=3,
                      padding='VALID',
                      activation_fn=tf.nn.relu,
                      weights_initializer=tfinit.truncated_normal(mean=0,
                                                                  stddev=0.05),
                      biases_initializer=tfinit.zeros(),
                      scope='conv1')
    net = slim.dropout(net, pkeep_conv)
    print(net.shape)
    net = slim.conv3d(inputs=net,
                      num_outputs=8,
                      kernel_size=3,
                      padding='VALID',
                      activation_fn=tf.nn.relu,
                      weights_initializer=tfinit.truncated_normal(mean=0,
                                                                  stddev=0.05),
                      biases_initializer=tfinit.zeros(),
                      scope='conv2')
    # net = tf.squeeze(net, squeeze_dims=[2,3])
    net = slim.flatten(net)
    net = slim.dropout(net, pkeep_hidden)
    print(net.shape)
    net = slim.fully_connected(inputs=net,
                               num_outputs=200,
                               scope='fc3',
                               weights_initializer=tfinit.truncated_normal(
                                   mean=0, stddev=0.05),
                               biases_initializer=tfinit.zeros())
    net = slim.dropout(net, pkeep_hidden)
    print(net.shape)
    net = slim.fully_connected(inputs=net,
                               num_outputs=84,
                               scope='fc4',
                               weights_initializer=tfinit.truncated_normal(
                                   mean=0, stddev=0.05),
                               biases_initializer=tfinit.zeros())
    net = slim.dropout(net, pkeep_hidden)
    print(net.shape)
    net = slim.fully_connected(inputs=net,
                               num_outputs=16,
                               activation_fn=tf.identity,
                               scope='output',
                               weights_initializer=tfinit.truncated_normal(
                                   mean=0, stddev=0.05),
                               biases_initializer=tfinit.zeros())
    return net
예제 #11
0
def c3d_small(net, reuse=None, is_training=True, scope='c3d', use_fc=True):
    with tf.compat.v1.variable_scope(scope, reuse=reuse):
        with slim.arg_scope(
                c3d_argscope(activation=tf.nn.relu,
                             kernel_size=3,
                             padding='SAME',
                             training=is_training)):
            end_points = {}

            net = slim.conv3d(net, 64, scope='conv_1')
            print('conv_1 feats: {}'.format(net.get_shape().as_list()))
            end_points['conv_1'] = net

            net = slim.max_pool3d(net, kernel_size=(1, 2, 2), stride=(1, 2, 2))
            net = slim.conv3d(net, 128, scope='conv_2')
            print('conv_2 feats: {}'.format(net.get_shape().as_list()))
            end_points['conv_2'] = net

            net = slim.max_pool3d(net, kernel_size=2, stride=2)
            net = slim.conv3d(net, 256, scope='conv_3')
            print('conv_3 feats: {}'.format(net.get_shape().as_list()))
            end_points['conv_3'] = net

            net = slim.max_pool3d(net, kernel_size=2, stride=2)
            net = slim.conv3d(net, 256, scope='conv_4')
            print('conv_4 feats: {}'.format(net.get_shape().as_list()))
            end_points['conv_4'] = net

            net = slim.max_pool3d(net, kernel_size=2, stride=2)
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]])
            net = slim.conv3d(net, 256, scope='conv_5', padding='VALID')
            print('conv_5 feats: {}'.format(net.get_shape().as_list()))
            end_points['conv_5'] = net

            net = slim.max_pool3d(net, kernel_size=2, stride=2)
            end_points['maxpool_5'] = net

            net = tf.reshape(net, [net.get_shape().as_list()[0], -1])
            print('flattened feats: {}'.format(net.get_shape().as_list()))

            if use_fc:
                net = slim.fully_connected(net, 2048, scope='fc_1')
                print('fc_1 feats: {}'.format(net.get_shape().as_list()))
                end_points['fc_1'] = net

                net = slim.fully_connected(net, 2048, scope='fc_2')
                print('fc_2 feats: {}'.format(net.get_shape().as_list()))
                end_points['fc_2'] = net

        return net, end_points
예제 #12
0
 def pullout8(net, out_dim, is_training,
              scope=None, reuse=None):
     """ supposed to work best with 8x8 input """
     with tf.variable_scope(scope, 'pullout8', [net], reuse=reuse):
         net = inresnet3d.conv_maxpool(net, scope='conv_pool_8')
         print(net.shape)
         net = inresnet3d.conv_maxpool(net, scope='conv_pool_4')
         print(net.shape)
         shape2 = net.get_shape()
         fc_num = shape2[4] * 2
         net = slim.conv3d(
             net, fc_num, shape2[1:4],
             padding='VALID', scope='fullconn4')
         # net = slim.avg_pool3d(
         #     net, 5, stride=3, padding='VALID',
         #     scope='avgpool8_5x5_3')
         print(net.shape)
         # net = slim.conv3d(net, 64, 1, scope='reduce8')
         # print(net.shape)
         # net = slim.conv3d(
         #     net, 256, net.get_shape()[1:4],
         #     padding='VALID', scope='fullconn8')
         # print(net.shape)
         net = slim.flatten(net)
         net = slim.dropout(
             net, 0.5, scope='dropout8')
         print(net.shape)
         net = slim.fully_connected(
             net, out_dim,
             activation_fn=None, normalizer_fn=None,
             scope='output8')
     return net
예제 #13
0
def conv3d(inputs,
           filters,
           kernel_size,
           strides=1,
           scope=None,
           dilation=1,
           data_format=''):
    """Returns Conv3D wrapped with default values."""

    del data_format
    del dilation
    init = initializers.variance_scaling_initializer

    return slim.conv3d(inputs,
                       filters,
                       kernel_size=kernel_size,
                       stride=strides,
                       padding='SAME',
                       activation_fn=None,
                       biases_initializer=None,
                       normalizer_fn=None,
                       scope=scope,
                       weights_initializer=init(factor=2.0,
                                                mode='FAN_IN',
                                                uniform=False))
def em_branch(input, prefix='em_branch_'):
    # input should be of shape [batch_size, frame_count, height, width, 16]
    conv = slim.conv3d(input, 8, [3, 3, 3], rate=1, activation_fn=lrelu, scope=prefix + 'g_conv1', padding='SAME')

    padding_method = 'VALID'
    conv1 = slim.conv3d(conv, 16, [5, 5, 5], rate=1, activation_fn=lrelu, scope=prefix + 's_conv1', padding=padding_method)
    conv2 = slim.conv3d(conv1, 16, [5, 5, 5], rate=1, activation_fn=lrelu, scope=prefix + 's_conv2', padding=padding_method)
    conv3 = slim.conv3d(conv2, 16, [5, 5, 5], rate=1, activation_fn=lrelu, scope=prefix + 's_conv3', padding=padding_method)
    #
    # shape_image = tf.placeholder(tf.float32, [BATCH_SIZE, CROP_FRAME - 8, CROP_HEIGHT - 8, CROP_WIDTH - 8, 16])
    #
    # pool_size = 1
    # deconv_filter1 = tf.Variable(tf.truncated_normal([pool_size, pool_size, pool_size, 16, 16], stddev=0.02))
    # deconv1 = tf.nn.conv3d_transpose(conv3, deconv_filter1, tf.shape(shape_image), strides=[1, pool_size, pool_size, pool_size, 1])
    # deconv1 = lrelu(deconv1)
    #
    # # print deconv1.shape
    # # print 'conv1.shape[:-1] + (8,):', tuple(conv1.shape[:-1]) + (8,)
    #
    # shape_image = tf.placeholder(tf.float32, [BATCH_SIZE, CROP_FRAME - 4, CROP_HEIGHT - 4, CROP_WIDTH - 4, 8])
    # pool_size = 1
    # deconv_filter2 = tf.Variable(tf.truncated_normal([pool_size, pool_size, pool_size, 8, 16], stddev=0.02))
    # deconv2 = tf.nn.conv3d_transpose(deconv1, deconv_filter2, tf.shape(shape_image), strides=[1, pool_size, pool_size, pool_size, 1])
    # deconv2 = lrelu(deconv2)
    #
    # # print deconv2.shape
    # shape_image = tf.placeholder(tf.float32, [BATCH_SIZE, CROP_FRAME, CROP_HEIGHT, CROP_WIDTH, 3])
    # pool_size = 1
    # deconv_filter3 = tf.Variable(tf.truncated_normal([pool_size, pool_size, pool_size, 3, 8], stddev=0.02))
    # deconv3 = tf.nn.conv3d_transpose(deconv2, deconv_filter3, tf.shape(shape_image), strides=[1, pool_size, pool_size, pool_size, 1])
    # deconv3 = lrelu(deconv3)

    # print deconv3.shape
    deconv1 = slim.conv3d_transpose(conv3, 16, [5, 5, 5], activation_fn=lrelu, scope=prefix + 's_deconv1', padding=padding_method)
    deconv2 = slim.conv3d_transpose(deconv1, 8, [5, 5, 5], activation_fn=lrelu, scope=prefix + 's_deconv2', padding=padding_method)
    deconv3 = slim.conv3d_transpose(deconv2, 3, [5, 5, 5], activation_fn=lrelu, scope=prefix + 's_deconv3', padding=padding_method)


    if DEBUG == 1:
        print 'conv.shape:', conv.shape
        print 'conv1.shape:', conv1.shape
        print 'conv2.shape:', conv2.shape
        print 'conv3.shape:', conv3.shape
        print 'deconv1.shape:', deconv1.shape
        print 'deconv2.shape:', deconv2.shape
        print 'deconv3.shape:', deconv3.shape
    return deconv3
예제 #15
0
def NonLocalBlock(input_x,
                  out_channels,
                  sub_sample=True,
                  is_bn=True,
                  scope='NonLocalBlock'):
    batchsize, clips, height, width, in_channels = input_x.get_shape().as_list(
    )
    with tf.variable_scope(scope) as sc:
        with tf.variable_scope('g') as scope:
            g = slim.conv3d(input_x,
                            out_channels,
                            kernel_size=1,
                            stride=1,
                            scope='g')
            if sub_sample:
                g = slim.max_pool3d(g, [1, 2, 2],
                                    stride=[1, 2, 2],
                                    scope='g_max_pool')

        with tf.variable_scope('phi') as scope:
            '''
            phi = 
            '''
            if sub_sample:
                phi = slim.max_pool3d(phi, [1, 2, 2],
                                      stride=[1, 2, 2],
                                      scope='phi_max_pool')

        with tf.variable_scope('theta') as scope:
            '''
            theta = 
            '''
        '''
        g_x = 
        '''
        '''
        theta_x = 
        '''
        '''
        phi_x = 
        transposed_phi_x = 
        '''
        '''
        f =            # (theta, phi) matrix multiplication
        f_softmax =    # softmax
        y =            # (f_softmax, g)
        y =            # reshape
        '''

        with tf.variable_scope('w') as scope:
            '''
            w_y =      # Z operation
            '''
            if is_bn:
                w_y = slim.batch_norm(w_y)
        '''
        z =           # add y to x
        '''
    return z
예제 #16
0
 def conv_maxpool(net, scope=None, reuse=None):
     """ simple conv + max_pool """
     num = int(net.shape[-1].value)
     sc_current = 'conv_maxpool_{}'.format(num)
     with tf.variable_scope(scope, sc_current, [net], reuse=reuse):
         net = slim.conv3d(net, 2 * num, 3, stride=1)
         net = slim.max_pool3d(net, 3, stride=2)
     return net
예제 #17
0
def convT(_X, out_channels, kernel_size=[3, 1, 1], stride=1, padding='VALID'):
    return slim.conv3d(_X,
                       out_channels,
                       kernel_size=kernel_size,
                       stride=stride,
                       padding=padding,
                       weights_initializer=weights_initializer,
                       biases_initializer=None)
예제 #18
0
def conv3d(x, o_dim, data_format='NDHWC', name=None, k=4, s=2, act=None):
    return slim.conv3d(x,
                       o_dim,
                       k,
                       stride=s,
                       activation_fn=act,
                       scope=name,
                       data_format=data_format)
예제 #19
0
def resnet3d_18(net,
                num_out,
                reuse=tf.AUTO_REUSE,
                training=True,
                scope='resnet',
                blocks=('2d', '2d', '3d', '3d'),
                module_sizes=(2, 2, 2, 2),
                filter_sizes=(64, 128, 256, 512),
                *args,
                **kwargs):
    with tf.variable_scope(scope, reuse=reuse):
        with slim.arg_scope(resnet_arg_scope(training=training)):
            feats = {}

            net = slim.conv3d(net,
                              64,
                              kernel_size=(1, 7, 7),
                              stride=(1, 2, 2),
                              scope='conv0')
            net = slim.batch_norm(net, scope='bn_0')
            net = tf.nn.relu(net)
            net = slim.max_pool3d(net, kernel_size=(1, 3, 3), stride=(1, 2, 2))

            print('Shape conv_1: {}'.format(net.get_shape().as_list()))
            feats['conv_1'] = net

            block_id = 0
            for i, blocks_in_module in enumerate(module_sizes):
                for j in range(blocks_in_module):
                    block_id += 1
                    stride = 2 if j == 0 and i > 0 else 1
                    with tf.variable_scope("res%d.%d" % (i, j)):
                        print('Block {}'.format(block_id))
                        if blocks[i] == '2d':
                            print('2D block')
                            net = block2d(net, filter_sizes[i], stride)
                        elif blocks[i] == '3d':
                            print('3D block')
                            net = block3d(net, filter_sizes[i], stride)
                        else:
                            net = None
                        print('Shape {} {}: {}'.format(
                            i, j,
                            net.get_shape().as_list()))
                        feats['block_{}'.format(block_id)] = net
                feats['conv_{}'.format(i + 2)] = net
                print('Shape conv_{}: {}'.format(i + 2,
                                                 net.get_shape().as_list()))

            net = tf.reduce_mean(net, [1, 2, 3])
            feats['pre_logit'] = net
            print('Shape pre_logit: {}'.format(net.get_shape().as_list()))
            logits = slim.fully_connected(
                net,
                num_out,
                activation_fn=None,
                weights_initializer=tf.random_normal_initializer(stddev=1e-3))
            return logits, feats
예제 #20
0
    def _create_dqn_two_stream(self, rgb, vox, trainable=True, if_bn=False, reuse=False, scope_name='dqn_two_stream'):
        with tf.variable_scope(scope_name) as scope:
            if reuse:
                scope.reuse_variables()
            
            if if_bn:
                batch_normalizer_gen = slim.batch_norm
                batch_norm_params_gen = {'is_training': self.is_training, 'decay': self.FLAGS.bn_decay}
            else:
                #self._print_arch('=== NOT Using BN for GENERATOR!')
                batch_normalizer_gen = None
                batch_norm_params_gen = None

            if self.FLAGS.if_l2Reg:
                weights_regularizer = slim.l2_regularizer(1e-5)
            else:
                weights_regularizer = None
            
            with slim.arg_scope([slim.fully_connected],
                    activation_fn=self.activation_fn,
                    trainable=trainable,
                    normalizer_fn=batch_normalizer_gen,
                    normalizer_params=batch_norm_params_gen,
                    weights_regularizer=weights_regularizer):
                
                net_rgb = slim.conv2d(rgb, 64, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='rgb_conv1')
                net_rgb = slim.conv2d(net_rgb, 128, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='rgb_conv2')
                net_rgb = slim.conv2d(net_rgb, 256, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='rgb_conv3')
                net_rgb = slim.conv2d(net_rgb, 256, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='rgb_conv4')
                net_rgb = slim.conv2d(net_rgb, 256, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='rgb_conv5')
                net_rgb = slim.flatten(net_rgb, scope='rgb_flatten')

                net_vox = slim.conv3d(vox, 64, kernel_size=[3,3], stride=[1,1], padding='SAME', scope='vox_conv1')
                net_vox = slim.conv3d(net_vox, 128, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='vox_conv2')
                net_vox = slim.conv3d(net_vox, 256, kernel_size=[3,3], stride=[1,1], padding='SAME', scope='vox_conv3')
                net_vox = slim.conv3d(net_vox, 256, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='vox_conv4')
                net_vox = slim.conv3d(net_vox, 512, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='vox_conv5')
                net_vox = slim.flatten(net_vox, scope='vox_flatten')
                
                net_feat = tf.concat([net_rgb, net_vox], axis=1)
                net_feat = slim.fully_connected(net_feat, 4096, scope='fc6')
                net_feat = slim.fully_connected(net_feat, 4096, scope='fc7')
                logits = slim.fully_connected(net_feat, self.FLAGS.action_num, activation_fn=None, scope='fc8')

                return tf.nn.softmax(logits), logits
예제 #21
0
def C3D(input_data, num_classes, keep_pro=0.5, non_local=False):
    with tf.variable_scope('C3D'):
        with slim.arg_scope([slim.conv3d],
                            padding='SAME',
                            weights_regularizer=slim.l2_regularizer(0.0005),
                            activation_fn=tf.nn.relu,
                            kernel_size=[3, 3, 3],
                            stride=[1, 1, 1]
                            ):
            # Batch * 16 * 112 * 112 * 3
            net = slim.conv3d(input_data, 64, scope='conv1')
            net = slim.max_pool3d(net, kernel_size=[1, 2, 2], stride=[1, 2, 2], padding='SAME', scope='max_pool1')
            # net = NonLocalBlock(net, 64, scope='nonlocal_block_1')

            # Batch * 16 * 56 * 56 * 64
            net = slim.conv3d(net, 128, scope='conv2')
            net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool2')
            if non_local:
                net = NonLocalBlock(net, 128, scope='nonlocal_block_2')

            # Batch * 8 * 28 * 28 * 128 
            net = slim.repeat(net, 2, slim.conv3d, 256, scope='conv3')
            net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool3')
            if non_local:
                net = NonLocalBlock(net, 256, scope='nonlocal_block_3')

            # Batch * 4 * 14 * 14 * 256
            net = slim.repeat(net, 2, slim.conv3d, 512, scope='conv4')
            net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool4')
            if non_local:
                net = NonLocalBlock(net, 512, scope='nonlocal_block_4')
            # Batch * 2 * 7 * 7 * 512
            net = slim.repeat(net, 2, slim.conv3d, 512, scope='conv5')
            net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool5')

            # Batch * 1 * 4 * 4 * 512
            net = tf.reshape(net, [-1, 512 * 4 * 4])
            net = slim.fully_connected(net, 4096, weights_regularizer=slim.l2_regularizer(0.0005), scope='fc6')
            net = slim.dropout(net, keep_pro, scope='dropout1')
            net = slim.fully_connected(net, 4096, weights_regularizer=slim.l2_regularizer(0.0005), scope='fc7')
            net = slim.dropout(net, keep_pro, scope='dropout2')
            out = slim.fully_connected(net, num_classes, weights_regularizer=slim.l2_regularizer(0.0005), \
                                       activation_fn=None, scope='out')

            return out
예제 #22
0
def conv21d(inputs,
            filters,
            kernel_size,
            strides=1,
            is_training=False,
            scope=None,
            dilation=1,
            data_format=''):
    """Returns conv(2+1)D with default values."""

    del data_format
    del dilation
    if isinstance(kernel_size, int):
        kernel_size = [kernel_size, kernel_size, kernel_size]
    if isinstance(strides, int):
        strides = [strides, strides, strides]

    init = initializers.variance_scaling_initializer
    inputs = slim.conv3d(inputs,
                         filters // 2,
                         kernel_size=[kernel_size[0], 1, 1],
                         stride=[strides[0], 1, 1],
                         padding='SAME',
                         activation_fn=None,
                         biases_initializer=None,
                         normalizer_fn=None,
                         scope=scope,
                         weights_initializer=init(factor=2.0,
                                                  mode='FAN_IN',
                                                  uniform=False))
    inputs = batch_norm_relu(inputs, is_training, relu=True)

    inputs = slim.conv3d(inputs,
                         filters,
                         kernel_size=[1, kernel_size[1], kernel_size[2]],
                         stride=[1, strides[1], strides[2]],
                         padding='SAME',
                         activation_fn=None,
                         biases_initializer=None,
                         normalizer_fn=None,
                         scope=scope + 's',
                         weights_initializer=init(factor=2.0,
                                                  mode='FAN_IN',
                                                  uniform=False))
    return inputs
예제 #23
0
def process_input(inputs, num_channels_intermediate, num_channels_out):
    """Processes input tensors to model with some convs."""
    net = slim.conv3d(inputs,
                      num_outputs=num_channels_intermediate,
                      kernel_size=[3, 3, 3],
                      stride=[1, 1, 1],
                      activation_fn=tf.nn.relu)
    net = model_block(net, num_channels_intermediate, num_channels_out, 1)
    return net
예제 #24
0
    def _mapping_network(self, feature3d, trainable, reuse):
        """ Maps warped features + eye vector into some common representation. """
        with tf.variable_scope('Mapping', reuse=reuse) as scope:
            x = feature3d
            # x = slim.conv3d(x, 64, kernel_size=[1, 1, 1], trainable=trainable, activation_fn=tf.nn.relu)
            x = slim.conv3d(x, 128, kernel_size=[1, 1, 1], trainable=trainable, activation_fn=tf.nn.relu)

            variables = tf.contrib.framework.get_variables(scope)

            return x, variables
예제 #25
0
def shortcut(inputs, num_input, num_output, stride):
    """Creates a shortcut (either a skip connection or a 1x1x1 convolution)."""
    if num_input == num_output:
        return inputs
    else:
        return slim.conv3d(inputs,
                           num_outputs=num_output,
                           kernel_size=[1, 1, 1],
                           stride=[stride, stride, stride],
                           activation_fn=None)
예제 #26
0
    def cost_volume(self, left_feature, right_feature):
        cost_aggre = cost_volume_aggre(left_feature, right_feature, 4, 192)
        with tf.name_scope('CostVolume'):
            cost_volume1 = conv3d_bolck(cost_aggre, 32, [3, 3, 3], self.is_training)
            cost_volume2 = conv3d_bolck(cost_volume1, 32, [3, 3, 3], self.is_training)
            cost_volume3 = conv3d_bolck(cost_volume2, 32, [3, 3, 3], self.is_training)
            cost_volume4 = conv3d_bolck(cost_volume3, 32, [3, 3, 3], self.is_training)
        output = slim.conv3d(cost_volume4, 1, [3, 3, 3], padding='SAME', activation_fn=None)

        return tf.squeeze(output, 4)
예제 #27
0
def Transition(_X, in_channels):
    _X = tf.layers.batch_normalization(_X, training=IS_TRAIN)
    _X = tf.nn.relu(_X)
    _X = slim.conv3d(_X,
                     in_channels,
                     kernel_size=[1, 1, 1],
                     stride=1,
                     biases_initializer=None)
    _X = slim.avg_pool3d(_X, [2, 2, 2], stride=2, padding='SAME')
    return _X
    def _build_network_slim(self,
                            inputs,
                            spatial_squeeze=False,
                            scope='DualCamNet'):
        """
        Builds a DualCamNet network for classification using a 3D temporal convolutional layer with 7x1x1 filters.
        """

        with tf.variable_scope(scope, 'DualCamNet', [inputs]) as sc:
            end_points_collection = sc.original_name_scope + '_end_points'

            # Collect outputs for convolution2d and max_pool2d
            with slim.arg_scope([slim.layers.conv2d, slim.layers.max_pool2d],
                                outputs_collections=[end_points_collection]):
                # ----------- 1st layer group ---------------
                net = tf.reshape(inputs,
                                 shape=(-1, self.num_frames, self.height,
                                        self.width, self.channels))
                net = slim.conv3d(net,
                                  self.channels, [7, 1, 1],
                                  scope='conv1',
                                  padding='SAME')
                net = tf.reshape(net,
                                 shape=(-1, self.height, self.width,
                                        self.channels))
                # ----------- 2nd layer group ---------------
                net = slim.conv2d(net,
                                  32, [5, 5],
                                  scope='conv2',
                                  padding='SAME')
                net = slim.max_pool2d(net, [2, 2], scope='pool2')
                # ----------- 3rd layer group ---------------
                net = slim.conv2d(net,
                                  64, [5, 5],
                                  scope='conv3',
                                  padding='SAME')
                net = slim.max_pool2d(net, [2, 2], scope='pool3')
                # ----------- 4th layer group ---------------
                # Use convolution2d instead of fully_connected layers
                net = slim.conv2d(net,
                                  1024,
                                  9,
                                  12,
                                  scope='fc1',
                                  padding='VALID')

                # Convert end_points_collection into a end_point dictionary
                end_points = slim.layers.utils.convert_collection_to_dict(
                    end_points_collection)

                if spatial_squeeze:
                    net = tf.squeeze(net, [1, 2], name='fc1/squeezed')
                    end_points[sc.name + '/fc1'] = net

                return net, end_points
예제 #29
0
def unpool(inputs):

    global unpool_idx
    shape = inputs.get_shape().as_list()

    res = resize3D(inputs, 2.0, 2.0, 2.0)
    res = slim.conv3d(res, num_outputs=shape[-1], kernel_size=[3, 3, 3], stride=1, scope='unpool_' + str(unpool_idx),
                      activation_fn=tf.nn.relu)
    res = slim.batch_norm(res, activation_fn=tf.nn.relu)
    unpool_idx += 1
    return res
예제 #30
0
def pooling_aggregator(unproj_grids,
                       channels,
                       FLAGS,
                       trainable=True,
                       reuse=False,
                       is_training=True,
                       scope_name='aggr_64'):

    unproj_grids = collapse_dims(unproj_grids)

    with tf.variable_scope(scope_name, reuse=reuse) as scope:

        #a simple 1x1 convolution -- no BN
        feats = slim.conv3d(unproj_grids,
                            channels,
                            activation_fn=None,
                            kernel_size=1,
                            stride=1,
                            trainable=trainable)

    l = FLAGS.max_episode_length
    uncollapse = lambda x: uncollapse_dims(x,
                                           x.get_shape().as_list()[0] / l, l)

    feats = uncollapse(feats)
    unproj_grids = uncollapse(unproj_grids)

    #B x E x V x V X V x C

    def fn_pool(feats, pool_fn, id_, givei=False):
        outputs = []
        base = id_
        for i in range(FLAGS.max_episode_length):
            if givei:
                base = pool_fn(feats[:, i], base, i)
            else:
                base = pool_fn(feats[:, i], base)
            outputs.append(base)
        return tf.stack(outputs, axis=1)

    # return tf.concat([
    #     fn_pool(unproj_grids, tf.maximum, unproj_grids[:,0]),
    #     fn_pool(unproj_grids, tf.minimum, unproj_grids[:,0]),
    #     fn_pool(feats, tf.maximum, feats[:,0])
    # ], axis = -1)

    #max may be a bad idea
    #return fn_pool(feats, tf.maximum, feats[:,0])

    return fn_pool(feats,
                   lambda x, prev, i: i / (i + 1.0) * prev + 1 / (i + 1.0) * x,
                   feats[:, 0],
                   givei=True)