def conv3d(batch_input, depth, height, width, output_channel, stride, use_bias=False, scope='cov3d'): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): if use_bias: return slim.conv3d( batch_input, output_channel, [depth, height, width], stride, 'SAME', data_format="NDHWC", activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer()) else: return slim.conv3d( batch_input, output_channel, [depth, height, width], stride, 'SAME', activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), data_format="NDHWC", biases_initializer=None)
def resnet_k( net, kernel_size=3, num_out=None, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): """ general residual model """ num = int(net.shape[-1].value) if num_out is None: num_out = num num2 = (num_out >> 1) num4 = (num2 >> 1) sc_current = 'residual_{}_{}'.format(kernel_size, num2) with tf.variable_scope(scope, sc_current, [net], reuse=reuse): with tf.variable_scope('branch0'): tower0 = slim.conv3d(net, num2, 1, stride=1) with tf.variable_scope('branch1'): # equavalent to 3x3 tower1 = slim.conv3d(net, num4, 1, stride=1) tower1 = slim.conv3d( tower1, num2, kernel_size, stride=1) mixed = tf.concat(axis=-1, values=[tower0, tower1]) mixup = slim.conv3d( mixed, num_out, 1, stride=1, normalizer_fn=None, activation_fn=None, scope='mixup') if num != num_out: net = slim.conv3d(net, num_out, 1, stride=1) net += mixup * scale if activation_fn is not None: net = activation_fn(net) return net
def block3d(input, num_filters, stride=1, use_final_relu=True): num_filters_in = input.get_shape().as_list()[1] # residual residual = slim.conv3d(input, num_filters, kernel_size=(3, 3, 3), stride=(stride, stride, stride), scope='conv1') residual = slim.batch_norm(residual, scope='bn_1') residual = tf.nn.relu(residual) residual = slim.conv3d(residual, num_filters, kernel_size=(3, 3, 3), stride=1, scope='conv2') residual = slim.batch_norm(residual, scope='bn_2') # identity shortcut = input if stride != 1 or num_filters_in != num_filters: shortcut = slim.conv3d(input, num_filters, kernel_size=(1, 1, 1), stride=(stride, stride, stride), scope='shortcut') shortcut = slim.batch_norm(shortcut, scope='bn_3') out = shortcut + residual if use_final_relu: out = tf.nn.relu(out) return out
def _region_proposal(self, net_conv, is_training, initializer): rpn = slim.conv3d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv3d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name="rpn_cls_pred") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv3d(rpn, self._num_anchors * 6, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a deterministic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_cls_pred"] = rpn_cls_pred self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["rois"] = rois return rois
def _module_fn(): """ Function building the module """ feature_layer = tf.placeholder( tf.float32, shape=[None, None, None, None, nchannels], name='input') obs_layer = tf.placeholder(tf.float32, shape=[None, None, None, None, n_y], name='observations') # Builds the neural network net = slim.conv3d(feature_layer, 16, 5, activation_fn=tf.nn.leaky_relu, padding='valid') #net = wide_resnet(feature_layer, 8, activation_fn=tf.nn.leaky_relu, is_training=is_training) net = wide_resnet(net, 16, activation_fn=tf.nn.leaky_relu, keep_prob=dropout, is_training=is_training) net = wide_resnet(net, 32, activation_fn=tf.nn.leaky_relu, keep_prob=dropout, is_training=is_training) net = wide_resnet(net, 32, activation_fn=tf.nn.leaky_relu, keep_prob=dropout, is_training=is_training) net = slim.conv3d(net, 32, 3, activation_fn=tf.nn.tanh) # Define the probabilistic layer #out_rate = slim.conv3d(net, 1, 1, activation_fn=tf.nn.relu) #out_rate = tf.math.add(out_rate, 1e-6, name='rate') net = slim.conv3d(net, n_mixture * n_y, 1, activation_fn=tf.nn.relu) cube_size = tf.shape(obs_layer)[1] out_rate = tf.reshape(net, [-1, cube_size, cube_size, cube_size, n_y]) out_rate = tf.math.add(out_rate, 1e-6, name='rate') pdf = tfd.Poisson(rate=out_rate) # Define a function for sampling, and a function for estimating the log likelihood sample = tf.squeeze(pdf.sample()) loglik = pdf.log_prob(obs_layer) hub.add_signature(inputs={ 'features': feature_layer, 'labels': obs_layer }, outputs={ 'sample': sample, 'loglikelihood': loglik })
def C3D(input, num_classes, keep_pro=0.5): with tf.variable_scope('C3D'): with slim.arg_scope([slim.conv3d], padding='SAME', weights_regularizer=slim.l2_regularizer(0.0005), activation_fn=tf.nn.relu, kernel_size=[3, 3, 3], stride=[1, 1, 1]): net = slim.conv3d(input, 64, scope='conv1') net = slim.max_pool3d(net, kernel_size=[1, 2, 2], stride=[1, 2, 2], padding='SAME', scope='max_pool1') net = slim.conv3d(net, 128, scope='conv2') net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool2') net = slim.repeat(net, 2, slim.conv3d, 256, scope='conv3') net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool3') net = slim.repeat(net, 2, slim.conv3d, 512, scope='conv4') net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool4') net = slim.repeat(net, 2, slim.conv3d, 512, scope='conv5') net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool5') net = tf.reshape(net, [-1, 512 * 4 * 4]) net = slim.fully_connected( net, 4096, weights_regularizer=slim.l2_regularizer(0.0005), scope='fc6') net = slim.dropout(net, keep_pro, scope='dropout1') net = slim.fully_connected( net, 4096, weights_regularizer=slim.l2_regularizer(0.0005), scope='fc7') net = slim.dropout(net, keep_pro, scope='dropout2') out = slim.fully_connected(net, num_classes, weights_regularizer=slim.l2_regularizer(0.0005), \ activation_fn=None, scope='out') return out
def _cnn_embedding(self, inputs): net = slim.conv3d(inputs, 16, [1, 3, 3]) net = slim.conv3d(net, 16, [1, 3, 3]) net = slim.max_pool3d(net, [1, 3, 3], [1, 2, 2], 'SAME') net = slim.conv3d(net, 32, [1, 3, 3]) net = slim.conv3d(net, 32, [1, 3, 3]) net = slim.max_pool3d(net, [1, 3, 3], [1, 2, 2], 'SAME') net = tf.reshape(net, [-1, tf.shape(inputs)[1], tf.size(net[0][0])]) outputs = slim.fully_connected(net, 100, tf.nn.tanh) return outputs
def _inference3D(self, x, is_training): """ Inference part of the network. Per view we get a 46x46x128 encoding (and maybe a 46x46x3 eye map). We unproject into a hand centered volume of dimension 64, so input dim is: 64x64x64x 8*128 = 64x64x64x 1024 """ with tf.variable_scope('PoseNet3D') as scope: num_chan = self.config.num_kp skips = list() scorevolumes = list() skips.append(None) # this is needed for the final upsampling step # 3D encoder # chan_list = [64, 128, 128, 256] chan_list = [32, 64, 64, 64] for chan in chan_list: x = self._enc3D_step(x, chan, dim_red=True, is_training=is_training) # voxel sizes: 32, 16, 8, 4 skips.append(x) skips.pop() # the last one is of no use # bottleneck in the middle x = slim.conv3d(x, 64, kernel_size=[1, 1, 1], trainable=is_training,activation_fn=tf.nn.relu) # make initial guess of the scorevolume scorevol = slim.conv3d_transpose(x, num_chan, kernel_size=[32, 32, 32], trainable=is_training, stride=16, activation_fn=None) scorevolumes.append(scorevol) # 3D decoder kernels = [16, 8, 4] # chan_list = [64, 64, 64] chan_list = [32, 32, 32] for chan, kernel in zip(chan_list, kernels): x, scorevol = self._dec3D_stop(x, skips.pop(), scorevol, chan, num_chan, kernel, is_training) scorevolumes.append(scorevol) # final decoder step x = slim.conv3d_transpose(x, 64, kernel_size=[4, 4, 4], trainable=is_training, stride=2, activation_fn=tf.nn.relu) scorevol_delta = slim.conv3d(x, num_chan, kernel_size=[1, 1, 1], trainable=is_training, activation_fn=None) scorevol = scorevol_delta scorevolumes.append(scorevol) variables = tf.contrib.framework.get_variables(scope) if self.net_config.use_softargmax: xyz_vox_list = [softargmax3D(svol, output_vox_space=True) for svol in scorevolumes] score_list = [tf.reduce_mean(svol, [1, 2, 3]) for svol in scorevolumes] else: xyz_vox_list = [argmax_3d(svol) for svol in scorevolumes] score_list = [tf.reduce_max(svol, [1, 2, 3]) for svol in scorevolumes] return scorevolumes, xyz_vox_list, score_list, variables
def module_fn(): '''Define network here''' x = tf.placeholder( tf.float32, shape=[None, cube_sizeft, cube_sizeft, cube_sizeft, nchannels], name='input') y = tf.placeholder(tf.float32, shape=[None, cube_size, cube_size, cube_size, 1], name='labels') keepprob = tf.placeholder(tf.float32, name='keepprob') print('Shape of training and testing data is : ', x.shape, y.shape, file=fname) # wregwt, bregwt = 0.001, 0.001 if wregwt: wreg = slim.regularizers.l2_regularizer(wregwt) else: wreg = None if bregwt: breg = slim.regularizers.l2_regularizer(bregwt) else: breg = None print('Regularizing weights are : ', wregwt, bregwt, file=fname) # net = slim.conv3d(x, 16, 5, activation_fn=tf.nn.leaky_relu, padding='valid', weights_regularizer=wreg, biases_regularizer=breg) net = wide_resnet(net, 32, keep_prob=keepprob, activation_fn=tf.nn.leaky_relu) net = wide_resnet(net, 64, keep_prob=keepprob, activation_fn=tf.nn.leaky_relu) net = wide_resnet(net, 32, keep_prob=keepprob, activation_fn=tf.nn.leaky_relu) net = wide_resnet(net, 16, keep_prob=keepprob, activation_fn=tf.nn.leaky_relu) net = slim.conv3d(net, 1, 3, activation_fn=None) net = tf.identity(net, name='logits') pred = tf.nn.sigmoid(net, name='prediction') # inputs = dict(input=x, label=y, keepprob=keepprob) outputs = dict(default=net, prediction=pred) hub.add_signature(inputs=inputs, outputs=outputs)
def cnn3d_example(inputs, pkeep_conv, pkeep_hidden): """ """ print(inputs.shape) net = slim.conv3d(inputs=inputs, num_outputs=2, kernel_size=3, padding='VALID', activation_fn=tf.nn.relu, weights_initializer=tfinit.truncated_normal(mean=0, stddev=0.05), biases_initializer=tfinit.zeros(), scope='conv1') net = slim.dropout(net, pkeep_conv) print(net.shape) net = slim.conv3d(inputs=net, num_outputs=8, kernel_size=3, padding='VALID', activation_fn=tf.nn.relu, weights_initializer=tfinit.truncated_normal(mean=0, stddev=0.05), biases_initializer=tfinit.zeros(), scope='conv2') # net = tf.squeeze(net, squeeze_dims=[2,3]) net = slim.flatten(net) net = slim.dropout(net, pkeep_hidden) print(net.shape) net = slim.fully_connected(inputs=net, num_outputs=200, scope='fc3', weights_initializer=tfinit.truncated_normal( mean=0, stddev=0.05), biases_initializer=tfinit.zeros()) net = slim.dropout(net, pkeep_hidden) print(net.shape) net = slim.fully_connected(inputs=net, num_outputs=84, scope='fc4', weights_initializer=tfinit.truncated_normal( mean=0, stddev=0.05), biases_initializer=tfinit.zeros()) net = slim.dropout(net, pkeep_hidden) print(net.shape) net = slim.fully_connected(inputs=net, num_outputs=16, activation_fn=tf.identity, scope='output', weights_initializer=tfinit.truncated_normal( mean=0, stddev=0.05), biases_initializer=tfinit.zeros()) return net
def c3d_small(net, reuse=None, is_training=True, scope='c3d', use_fc=True): with tf.compat.v1.variable_scope(scope, reuse=reuse): with slim.arg_scope( c3d_argscope(activation=tf.nn.relu, kernel_size=3, padding='SAME', training=is_training)): end_points = {} net = slim.conv3d(net, 64, scope='conv_1') print('conv_1 feats: {}'.format(net.get_shape().as_list())) end_points['conv_1'] = net net = slim.max_pool3d(net, kernel_size=(1, 2, 2), stride=(1, 2, 2)) net = slim.conv3d(net, 128, scope='conv_2') print('conv_2 feats: {}'.format(net.get_shape().as_list())) end_points['conv_2'] = net net = slim.max_pool3d(net, kernel_size=2, stride=2) net = slim.conv3d(net, 256, scope='conv_3') print('conv_3 feats: {}'.format(net.get_shape().as_list())) end_points['conv_3'] = net net = slim.max_pool3d(net, kernel_size=2, stride=2) net = slim.conv3d(net, 256, scope='conv_4') print('conv_4 feats: {}'.format(net.get_shape().as_list())) end_points['conv_4'] = net net = slim.max_pool3d(net, kernel_size=2, stride=2) net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) net = slim.conv3d(net, 256, scope='conv_5', padding='VALID') print('conv_5 feats: {}'.format(net.get_shape().as_list())) end_points['conv_5'] = net net = slim.max_pool3d(net, kernel_size=2, stride=2) end_points['maxpool_5'] = net net = tf.reshape(net, [net.get_shape().as_list()[0], -1]) print('flattened feats: {}'.format(net.get_shape().as_list())) if use_fc: net = slim.fully_connected(net, 2048, scope='fc_1') print('fc_1 feats: {}'.format(net.get_shape().as_list())) end_points['fc_1'] = net net = slim.fully_connected(net, 2048, scope='fc_2') print('fc_2 feats: {}'.format(net.get_shape().as_list())) end_points['fc_2'] = net return net, end_points
def pullout8(net, out_dim, is_training, scope=None, reuse=None): """ supposed to work best with 8x8 input """ with tf.variable_scope(scope, 'pullout8', [net], reuse=reuse): net = inresnet3d.conv_maxpool(net, scope='conv_pool_8') print(net.shape) net = inresnet3d.conv_maxpool(net, scope='conv_pool_4') print(net.shape) shape2 = net.get_shape() fc_num = shape2[4] * 2 net = slim.conv3d( net, fc_num, shape2[1:4], padding='VALID', scope='fullconn4') # net = slim.avg_pool3d( # net, 5, stride=3, padding='VALID', # scope='avgpool8_5x5_3') print(net.shape) # net = slim.conv3d(net, 64, 1, scope='reduce8') # print(net.shape) # net = slim.conv3d( # net, 256, net.get_shape()[1:4], # padding='VALID', scope='fullconn8') # print(net.shape) net = slim.flatten(net) net = slim.dropout( net, 0.5, scope='dropout8') print(net.shape) net = slim.fully_connected( net, out_dim, activation_fn=None, normalizer_fn=None, scope='output8') return net
def conv3d(inputs, filters, kernel_size, strides=1, scope=None, dilation=1, data_format=''): """Returns Conv3D wrapped with default values.""" del data_format del dilation init = initializers.variance_scaling_initializer return slim.conv3d(inputs, filters, kernel_size=kernel_size, stride=strides, padding='SAME', activation_fn=None, biases_initializer=None, normalizer_fn=None, scope=scope, weights_initializer=init(factor=2.0, mode='FAN_IN', uniform=False))
def em_branch(input, prefix='em_branch_'): # input should be of shape [batch_size, frame_count, height, width, 16] conv = slim.conv3d(input, 8, [3, 3, 3], rate=1, activation_fn=lrelu, scope=prefix + 'g_conv1', padding='SAME') padding_method = 'VALID' conv1 = slim.conv3d(conv, 16, [5, 5, 5], rate=1, activation_fn=lrelu, scope=prefix + 's_conv1', padding=padding_method) conv2 = slim.conv3d(conv1, 16, [5, 5, 5], rate=1, activation_fn=lrelu, scope=prefix + 's_conv2', padding=padding_method) conv3 = slim.conv3d(conv2, 16, [5, 5, 5], rate=1, activation_fn=lrelu, scope=prefix + 's_conv3', padding=padding_method) # # shape_image = tf.placeholder(tf.float32, [BATCH_SIZE, CROP_FRAME - 8, CROP_HEIGHT - 8, CROP_WIDTH - 8, 16]) # # pool_size = 1 # deconv_filter1 = tf.Variable(tf.truncated_normal([pool_size, pool_size, pool_size, 16, 16], stddev=0.02)) # deconv1 = tf.nn.conv3d_transpose(conv3, deconv_filter1, tf.shape(shape_image), strides=[1, pool_size, pool_size, pool_size, 1]) # deconv1 = lrelu(deconv1) # # # print deconv1.shape # # print 'conv1.shape[:-1] + (8,):', tuple(conv1.shape[:-1]) + (8,) # # shape_image = tf.placeholder(tf.float32, [BATCH_SIZE, CROP_FRAME - 4, CROP_HEIGHT - 4, CROP_WIDTH - 4, 8]) # pool_size = 1 # deconv_filter2 = tf.Variable(tf.truncated_normal([pool_size, pool_size, pool_size, 8, 16], stddev=0.02)) # deconv2 = tf.nn.conv3d_transpose(deconv1, deconv_filter2, tf.shape(shape_image), strides=[1, pool_size, pool_size, pool_size, 1]) # deconv2 = lrelu(deconv2) # # # print deconv2.shape # shape_image = tf.placeholder(tf.float32, [BATCH_SIZE, CROP_FRAME, CROP_HEIGHT, CROP_WIDTH, 3]) # pool_size = 1 # deconv_filter3 = tf.Variable(tf.truncated_normal([pool_size, pool_size, pool_size, 3, 8], stddev=0.02)) # deconv3 = tf.nn.conv3d_transpose(deconv2, deconv_filter3, tf.shape(shape_image), strides=[1, pool_size, pool_size, pool_size, 1]) # deconv3 = lrelu(deconv3) # print deconv3.shape deconv1 = slim.conv3d_transpose(conv3, 16, [5, 5, 5], activation_fn=lrelu, scope=prefix + 's_deconv1', padding=padding_method) deconv2 = slim.conv3d_transpose(deconv1, 8, [5, 5, 5], activation_fn=lrelu, scope=prefix + 's_deconv2', padding=padding_method) deconv3 = slim.conv3d_transpose(deconv2, 3, [5, 5, 5], activation_fn=lrelu, scope=prefix + 's_deconv3', padding=padding_method) if DEBUG == 1: print 'conv.shape:', conv.shape print 'conv1.shape:', conv1.shape print 'conv2.shape:', conv2.shape print 'conv3.shape:', conv3.shape print 'deconv1.shape:', deconv1.shape print 'deconv2.shape:', deconv2.shape print 'deconv3.shape:', deconv3.shape return deconv3
def NonLocalBlock(input_x, out_channels, sub_sample=True, is_bn=True, scope='NonLocalBlock'): batchsize, clips, height, width, in_channels = input_x.get_shape().as_list( ) with tf.variable_scope(scope) as sc: with tf.variable_scope('g') as scope: g = slim.conv3d(input_x, out_channels, kernel_size=1, stride=1, scope='g') if sub_sample: g = slim.max_pool3d(g, [1, 2, 2], stride=[1, 2, 2], scope='g_max_pool') with tf.variable_scope('phi') as scope: ''' phi = ''' if sub_sample: phi = slim.max_pool3d(phi, [1, 2, 2], stride=[1, 2, 2], scope='phi_max_pool') with tf.variable_scope('theta') as scope: ''' theta = ''' ''' g_x = ''' ''' theta_x = ''' ''' phi_x = transposed_phi_x = ''' ''' f = # (theta, phi) matrix multiplication f_softmax = # softmax y = # (f_softmax, g) y = # reshape ''' with tf.variable_scope('w') as scope: ''' w_y = # Z operation ''' if is_bn: w_y = slim.batch_norm(w_y) ''' z = # add y to x ''' return z
def conv_maxpool(net, scope=None, reuse=None): """ simple conv + max_pool """ num = int(net.shape[-1].value) sc_current = 'conv_maxpool_{}'.format(num) with tf.variable_scope(scope, sc_current, [net], reuse=reuse): net = slim.conv3d(net, 2 * num, 3, stride=1) net = slim.max_pool3d(net, 3, stride=2) return net
def convT(_X, out_channels, kernel_size=[3, 1, 1], stride=1, padding='VALID'): return slim.conv3d(_X, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, weights_initializer=weights_initializer, biases_initializer=None)
def conv3d(x, o_dim, data_format='NDHWC', name=None, k=4, s=2, act=None): return slim.conv3d(x, o_dim, k, stride=s, activation_fn=act, scope=name, data_format=data_format)
def resnet3d_18(net, num_out, reuse=tf.AUTO_REUSE, training=True, scope='resnet', blocks=('2d', '2d', '3d', '3d'), module_sizes=(2, 2, 2, 2), filter_sizes=(64, 128, 256, 512), *args, **kwargs): with tf.variable_scope(scope, reuse=reuse): with slim.arg_scope(resnet_arg_scope(training=training)): feats = {} net = slim.conv3d(net, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), scope='conv0') net = slim.batch_norm(net, scope='bn_0') net = tf.nn.relu(net) net = slim.max_pool3d(net, kernel_size=(1, 3, 3), stride=(1, 2, 2)) print('Shape conv_1: {}'.format(net.get_shape().as_list())) feats['conv_1'] = net block_id = 0 for i, blocks_in_module in enumerate(module_sizes): for j in range(blocks_in_module): block_id += 1 stride = 2 if j == 0 and i > 0 else 1 with tf.variable_scope("res%d.%d" % (i, j)): print('Block {}'.format(block_id)) if blocks[i] == '2d': print('2D block') net = block2d(net, filter_sizes[i], stride) elif blocks[i] == '3d': print('3D block') net = block3d(net, filter_sizes[i], stride) else: net = None print('Shape {} {}: {}'.format( i, j, net.get_shape().as_list())) feats['block_{}'.format(block_id)] = net feats['conv_{}'.format(i + 2)] = net print('Shape conv_{}: {}'.format(i + 2, net.get_shape().as_list())) net = tf.reduce_mean(net, [1, 2, 3]) feats['pre_logit'] = net print('Shape pre_logit: {}'.format(net.get_shape().as_list())) logits = slim.fully_connected( net, num_out, activation_fn=None, weights_initializer=tf.random_normal_initializer(stddev=1e-3)) return logits, feats
def _create_dqn_two_stream(self, rgb, vox, trainable=True, if_bn=False, reuse=False, scope_name='dqn_two_stream'): with tf.variable_scope(scope_name) as scope: if reuse: scope.reuse_variables() if if_bn: batch_normalizer_gen = slim.batch_norm batch_norm_params_gen = {'is_training': self.is_training, 'decay': self.FLAGS.bn_decay} else: #self._print_arch('=== NOT Using BN for GENERATOR!') batch_normalizer_gen = None batch_norm_params_gen = None if self.FLAGS.if_l2Reg: weights_regularizer = slim.l2_regularizer(1e-5) else: weights_regularizer = None with slim.arg_scope([slim.fully_connected], activation_fn=self.activation_fn, trainable=trainable, normalizer_fn=batch_normalizer_gen, normalizer_params=batch_norm_params_gen, weights_regularizer=weights_regularizer): net_rgb = slim.conv2d(rgb, 64, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='rgb_conv1') net_rgb = slim.conv2d(net_rgb, 128, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='rgb_conv2') net_rgb = slim.conv2d(net_rgb, 256, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='rgb_conv3') net_rgb = slim.conv2d(net_rgb, 256, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='rgb_conv4') net_rgb = slim.conv2d(net_rgb, 256, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='rgb_conv5') net_rgb = slim.flatten(net_rgb, scope='rgb_flatten') net_vox = slim.conv3d(vox, 64, kernel_size=[3,3], stride=[1,1], padding='SAME', scope='vox_conv1') net_vox = slim.conv3d(net_vox, 128, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='vox_conv2') net_vox = slim.conv3d(net_vox, 256, kernel_size=[3,3], stride=[1,1], padding='SAME', scope='vox_conv3') net_vox = slim.conv3d(net_vox, 256, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='vox_conv4') net_vox = slim.conv3d(net_vox, 512, kernel_size=[3,3], stride=[2,2], padding='SAME', scope='vox_conv5') net_vox = slim.flatten(net_vox, scope='vox_flatten') net_feat = tf.concat([net_rgb, net_vox], axis=1) net_feat = slim.fully_connected(net_feat, 4096, scope='fc6') net_feat = slim.fully_connected(net_feat, 4096, scope='fc7') logits = slim.fully_connected(net_feat, self.FLAGS.action_num, activation_fn=None, scope='fc8') return tf.nn.softmax(logits), logits
def C3D(input_data, num_classes, keep_pro=0.5, non_local=False): with tf.variable_scope('C3D'): with slim.arg_scope([slim.conv3d], padding='SAME', weights_regularizer=slim.l2_regularizer(0.0005), activation_fn=tf.nn.relu, kernel_size=[3, 3, 3], stride=[1, 1, 1] ): # Batch * 16 * 112 * 112 * 3 net = slim.conv3d(input_data, 64, scope='conv1') net = slim.max_pool3d(net, kernel_size=[1, 2, 2], stride=[1, 2, 2], padding='SAME', scope='max_pool1') # net = NonLocalBlock(net, 64, scope='nonlocal_block_1') # Batch * 16 * 56 * 56 * 64 net = slim.conv3d(net, 128, scope='conv2') net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool2') if non_local: net = NonLocalBlock(net, 128, scope='nonlocal_block_2') # Batch * 8 * 28 * 28 * 128 net = slim.repeat(net, 2, slim.conv3d, 256, scope='conv3') net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool3') if non_local: net = NonLocalBlock(net, 256, scope='nonlocal_block_3') # Batch * 4 * 14 * 14 * 256 net = slim.repeat(net, 2, slim.conv3d, 512, scope='conv4') net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool4') if non_local: net = NonLocalBlock(net, 512, scope='nonlocal_block_4') # Batch * 2 * 7 * 7 * 512 net = slim.repeat(net, 2, slim.conv3d, 512, scope='conv5') net = slim.max_pool3d(net, kernel_size=[2, 2, 2], stride=[2, 2, 2], padding='SAME', scope='max_pool5') # Batch * 1 * 4 * 4 * 512 net = tf.reshape(net, [-1, 512 * 4 * 4]) net = slim.fully_connected(net, 4096, weights_regularizer=slim.l2_regularizer(0.0005), scope='fc6') net = slim.dropout(net, keep_pro, scope='dropout1') net = slim.fully_connected(net, 4096, weights_regularizer=slim.l2_regularizer(0.0005), scope='fc7') net = slim.dropout(net, keep_pro, scope='dropout2') out = slim.fully_connected(net, num_classes, weights_regularizer=slim.l2_regularizer(0.0005), \ activation_fn=None, scope='out') return out
def conv21d(inputs, filters, kernel_size, strides=1, is_training=False, scope=None, dilation=1, data_format=''): """Returns conv(2+1)D with default values.""" del data_format del dilation if isinstance(kernel_size, int): kernel_size = [kernel_size, kernel_size, kernel_size] if isinstance(strides, int): strides = [strides, strides, strides] init = initializers.variance_scaling_initializer inputs = slim.conv3d(inputs, filters // 2, kernel_size=[kernel_size[0], 1, 1], stride=[strides[0], 1, 1], padding='SAME', activation_fn=None, biases_initializer=None, normalizer_fn=None, scope=scope, weights_initializer=init(factor=2.0, mode='FAN_IN', uniform=False)) inputs = batch_norm_relu(inputs, is_training, relu=True) inputs = slim.conv3d(inputs, filters, kernel_size=[1, kernel_size[1], kernel_size[2]], stride=[1, strides[1], strides[2]], padding='SAME', activation_fn=None, biases_initializer=None, normalizer_fn=None, scope=scope + 's', weights_initializer=init(factor=2.0, mode='FAN_IN', uniform=False)) return inputs
def process_input(inputs, num_channels_intermediate, num_channels_out): """Processes input tensors to model with some convs.""" net = slim.conv3d(inputs, num_outputs=num_channels_intermediate, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn=tf.nn.relu) net = model_block(net, num_channels_intermediate, num_channels_out, 1) return net
def _mapping_network(self, feature3d, trainable, reuse): """ Maps warped features + eye vector into some common representation. """ with tf.variable_scope('Mapping', reuse=reuse) as scope: x = feature3d # x = slim.conv3d(x, 64, kernel_size=[1, 1, 1], trainable=trainable, activation_fn=tf.nn.relu) x = slim.conv3d(x, 128, kernel_size=[1, 1, 1], trainable=trainable, activation_fn=tf.nn.relu) variables = tf.contrib.framework.get_variables(scope) return x, variables
def shortcut(inputs, num_input, num_output, stride): """Creates a shortcut (either a skip connection or a 1x1x1 convolution).""" if num_input == num_output: return inputs else: return slim.conv3d(inputs, num_outputs=num_output, kernel_size=[1, 1, 1], stride=[stride, stride, stride], activation_fn=None)
def cost_volume(self, left_feature, right_feature): cost_aggre = cost_volume_aggre(left_feature, right_feature, 4, 192) with tf.name_scope('CostVolume'): cost_volume1 = conv3d_bolck(cost_aggre, 32, [3, 3, 3], self.is_training) cost_volume2 = conv3d_bolck(cost_volume1, 32, [3, 3, 3], self.is_training) cost_volume3 = conv3d_bolck(cost_volume2, 32, [3, 3, 3], self.is_training) cost_volume4 = conv3d_bolck(cost_volume3, 32, [3, 3, 3], self.is_training) output = slim.conv3d(cost_volume4, 1, [3, 3, 3], padding='SAME', activation_fn=None) return tf.squeeze(output, 4)
def Transition(_X, in_channels): _X = tf.layers.batch_normalization(_X, training=IS_TRAIN) _X = tf.nn.relu(_X) _X = slim.conv3d(_X, in_channels, kernel_size=[1, 1, 1], stride=1, biases_initializer=None) _X = slim.avg_pool3d(_X, [2, 2, 2], stride=2, padding='SAME') return _X
def _build_network_slim(self, inputs, spatial_squeeze=False, scope='DualCamNet'): """ Builds a DualCamNet network for classification using a 3D temporal convolutional layer with 7x1x1 filters. """ with tf.variable_scope(scope, 'DualCamNet', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for convolution2d and max_pool2d with slim.arg_scope([slim.layers.conv2d, slim.layers.max_pool2d], outputs_collections=[end_points_collection]): # ----------- 1st layer group --------------- net = tf.reshape(inputs, shape=(-1, self.num_frames, self.height, self.width, self.channels)) net = slim.conv3d(net, self.channels, [7, 1, 1], scope='conv1', padding='SAME') net = tf.reshape(net, shape=(-1, self.height, self.width, self.channels)) # ----------- 2nd layer group --------------- net = slim.conv2d(net, 32, [5, 5], scope='conv2', padding='SAME') net = slim.max_pool2d(net, [2, 2], scope='pool2') # ----------- 3rd layer group --------------- net = slim.conv2d(net, 64, [5, 5], scope='conv3', padding='SAME') net = slim.max_pool2d(net, [2, 2], scope='pool3') # ----------- 4th layer group --------------- # Use convolution2d instead of fully_connected layers net = slim.conv2d(net, 1024, 9, 12, scope='fc1', padding='VALID') # Convert end_points_collection into a end_point dictionary end_points = slim.layers.utils.convert_collection_to_dict( end_points_collection) if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='fc1/squeezed') end_points[sc.name + '/fc1'] = net return net, end_points
def unpool(inputs): global unpool_idx shape = inputs.get_shape().as_list() res = resize3D(inputs, 2.0, 2.0, 2.0) res = slim.conv3d(res, num_outputs=shape[-1], kernel_size=[3, 3, 3], stride=1, scope='unpool_' + str(unpool_idx), activation_fn=tf.nn.relu) res = slim.batch_norm(res, activation_fn=tf.nn.relu) unpool_idx += 1 return res
def pooling_aggregator(unproj_grids, channels, FLAGS, trainable=True, reuse=False, is_training=True, scope_name='aggr_64'): unproj_grids = collapse_dims(unproj_grids) with tf.variable_scope(scope_name, reuse=reuse) as scope: #a simple 1x1 convolution -- no BN feats = slim.conv3d(unproj_grids, channels, activation_fn=None, kernel_size=1, stride=1, trainable=trainable) l = FLAGS.max_episode_length uncollapse = lambda x: uncollapse_dims(x, x.get_shape().as_list()[0] / l, l) feats = uncollapse(feats) unproj_grids = uncollapse(unproj_grids) #B x E x V x V X V x C def fn_pool(feats, pool_fn, id_, givei=False): outputs = [] base = id_ for i in range(FLAGS.max_episode_length): if givei: base = pool_fn(feats[:, i], base, i) else: base = pool_fn(feats[:, i], base) outputs.append(base) return tf.stack(outputs, axis=1) # return tf.concat([ # fn_pool(unproj_grids, tf.maximum, unproj_grids[:,0]), # fn_pool(unproj_grids, tf.minimum, unproj_grids[:,0]), # fn_pool(feats, tf.maximum, feats[:,0]) # ], axis = -1) #max may be a bad idea #return fn_pool(feats, tf.maximum, feats[:,0]) return fn_pool(feats, lambda x, prev, i: i / (i + 1.0) * prev + 1 / (i + 1.0) * x, feats[:, 0], givei=True)