def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): """Strided 2-D convolution with 'SAME' padding. When stride > 1, then we do explicit zero-padding, followed by conv2d with 'VALID' padding. Note that net = conv2d_same(inputs, num_outputs, 3, stride=stride) is equivalent to net = tf.contrib.layers.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME') net = subsample(net, factor=stride) whereas net = tf.contrib.layers.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME') is different when the input's height or width is even, which is why we add the current function. For more details, see ResnetUtilsTest.testConv2DSameEven(). Args: inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. num_outputs: An integer, the number of output filters. kernel_size: An int with the kernel_size of the filters. stride: An integer, the output stride. rate: An integer, rate for atrous convolution. scope: Scope. Returns: output: A 4-D tensor of size [batch, height_out, width_out, channels] with the convolution output. """ if stride == 1: return layers_lib.conv2d( inputs, num_outputs, kernel_size, stride=1, rate=rate, padding='SAME', scope=scope) else: kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) pad_total = kernel_size_effective - 1 pad_beg = pad_total // 2 pad_end = pad_total - pad_beg inputs = array_ops.pad( inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) return layers_lib.conv2d( inputs, num_outputs, kernel_size, stride=stride, rate=rate, padding='VALID', scope=scope)
def conv_model(X, Y_, mode): XX = tf.reshape(X, [-1, 28, 28, 1]) biasInit = tf.constant_initializer(0.1, dtype=tf.float32) Y1 = layers.conv2d(XX, num_outputs=6, kernel_size=[6, 6], biases_initializer=biasInit) Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit) Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit) Y4 = layers.flatten(Y3) Y5 = layers.relu(Y4, 200, biases_initializer=biasInit) # to deactivate dropout on the dense layer, set keep_prob=1 Y5d = layers.dropout(Y5, keep_prob=0.75, noise_shape=None, is_training=mode==learn.ModeKeys.TRAIN) Ylogits = layers.linear(Y5d, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = conv_model_loss(Ylogits, Y_, mode) train_op = conv_model_train_op(loss, mode) eval_metrics = conv_model_eval_metrics(classes, Y_, mode) return learn.ModelFnOps( mode=mode, # You can name the fields of your predictions dictionary as you like. predictions={"predictions": predict, "classes": classes}, loss=loss, train_op=train_op, eval_metric_ops=eval_metrics )
def vgg_a(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_a'): """Oxford Net VGG 11-Layers version A Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 1, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 2, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def discriminator_stego_nn(self, img, reuse=False): with tf.variable_scope('S_network'): if reuse: tf.get_variable_scope().reuse_variables() net = img net = self.image_processing_layer(img) net = self.batch_norm(net, scope='d_s_bn0') net = conv2d(net, self.df_dim, kernel_size=[5, 5], stride=[2, 2], activation_fn=self.leaky_relu, scope='d_s_h0_conv') net = self.batch_norm(net, scope='d_s_bn1') net = conv2d(net, self.df_dim * 2, kernel_size=[5, 5], stride=[2, 2], activation_fn=self.leaky_relu, scope='d_s_h1_conv') net = self.batch_norm(net, scope='d_s_bn2') net = conv2d(net, self.df_dim * 4, kernel_size=[5, 5], stride=[2, 2], activation_fn=self.leaky_relu, scope='d_s_h2_conv') net = self.batch_norm(net, scope='d_s_bn3') net = conv2d(net, self.df_dim * 8, kernel_size=[5, 5], stride=[2, 2], activation_fn=self.leaky_relu, scope='d_s_h3_conv') net = self.batch_norm(net, scope='d_s_bn4') net = tf.reshape(net, [self.conf.batch_size, -1]) net = linear(net, 1, activation_fn=tf.nn.sigmoid, scope='d_s_h4_lin', weights_initializer=tf.random_normal_initializer(stddev=0.02)) return net
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, outputs_collections=None, scope=None): """Bottleneck residual unit variant with BN before convolutions. This is the full preactivation residual unit variant proposed in [2]. See Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck variant which has an extra bottleneck layer. When putting together two consecutive ResNet blocks that use this unit, one should use stride = 2 in the last unit of the first block. Args: inputs: A tensor of size [batch, height, width, channels]. depth: The depth of the ResNet unit output. depth_bottleneck: The depth of the bottleneck layers. stride: The ResNet unit's stride. Determines the amount of downsampling of the units output compared to its input. rate: An integer, rate for atrous convolution. outputs_collections: Collection to add the ResNet unit output. scope: Optional variable_scope. Returns: The ResNet unit's output. """ with variable_scope.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4) preact = layers.batch_norm( inputs, activation_fn=nn_ops.relu, scope='preact') if depth == depth_in: shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') else: shortcut = layers_lib.conv2d( preact, depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut') residual = layers_lib.conv2d( preact, depth_bottleneck, [1, 1], stride=1, scope='conv1') residual = resnet_utils.conv2d_same( residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2') residual = layers_lib.conv2d( residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3') output = shortcut + residual return utils.collect_named_outputs(outputs_collections, sc.name, output)
def build_atari(minimap, screen, info, msize, ssize, num_action): # Extract features mconv1 = layers.conv2d(tf.transpose(minimap, [0, 2, 3, 1]), num_outputs=16, kernel_size=8, stride=4, scope='mconv1') mconv2 = layers.conv2d(mconv1, num_outputs=32, kernel_size=4, stride=2, scope='mconv2') sconv1 = layers.conv2d(tf.transpose(screen, [0, 2, 3, 1]), num_outputs=16, kernel_size=8, stride=4, scope='sconv1') sconv2 = layers.conv2d(sconv1, num_outputs=32, kernel_size=4, stride=2, scope='sconv2') info_fc = layers.fully_connected(layers.flatten(info), num_outputs=256, activation_fn=tf.tanh, scope='info_fc') # Compute spatial actions, non spatial actions and value feat_fc = tf.concat([layers.flatten(mconv2), layers.flatten(sconv2), info_fc], axis=1) feat_fc = layers.fully_connected(feat_fc, num_outputs=256, activation_fn=tf.nn.relu, scope='feat_fc') spatial_action_x = layers.fully_connected(feat_fc, num_outputs=ssize, activation_fn=tf.nn.softmax, scope='spatial_action_x') spatial_action_y = layers.fully_connected(feat_fc, num_outputs=ssize, activation_fn=tf.nn.softmax, scope='spatial_action_y') spatial_action_x = tf.reshape(spatial_action_x, [-1, 1, ssize]) spatial_action_x = tf.tile(spatial_action_x, [1, ssize, 1]) spatial_action_y = tf.reshape(spatial_action_y, [-1, ssize, 1]) spatial_action_y = tf.tile(spatial_action_y, [1, 1, ssize]) spatial_action = layers.flatten(spatial_action_x * spatial_action_y) non_spatial_action = layers.fully_connected(feat_fc, num_outputs=num_action, activation_fn=tf.nn.softmax, scope='non_spatial_action') value = tf.reshape(layers.fully_connected(feat_fc, num_outputs=1, activation_fn=None, scope='value'), [-1]) return spatial_action, non_spatial_action, value
def conv_model(X, Y_, mode): XX = tf.reshape(X, [-1, 28, 28, 1]) biasInit = tf.constant_initializer(0.1, dtype=tf.float32) Y1 = layers.conv2d(XX, num_outputs=6, kernel_size=[6, 6], biases_initializer=biasInit) Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit) Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit) Y4 = layers.flatten(Y3) Y5 = layers.relu(Y4, 200, biases_initializer=biasInit) Ylogits = layers.linear(Y5, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)))*100 train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.001, "Adam") return {"predictions":predict, "classes": classes}, loss, train_op
def conv_model(X, Y_): XX = tf.reshape(X, [-1, 28, 28, 1]) Y1 = layers.conv2d(XX, num_outputs=6, kernel_size=[6, 6]) Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2) Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2) Y4 = layers.flatten(Y3) Y5 = layers.relu(Y4, 200) Ylogits = layers.linear(Y5, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)) train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.003, "Adam") return {"predictions":predict, "classes": classes}, loss, train_op
def resBlock(x, num_outputs, kernel_size = 4, stride=1, activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, scope=None): assert num_outputs%2==0 #num_outputs must be divided by channel_factor(2 here) with tf.variable_scope(scope, 'resBlock'): shortcut = x if stride != 1 or x.get_shape()[3] != num_outputs: shortcut = tcl.conv2d(shortcut, num_outputs, kernel_size=1, stride=stride, activation_fn=None, normalizer_fn=None, scope='shortcut') x = tcl.conv2d(x, num_outputs/2, kernel_size=1, stride=1, padding='SAME') x = tcl.conv2d(x, num_outputs/2, kernel_size=kernel_size, stride=stride, padding='SAME') x = tcl.conv2d(x, num_outputs, kernel_size=1, stride=1, activation_fn=None, padding='SAME', normalizer_fn=None) x += shortcut x = normalizer_fn(x) x = activation_fn(x) return x
def __call__(self, x, reuse=False): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() size = 64 shared = tcl.conv2d(x, num_outputs=size, kernel_size=5, # bzx28x28x1 -> bzx14x14x64 stride=2, activation_fn=tf.nn.relu) shared = tcl.conv2d(shared, num_outputs=size * 2, kernel_size=5, # 7x7x128 stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) shared = tcl.fully_connected(tcl.flatten( # reshape, 1 shared), 1024, activation_fn=tf.nn.relu) #c = tcl.fully_connected(shared, 128, activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm) c = tcl.fully_connected(shared, 10, activation_fn=None) # 10 classes return c
def __call__(self, x, reuse=False): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() size = 64 shared = tcl.conv2d(x, num_outputs=size, kernel_size=4, # bzx28x28x1 -> bzx14x14x64 stride=2, activation_fn=lrelu) shared = tcl.conv2d(shared, num_outputs=size * 2, kernel_size=4, # 7x7x128 stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) shared = tcl.flatten(shared) d = tcl.fully_connected(shared, 1, activation_fn=None, weights_initializer=tf.random_normal_initializer(0, 0.02)) q = tcl.fully_connected(shared, 128, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) q = tcl.fully_connected(q, 2, activation_fn=None) # 10 classes return d, q
def get_shortcut(self, stride, scope='shortcut'): """Reshape and repeat to get the shortcut of input Reference ========= [1] TensorFlow 实战 """ def subsample(inputs, factor, scope): if factor == 1: return inputs else: # avg for auto encoder return avg_pool2d(inputs,[1,1], stride=factor, padding='SAME', scope=scope) if self.depth_in == self.depth_out: self.shortcut = subsample(self.inputs, stride, scope) else: self.shortcut = conv2d( inputs=self.inputs, num_outputs=self.depth_out, kernel_size=[1,1], stride=stride, padding='SAME', normalizer_fn=None, activation_fn=None, scope=scope)
def conv2d(input, filters=16, kernel_size=3, padding="same", stride=1, activation_fn=relu, reuse=None, name=None, data_format='NHWC', weights_initializer=xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=None, biases_regularizer=None, normalizer_fn=None, normalizer_params=None): return contrib_layers.conv2d(input, num_outputs=filters, kernel_size=kernel_size, padding=padding, stride=stride, scope=name, data_format=data_format, activation_fn=activation_fn, reuse=reuse, weights_initializer=weights_initializer, biases_initializer=biases_initializer, weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params)
def __call__(self, x, reuse=True): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() size = 64 d = tcl.conv2d(x, num_outputs=size, kernel_size=3, # bzx64x64x3 -> bzx32x32x64 stride=2, activation_fn=lrelu) d = tcl.conv2d(d, num_outputs=size * 2, kernel_size=3, # 16x16x128 stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) d = tcl.conv2d(d, num_outputs=size * 4, kernel_size=3, # 8x8x256 stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) d = tcl.conv2d(d, num_outputs=size * 8, kernel_size=3, # 4x4x512 stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) d = tcl.fully_connected(tcl.flatten( # reshape, 1 d), 1, activation_fn=None) return d
def get_bottlenet(self): """Form the network""" # get collections bottlelayer = namedtuple("bottlelayer", ['kernel_shape','stride','bn_flag','padding','act_fn']) with tf.name_scope(self.scope): input_now = self.inputs for i, kernel in enumerate(self.bottle_params): with tf.name_scope('bottle_sub'+str('i')): kernel = bottlelayer._make(kernel) with tf.name_scope('conv2d'): residual = conv2d( inputs=input_now, num_outputs=kernel.kernel_shape[-1], kernel_size=kernel.kernel_shape[0:2], padding=kernel.padding, stride=kernel.stride, ) if kernel.bn_flag: residual = utils.get_batch_norm(residual, self.is_training, scope='batch_norm') if kernel.act_fn is not None: with tf.name_scope('activate'): residual = kernel.act_fn(residual) input_now = residual # add shortcut self.get_shortcut(self.stride,scope=self.scope+'_shortcut') residual = residual + self.shortcut if self.summary_flag: tf.summary.histogram('bottle_residual', residual) return residual
def autoencoder(inputs): # encoder # 32 x 32 x 1 -> 16 x 16 x 32 # 16 x 16 x 32 -> 8 x 8 x 16 # 8 x 8 x 16 -> 2 x 2 x 8 net = lays.conv2d(inputs, 32, [5, 5], stride=2, padding='SAME') net = lays.conv2d(net, 16, [5, 5], stride=2, padding='SAME') net = lays.conv2d(net, 8, [5, 5], stride=4, padding='SAME') # decoder # 2 x 2 x 8 -> 8 x 8 x 16 # 8 x 8 x 16 -> 16 x 16 x 32 # 16 x 16 x 32 -> 32 x 32 x 1 net = lays.conv2d_transpose(net, 16, [5, 5], stride=4, padding='SAME') net = lays.conv2d_transpose(net, 32, [5, 5], stride=2, padding='SAME') net = lays.conv2d_transpose(net, 1, [5, 5], stride=2, padding='SAME', activation_fn=tf.nn.tanh) return net
def inception_v1(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, prediction_fn=layers_lib.softmax, spatial_squeeze=True, reuse=None, scope='InceptionV1'): """Defines the Inception V1 architecture. This architecture is defined in: Going deeper with convolutions Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. http://arxiv.org/pdf/1409.4842v1.pdf. The default image size used to train this network is 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: the percentage of activation values that are retained. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape is [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, num_classes] end_points: a dictionary from components of the network to the corresponding activation. """ # Final pooling and prediction with variable_scope.variable_scope( scope, 'InceptionV1', [inputs, num_classes], reuse=reuse) as scope: with arg_scope( [layers_lib.batch_norm, layers_lib.dropout], is_training=is_training): net, end_points = inception_v1_base(inputs, scope=scope) with variable_scope.variable_scope('Logits'): net = layers_lib.avg_pool2d( net, [7, 7], stride=1, scope='MaxPool_0a_7x7') net = layers_lib.dropout(net, dropout_keep_prob, scope='Dropout_0b') logits = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_0c_1x1') if spatial_squeeze: logits = array_ops.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def conv_model(features, target): target = tf.one_hot(target, 10, 1.0, 0.0) features = tf.expand_dims(features, 3) features = tf.reduce_max(layers.conv2d(features, 12, [3, 3]), [1, 2]) features = tf.reshape(features, [-1, 12]) prediction, loss = learn.models.logistic_regression(features, target) train_op = layers.optimize_loss(loss, tf.contrib.framework.get_global_step(), optimizer="SGD", learning_rate=0.01) return tf.argmax(prediction, dimension=1), loss, train_op
def testConv2DSameOdd(self): n, n2 = 5, 3 # Input image. x = create_test_input(1, n, n, 1) # Convolution kernel. w = create_test_input(1, 3, 3, 1) w = array_ops.reshape(w, [3, 3, 1, 1]) variable_scope.get_variable('Conv/weights', initializer=w) variable_scope.get_variable('Conv/biases', initializer=array_ops.zeros([1])) variable_scope.get_variable_scope().reuse_variables() y1 = layers.conv2d(x, 1, [3, 3], stride=1, scope='Conv') y1_expected = math_ops.cast([[14, 28, 43, 58, 34], [28, 48, 66, 84, 46], [43, 66, 84, 102, 55], [58, 84, 102, 120, 64], [34, 46, 55, 64, 30]], dtypes.float32) y1_expected = array_ops.reshape(y1_expected, [1, n, n, 1]) y2 = resnet_utils.subsample(y1, 2) y2_expected = math_ops.cast([[14, 43, 34], [43, 84, 55], [34, 55, 30]], dtypes.float32) y2_expected = array_ops.reshape(y2_expected, [1, n2, n2, 1]) y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') y3_expected = y2_expected y4 = layers.conv2d(x, 1, [3, 3], stride=2, scope='Conv') y4_expected = y2_expected with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) self.assertAllClose(y1.eval(), y1_expected.eval()) self.assertAllClose(y2.eval(), y2_expected.eval()) self.assertAllClose(y3.eval(), y3_expected.eval()) self.assertAllClose(y4.eval(), y4_expected.eval())
def __call__(self, x, reuse=False): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() size = 64 shared = tcl.conv2d(x, num_outputs=size, kernel_size=4, # bzx64x64x3 -> bzx32x32x64 stride=2, activation_fn=lrelu) shared = tcl.conv2d(shared, num_outputs=size * 2, kernel_size=4, # 16x16x128 stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) shared = tcl.conv2d(shared, num_outputs=size * 4, kernel_size=4, # 8x8x256 stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) #d = tcl.conv2d(d, num_outputs=size * 8, kernel_size=3, # 4x4x512 # stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) shared = tcl.fully_connected(tcl.flatten( # reshape, 1 shared), 1024, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) q = tcl.fully_connected(tcl.flatten(shared), 128, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) q = tcl.fully_connected(q, 10, activation_fn=None) # 10 classes return q
def network(self): net = self.images net = self.image_processing_layer(net) def get_init(): return tf.truncated_normal_initializer(stddev=0.02) net = conv2d(net, 10, [7, 7], activation_fn=tf.nn.relu, name='conv1', weights_initializer=get_init()) net = conv2d(net, 20, [5, 5], activation_fn=tf.nn.relu, name='conv2', weights_initializer=get_init()) net = tf.nn.max_pool(net, [1, 4, 4, 1], [1, 1, 1, 1], padding='SAME') net = conv2d(net, 30, [3, 3], activation_fn=tf.nn.relu, name='conv3', weights_initializer=get_init()) net = conv2d(net, 40, [3, 3], activation_fn=tf.nn.relu, name='conv4', weights_initializer=get_init()) net = tf.nn.max_pool(net, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME') net = tf.reshape(net, [self.conf.batch_size, -1]) net = linear(net, 100, activation_fn=tf.nn.tanh, name='FC1') out = linear(net, 2, activation_fn=tf.nn.softmax, name='out') return out
def conv_layer(net, filters=32, hyperparameter=False, activation=tf.nn.relu, stride=1, max_pool=True, var_coll=far.HYPERPARAMETERS_COLLECTIONS, conv_initialization=tf.contrib.layers.xavier_initializer_conv2d(tf.float32)): max_pool_stride = [1, 2, 2, 1] bn = lambda _inp: tcl.batch_norm(_inp, variables_collections=var_coll) net + tcl.conv2d(net.out, num_outputs=filters, stride=stride, kernel_size=3, normalizer_fn=bn, activation_fn=None, trainable=not hyperparameter, variables_collections=var_coll, weights_initializer=conv_initialization) net + activation(net.out) if max_pool: net + tf.nn.max_pool(net.out, max_pool_stride, max_pool_stride, 'VALID')
def testConv2DSameEven(self): n, n2 = 4, 2 # Input image. x = create_test_input(1, n, n, 1) # Convolution kernel. w = create_test_input(1, 3, 3, 1) w = array_ops.reshape(w, [3, 3, 1, 1]) variable_scope.get_variable('Conv/weights', initializer=w) variable_scope.get_variable('Conv/biases', initializer=array_ops.zeros([1])) variable_scope.get_variable_scope().reuse_variables() y1 = layers.conv2d(x, 1, [3, 3], stride=1, scope='Conv') y1_expected = math_ops.to_float([[14, 28, 43, 26], [28, 48, 66, 37], [43, 66, 84, 46], [26, 37, 46, 22]]) y1_expected = array_ops.reshape(y1_expected, [1, n, n, 1]) y2 = resnet_utils.subsample(y1, 2) y2_expected = math_ops.to_float([[14, 43], [43, 84]]) y2_expected = array_ops.reshape(y2_expected, [1, n2, n2, 1]) y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') y3_expected = y2_expected y4 = layers.conv2d(x, 1, [3, 3], stride=2, scope='Conv') y4_expected = math_ops.to_float([[48, 37], [37, 22]]) y4_expected = array_ops.reshape(y4_expected, [1, n2, n2, 1]) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) self.assertAllClose(y1.eval(), y1_expected.eval()) self.assertAllClose(y2.eval(), y2_expected.eval()) self.assertAllClose(y3.eval(), y3_expected.eval()) self.assertAllClose(y4.eval(), y4_expected.eval())
def __call__(self, x, is_training = True): with tf.variable_scope(self.name) as scope: with arg_scope([tcl.batch_norm], is_training=is_training, scale=True): with arg_scope([tcl.conv2d, tcl.conv2d_transpose], activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, biases_initializer=None, padding='SAME', weights_regularizer=tcl.l2_regularizer(0.0002)): size = 16 # x: s x s x 3 se = tcl.conv2d(x, num_outputs=size, kernel_size=4, stride=1) # 256 x 256 x 16 se = resBlock(se, num_outputs=size * 2, kernel_size=4, stride=2) # 128 x 128 x 32 se = resBlock(se, num_outputs=size * 2, kernel_size=4, stride=1) # 128 x 128 x 32 se = resBlock(se, num_outputs=size * 4, kernel_size=4, stride=2) # 64 x 64 x 64 se = resBlock(se, num_outputs=size * 4, kernel_size=4, stride=1) # 64 x 64 x 64 se = resBlock(se, num_outputs=size * 8, kernel_size=4, stride=2) # 32 x 32 x 128 se = resBlock(se, num_outputs=size * 8, kernel_size=4, stride=1) # 32 x 32 x 128 se = resBlock(se, num_outputs=size * 16, kernel_size=4, stride=2) # 16 x 16 x 256 se = resBlock(se, num_outputs=size * 16, kernel_size=4, stride=1) # 16 x 16 x 256 se = resBlock(se, num_outputs=size * 32, kernel_size=4, stride=2) # 8 x 8 x 512 se = resBlock(se, num_outputs=size * 32, kernel_size=4, stride=1) # 8 x 8 x 512 pd = tcl.conv2d_transpose(se, size * 32, 4, stride=1) # 8 x 8 x 512 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=2) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=1) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=1) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=2) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=1) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=1) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=2) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=1) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=1) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 2, 4, stride=2) # 128 x 128 x 32 pd = tcl.conv2d_transpose(pd, size * 2, 4, stride=1) # 128 x 128 x 32 pd = tcl.conv2d_transpose(pd, size, 4, stride=2) # 256 x 256 x 16 pd = tcl.conv2d_transpose(pd, size, 4, stride=1) # 256 x 256 x 16 pd = tcl.conv2d_transpose(pd, 3, 4, stride=1) # 256 x 256 x 3 pd = tcl.conv2d_transpose(pd, 3, 4, stride=1) # 256 x 256 x 3 pos = tcl.conv2d_transpose(pd, 3, 4, stride=1, activation_fn = tf.nn.sigmoid)#, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) return pos
def conv2d(inputs, activation_fn=lrelu, normalizer_fn=instance_norm, scope='conv2d', **kwargs): """Summary Parameters ---------- inputs : TYPE Description activation_fn : TYPE, optional Description normalizer_fn : TYPE, optional Description scope : str, optional Description **kwargs Description Returns ------- TYPE Description """ with tf.variable_scope(scope or 'conv2d'): h = tfl.conv2d( inputs=inputs, activation_fn=None, normalizer_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.02), biases_initializer=None, **kwargs) if normalizer_fn: h = normalizer_fn(h) if activation_fn: h = activation_fn(h) return h
def inference_vgg16(images: tf.Tensor, params: ModelParams, num_classes: int, use_batch_norm=False, weight_decay=0.0, is_training=False) -> tf.Tensor: with tf.name_scope('vgg_augmented'): if use_batch_norm: if params.batch_renorm: renorm_clipping = {'rmax': 100, 'rmin': 0.1, 'dmax': 10} renorm_momentum = 0.98 else: renorm_clipping = None renorm_momentum = 0.99 batch_norm_fn = lambda x: tf.layers.batch_normalization( x, axis=-1, training=is_training, name='batch_norm', renorm=params.batch_renorm, renorm_clipping=renorm_clipping, renorm_momentum=renorm_momentum) else: batch_norm_fn = None def upsample_conv(pooled_layer, previous_layer, layer_params, number): with tf.name_scope('deconv{}'.format(number)): if previous_layer.get_shape( )[1].value and previous_layer.get_shape()[2].value: target_shape = previous_layer.get_shape()[1:3] else: target_shape = tf.shape(previous_layer)[1:3] upsampled_layer = tf.image.resize_images( pooled_layer, target_shape, method=tf.image.ResizeMethod.BILINEAR) input_tensor = tf.concat([upsampled_layer, previous_layer], 3) for i, (nb_filters, filter_size) in enumerate(layer_params): input_tensor = layers.conv2d( inputs=input_tensor, num_outputs=nb_filters, kernel_size=[filter_size, filter_size], normalizer_fn=batch_norm_fn, scope="conv{}_{}".format(number, i + 1)) return input_tensor # Original VGG : vgg_net, intermediate_levels = vgg_16_fn(images, blocks=5, weight_decay=weight_decay) out_tensor = vgg_net # Intermediate convolution if params.intermediate_conv is not None: with tf.name_scope('intermediate_convs'): for layer_params in params.intermediate_conv: for k, (nb_filters, filter_size) in enumerate(layer_params): out_tensor = layers.conv2d( inputs=out_tensor, num_outputs=nb_filters, kernel_size=[filter_size, filter_size], normalizer_fn=batch_norm_fn, scope='conv_{}'.format(k + 1)) # Upsampling : with tf.name_scope('upsampling'): selected_upscale_params = [ l for i, l in enumerate(params.upscale_params) if params.selected_levels_upscaling[i] ] assert len(params.selected_levels_upscaling) == len(intermediate_levels), \ 'Upscaling : {} is different from {}'.format(len(params.selected_levels_upscaling), len(intermediate_levels)) selected_intermediate_levels = [ l for i, l in enumerate(intermediate_levels) if params.selected_levels_upscaling[i] ] # Upsampling loop n_layer = 1 for i in reversed(range(len(selected_intermediate_levels))): out_tensor = upsample_conv(out_tensor, selected_intermediate_levels[i], selected_upscale_params[i], n_layer) n_layer += 1 logits = layers.conv2d(inputs=out_tensor, num_outputs=num_classes, activation_fn=None, kernel_size=[1, 1], scope="conv{}-logits".format(n_layer)) return logits # [B,h,w,Classes]
def resnet_v2(inputs, blocks, num_classes=None, is_training=None, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope=None): """Generator for v2 (preactivation) ResNet models. This function generates a family of ResNet v2 models. See the resnet_v2_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce ResNets of various depths. Training for image classification on Imagenet is usually done with [224, 224] inputs, resulting in [7, 7] feature maps at the output of the last ResNet block for the ResNets defined in [1] that have nominal stride equal to 32. However, for dense prediction tasks we advise that one uses inputs with spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In this case the feature maps at the ResNet output will have spatial shape [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] and corners exactly aligned with the input image corners, which greatly facilitates alignment of the features to the image. Using as input [225, 225] images results in [8, 8] feature maps at the output of the last ResNet block. For dense prediction tasks, the ResNet needs to run in fully-convolutional (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all have nominal stride equal to 32 and a good choice in FCN mode is to use output_stride=16 in order to increase the density of the computed features at small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. blocks: A list of length equal to the number of ResNet blocks. Each element is a resnet_utils.Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If None we return the features before the logit layer. is_training: whether is training or not. If None, the value inherited from the resnet_arg_scope is used. Specifying value None is deprecated. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. include_root_block: If True, include the initial convolution followed by max-pooling, if False excludes it. If excluded, `inputs` should be the results of an activation-less convolution. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. If num_classes is not None, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with variable_scope.variable_scope( scope, 'resnet_v2', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with arg_scope( [layers_lib.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): if is_training is not None: bn_scope = arg_scope([layers.batch_norm], is_training=is_training) else: bn_scope = arg_scope([]) with bn_scope: net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError('The output_stride needs to be a multiple of 4.') output_stride /= 4 # We do not include batch normalization or activation functions in # conv1 because the first ResNet unit will perform these. Cf. # Appendix of [2]. with arg_scope( [layers_lib.conv2d], activation_fn=None, normalizer_fn=None): net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = layers.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) # This is needed because the pre-activation variant does not have batch # normalization or activation functions in the residual unit output. See # Appendix of [2]. net = layers.batch_norm( net, activation_fn=nn_ops.relu, scope='postnorm') if global_pool: # Global average pooling. net = math_ops.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) if num_classes is not None: net = layers_lib.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') # Convert end_points_collection into a dictionary of end_points. end_points = utils.convert_collection_to_dict(end_points_collection) if num_classes is not None: end_points['predictions'] = layers.softmax(net, scope='predictions') return net, end_points
def inference_u_net(images: tf.Tensor, params: ModelParams, num_classes: int, use_batch_norm=False, weight_decay=0.0, is_training=False) -> tf.Tensor: enc_layers = OrderedDict() dec_layers = OrderedDict() with tf.variable_scope('U-Net'): with tf.variable_scope('Encoder'): conv_layer = layers.conv2d(images, num_outputs=64, kernel_size=(3, 3), padding='SAME', activation_fn=tf.identity) enc_layers['conv_layer_enc_64'] = conv_bn_layer( conv_layer, kernel_size=(3, 3), output_channels=64, bn=True, is_training=is_training, relu=True) conv_layer = layers.max_pool2d( inputs=enc_layers['conv_layer_enc_64'], kernel_size=(2, 2), stride=2) for n_feat in [128, 256, 512]: enc_layers['conv_layer_enc_' + str(n_feat)] = conv_bn_layer( conv_layer, kernel_size=(3, 3), output_channels=n_feat, bn=True, is_training=is_training, relu=True) enc_layers['conv_layer_enc_' + str(n_feat)] = conv_bn_layer( enc_layers['conv_layer_enc_' + str(n_feat)], kernel_size=(3, 3), output_channels=n_feat, bn=True, is_training=is_training, relu=True) conv_layer = layers.max_pool2d( inputs=enc_layers['conv_layer_enc_' + str(n_feat)], kernel_size=(2, 2), stride=2) conv_layer_enc_1024 = conv_bn_layer(conv_layer, kernel_size=(3, 3), output_channels=1024, bn=True, is_training=is_training, relu=True) with tf.variable_scope('Decoder'): dec_layers['conv_layer_dec_512'] = conv_bn_layer( conv_layer_enc_1024, kernel_size=(3, 3), output_channels=512, bn=True, is_training=is_training, relu=True) reduced_patchsize = _get_image_shape_tensor( enc_layers['conv_layer_enc_512']) dec_layers['conv_layer_dec_512'] = tf.image.resize_images( dec_layers['conv_layer_dec_512'], size=reduced_patchsize, method=tf.image.ResizeMethod.BILINEAR) for n_feat in [512, 256, 128, 64]: dec_layers['conv_layer_dec_' + str(n_feat * 2)] = tf.concat( [ dec_layers['conv_layer_dec_' + str(n_feat)], enc_layers['conv_layer_enc_' + str(n_feat)] ], axis=3) dec_layers['conv_layer_dec_' + str(n_feat)] = conv_bn_layer( dec_layers['conv_layer_dec_' + str(n_feat * 2)], kernel_size=(3, 3), output_channels=n_feat, bn=True, is_training=is_training, relu=True) if n_feat > 64: dec_layers['conv_layer_dec_' + str(int(n_feat / 2))] = conv_bn_layer( dec_layers['conv_layer_dec_' + str(n_feat)], kernel_size=(3, 3), output_channels=n_feat / 2, bn=True, is_training=is_training, relu=True) reduced_patchsize = _get_image_shape_tensor( enc_layers['conv_layer_enc_' + str(int(n_feat / 2))]) dec_layers['conv_layer_dec_' + str(int(n_feat / 2))] = tf.image.resize_images( dec_layers['conv_layer_dec_' + str(int(n_feat / 2))], size=reduced_patchsize, method=tf.image.ResizeMethod.BILINEAR) return layers.conv2d(dec_layers['conv_layer_dec_64'], num_outputs=num_classes, kernel_size=(3, 3), padding='SAME', activation_fn=tf.identity)
def inception_v3_base(inputs, final_endpoint='Mixed_7c', min_depth=16, depth_multiplier=1.0, scope=None): """Inception model from http://arxiv.org/abs/1512.00567. Constructs an Inception v3 network from inputs to the given final endpoint. This method can construct the network up to the final inception block Mixed_7c. Note that the names of the layers in the paper do not correspond to the names of the endpoints registered by this function although they build the same network. Here is a mapping from the old_names to the new names: Old name | New name ======================================= conv0 | Conv2d_1a_3x3 conv1 | Conv2d_2a_3x3 conv2 | Conv2d_2b_3x3 pool1 | MaxPool_3a_3x3 conv3 | Conv2d_3b_1x1 conv4 | Conv2d_4a_3x3 pool2 | MaxPool_5a_3x3 mixed_35x35x256a | Mixed_5b mixed_35x35x288a | Mixed_5c mixed_35x35x288b | Mixed_5d mixed_17x17x768a | Mixed_6a mixed_17x17x768b | Mixed_6b mixed_17x17x768c | Mixed_6c mixed_17x17x768d | Mixed_6d mixed_17x17x768e | Mixed_6e mixed_8x8x1280a | Mixed_7a mixed_8x8x2048a | Mixed_7b mixed_8x8x2048b | Mixed_7c Args: inputs: a tensor of size [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. scope: Optional variable_scope. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or depth_multiplier <= 0 """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) with variable_scope.variable_scope(scope, 'InceptionV3', [inputs]): with arg_scope( [layers.conv2d, layers_lib.max_pool2d, layers_lib.avg_pool2d], stride=1, padding='VALID'): # 299 x 299 x 3 end_point = 'Conv2d_1a_3x3' net = layers.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 149 x 149 x 32 end_point = 'Conv2d_2a_3x3' net = layers.conv2d(net, depth(32), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 147 x 147 x 32 end_point = 'Conv2d_2b_3x3' net = layers.conv2d( net, depth(64), [3, 3], padding='SAME', scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 147 x 147 x 64 end_point = 'MaxPool_3a_3x3' net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 73 x 73 x 64 end_point = 'Conv2d_3b_1x1' net = layers.conv2d(net, depth(80), [1, 1], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 73 x 73 x 80. end_point = 'Conv2d_4a_3x3' net = layers.conv2d(net, depth(192), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 71 x 71 x 192. end_point = 'MaxPool_5a_3x3' net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 35 x 35 x 192. # Inception blocks with arg_scope( [layers.conv2d, layers_lib.max_pool2d, layers_lib.avg_pool2d], stride=1, padding='SAME'): # mixed: 35 x 35 x 256. end_point = 'Mixed_5b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(48), [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d( branch_1, depth(64), [5, 5], scope='Conv2d_0b_5x5') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d( branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d( branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(32), [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_1: 35 x 35 x 288. end_point = 'Mixed_5c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(48), [1, 1], scope='Conv2d_0b_1x1') branch_1 = layers.conv2d( branch_1, depth(64), [5, 5], scope='Conv_1_0c_5x5') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d( branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d( branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(64), [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_2: 35 x 35 x 288. end_point = 'Mixed_5d' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(48), [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d( branch_1, depth(64), [5, 5], scope='Conv2d_0b_5x5') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d( branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d( branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(64), [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_3: 17 x 17 x 768. end_point = 'Mixed_6a' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(384), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d( branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_1 = layers.conv2d( branch_1, depth(96), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_1x1') with variable_scope.variable_scope('Branch_2'): branch_2 = layers_lib.max_pool2d( net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = array_ops.concat([branch_0, branch_1, branch_2], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed4: 17 x 17 x 768. end_point = 'Mixed_6b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(128), [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d( branch_1, depth(128), [1, 7], scope='Conv2d_0b_1x7') branch_1 = layers.conv2d( branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(128), [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d( branch_2, depth(128), [7, 1], scope='Conv2d_0b_7x1') branch_2 = layers.conv2d( branch_2, depth(128), [1, 7], scope='Conv2d_0c_1x7') branch_2 = layers.conv2d( branch_2, depth(128), [7, 1], scope='Conv2d_0d_7x1') branch_2 = layers.conv2d( branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_5: 17 x 17 x 768. end_point = 'Mixed_6c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d( branch_1, depth(160), [1, 7], scope='Conv2d_0b_1x7') branch_1 = layers.conv2d( branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d( branch_2, depth(160), [7, 1], scope='Conv2d_0b_7x1') branch_2 = layers.conv2d( branch_2, depth(160), [1, 7], scope='Conv2d_0c_1x7') branch_2 = layers.conv2d( branch_2, depth(160), [7, 1], scope='Conv2d_0d_7x1') branch_2 = layers.conv2d( branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_6: 17 x 17 x 768. end_point = 'Mixed_6d' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d( branch_1, depth(160), [1, 7], scope='Conv2d_0b_1x7') branch_1 = layers.conv2d( branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d( branch_2, depth(160), [7, 1], scope='Conv2d_0b_7x1') branch_2 = layers.conv2d( branch_2, depth(160), [1, 7], scope='Conv2d_0c_1x7') branch_2 = layers.conv2d( branch_2, depth(160), [7, 1], scope='Conv2d_0d_7x1') branch_2 = layers.conv2d( branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_7: 17 x 17 x 768. end_point = 'Mixed_6e' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d( branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7') branch_1 = layers.conv2d( branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d( branch_2, depth(192), [7, 1], scope='Conv2d_0b_7x1') branch_2 = layers.conv2d( branch_2, depth(192), [1, 7], scope='Conv2d_0c_1x7') branch_2 = layers.conv2d( branch_2, depth(192), [7, 1], scope='Conv2d_0d_7x1') branch_2 = layers.conv2d( branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_8: 8 x 8 x 1280. end_point = 'Mixed_7a' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_0 = layers.conv2d( branch_0, depth(320), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d( branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7') branch_1 = layers.conv2d( branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') branch_1 = layers.conv2d( branch_1, depth(192), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers_lib.max_pool2d( net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = array_ops.concat([branch_0, branch_1, branch_2], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_9: 8 x 8 x 2048. end_point = 'Mixed_7b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(320), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(384), [1, 1], scope='Conv2d_0a_1x1') branch_1 = array_ops.concat( [ layers.conv2d( branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), layers.conv2d( branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1') ], 3) with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(448), [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d( branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') branch_2 = array_ops.concat( [ layers.conv2d( branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), layers.conv2d( branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1') ], 3) with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_10: 8 x 8 x 2048. end_point = 'Mixed_7c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(320), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(384), [1, 1], scope='Conv2d_0a_1x1') branch_1 = array_ops.concat( [ layers.conv2d( branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), layers.conv2d( branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1') ], 3) with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(448), [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d( branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') branch_2 = array_ops.concat( [ layers.conv2d( branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), layers.conv2d( branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1') ], 3) with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points raise ValueError('Unknown final endpoint %s' % final_endpoint)
def quantizable_separable_conv2d(inputs, num_outputs, kernel_size, is_quantized=True, depth_multiplier=1, stride=1, activation_fn=tf.nn.relu6, normalizer_fn=None, scope=None): """Quantization friendly backward compatible separable conv2d. This op has the same API is separable_conv2d. The main difference is that an additional BiasAdd is manually inserted after the depthwise conv, such that the depthwise bias will not have name conflict with pointwise bias. The motivation of this op is that quantization script need BiasAdd in order to recognize the op, in which a native call to separable_conv2d do not create for the depthwise conv. Args: inputs: A tensor of size [batch_size, height, width, channels]. num_outputs: The number of pointwise convolution output filters. If is None, then we skip the pointwise convolution stage. kernel_size: A list of length 2: [kernel_height, kernel_width] of the filters. Can be an int if both values are the same. is_quantized: flag to enable/disable quantization. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to num_filters_in * depth_multiplier. stride: A list of length 2: [stride_height, stride_width], specifying the depthwise convolution stride. Can be an int if both strides are the same. activation_fn: Activation function. The default value is a ReLU function. Explicitly set it to None to skip it and maintain a linear activation. normalizer_fn: Normalization function to use instead of biases. scope: Optional scope for variable_scope. Returns: Tensor resulting from concatenation of input tensors """ if is_quantized: outputs = contrib_layers.separable_conv2d( inputs, None, kernel_size, depth_multiplier=depth_multiplier, stride=1, activation_fn=None, normalizer_fn=None, biases_initializer=None, scope=scope) outputs = contrib_layers.bias_add(outputs, trainable=True, scope='%s_bias' % scope) outputs = contrib_layers.conv2d(outputs, num_outputs, [1, 1], activation_fn=activation_fn, stride=stride, normalizer_fn=normalizer_fn, scope=scope) else: outputs = contrib_layers.separable_conv2d( inputs, num_outputs, kernel_size, depth_multiplier=depth_multiplier, stride=stride, activation_fn=activation_fn, normalizer_fn=normalizer_fn, scope=scope) return outputs
def get_conv_model(features, labels, mode, params): parent_scope = "cnn" # TODO Need to have two: one for expand, one for conquer # features = _get_feature_dict(features) head = params.get("head") feature_columns = params.get("feature_columns") activation_fn = params.get("activation_fn") dropout = params.get("dropout") learning_rate = params.get("learning_rate") optimizer = params.get("optimizer") # with variable_scope.variable_scope( # parent_scope + "/input_from_feature_columns", # values=features.values()) as scope: # net = layers.input_from_feature_columns( # columns_to_tensors=features, # feature_columns=feature_columns, # weight_collections=[parent_scope], # scope=scope) with variable_scope.variable_scope( parent_scope + "/convlayer_1", values=[features]) as scope: net = layers.conv2d(features, num_outputs=32, kernel_size=3, variables_collections=[parent_scope], scope=scope) net = layers.max_pool2d(net, 2, stride=1, padding='SAME') with variable_scope.variable_scope( parent_scope + "/convlayer_2", values=[features]) as scope: net = layers.conv2d(features, num_outputs=64, kernel_size=5, padding='VALID', variables_collections=[parent_scope], scope=scope) # net = layers.max_pool2d(net, 1, # stride=1, # padding='SAME') # # with variable_scope.variable_scope( # parent_scope + "/max_pool_1", # values=[net]) as scope: shape = net.get_shape() net = tf.reshape(net, [-1, shape[3].value], name="reshape_1") hidden_units = [256, 128] for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, values=[net]) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dropout)) with variable_scope.variable_scope( parent_scope + "/logits", values=[net]) as scope: logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=scope) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=contrib_variables.get_global_step(), learning_rate=learning_rate, optimizer=optimizer, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. summaries=[]) return head.head_ops(features, labels, mode, _train_op_fn, logits)
batch_size = 32 # create placeholders for data X = tf.placeholder(tf.float32, shape=(None, 28, 28, 1), name='X') y = tf.placeholder(tf.int32, shape=(None, 10), name='y') # create weight initializations init_fn = layers.xavier_initializer() # create model with tf.variable_scope('layers'): # CONV 1 with framework.arg_scope([layers.conv2d, layers.fully_connected], weights_initializer=init_fn): conv1 = layers.conv2d(X, 32, (3, 3), 2, padding='VALID', scope='conv1') pool1 = layers.max_pool2d(conv1, (2, 2), scope='pool1') # CONV 2 conv2 = layers.conv2d(pool1, 32, (3, 3), 2, padding='SAME', scope='conv2') pool2 = layers.max_pool2d(conv2, (2, 2), scope='pool2') # FLATTEN flat_dim = np.prod(pool2.get_shape().as_list()[1:]) flatten = tf.reshape(pool2, (-1, flat_dim)) # FC 3 fc3 = layers.fully_connected(flatten, 128, scope='fc3') # FC 4 fc4 = layers.fully_connected(fc3, 10, scope='fc4')
def atrous_spatial_pyramid_pooling(self, inputs, output_stride, batch_norm_decay, is_training, depth=256): """Atrous Spatial Pyramid Pooling. Args: inputs: A tensor of size [batch, height, width, channels]. output_stride: The ResNet unit's stride. Determines the rates for atrous convolution. the rates are (6, 12, 18) when the stride is 16, and doubled when 8. batch_norm_decay: The moving average decay when estimating layer activation statistics in batch normalization. is_training: A boolean denoting whether the input is for training. depth: The depth of the ResNet unit output. Returns: The atrous spatial pyramid pooling output. """ with tf.variable_scope("aspp"): if output_stride not in [8, 16]: raise ValueError('output_stride must be either 8 or 16.') atrous_rates = [6, 12, 18] if output_stride == 8: atrous_rates = [2 * rate for rate in atrous_rates] with tf.contrib.slim.arg_scope( resnet_v2.resnet_arg_scope( batch_norm_decay=batch_norm_decay)): with arg_scope([layers.batch_norm], is_training=is_training): inputs_size = tf.shape(inputs)[1:3] # (a) one 1x1 convolution and three 3x3 convolutions with rates = (6, 12, 18) when output stride = 16. # the rates are doubled when output stride = 8. conv_1x1 = layers_lib.conv2d(inputs, depth, [1, 1], stride=1, scope="conv_1x1") conv_3x3_1 = layers_lib.conv2d(inputs, depth, [3, 3], stride=1, rate=atrous_rates[0], scope='conv_3x3_1') conv_3x3_2 = layers_lib.conv2d(inputs, depth, [3, 3], stride=1, rate=atrous_rates[1], scope='conv_3x3_2') conv_3x3_3 = layers_lib.conv2d(inputs, depth, [3, 3], stride=1, rate=atrous_rates[2], scope='conv_3x3_3') # (b) the image-level features with tf.variable_scope("image_level_features"): # global average pooling image_level_features = tf.reduce_mean( inputs, [1, 2], name='global_average_pooling', keepdims=True) # 1x1 convolution with 256 filters( and batch normalization) image_level_features = layers_lib.conv2d( image_level_features, depth, [1, 1], stride=1, scope='conv_1x1') # bilinearly upsample features image_level_features = tf.image.resize_bilinear( image_level_features, inputs_size, name='upsample') net = tf.concat([ conv_1x1, conv_3x3_1, conv_3x3_2, conv_3x3_3, image_level_features ], axis=3, name='concat') net = layers_lib.conv2d(net, depth, [1, 1], stride=1, scope='conv_1x1_concat') return net
def build_net(x): weight_decay = FLAGS.weight_decay h1 = layers.conv2d( inputs=x, num_outputs=32, kernel_size=[5, 5], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=layers.l2_regularizer(weight_decay), biases_regularizer=layers.l2_regularizer(weight_decay), scope='conv1', normalizer_fn=layers.batch_norm) h1 = layers.avg_pool2d(inputs=h1, kernel_size=[3, 3], padding='SAME', scope='pool1') h2 = layers.conv2d( inputs=h1, num_outputs=32, kernel_size=[5, 5], weights_initializer=tf.truncated_normal_initializer(stddev=0.05), weights_regularizer=layers.l2_regularizer(weight_decay), biases_regularizer=layers.l2_regularizer(weight_decay), scope='conv2', normalizer_fn=layers.batch_norm) h2 = layers.avg_pool2d(inputs=h2, kernel_size=[3, 3], padding='SAME', scope='pool2') h3 = layers.conv2d( inputs=h2, num_outputs=64, kernel_size=[5, 5], weights_initializer=tf.truncated_normal_initializer(stddev=0.05), weights_regularizer=layers.l2_regularizer(weight_decay), biases_regularizer=layers.l2_regularizer(weight_decay), scope='conv3', normalizer_fn=layers.batch_norm) h3 = layers.avg_pool2d(inputs=h3, kernel_size=[3, 3], padding='SAME', scope='pool3') h4 = layers.conv2d( inputs=h3, num_outputs=64, kernel_size=[4, 4], weights_initializer=tf.truncated_normal_initializer(stddev=0.05), weights_regularizer=layers.l2_regularizer(weight_decay), biases_regularizer=layers.l2_regularizer(weight_decay), padding='VALID', scope='conv4', normalizer_fn=layers.batch_norm) keep_prob = tf.placeholder(tf.float32, name="keep_prob") h4 = layers.dropout(inputs=h4, keep_prob=keep_prob, scope='dropout') h5 = layers.fully_connected( inputs=h4, num_outputs=10, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.05), weights_regularizer=layers.l2_regularizer(weight_decay), biases_regularizer=layers.l2_regularizer(weight_decay), scope='fc1') h5 = tf.reshape(h5, [-1, 10]) return h5, keep_prob
def build_model(self): self.placeholders = _get_placeholders(self.spatial_dim) with tf.variable_scope("theta"): units_embedded = layers.embed_sequence( self.placeholders.screen_unit_type, vocab_size=SCREEN_FEATURES.unit_type.scale, embed_dim=self.unit_type_emb_dim, scope="unit_type_emb", trainable=self.trainable ) # Let's not one-hot zero which is background player_relative_screen_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_screen, num_classes=SCREEN_FEATURES.player_relative.scale )[:, :, :, 1:] player_relative_minimap_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_minimap, num_classes=MINIMAP_FEATURES.player_relative.scale )[:, :, :, 1:] channel_axis = 3 screen_numeric_all = tf.concat( [self.placeholders.screen_numeric, units_embedded, player_relative_screen_one_hot], axis=channel_axis ) minimap_numeric_all = tf.concat( [self.placeholders.minimap_numeric, player_relative_minimap_one_hot], axis=channel_axis ) # BUILD CONVNNs screen_output = self._build_convs(screen_numeric_all, "screen_network") minimap_output = self._build_convs(minimap_numeric_all, "minimap_network") # State representation (last layer before separation as described in the paper) self.map_output = tf.concat([screen_output, minimap_output], axis=channel_axis) # BUILD CONVLSTM self.rnn_in = tf.reshape(self.map_output, [1, -1, 32, 32, 64]) self.cell = tf.contrib.rnn.Conv2DLSTMCell(input_shape=[32, 32, 1], # input dims kernel_shape=[3, 3], # for a 3 by 3 conv output_channels=64) # number of feature maps c_init = np.zeros((1, 32, 32, 64), np.float32) h_init = np.zeros((1, 32, 32, 64), np.float32) self.state_init = [c_init, h_init] step_size = tf.shape(self.map_output)[:1] # Get step_size from input dimensions c_in = tf.placeholder(tf.float32, [None, 32, 32, 64]) h_in = tf.placeholder(tf.float32, [None, 32, 32, 64]) self.state_in = (c_in, h_in) state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in) self.step_size = tf.placeholder(tf.float32, [1]) (self.outputs, self.state) = tf.nn.dynamic_rnn(self.cell, self.rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False, dtype=tf.float32) lstm_c, lstm_h = self.state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(self.outputs, [-1, 32, 32, 64]) # 1x1 conv layer to generate our spatial policy self.spatial_action_logits = layers.conv2d( rnn_out, data_format="NHWC", num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope='spatial_action', trainable=self.trainable ) spatial_action_probs = tf.nn.softmax(layers.flatten(self.spatial_action_logits)) map_output_flat = tf.reshape(self.outputs, [-1, 65536]) # (32*32*64) # fully connected layer for Value predictions and action_id self.fc1 = layers.fully_connected( map_output_flat, num_outputs=256, activation_fn=tf.nn.relu, scope="fc1", trainable=self.trainable ) # fc/action_id action_id_probs = layers.fully_connected( self.fc1, num_outputs=len(actions.FUNCTIONS), activation_fn=tf.nn.softmax, scope="action_id", trainable=self.trainable ) # fc/value self.value_estimate = tf.squeeze(layers.fully_connected( self.fc1, num_outputs=1, activation_fn=None, scope='value', trainable=self.trainable ), axis=1) # disregard non-allowed actions by setting zero prob and re-normalizing to 1 ((MINE) THE MASK) action_id_probs *= self.placeholders.available_action_ids action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keepdims=True) def logclip(x): return tf.log(tf.clip_by_value(x, 1e-12, 1.0)) spatial_action_log_probs = ( logclip(spatial_action_probs) * tf.expand_dims(self.placeholders.is_spatial_action_available, axis=1) ) # non-available actions get log(1e-10) value but that's ok because it's never used action_id_log_probs = logclip(action_id_probs) self.value_estimate = self.value_estimate self.action_id_probs = action_id_probs self.spatial_action_probs = spatial_action_probs self.action_id_log_probs = action_id_log_probs self.spatial_action_log_probs = spatial_action_log_probs selected_spatial_action_flat = ravel_index_pairs( self.placeholders.selected_spatial_action, self.spatial_dim ) selected_log_probs = self._get_select_action_probs(selected_spatial_action_flat) # maximum is to avoid 0 / 0 because this is used to calculate some means sum_spatial_action_available = tf.maximum( 1e-10, tf.reduce_sum(self.placeholders.is_spatial_action_available) ) neg_entropy_spatial = tf.reduce_sum( self.spatial_action_probs * self.spatial_action_log_probs ) / sum_spatial_action_available neg_entropy_action_id = tf.reduce_mean(tf.reduce_sum( self.action_id_probs * self.action_id_log_probs, axis=1 )) # Sample now actions from the corresponding dstrs defined by the policy network theta self.sampled_action_id = weighted_random_sample(self.action_id_probs) self.sampled_spatial_action = weighted_random_sample(self.spatial_action_probs) self.value_estimate = self.value_estimate policy_loss = -tf.reduce_mean(selected_log_probs.total * self.placeholders.advantage) value_loss = tf.losses.mean_squared_error( self.placeholders.value_target, self.value_estimate) loss = ( policy_loss + value_loss * self.loss_value_weight + neg_entropy_spatial * self.entropy_weight_spatial + neg_entropy_action_id * self.entropy_weight_action_id ) self.train_op = layers.optimize_loss( loss=loss, global_step=tf.train.get_global_step(), optimizer=self.optimiser, clip_gradients=self.max_gradient_norm, summaries=OPTIMIZER_SUMMARIES, learning_rate=None, name="train_op" ) self._scalar_summary("value/estimate", tf.reduce_mean(self.value_estimate)) self._scalar_summary("value/target", tf.reduce_mean(self.placeholders.value_target)) self._scalar_summary("action/is_spatial_action_available", tf.reduce_mean(self.placeholders.is_spatial_action_available)) self._scalar_summary("action/selected_id_log_prob", tf.reduce_mean(selected_log_probs.action_id)) self._scalar_summary("loss/policy", policy_loss) self._scalar_summary("loss/value", value_loss) self._scalar_summary("loss/neg_entropy_spatial", neg_entropy_spatial) self._scalar_summary("loss/neg_entropy_action_id", neg_entropy_action_id) self._scalar_summary("loss/total", loss) self._scalar_summary("value/advantage", tf.reduce_mean(self.placeholders.advantage)) self._scalar_summary("action/selected_total_log_prob", tf.reduce_mean(selected_log_probs.total)) self._scalar_summary("action/selected_spatial_log_prob", tf.reduce_sum(selected_log_probs.spatial) / sum_spatial_action_available) self.init_op = tf.global_variables_initializer() self.saver = tf.train.Saver(max_to_keep=2) self.all_summary_op = tf.summary.merge_all(tf.GraphKeys.SUMMARIES) self.scalar_summary_op = tf.summary.merge(tf.get_collection(self._scalar_summary_key))
def build_conv_layers_for_input(self, inputs, name, previous_tensors=None): """build_conv_layers_for_input Creates 2 convolutional layers based on an input. Changeable parts here are: Number of outputs for both layers Size of the kernel used The stride used The activation function :param inputs: The inputs to run the convolutional layers against. :param name: The name of the input, to scope the layers. """ conv_layer1 = layers.conv2d( inputs=inputs, data_format="NHWC", num_outputs=16, kernel_size=5, stride=1, padding="SAME", activation_fn=tf.nn.relu, scope=f"{name}/conv_layer1/model_{self.curriculum_number}", trainable=self.trainable, ) conv_layer2 = layers.conv2d( inputs=conv_layer1, data_format="NHWC", num_outputs=32, kernel_size=3, stride=1, padding="SAME", activation_fn=None, scope=f"{name}/conv_layer2/model_{self.curriculum_number}", trainable=self.trainable, ) if self.trainable: layers.summarize_activation(conv_layer1) layers.summarize_activation(conv_layer2) tf.summary.image( f"{name}/new_conv_layer1", tf.reshape(conv_layer1, [-1, 32, 32, 1]), 3 ) tf.summary.image( f"{name}/new_conv_layer2", tf.reshape(conv_layer2, [-1, 32, 32, 1]), 3 ) # If we aren't doing transfer learning, return now. if previous_tensors is None: return conv_layer2 # Sort the previous models previous_conv_layer2 = [] for model_number, prev_out in enumerate(previous_tensors): conv_layer2_previous = layers.conv2d( inputs=prev_out, data_format="NHWC", num_outputs=32, kernel_size=3, stride=1, padding="SAME", activation_fn=None, scope=f"{name}/conv_layer2/model_{model_number}", trainable=self.trainable, ) previous_conv_layer2.append(conv_layer2_previous) previous_conv_layer2_added = self.add_all_previous( previous_conv_layer2, f"{name}/conv_layer2" ) combined_conv_layer2 = tf.add( conv_layer2, previous_conv_layer2_added, "%s_conv_add" % name ) relu_conv_layer2 = tf.nn.relu( combined_conv_layer2, name="combined_%s_conv_layer2_relu" % name ) if self.trainable: layers.summarize_activation(relu_conv_layer2) tf.summary.image( f"{name}/combined_conv_layer2", tf.reshape(relu_conv_layer2, [-1, 32, 32, 1]), 3, ) return relu_conv_layer2
def build_transfer(self, previous_model): """build_transfer Build the actual network, using the values passed over the from agent object, which themselves are derived from the Obs object. This model is built using a previous model. """ # Maps a series of symbols to embeddings, # where an embedding is a mapping from discrete objects, # such as words, to vectors of real numbers. # In this case it is from the unit types. units_embedded = layers.embed_sequence( self.placeholders.screen_unit_type, vocab_size=SCREEN_FEATURES.unit_type.scale, embed_dim=self.unittype_emb_dim, scope="unit_type_emb", trainable=self.trainable, ) # "One hot" encoding performs "binarization" on the input # meaning we end up with features we can suitably learn # from. # Basically, learning from categories isn't possible, # but learning from ints (i.e. 0/1/2 for 3 categories) # ends up with further issues, like the ML algorithm # picking up some pattern in the categories, when none exists. # Instead we want it in a binary form instead, to prevent this. # This is not needed for the background, since it is # not used, which is why we ignore channel 0 in the # last sub-array. player_relative_screen_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_screen, num_classes=SCREEN_FEATURES.player_relative.scale, )[:, :, :, 1:] player_relative_minimap_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_minimap, num_classes=MINIMAP_FEATURES.player_relative.scale, )[:, :, :, 1:] channel_axis = 3 # Group together all the inputs, such that a conv # layer can be built upon them. screen_numeric_all = tf.concat( [ self.placeholders.screen_numeric, units_embedded, player_relative_screen_one_hot, ], axis=channel_axis, ) minimap_numeric_all = tf.concat( [self.placeholders.minimap_numeric, player_relative_minimap_one_hot], axis=channel_axis, ) # Build the 2 convolutional layers based on the screen # and the mini-map. screen_conv_layer_output = self.build_conv_layers_for_input( screen_numeric_all, "screen_network", previous_model.screen_conv_1 ) # And now the minimap minimap_conv_layer_output = self.build_conv_layers_for_input( minimap_numeric_all, "minimap_network", previous_model.minimap_conv_1 ) # Group these two convolutional layers now, and # build a further convolutional layer on top of it. visual_inputs = tf.concat( [screen_conv_layer_output, minimap_conv_layer_output], axis=channel_axis ) spatial_actions_normal = layers.conv2d( visual_inputs, data_format="NHWC", num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope=f"spatial_actions/model_{self.curriculum_number}", trainable=self.trainable, ) # Sort the previous models spatial action layers. previous_spatial_actions = [] for model_number, prev_out in enumerate(previous_model.concat_2): spatial_actions_previous = layers.conv2d( prev_out, data_format="NHWC", num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope=f"spatial_actions/model_{model_number}", trainable=self.trainable, ) previous_spatial_actions.append(spatial_actions_previous) previous_spatial_actions_added = self.add_all_previous( previous_spatial_actions, "spatial_actions" ) joint_spatial_actions = tf.add( spatial_actions_normal, previous_spatial_actions_added, "spatial_actions_add", ) if self.trainable: tf.summary.image( f"spatial_action_normal", tf.reshape(spatial_actions_normal, [-1, 32, 32, 1]), 3, ) tf.summary.image( f"spatial_action_previous", tf.reshape(spatial_actions_previous, [-1, 32, 32, 1]), 3, ) tf.summary.image( f"joint_connected_layers", tf.reshape(joint_spatial_actions, [-1, 32, 32, 1]), 3, ) # Take the softmax of this final convolutional layer. spatial_action_probs = tf.nn.softmax(layers.flatten(joint_spatial_actions)) # Build a full connected layer of this final convolutional layer. # Could possibly pass in additional variables here, alongside the # convolutional layer. map_output_flat = layers.flatten(visual_inputs) fully_connected_layer_normal = layers.fully_connected( map_output_flat, num_outputs=256, activation_fn=None, scope=f"fully_connected_layer1/model_{self.curriculum_number}", trainable=self.trainable, ) previous_fully_con_1 = [] for model_number, prev_out in enumerate(previous_model.flatten_1): fully_connected_previous = layers.fully_connected( prev_out, num_outputs=256, activation_fn=None, scope=f"fully_connected_layer1/model_{model_number}", trainable=self.trainable, ) previous_fully_con_1.append(fully_connected_previous) previous_fully_con_1_added = self.add_all_previous( previous_fully_con_1, "fully_connected_layer1" ) # Combine the new and old models values, and then apply RELU to the result. joint_connected_layers = tf.add( fully_connected_layer_normal, previous_fully_con_1_added, "fully_connected_layer_add", ) relu_connected_layer = tf.nn.relu( joint_connected_layers, name="fully_connected_layer1_normal_relu" ) # Generate the probability of a given action from the # fully connected layer. Finally, produce a value # estimate for the given actions. action_id_probs_new = layers.fully_connected( relu_connected_layer, num_outputs=len(actions.FUNCTIONS), activation_fn=None, scope=f"action_id/model_{self.curriculum_number}", trainable=self.trainable, ) previous_action_ids = [] for model_number, prev_out in enumerate(previous_model.fully_connected_layer1): previous_action_id_probs = layers.fully_connected( prev_out, num_outputs=len(actions.FUNCTIONS), activation_fn=None, scope=f"action_id/model_{model_number}", trainable=self.trainable, ) previous_action_ids.append(previous_action_id_probs) previous_action_ids_added = self.add_all_previous( previous_action_ids, "action_id" ) joint_action_ids = tf.add( action_id_probs_new, previous_action_ids_added, "id_probs_add" ) # Combine the new and old models values, and then apply softmax to the result. action_id_probs = tf.nn.softmax(joint_action_ids) # Sort value estimate. value_estimate_new = layers.fully_connected( relu_connected_layer, num_outputs=1, activation_fn=None, scope=f"value/model_{self.curriculum_number}", trainable=self.trainable, ) previous_value_estimates = [] for model_number, prev_out in enumerate(previous_model.fully_connected_layer1): value_estimate_previous = layers.fully_connected( prev_out, num_outputs=1, activation_fn=None, scope=f"value/model_{model_number}", trainable=self.trainable, ) previous_value_estimates.append(value_estimate_previous) previous_value_estimates_added = self.add_all_previous( previous_value_estimates, "value" ) # Combine the new and old models values, and then squeeze the result. joint_value_estimate = tf.add( value_estimate_new, previous_value_estimates_added, "value_estimate_add" ) value_estimate = tf.squeeze(joint_value_estimate, axis=1) # Disregard all the non-allowed actions by giving them a # probability of zero, before re-normalizing to 1. action_id_probs *= self.placeholders.available_action_ids action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keepdims=True) spatial_action_log_probs = self.logclip(spatial_action_probs) * tf.expand_dims( self.placeholders.is_spatial_action_available, axis=1 ) action_id_log_probs = self.logclip(action_id_probs) self.value_estimate = value_estimate self.action_id_probs = action_id_probs self.spatial_action_probs = spatial_action_probs self.action_id_log_probs = action_id_log_probs self.spatial_action_log_probs = spatial_action_log_probs return self
def __call__(self, x, is_training=True): with tf.variable_scope(self.name) as scope: with arg_scope([tcl.batch_norm], is_training=is_training, scale=True): with arg_scope([tcl.conv2d, tcl.conv2d_transpose], activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, biases_initializer=None, padding='SAME', weights_regularizer=tcl.l2_regularizer(0.0002)): size = 16 # x: s x s x 3 se = tcl.conv2d(x, num_outputs=size, kernel_size=4, stride=1) # 256 x 256 x 16 se = resBlock(se, num_outputs=size * 2, kernel_size=4, stride=2) # 128 x 128 x 32 se = resBlock(se, num_outputs=size * 2, kernel_size=4, stride=1) # 128 x 128 x 32 se = resBlock(se, num_outputs=size * 4, kernel_size=4, stride=2) # 64 x 64 x 64 se = resBlock(se, num_outputs=size * 4, kernel_size=4, stride=1) # 64 x 64 x 64 se = resBlock(se, num_outputs=size * 8, kernel_size=4, stride=2) # 32 x 32 x 128 se = resBlock(se, num_outputs=size * 8, kernel_size=4, stride=1) # 32 x 32 x 128 se = resBlock(se, num_outputs=size * 16, kernel_size=4, stride=2) # 16 x 16 x 256 se = resBlock(se, num_outputs=size * 16, kernel_size=4, stride=1) # 16 x 16 x 256 se = resBlock(se, num_outputs=size * 32, kernel_size=4, stride=2) # 8 x 8 x 512 se = resBlock(se, num_outputs=size * 32, kernel_size=4, stride=1) # 8 x 8 x 512 pd = tcl.conv2d_transpose(se, size * 32, 4, stride=1) # 8 x 8 x 512 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=2) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=1) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=1) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=2) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=1) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=1) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=2) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=1) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=1) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 2, 4, stride=2) # 128 x 128 x 32 pd = tcl.conv2d_transpose(pd, size * 2, 4, stride=1) # 128 x 128 x 32 pd = tcl.conv2d_transpose(pd, size, 4, stride=2) # 256 x 256 x 16 pd = tcl.conv2d_transpose(pd, size, 4, stride=1) # 256 x 256 x 16 pd = tcl.conv2d_transpose(pd, 3, 4, stride=1) # 256 x 256 x 3 pd = tcl.conv2d_transpose(pd, 3, 4, stride=1) # 256 x 256 x 3 pos = tcl.conv2d_transpose( pd, 3, 4, stride=1, activation_fn=tf.nn.sigmoid ) #, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) return pos
def inception_v2(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, min_depth=16, depth_multiplier=1.0, replace_separable_convolution=False, prediction_fn=layers_lib.softmax, spatial_squeeze=True, reuse=None, scope='InceptionV2'): """Inception v2 model for classification. Constructs an Inception v2 network for classification as described in http://arxiv.org/abs/1502.03167. The recommended image size used to train this network is 224x224. For image sizes that differ substantially, it is recommended to use inception_v2_base() and connect custom final layers to the output. Args: inputs: a tensor of shape [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: the percentage of activation values that are retained. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. replace_separable_convolution: Replace the separable convolution in the layer Conv2d_1a_7x7 with a normal convolution. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. Note that input image sizes other than 224x224 might lead to different spatial dimensions, and hence cannot be squeezed. In this event, it is best to set spatial_squeeze as False, and perform a reduce_mean over the resulting spatial dimensions with sizes exceeding 1. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, num_classes] end_points: a dictionary from components of the network to the corresponding activation. Raises: ValueError: if depth_multiplier <= 0. """ if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') # Final pooling and prediction with variable_scope.variable_scope(scope, 'InceptionV2', [inputs, num_classes], reuse=reuse) as scope: with arg_scope([layers_lib.batch_norm, layers_lib.dropout], is_training=is_training): net, end_points = inception_v2_base( inputs, scope=scope, min_depth=min_depth, depth_multiplier=depth_multiplier, replace_separable_convolution=replace_separable_convolution) with variable_scope.variable_scope('Logits'): kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7]) net = layers_lib.avg_pool2d( net, kernel_size, padding='VALID', scope='AvgPool_1a_{}x{}'.format(*kernel_size)) # 1 x 1 x 1024 net = layers_lib.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') logits = layers.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') if spatial_squeeze: logits = array_ops.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def build_graph(self, x, reuse, is_training=True, final_sigmoid=True): with tf.variable_scope(self.scope_name, reuse=reuse) as scope: with arg_scope([tcl.batch_norm], is_training=is_training, scale=True): with arg_scope([tcl.conv2d, tcl.conv2d_transpose], activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, biases_initializer=None, padding='SAME', weights_regularizer=tcl.l2_regularizer(0.0002)): bilinear_interpolation = True kernel_size = 3 size = 16 # x: s x s x 3 se = tcl.conv2d(x, num_outputs=size, kernel_size=kernel_size, stride=1) # 256 x 256 x 16 se = resBlock(se, num_outputs=size * 2, kernel_size=kernel_size, stride=2) # 128 x 128 x 32 se = resBlock(se, num_outputs=size * 2, kernel_size=kernel_size, stride=1) # 128 x 128 x 32 se = resBlock(se, num_outputs=size * 4, kernel_size=kernel_size, stride=2) # 64 x 64 x 64 se = resBlock(se, num_outputs=size * 4, kernel_size=kernel_size, stride=1) # 64 x 64 x 64 se = resBlock(se, num_outputs=size * 8, kernel_size=kernel_size, stride=2) # 32 x 32 x 128 se = resBlock(se, num_outputs=size * 8, kernel_size=kernel_size, stride=1) # 32 x 32 x 128 se = resBlock(se, num_outputs=size * 16, kernel_size=kernel_size, stride=2) # 16 x 16 x 256 se = resBlock(se, num_outputs=size * 16, kernel_size=kernel_size, stride=1) # 16 x 16 x 256 se = resBlock(se, num_outputs=size * 32, kernel_size=kernel_size, stride=2) # 8 x 8 x 512 se = resBlock(se, num_outputs=size * 32, kernel_size=kernel_size, stride=1) # 8 x 8 x 512 pd = tcl.conv2d(se, size * 32, kernel_size, stride=1) # 8 x 8 x 512 if bilinear_interpolation is True: pd = tf.image.resize_bilinear(pd, (16, 16)) #pd = tf.image.resize_nearest_neighbor(pd, (16,16) ) pd = tcl.conv2d(pd, size * 16, kernel_size, stride=1) # 16 x 16 x 256 pd = tcl.conv2d(pd, size * 16, kernel_size, stride=1) # 16 x 16 x 256 pd = tcl.conv2d(pd, size * 16, kernel_size, stride=1) # 16 x 16 x 256 else: pd = tcl.conv2d_transpose(pd, size * 16, kernel_size, stride=2) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 16, kernel_size, stride=1) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 16, kernel_size, stride=1) # 16 x 16 x 256 if bilinear_interpolation is True: pd = tf.image.resize_bilinear(pd, (32, 32)) #pd = tf.image.resize_nearest_neighbor(pd, (32,32) ) pd = tcl.conv2d(pd, size * 8, kernel_size, stride=1) pd = tcl.conv2d(pd, size * 8, kernel_size, stride=1) # 32 x 32 x 128 pd = tcl.conv2d(pd, size * 8, kernel_size, stride=1) # 32 x 32 x 128 else: pd = tcl.conv2d_transpose(pd, size * 8, kernel_size, stride=2) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 8, kernel_size, stride=1) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 8, kernel_size, stride=1) # 32 x 32 x 128 if bilinear_interpolation is True: pd = tf.image.resize_bilinear(pd, (64, 64)) #pd = tf.image.resize_nearest_neighbor(pd, (64,64) ) pd = tcl.conv2d(pd, size * 4, kernel_size, stride=1) pd = tcl.conv2d(pd, size * 4, kernel_size, stride=1) # 64 x 64 x 64 pd = tcl.conv2d(pd, size * 4, kernel_size, stride=1) # 64 x 64 x 64 else: pd = tcl.conv2d_transpose(pd, size * 4, kernel_size, stride=2) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 4, kernel_size, stride=1) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 4, kernel_size, stride=1) # 64 x 64 x 64 if bilinear_interpolation is True: pd = tf.image.resize_bilinear(pd, (128, 128)) #pd = tf.image.resize_nearest_neighbor(pd, (128,128) ) pd = tcl.conv2d(pd, size * 2, kernel_size, stride=1) pd = tcl.conv2d(pd, size * 2, kernel_size, stride=1) # 128 x 128 x 32 else: pd = tcl.conv2d_transpose(pd, size * 2, kernel_size, stride=2) # 128 x 128 x 32 pd = tcl.conv2d_transpose(pd, size * 2, kernel_size, stride=1) # 128 x 128 x 32 if bilinear_interpolation is True: pd = tf.image.resize_bilinear(pd, (256, 256)) #pd = tf.image.resize_nearest_neighbor(pd, (256,256) ) pd = tcl.conv2d(pd, size, kernel_size, stride=1) pd = tcl.conv2d(pd, size, kernel_size, stride=1) # 256 x 256 x 16 pd = tcl.conv2d(pd, self.num_output_channel, kernel_size, stride=1) # 256 x 256 x 3 pd = tcl.conv2d(pd, self.num_output_channel, kernel_size, stride=1) # 256 x 256 x 3 if final_sigmoid == True: pos = tcl.conv2d(pd, self.num_output_channel, kernel_size, stride=1, activation_fn=tf.nn.sigmoid) else: pos = tcl.conv2d( pd, self.num_output_channel, kernel_size, stride=1, activation_fn=None) #tf.nn.sigmoid) else: pd = tcl.conv2d_transpose(pd, size, kernel_size, stride=2) # 256 x 256 x 16 pd = tcl.conv2d_transpose(pd, size, kernel_size, stride=1) # 256 x 256 x 16 pd = tcl.conv2d_transpose(pd, self.num_output_channel, kernel_size, stride=1) # 256 x 256 x 3 pd = tcl.conv2d_transpose(pd, self.num_output_channel, kernel_size, stride=1) # 256 x 256 x 3 pos = tcl.conv2d_transpose(pd, self.num_output_channel, kernel_size, stride=1, activation_fn=tf.nn.sigmoid) return 1.1 * (pos - 0.5) + 0.5
def inception_v3(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, min_depth=16, depth_multiplier=1.0, prediction_fn=layers_lib.softmax, spatial_squeeze=True, reuse=None, scope='InceptionV3'): """Inception model from http://arxiv.org/abs/1512.00567. "Rethinking the Inception Architecture for Computer Vision" Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna. With the default arguments this method constructs the exact model defined in the paper. However, one can experiment with variations of the inception_v3 network by changing arguments dropout_keep_prob, min_depth and depth_multiplier. The default image size used to train this network is 299x299. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: the percentage of activation values that are retained. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape is [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, num_classes] end_points: a dictionary from components of the network to the corresponding activation. Raises: ValueError: if 'depth_multiplier' is less than or equal to zero. """ if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) with variable_scope.variable_scope(scope, 'InceptionV3', [inputs, num_classes], reuse=reuse) as scope: with arg_scope([layers_lib.batch_norm, layers_lib.dropout], is_training=is_training): net, end_points = inception_v3_base( inputs, scope=scope, min_depth=min_depth, depth_multiplier=depth_multiplier) # Auxiliary Head logits with arg_scope( [layers.conv2d, layers_lib.max_pool2d, layers_lib.avg_pool2d], stride=1, padding='SAME'): aux_logits = end_points['Mixed_6e'] with variable_scope.variable_scope('AuxLogits'): aux_logits = layers_lib.avg_pool2d(aux_logits, [5, 5], stride=3, padding='VALID', scope='AvgPool_1a_5x5') aux_logits = layers.conv2d(aux_logits, depth(128), [1, 1], scope='Conv2d_1b_1x1') # Shape of feature map before the final layer. kernel_size = _reduced_kernel_size_for_small_input( aux_logits, [5, 5]) aux_logits = layers.conv2d( aux_logits, depth(768), kernel_size, weights_initializer=trunc_normal(0.01), padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size)) aux_logits = layers.conv2d( aux_logits, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, weights_initializer=trunc_normal(0.001), scope='Conv2d_2b_1x1') if spatial_squeeze: aux_logits = array_ops.squeeze(aux_logits, [1, 2], name='SpatialSqueeze') end_points['AuxLogits'] = aux_logits # Final pooling and prediction with variable_scope.variable_scope('Logits'): kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8]) net = layers_lib.avg_pool2d( net, kernel_size, padding='VALID', scope='AvgPool_1a_{}x{}'.format(*kernel_size)) # 1 x 1 x 2048 net = layers_lib.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') end_points['PreLogits'] = net # 2048 logits = layers.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') if spatial_squeeze: logits = array_ops.squeeze(logits, [1, 2], name='SpatialSqueeze') # 1000 end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def model_fn(features, labels, mode, params): """ Based on https://github.com/tensorflow/tpu/blob/master/models/experimental/inception/inception_v2_tpu_model.py :param features: :param labels: :param mode: :param params: :return: """ tf.summary.image('0_input', features, max_outputs=4) training = mode == tf.estimator.ModeKeys.TRAIN # 224 x 224 x 3 end_point = 'Conv2d_1a_7x7' net = layers.conv2d(features, 64, [7, 7], stride=2, weights_initializer=trunc_normal(1.0), activation_fn=None, scope=end_point) net = tf.layers.batch_normalization(net, training=training, name='{}_bn'.format(end_point)) net = tf.nn.relu(net, name='{}_act'.format(end_point)) tf.summary.image('1_{}'.format(end_point), net[:, :, :, 0:3], max_outputs=4) # 112 x 112 x 64 end_point = 'MaxPool_2a_3x3' net = layers_lib.max_pool2d(net, [3, 3], scope=end_point, stride=2, padding='SAME') tf.summary.image('2_{}'.format(end_point), net[:, :, :, 0:3], max_outputs=4) # 56 x 56 x 64 end_point = 'Conv2d_2b_1x1' net = layers.conv2d(net, 64, [1, 1], activation_fn=None, scope=end_point, weights_initializer=trunc_normal(0.1)) net = tf.layers.batch_normalization(net, training=training, name='{}_bn'.format(end_point)) net = tf.nn.relu(net, name='{}_act'.format(end_point)) tf.summary.image('3_{}'.format(end_point), net[:, :, :, 0:3], max_outputs=4) # 56 x 56 x 64 end_point = 'Conv2d_2c_3x3' net = layers.conv2d(net, 192, [3, 3], activation_fn=None, scope=end_point) net = tf.layers.batch_normalization(net, training=training, name='{}_bn'.format(end_point)) net = tf.nn.relu(net, name='{}_act'.format(end_point)) tf.summary.image('4_{}'.format(end_point), net[:, :, :, 0:3], max_outputs=4) # 56 x 56 x 192 end_point = 'MaxPool_3a_3x3' net = layers_lib.max_pool2d(net, [3, 3], scope=end_point, stride=2, padding='SAME') tf.summary.image('5_{}'.format(end_point), net[:, :, :, 0:3], max_outputs=4) # 28 x 28 x 192 # Inception module. end_point = 'Mixed_3b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 64, [1, 1], activation_fn=None, scope='Conv2d_0a_1x1') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_0a_1x1')) with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 64, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0a_1x1')) branch_1 = layers.conv2d(branch_1, 64, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0b_3x3')) with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, 64, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0a_1x1')) branch_2 = layers.conv2d(branch_2, 96, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0b_3x3')) branch_2 = layers.conv2d(branch_2, 96, [3, 3], activation_fn=None, scope='Conv2d_0c_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0c_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0c_3x3')) with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 32, [1, 1], weights_initializer=trunc_normal(0.1), activation_fn=None, scope='Conv2d_0b_1x1') branch_3 = tf.layers.batch_normalization( branch_3, training=training, name='{}_bn'.format('Conv2d_0b_1x1')) branch_3 = tf.nn.relu(branch_3, name='{}_act'.format('Conv2d_0b_1x1')) net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) # 28 x 28 x 256 end_point = 'Mixed_3c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 64, [1, 1], activation_fn=None, scope='Conv2d_0a_1x1') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_0a_1x1')) with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, 64, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0a_1x1')) branch_1 = layers.conv2d(branch_1, 96, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0b_3x3')) with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, 64, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0a_1x1')) branch_2 = layers.conv2d(branch_2, 96, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0b_3x3')) branch_2 = layers.conv2d(branch_2, 96, [3, 3], activation_fn=None, scope='Conv2d_0c_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0c_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0c_3x3')) with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 64, [1, 1], weights_initializer=trunc_normal(0.1), activation_fn=None, scope='Conv2d_0b_1x1') branch_3 = tf.layers.batch_normalization( branch_3, training=training, name='{}_bn'.format('Conv2d_0b_1x1')) branch_3 = tf.nn.relu(branch_3, name='{}_act'.format('Conv2d_0b_1x1')) net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) # 28 x 28 x 320 end_point = 'Mixed_4a' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, 128, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_0a_1x1')) branch_0 = layers.conv2d(branch_0, 160, [3, 3], stride=2, activation_fn=None, scope='Conv2d_1a_3x3') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_1a_3x3')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_1a_3x3')) with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, 64, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0a_1x1')) branch_1 = layers.conv2d(branch_1, 96, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0b_3x3')) branch_1 = layers.conv2d(branch_1, 96, [3, 3], stride=2, activation_fn=None, scope='Conv2d_1a_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_1a_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_1a_3x3')) with variable_scope.variable_scope('Branch_2'): branch_2 = layers_lib.max_pool2d(net, [3, 3], stride=2, padding='SAME', scope='MaxPool_1a_3x3') net = array_ops.concat([branch_0, branch_1, branch_2], 3) # 14 x 14 x 576 end_point = 'Mixed_4b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 224, [1, 1], activation_fn=None, scope='Conv2d_0a_1x1') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_0a_1x1')) with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, 64, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0a_1x1')) branch_1 = layers.conv2d(branch_1, 96, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0b_3x3')) with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, 96, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0a_1x1')) branch_2 = layers.conv2d(branch_2, 128, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0b_3x3')) branch_2 = layers.conv2d(branch_2, 128, [3, 3], activation_fn=None, scope='Conv2d_0c_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0c_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0c_3x3')) with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 128, [1, 1], weights_initializer=trunc_normal(0.1), activation_fn=None, scope='Conv2d_0b_1x1') branch_3 = tf.layers.batch_normalization( branch_3, training=training, name='{}_bn'.format('Conv2d_0b_1x1')) branch_3 = tf.nn.relu(branch_3, name='{}_act'.format('Conv2d_0b_1x1')) net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) # 14 x 14 x 576 end_point = 'Mixed_4c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 192, [1, 1], activation_fn=None, scope='Conv2d_0a_1x1') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_0a_1x1')) with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, 96, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0a_1x1')) branch_1 = layers.conv2d(branch_1, 128, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0b_3x3')) with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, 96, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0a_1x1')) branch_2 = layers.conv2d(branch_2, 128, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0b_3x3')) branch_2 = layers.conv2d(branch_2, 128, [3, 3], activation_fn=None, scope='Conv2d_0c_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0c_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0c_3x3')) with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 128, [1, 1], weights_initializer=trunc_normal(0.1), activation_fn=None, scope='Conv2d_0b_1x1') branch_3 = tf.layers.batch_normalization( branch_3, training=training, name='{}_bn'.format('Conv2d_0b_1x1')) branch_3 = tf.nn.relu(branch_3, name='{}_act'.format('Conv2d_0b_1x1')) net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) # 14 x 14 x 576 end_point = 'Mixed_4d' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 160, [1, 1], activation_fn=None, scope='Conv2d_0a_1x1') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_0a_1x1')) with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, 128, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0a_1x1')) branch_1 = layers.conv2d(branch_1, 160, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0b_3x3')) with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, 128, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0a_1x1')) branch_2 = layers.conv2d(branch_2, 160, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0b_3x3')) branch_2 = layers.conv2d(branch_2, 160, [3, 3], activation_fn=None, scope='Conv2d_0c_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0c_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0c_3x3')) with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 96, [1, 1], weights_initializer=trunc_normal(0.1), activation_fn=None, scope='Conv2d_0b_1x1') branch_3 = tf.layers.batch_normalization( branch_3, training=training, name='{}_bn'.format('Conv2d_0b_1x1')) branch_3 = tf.nn.relu(branch_3, name='{}_act'.format('Conv2d_0b_1x1')) net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) # 14 x 14 x 576 end_point = 'Mixed_4e' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 96, [1, 1], activation_fn=None, scope='Conv2d_0a_1x1') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_0a_1x1')) with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, 128, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0a_1x1')) branch_1 = layers.conv2d(branch_1, 192, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0b_3x3')) with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, 160, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0a_1x1')) branch_2 = layers.conv2d(branch_2, 192, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0b_3x3')) branch_2 = layers.conv2d(branch_2, 192, [3, 3], activation_fn=None, scope='Conv2d_0c_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0c_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0c_3x3')) with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 96, [1, 1], weights_initializer=trunc_normal(0.1), activation_fn=None, scope='Conv2d_0b_1x1') branch_3 = tf.layers.batch_normalization( branch_3, training=training, name='{}_bn'.format('Conv2d_0b_1x1')) branch_3 = tf.nn.relu(branch_3, name='{}_act'.format('Conv2d_0b_1x1')) net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) # 14 x 14 x 576 end_point = 'Mixed_5a' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, 128, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_0a_1x1')) branch_0 = layers.conv2d(branch_0, 192, [3, 3], stride=2, activation_fn=None, scope='Conv2d_1a_3x3') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_1a_3x3')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_1a_3x3')) with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, 192, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0a_1x1')) branch_1 = layers.conv2d(branch_1, 256, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0b_3x3')) branch_1 = layers.conv2d(branch_1, 256, [3, 3], stride=2, activation_fn=None, scope='Conv2d_1a_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_1a_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_1a_3x3')) with variable_scope.variable_scope('Branch_2'): branch_2 = layers_lib.max_pool2d(net, [3, 3], stride=2, padding='SAME', scope='MaxPool_1a_3x3') net = array_ops.concat([branch_0, branch_1, branch_2], 3) # 7 x 7 x 1024 end_point = 'Mixed_5b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 352, [1, 1], activation_fn=None, scope='Conv2d_0a_1x1') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_0a_1x1')) with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, 192, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0a_1x1')) branch_1 = layers.conv2d(branch_1, 320, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0b_3x3')) with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, 160, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0a_1x1')) branch_2 = layers.conv2d(branch_2, 224, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0b_3x3')) branch_2 = layers.conv2d(branch_2, 224, [3, 3], activation_fn=None, scope='Conv2d_0c_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0c_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0c_3x3')) with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], padding='SAME', stride=1, scope='AvgPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 128, [1, 1], weights_initializer=trunc_normal(0.1), activation_fn=None, scope='Conv2d_0b_1x1') branch_3 = tf.layers.batch_normalization( branch_3, training=training, name='{}_bn'.format('Conv2d_0b_1x1')) branch_3 = tf.nn.relu(branch_3, name='{}_act'.format('Conv2d_0b_1x1')) net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) # 7 x 7 x 1024 end_point = 'Mixed_5c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 352, [1, 1], activation_fn=None, scope='Conv2d_0a_1x1') branch_0 = tf.layers.batch_normalization( branch_0, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_0 = tf.nn.relu(branch_0, name='{}_act'.format('Conv2d_0a_1x1')) with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, 192, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0a_1x1')) branch_1 = layers.conv2d(branch_1, 320, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_1 = tf.layers.batch_normalization( branch_1, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_1 = tf.nn.relu(branch_1, name='{}_act'.format('Conv2d_0b_3x3')) with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, 192, [1, 1], weights_initializer=trunc_normal(0.09), activation_fn=None, scope='Conv2d_0a_1x1') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0a_1x1')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0a_1x1')) branch_2 = layers.conv2d(branch_2, 224, [3, 3], activation_fn=None, scope='Conv2d_0b_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0b_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0b_3x3')) branch_2 = layers.conv2d(branch_2, 224, [3, 3], activation_fn=None, scope='Conv2d_0c_3x3') branch_2 = tf.layers.batch_normalization( branch_2, training=training, name='{}_bn'.format('Conv2d_0c_3x3')) branch_2 = tf.nn.relu(branch_2, name='{}_act'.format('Conv2d_0c_3x3')) with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d(net, [3, 3], padding='SAME', stride=1, scope='MaxPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 128, [1, 1], weights_initializer=trunc_normal(0.1), activation_fn=None, scope='Conv2d_0b_1x1') branch_3 = tf.layers.batch_normalization( branch_3, training=training, name='{}_bn'.format('Conv2d_0b_1x1')) branch_3 = tf.nn.relu(branch_3, name='{}_act'.format('Conv2d_0b_1x1')) net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) with variable_scope.variable_scope('Logits'): kernel_size = util._reduced_kernel_size_for_small_input(net, [7, 7]) net = layers_lib.avg_pool2d( net, kernel_size, stride=1, padding='VALID', scope='AvgPool_1a_{}x{}'.format(*kernel_size)) # 1 x 1 x 1024 net = layers_lib.dropout(net, keep_prob=params['dropout_keep_prob'], scope='Dropout_1b') logits = layers.conv2d(net, params['num_classes'], [1, 1], normalizer_fn=None, activation_fn=None, scope='Conv2d_1c_1x1') if params['spatial_squeeze']: logits = array_ops.squeeze(logits, [1, 2], name='SpatialSqueeze') predictions = { 'raw': logits, 'predictions': tf.sigmoid(logits, name='Predictions'), } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=labels, logits=logits) tf.summary.scalar('loss', loss) eval_metric_ops = { 'auc_val': tf.metrics.auc(labels=labels, predictions=predictions['predictions']), 'accuracy_val': tf.metrics.accuracy(labels=labels, predictions=predictions['predictions']), } if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate']) extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(extra_update_ops): train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) tf.summary.scalar('auc_train', eval_metric_ops['auc_val'][1]) tf.summary.scalar('accuracy_train', eval_metric_ops['accuracy_val'][1]) tf.summary.histogram('labels', labels) tf.summary.histogram('predictions', predictions['predictions']) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
return data, info def resort(x): batch_size = x.get_shape().as_list()[0] x = tf.transpose(x, perm=[2,1,0]) x = tf.nn.top_k(x, k=batch_size ).values x = tf.transpose(x, perm=[2, 1, 0]) batch_size = x.get_shape().as_list()[2] x = tf.nn.top_k(x, k=batch_size).values return x # net argument ***************************************** x = tf.placeholder(tf.float64, [None, 16, 16, 3], 'x') y = tf.placeholder(tf.float64, [None, 1], 'y') b = tf.get_variable('b', [2], dtype=tf.float64) l1 = layers.conv2d(x, 16, [4,4], (1,1), padding='VALID') l1_0 = layers.conv2d(x, 256, [4,4], (3,3), padding='VALID') l2_0=tf.map_fn(resort,l1_0) l2=layers.conv2d(l2_0,16,[5,5],(1,1), padding='VALID') l3 = layers.fully_connected(l2,256,activation_fn=tf.nn.sigmoid) l3_0 = layers.fully_connected(l3,64 ) l3_1 = layers.fully_connected(l3_0,1,activation_fn=tf.nn.sigmoid)*2-1 l3_2 = tf.reduce_sum(l3_1, 1) l4 = tf.reduce_sum(l3_2, 1,name="yhat") # l5 = l4 loss = tf.reduce_sum(tf.sqrt(tf.square(l4 - y))) train = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss) init = tf.global_variables_initializer() s = [100] saver=tf.train.Saver() with tf.Session() as sess:
def setUp(self): super(OpHandlerUtilTest, self).setUp() tf.reset_default_graph() # This tests a Conv2D -> BatchNorm -> ReLU chain of ops. with framework.arg_scope(self._batch_norm_scope()): inputs = tf.zeros([2, 4, 4, 3]) layers.conv2d(inputs, num_outputs=5, kernel_size=3, scope='conv1') # This tests 3 Conv2D ops being concatenated before a batch normalization. c2 = layers.conv2d(inputs, num_outputs=5, kernel_size=3, scope='conv2') c3 = layers.conv2d(inputs, num_outputs=6, kernel_size=3, scope='conv3') c4 = layers.conv2d(inputs, num_outputs=7, kernel_size=3, scope='conv4') net = tf.concat([c2, c3, c4], axis=3) layers.batch_norm(net) g = tf.get_default_graph() # Declare OpSlice and OpGroup for ops in the first test network. self.batch_norm_op = g.get_operation_by_name( 'conv1/BatchNorm/FusedBatchNormV3') self.batch_norm_op_slice = orm.OpSlice(self.batch_norm_op, None) self.batch_norm_op_group = orm.OpGroup(self.batch_norm_op_slice) self.conv_op = g.get_operation_by_name('conv1/Conv2D') self.conv_op_slice = orm.OpSlice(self.conv_op, None) self.conv_op_group = orm.OpGroup( self.conv_op_slice, omit_source_op_slices=[self.conv_op_slice]) self.gamma_op = g.get_operation_by_name('conv1/BatchNorm/gamma/read') self.beta_op = g.get_operation_by_name('conv1/BatchNorm/beta/read') self.decay_op = g.get_operation_by_name('conv1/BatchNorm/Const') self.mean_op = g.get_operation_by_name( 'conv1/BatchNorm/AssignMovingAvg/sub_1') self.std_op = g.get_operation_by_name( 'conv1/BatchNorm/AssignMovingAvg_1/sub_1') self.relu_op = g.get_operation_by_name('conv1/Relu') self.relu_op_slice = orm.OpSlice(self.relu_op, None) self.relu_op_group = orm.OpGroup( self.relu_op_slice, omit_source_op_slices=[self.relu_op_slice]) # Declare OpSlice and OpGroup for ops in the second test network. self.relu2_op = g.get_operation_by_name('conv2/Relu') self.relu2_op_slice = orm.OpSlice(self.relu2_op, orm.Slice(0, 5)) self.relu2_op_group = orm.OpGroup( self.relu2_op_slice, omit_source_op_slices=[self.relu2_op_slice]) self.relu3_op = g.get_operation_by_name('conv3/Relu') self.relu3_op_slice = orm.OpSlice(self.relu3_op, orm.Slice(0, 6)) self.relu3_op_group = orm.OpGroup( self.relu3_op_slice, omit_source_op_slices=[self.relu3_op_slice]) self.relu4_op = g.get_operation_by_name('conv4/Relu') self.relu4_op_slice = orm.OpSlice(self.relu4_op, orm.Slice(0, 7)) self.relu4_op_group = orm.OpGroup( self.relu4_op_slice, omit_source_op_slices=[self.relu4_op_slice]) self.unfused_batch_norm_op = g.get_operation_by_name( 'BatchNorm/FusedBatchNormV3') self.unfused_batch_norm_op_slice = orm.OpSlice( self.unfused_batch_norm_op, orm.Slice(0, 18)) self.concat_op = g.get_operation_by_name('concat') self.concat_op_slice = orm.OpSlice(self.concat_op, orm.Slice(0, 18)) self.concat_op_group = orm.OpGroup( self.concat_op_slice, omit_source_op_slices=[self.concat_op_slice]) # Create mock OpRegularizerManager with custom mapping of OpSlice and # OpGroup. self.mock_op_reg_manager = mock.create_autospec( orm.OpRegularizerManager) def get_op_slices(op): return self.op_slice_dict.get(op, []) def get_op_group(op_slice): return self.op_group_dict.get(op_slice) def is_passthrough(op): return op in self._passthrough_ops self.mock_op_reg_manager.get_op_slices.side_effect = get_op_slices self.mock_op_reg_manager.get_op_group.side_effect = get_op_group self.mock_op_reg_manager.is_passthrough.side_effect = is_passthrough self.mock_op_reg_manager.ops = [ self.batch_norm_op, self.gamma_op, self.beta_op, self.decay_op, self.mean_op, self.std_op, self.conv_op, self.relu_op, self.relu2_op, self.relu3_op, self.relu4_op, self.unfused_batch_norm_op, self.concat_op ]
def attend(pixels, word_indices, pattern_indices, char_indices, memory_mask, parses): """ :param pixels: (bs, h, w) :param word_indices: (bs, h, w) :param pattern_indices: (bs, h, w) :param char_indices: (bs, h, w) :param memory_mask: (bs, h, w, m, l, d) :param parses: (bs, h, w, 4, 2) """ bs = tf.shape(pixels)[0] X, Y = tf.meshgrid(tf.linspace(0.0, 1.0, RealData.im_size[0]), tf.linspace(0.0, 1.0, RealData.im_size[0])) X = tf.tile(X[None, ..., None], (bs, 1, 1, 1)) Y = tf.tile(Y[None, ..., None], (bs, 1, 1, 1)) word_embeddings = tf.reshape( layers.embed_sequence(tf.reshape(word_indices, (bs, -1)), vocab_size=RealData.word_hash_size, embed_dim=self.n_hid, unique=False, scope="word-embeddings"), (bs, h, w, self.n_hid)) pattern_embeddings = tf.reshape( layers.embed_sequence(tf.reshape(pattern_indices, (bs, -1)), vocab_size=RealData.pattern_hash_size, embed_dim=self.n_hid, unique=False, scope="pattern-embeddings"), (bs, h, w, self.n_hid)) char_embeddings = tf.reshape( layers.embed_sequence(tf.reshape(char_indices, (bs, -1)), vocab_size=RealData.n_output, embed_dim=self.n_hid, unique=False, scope="char-embeddings"), (bs, h, w, self.n_hid)) pixels = tf.reshape(pixels, (bs, h, w, 3)) parses = tf.reshape(parses, (bs, h, w, 8)) memory_mask = tf.reshape(memory_mask, (bs, h, w, 1)) x = tf.concat([ pixels, word_embeddings, pattern_embeddings, char_embeddings, parses, X, Y, memory_mask ], axis=3) with tf.variable_scope('attend'): for i in range(4): x = tf.nn.relu(dilated_block(x)) x = layers.dropout(x, self.keep_prob, is_training=self.is_training_ph) pre_att_logits = x att_logits = layers.conv2d(x, RealData.n_memories, 3, activation_fn=None, weights_regularizer=self.regularizer ) # (bs, h, w, n_memories) att_logits = memory_mask * att_logits - ( 1.0 - memory_mask ) * 1000 # TODO only sum the memory_mask idx, in the softmax logits = tf.reshape(att_logits, (bs, -1)) # (bs, h * w * n_memories) logits -= tf.reduce_max(logits, axis=1, keepdims=True) lp = tf.nn.log_softmax(logits, axis=1) # (bs, h * w * n_memories) p = tf.nn.softmax(logits, axis=1) # (bs, h * w * n_memories) spatial_attention = tf.reshape( p, (bs, h * w * RealData.n_memories, 1, 1)) # (bs, h * w * n_memories, 1, 1) p_uniform = memory_mask / tf.reduce_sum( memory_mask, axis=(1, 2, 3), keepdims=True) cross_entropy_uniform = -tf.reduce_sum( p_uniform * tf.reshape(lp, (bs, h, w, RealData.n_memories)), axis=(1, 2, 3)) # (bs, 1) cp = tf.reduce_sum(tf.reshape(p, (bs, h, w, RealData.n_memories)), axis=3, keepdims=True) context = tf.reduce_sum(cp * pre_att_logits, axis=(1, 2)) # (bs, 4*n_hidden) return spatial_attention, cross_entropy_uniform, context
def inference_pose(image, center_map): # corresponds to pose_deploy_centerMap.prototxt with tf.variable_scope('PoseNet'): pool_center_lower = layers.avg_pool2d(center_map, 9, 8, padding='VALID') conv1_stage1 = layers.conv2d(image, 128, 9, 1, activation_fn=None, scope='conv1_stage1') conv1_stage1 = tf.nn.relu(conv1_stage1) pool1_stage1 = layers.max_pool2d(conv1_stage1, 3, 2) conv2_stage1 = layers.conv2d(pool1_stage1, 128, 9, 1, activation_fn=None, scope='conv2_stage1') conv2_stage1 = tf.nn.relu(conv2_stage1) pool2_stage1 = layers.max_pool2d(conv2_stage1, 3, 2) conv3_stage1 = layers.conv2d(pool2_stage1, 128, 9, 1, activation_fn=None, scope='conv3_stage1') conv3_stage1 = tf.nn.relu(conv3_stage1) pool3_stage1 = layers.max_pool2d(conv3_stage1, 3, 2) conv4_stage1 = layers.conv2d(pool3_stage1, 32, 5, 1, activation_fn=None, scope='conv4_stage1') conv4_stage1 = tf.nn.relu(conv4_stage1) conv5_stage1 = layers.conv2d(conv4_stage1, 512, 9, 1, activation_fn=None, scope='conv5_stage1') conv5_stage1 = tf.nn.relu(conv5_stage1) conv6_stage1 = layers.conv2d(conv5_stage1, 512, 1, 1, activation_fn=None, scope='conv6_stage1') conv6_stage1 = tf.nn.relu(conv6_stage1) conv7_stage1 = layers.conv2d(conv6_stage1, 15, 1, 1, activation_fn=None, scope='conv7_stage1') conv1_stage2 = layers.conv2d(image, 128, 9, 1, activation_fn=None, scope='conv1_stage2') conv1_stage2 = tf.nn.relu(conv1_stage2) pool1_stage2 = layers.max_pool2d(conv1_stage2, 3, 2) conv2_stage2 = layers.conv2d(pool1_stage2, 128, 9, 1, activation_fn=None, scope='conv2_stage2') conv2_stage2 = tf.nn.relu(conv2_stage2) pool2_stage2 = layers.max_pool2d(conv2_stage2, 3, 2) conv3_stage2 = layers.conv2d(pool2_stage2, 128, 9, 1, activation_fn=None, scope='conv3_stage2') conv3_stage2 = tf.nn.relu(conv3_stage2) pool3_stage2 = layers.max_pool2d(conv3_stage2, 3, 2) conv4_stage2 = layers.conv2d(pool3_stage2, 32, 5, 1, activation_fn=None, scope='conv4_stage2') conv4_stage2 = tf.nn.relu(conv4_stage2) concat_stage2 = tf.concat( axis=3, values=[conv4_stage2, conv7_stage1, pool_center_lower]) Mconv1_stage2 = layers.conv2d(concat_stage2, 128, 11, 1, activation_fn=None, scope='Mconv1_stage2') Mconv1_stage2 = tf.nn.relu(Mconv1_stage2) Mconv2_stage2 = layers.conv2d(Mconv1_stage2, 128, 11, 1, activation_fn=None, scope='Mconv2_stage2') Mconv2_stage2 = tf.nn.relu(Mconv2_stage2) Mconv3_stage2 = layers.conv2d(Mconv2_stage2, 128, 11, 1, activation_fn=None, scope='Mconv3_stage2') Mconv3_stage2 = tf.nn.relu(Mconv3_stage2) Mconv4_stage2 = layers.conv2d(Mconv3_stage2, 128, 1, 1, activation_fn=None, scope='Mconv4_stage2') Mconv4_stage2 = tf.nn.relu(Mconv4_stage2) Mconv5_stage2 = layers.conv2d(Mconv4_stage2, 15, 1, 1, activation_fn=None, scope='Mconv5_stage2') conv1_stage3 = layers.conv2d(pool3_stage2, 32, 5, 1, activation_fn=None, scope='conv1_stage3') conv1_stage3 = tf.nn.relu(conv1_stage3) concat_stage3 = tf.concat( axis=3, values=[conv1_stage3, Mconv5_stage2, pool_center_lower]) Mconv1_stage3 = layers.conv2d(concat_stage3, 128, 11, 1, activation_fn=None, scope='Mconv1_stage3') Mconv1_stage3 = tf.nn.relu(Mconv1_stage3) Mconv2_stage3 = layers.conv2d(Mconv1_stage3, 128, 11, 1, activation_fn=None, scope='Mconv2_stage3') Mconv2_stage3 = tf.nn.relu(Mconv2_stage3) Mconv3_stage3 = layers.conv2d(Mconv2_stage3, 128, 11, 1, activation_fn=None, scope='Mconv3_stage3') Mconv3_stage3 = tf.nn.relu(Mconv3_stage3) Mconv4_stage3 = layers.conv2d(Mconv3_stage3, 128, 1, 1, activation_fn=None, scope='Mconv4_stage3') Mconv4_stage3 = tf.nn.relu(Mconv4_stage3) Mconv5_stage3 = layers.conv2d(Mconv4_stage3, 15, 1, 1, activation_fn=None, scope='Mconv5_stage3') conv1_stage4 = layers.conv2d(pool3_stage2, 32, 5, 1, activation_fn=None, scope='conv1_stage4') conv1_stage4 = tf.nn.relu(conv1_stage4) concat_stage4 = tf.concat( axis=3, values=[conv1_stage4, Mconv5_stage3, pool_center_lower]) Mconv1_stage4 = layers.conv2d(concat_stage4, 128, 11, 1, activation_fn=None, scope='Mconv1_stage4') Mconv1_stage4 = tf.nn.relu(Mconv1_stage4) Mconv2_stage4 = layers.conv2d(Mconv1_stage4, 128, 11, 1, activation_fn=None, scope='Mconv2_stage4') Mconv2_stage4 = tf.nn.relu(Mconv2_stage4) Mconv3_stage4 = layers.conv2d(Mconv2_stage4, 128, 11, 1, activation_fn=None, scope='Mconv3_stage4') Mconv3_stage4 = tf.nn.relu(Mconv3_stage4) Mconv4_stage4 = layers.conv2d(Mconv3_stage4, 128, 1, 1, activation_fn=None, scope='Mconv4_stage4') Mconv4_stage4 = tf.nn.relu(Mconv4_stage4) Mconv5_stage4 = layers.conv2d(Mconv4_stage4, 15, 1, 1, activation_fn=None, scope='Mconv5_stage4') conv1_stage5 = layers.conv2d(pool3_stage2, 32, 5, 1, activation_fn=None, scope='conv1_stage5') conv1_stage5 = tf.nn.relu(conv1_stage5) concat_stage5 = tf.concat( axis=3, values=[conv1_stage5, Mconv5_stage4, pool_center_lower]) Mconv1_stage5 = layers.conv2d(concat_stage5, 128, 11, 1, activation_fn=None, scope='Mconv1_stage5') Mconv1_stage5 = tf.nn.relu(Mconv1_stage5) Mconv2_stage5 = layers.conv2d(Mconv1_stage5, 128, 11, 1, activation_fn=None, scope='Mconv2_stage5') Mconv2_stage5 = tf.nn.relu(Mconv2_stage5) Mconv3_stage5 = layers.conv2d(Mconv2_stage5, 128, 11, 1, activation_fn=None, scope='Mconv3_stage5') Mconv3_stage5 = tf.nn.relu(Mconv3_stage5) Mconv4_stage5 = layers.conv2d(Mconv3_stage5, 128, 1, 1, activation_fn=None, scope='Mconv4_stage5') Mconv4_stage5 = tf.nn.relu(Mconv4_stage5) Mconv5_stage5 = layers.conv2d(Mconv4_stage5, 15, 1, 1, activation_fn=None, scope='Mconv5_stage5') conv1_stage6 = layers.conv2d(pool3_stage2, 32, 5, 1, activation_fn=None, scope='conv1_stage6') conv1_stage6 = tf.nn.relu(conv1_stage6) concat_stage6 = tf.concat( axis=3, values=[conv1_stage6, Mconv5_stage5, pool_center_lower]) Mconv1_stage6 = layers.conv2d(concat_stage6, 128, 11, 1, activation_fn=None, scope='Mconv1_stage6') Mconv1_stage6 = tf.nn.relu(Mconv1_stage6) Mconv2_stage6 = layers.conv2d(Mconv1_stage6, 128, 11, 1, activation_fn=None, scope='Mconv2_stage6') Mconv2_stage6 = tf.nn.relu(Mconv2_stage6) Mconv3_stage6 = layers.conv2d(Mconv2_stage6, 128, 11, 1, activation_fn=None, scope='Mconv3_stage6') Mconv3_stage6 = tf.nn.relu(Mconv3_stage6) Mconv4_stage6 = layers.conv2d(Mconv3_stage6, 128, 1, 1, activation_fn=None, scope='Mconv4_stage6') Mconv4_stage6 = tf.nn.relu(Mconv4_stage6) Mconv5_stage6 = layers.conv2d(Mconv4_stage6, 15, 1, 1, activation_fn=None, scope='Mconv5_stage6') return Mconv5_stage6
def alexnet_v2(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='alexnet_v2'): """AlexNet version 2. Described in: http://arxiv.org/pdf/1404.5997v2.pdf Parameters from: github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ layers-imagenet-1gpu.cfg Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. To use in fully convolutional mode, set spatial_squeeze to false. The LRN layers have been removed and change the initializers from random_normal_initializer to xavier_initializer. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=[end_points_collection]): net = layers.conv2d( inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1') net = layers.conv2d(net, 192, [5, 5], scope='conv2') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2') net = layers.conv2d(net, 384, [3, 3], scope='conv3') net = layers.conv2d(net, 384, [3, 3], scope='conv4') net = layers.conv2d(net, 256, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5') # Use conv2d instead of fully_connected layers. with arg_scope( [layers.conv2d], weights_initializer=trunc_normal(0.005), biases_initializer=init_ops.constant_initializer(0.1)): net = layers.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, biases_initializer=init_ops.zeros_initializer(), scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def inference_person(image): with tf.variable_scope('PersonNet'): conv1_1 = layers.conv2d(image, 64, 3, 1, activation_fn=None, scope='conv1_1') conv1_1 = tf.nn.relu(conv1_1) conv1_2 = layers.conv2d(conv1_1, 64, 3, 1, activation_fn=None, scope='conv1_2') conv1_2 = tf.nn.relu(conv1_2) pool1_stage1 = layers.max_pool2d(conv1_2, 2, 2) conv2_1 = layers.conv2d(pool1_stage1, 128, 3, 1, activation_fn=None, scope='conv2_1') conv2_1 = tf.nn.relu(conv2_1) conv2_2 = layers.conv2d(conv2_1, 128, 3, 1, activation_fn=None, scope='conv2_2') conv2_2 = tf.nn.relu(conv2_2) pool2_stage1 = layers.max_pool2d(conv2_2, 2, 2) conv3_1 = layers.conv2d(pool2_stage1, 256, 3, 1, activation_fn=None, scope='conv3_1') conv3_1 = tf.nn.relu(conv3_1) conv3_2 = layers.conv2d(conv3_1, 256, 3, 1, activation_fn=None, scope='conv3_2') conv3_2 = tf.nn.relu(conv3_2) conv3_3 = layers.conv2d(conv3_2, 256, 3, 1, activation_fn=None, scope='conv3_3') conv3_3 = tf.nn.relu(conv3_3) conv3_4 = layers.conv2d(conv3_3, 256, 3, 1, activation_fn=None, scope='conv3_4') conv3_4 = tf.nn.relu(conv3_4) pool3_stage1 = layers.max_pool2d(conv3_4, 2, 2) conv4_1 = layers.conv2d(pool3_stage1, 512, 3, 1, activation_fn=None, scope='conv4_1') conv4_1 = tf.nn.relu(conv4_1) conv4_2 = layers.conv2d(conv4_1, 512, 3, 1, activation_fn=None, scope='conv4_2') conv4_2 = tf.nn.relu(conv4_2) conv4_3 = layers.conv2d(conv4_2, 512, 3, 1, activation_fn=None, scope='conv4_3') conv4_3 = tf.nn.relu(conv4_3) conv4_4 = layers.conv2d(conv4_3, 512, 3, 1, activation_fn=None, scope='conv4_4') conv4_4 = tf.nn.relu(conv4_4) conv5_1 = layers.conv2d(conv4_4, 512, 3, 1, activation_fn=None, scope='conv5_1') conv5_1 = tf.nn.relu(conv5_1) conv5_2_CPM = layers.conv2d(conv5_1, 128, 3, 1, activation_fn=None, scope='conv5_2_CPM') conv5_2_CPM = tf.nn.relu(conv5_2_CPM) conv6_1_CPM = layers.conv2d(conv5_2_CPM, 512, 1, 1, activation_fn=None, scope='conv6_1_CPM') conv6_1_CPM = tf.nn.relu(conv6_1_CPM) conv6_2_CPM = layers.conv2d(conv6_1_CPM, 1, 1, 1, activation_fn=None, scope='conv6_2_CPM') concat_stage2 = tf.concat(axis=3, values=[conv6_2_CPM, conv5_2_CPM]) Mconv1_stage2 = layers.conv2d(concat_stage2, 128, 7, 1, activation_fn=None, scope='Mconv1_stage2') Mconv1_stage2 = tf.nn.relu(Mconv1_stage2) Mconv2_stage2 = layers.conv2d(Mconv1_stage2, 128, 7, 1, activation_fn=None, scope='Mconv2_stage2') Mconv2_stage2 = tf.nn.relu(Mconv2_stage2) Mconv3_stage2 = layers.conv2d(Mconv2_stage2, 128, 7, 1, activation_fn=None, scope='Mconv3_stage2') Mconv3_stage2 = tf.nn.relu(Mconv3_stage2) Mconv4_stage2 = layers.conv2d(Mconv3_stage2, 128, 7, 1, activation_fn=None, scope='Mconv4_stage2') Mconv4_stage2 = tf.nn.relu(Mconv4_stage2) Mconv5_stage2 = layers.conv2d(Mconv4_stage2, 128, 7, 1, activation_fn=None, scope='Mconv5_stage2') Mconv5_stage2 = tf.nn.relu(Mconv5_stage2) Mconv6_stage2 = layers.conv2d(Mconv5_stage2, 128, 1, 1, activation_fn=None, scope='Mconv6_stage2') Mconv6_stage2 = tf.nn.relu(Mconv6_stage2) Mconv7_stage2 = layers.conv2d(Mconv6_stage2, 1, 1, 1, activation_fn=None, scope='Mconv7_stage2') concat_stage3 = tf.concat(axis=3, values=[Mconv7_stage2, conv5_2_CPM]) Mconv1_stage3 = layers.conv2d(concat_stage3, 128, 7, 1, activation_fn=None, scope='Mconv1_stage3') Mconv1_stage3 = tf.nn.relu(Mconv1_stage3) Mconv2_stage3 = layers.conv2d(Mconv1_stage3, 128, 7, 1, activation_fn=None, scope='Mconv2_stage3') Mconv2_stage3 = tf.nn.relu(Mconv2_stage3) Mconv3_stage3 = layers.conv2d(Mconv2_stage3, 128, 7, 1, activation_fn=None, scope='Mconv3_stage3') Mconv3_stage3 = tf.nn.relu(Mconv3_stage3) Mconv4_stage3 = layers.conv2d(Mconv3_stage3, 128, 7, 1, activation_fn=None, scope='Mconv4_stage3') Mconv4_stage3 = tf.nn.relu(Mconv4_stage3) Mconv5_stage3 = layers.conv2d(Mconv4_stage3, 128, 7, 1, activation_fn=None, scope='Mconv5_stage3') Mconv5_stage3 = tf.nn.relu(Mconv5_stage3) Mconv6_stage3 = layers.conv2d(Mconv5_stage3, 128, 1, 1, activation_fn=None, scope='Mconv6_stage3') Mconv6_stage3 = tf.nn.relu(Mconv6_stage3) Mconv7_stage3 = layers.conv2d(Mconv6_stage3, 1, 1, 1, activation_fn=None, scope='Mconv7_stage3') concat_stage4 = tf.concat(axis=3, values=[Mconv7_stage3, conv5_2_CPM]) Mconv1_stage4 = layers.conv2d(concat_stage4, 128, 7, 1, activation_fn=None, scope='Mconv1_stage4') Mconv1_stage4 = tf.nn.relu(Mconv1_stage4) Mconv2_stage4 = layers.conv2d(Mconv1_stage4, 128, 7, 1, activation_fn=None, scope='Mconv2_stage4') Mconv2_stage4 = tf.nn.relu(Mconv2_stage4) Mconv3_stage4 = layers.conv2d(Mconv2_stage4, 128, 7, 1, activation_fn=None, scope='Mconv3_stage4') Mconv3_stage4 = tf.nn.relu(Mconv3_stage4) Mconv4_stage4 = layers.conv2d(Mconv3_stage4, 128, 7, 1, activation_fn=None, scope='Mconv4_stage4') Mconv4_stage4 = tf.nn.relu(Mconv4_stage4) Mconv5_stage4 = layers.conv2d(Mconv4_stage4, 128, 7, 1, activation_fn=None, scope='Mconv5_stage4') Mconv5_stage4 = tf.nn.relu(Mconv5_stage4) Mconv6_stage4 = layers.conv2d(Mconv5_stage4, 128, 1, 1, activation_fn=None, scope='Mconv6_stage4') Mconv6_stage4 = tf.nn.relu(Mconv6_stage4) Mconv7_stage4 = layers.conv2d(Mconv6_stage4, 1, 1, 1, activation_fn=None, scope='Mconv7_stage4') return Mconv7_stage4
def inference_resnet_v1_50(images, params, num_classes, use_batch_norm=False, weight_decay=0.0, is_training=False, mean=None) -> tf.Tensor: if use_batch_norm: if params.batch_renorm: renorm_clipping = {'rmax': 100, 'rmin': 0.1, 'dmax': 1} renorm_momentum = 0.98 else: renorm_clipping = None renorm_momentum = 0.99 batch_norm_fn = lambda x: tf.layers.batch_normalization( x, axis=-1, training=is_training, name='batch_norm', renorm=params.batch_renorm, renorm_clipping=renorm_clipping, renorm_momentum=renorm_momentum) else: batch_norm_fn = None def upsample_conv(input_tensor, previous_intermediate_layer, layer_params, number) -> tf.Tensor: """ Deconvolution (upscaling) layers :param input_tensor: :param previous_intermediate_layer: :param layer_params: :param number: :return: """ with tf.variable_scope('deconv_{}'.format(number)): if previous_intermediate_layer.get_shape()[1].value and \ previous_intermediate_layer.get_shape()[2].value: target_shape = previous_intermediate_layer.get_shape()[1:3] else: target_shape = tf.shape(previous_intermediate_layer)[1:3] upsampled_layer = tf.image.resize_images( input_tensor, target_shape, method=tf.image.ResizeMethod.BILINEAR) net = tf.concat([upsampled_layer, previous_intermediate_layer], 3) filter_size, nb_bottlenecks = layer_params if nb_bottlenecks > 0: for i in range(nb_bottlenecks): net = resnet_v1.bottleneck(inputs=net, depth=filter_size, depth_bottleneck=filter_size // 4, stride=1) else: net = layers.conv2d(inputs=net, num_outputs=filter_size, kernel_size=[3, 3], scope="conv{}".format(number)) return net # Original ResNet blocks_needed = max([ i for i, is_needed in enumerate(params.selected_levels_upscaling) if is_needed ]) resnet_net, intermediate_layers = resnet_v1_50_fn( images, is_training=False, blocks=blocks_needed, weight_decay=weight_decay, renorm=False, corrected_version=params.correct_resnet_version, mean=mean) # Upsampling with tf.variable_scope('upsampling'): with arg_scope( [layers.conv2d], normalizer_fn=batch_norm_fn, weights_regularizer=layers.l2_regularizer(weight_decay)): selected_upscale_params = [ l for i, l in enumerate(params.upscale_params) if params.selected_levels_upscaling[i] ] assert len(selected_upscale_params) == len(intermediate_layers), \ 'Upscaling : {} is different from {}'.format(len(selected_upscale_params), len(intermediate_layers)) selected_intermediate_levels = [ l for i, l in enumerate(intermediate_layers) if params.selected_levels_upscaling[i] ] # Rescaled image values to [0,1] selected_intermediate_levels.insert(0, images / 255.0) # Force layers to not be too big to reduce memory usage for i, l in enumerate(selected_intermediate_levels): if l.get_shape()[-1] > params.max_depth: selected_intermediate_levels[i] = layers.conv2d( inputs=l, num_outputs=params.max_depth, kernel_size=[1, 1], scope="dimreduc_{}".format(i), # normalizer_fn=batch_norm_fn, activation_fn=None) # Deconvolving loop out_tensor = selected_intermediate_levels[-1] n_layer = 1 for i in reversed(range(len(selected_intermediate_levels) - 1)): out_tensor = upsample_conv(out_tensor, selected_intermediate_levels[i], selected_upscale_params[i], n_layer) n_layer += 1 if images.get_shape()[1].value and images.get_shape()[2].value: target_shape = images.get_shape()[1:3] else: target_shape = tf.shape(images)[1:3] out_tensor = tf.image.resize_images( out_tensor, target_shape, method=tf.image.ResizeMethod.BILINEAR) logits = layers.conv2d(inputs=out_tensor, num_outputs=num_classes, activation_fn=None, kernel_size=[1, 1], scope="conv{}-logits".format(n_layer)) return logits
def inference_pose_v2(image, center_map): # corresponds to pose_deploy_resize.prototxt with tf.variable_scope('PoseNet'): pool_center_lower = layers.avg_pool2d(center_map, 9, 8, padding='SAME') conv1_1 = layers.conv2d(image, 64, 3, 1, activation_fn=None, scope='conv1_1') conv1_1 = tf.nn.relu(conv1_1) conv1_2 = layers.conv2d(conv1_1, 64, 3, 1, activation_fn=None, scope='conv1_2') conv1_2 = tf.nn.relu(conv1_2) pool1_stage1 = layers.max_pool2d(conv1_2, 2, 2) conv2_1 = layers.conv2d(pool1_stage1, 128, 3, 1, activation_fn=None, scope='conv2_1') conv2_1 = tf.nn.relu(conv2_1) conv2_2 = layers.conv2d(conv2_1, 128, 3, 1, activation_fn=None, scope='conv2_2') conv2_2 = tf.nn.relu(conv2_2) pool2_stage1 = layers.max_pool2d(conv2_2, 2, 2) conv3_1 = layers.conv2d(pool2_stage1, 256, 3, 1, activation_fn=None, scope='conv3_1') conv3_1 = tf.nn.relu(conv3_1) conv3_2 = layers.conv2d(conv3_1, 256, 3, 1, activation_fn=None, scope='conv3_2') conv3_2 = tf.nn.relu(conv3_2) conv3_3 = layers.conv2d(conv3_2, 256, 3, 1, activation_fn=None, scope='conv3_3') conv3_3 = tf.nn.relu(conv3_3) conv3_4 = layers.conv2d(conv3_3, 256, 3, 1, activation_fn=None, scope='conv3_4') conv3_4 = tf.nn.relu(conv3_4) pool3_stage1 = layers.max_pool2d(conv3_4, 2, 2) conv4_1 = layers.conv2d(pool3_stage1, 512, 3, 1, activation_fn=None, scope='conv4_1') conv4_1 = tf.nn.relu(conv4_1) conv4_2 = layers.conv2d(conv4_1, 512, 3, 1, activation_fn=None, scope='conv4_2') conv4_2 = tf.nn.relu(conv4_2) conv4_3_CPM = layers.conv2d(conv4_2, 256, 3, 1, activation_fn=None, scope='conv4_3_CPM') conv4_3_CPM = tf.nn.relu(conv4_3_CPM) conv4_4_CPM = layers.conv2d(conv4_3_CPM, 256, 3, 1, activation_fn=None, scope='conv4_4_CPM') conv4_4_CPM = tf.nn.relu(conv4_4_CPM) conv4_5_CPM = layers.conv2d(conv4_4_CPM, 256, 3, 1, activation_fn=None, scope='conv4_5_CPM') conv4_5_CPM = tf.nn.relu(conv4_5_CPM) conv4_6_CPM = layers.conv2d(conv4_5_CPM, 256, 3, 1, activation_fn=None, scope='conv4_6_CPM') conv4_6_CPM = tf.nn.relu(conv4_6_CPM) conv4_7_CPM = layers.conv2d(conv4_6_CPM, 128, 3, 1, activation_fn=None, scope='conv4_7_CPM') conv4_7_CPM = tf.nn.relu(conv4_7_CPM) conv5_1_CPM = layers.conv2d(conv4_7_CPM, 512, 1, 1, activation_fn=None, scope='conv5_1_CPM') conv5_1_CPM = tf.nn.relu(conv5_1_CPM) conv5_2_CPM = layers.conv2d(conv5_1_CPM, 15, 1, 1, activation_fn=None, scope='conv5_2_CPM') concat_stage2 = tf.concat( axis=3, values=[conv5_2_CPM, conv4_7_CPM, pool_center_lower]) Mconv1_stage2 = layers.conv2d(concat_stage2, 128, 7, 1, activation_fn=None, scope='Mconv1_stage2') Mconv1_stage2 = tf.nn.relu(Mconv1_stage2) Mconv2_stage2 = layers.conv2d(Mconv1_stage2, 128, 7, 1, activation_fn=None, scope='Mconv2_stage2') Mconv2_stage2 = tf.nn.relu(Mconv2_stage2) Mconv3_stage2 = layers.conv2d(Mconv2_stage2, 128, 7, 1, activation_fn=None, scope='Mconv3_stage2') Mconv3_stage2 = tf.nn.relu(Mconv3_stage2) Mconv4_stage2 = layers.conv2d(Mconv3_stage2, 128, 7, 1, activation_fn=None, scope='Mconv4_stage2') Mconv4_stage2 = tf.nn.relu(Mconv4_stage2) Mconv5_stage2 = layers.conv2d(Mconv4_stage2, 128, 7, 1, activation_fn=None, scope='Mconv5_stage2') Mconv5_stage2 = tf.nn.relu(Mconv5_stage2) Mconv6_stage2 = layers.conv2d(Mconv5_stage2, 128, 1, 1, activation_fn=None, scope='Mconv6_stage2') Mconv6_stage2 = tf.nn.relu(Mconv6_stage2) Mconv7_stage2 = layers.conv2d(Mconv6_stage2, 15, 1, 1, activation_fn=None, scope='Mconv7_stage2') concat_stage3 = tf.concat( axis=3, values=[Mconv7_stage2, conv4_7_CPM, pool_center_lower]) Mconv1_stage3 = layers.conv2d(concat_stage3, 128, 7, 1, activation_fn=None, scope='Mconv1_stage3') Mconv1_stage3 = tf.nn.relu(Mconv1_stage3) Mconv2_stage3 = layers.conv2d(Mconv1_stage3, 128, 7, 1, activation_fn=None, scope='Mconv2_stage3') Mconv2_stage3 = tf.nn.relu(Mconv2_stage3) Mconv3_stage3 = layers.conv2d(Mconv2_stage3, 128, 7, 1, activation_fn=None, scope='Mconv3_stage3') Mconv3_stage3 = tf.nn.relu(Mconv3_stage3) Mconv4_stage3 = layers.conv2d(Mconv3_stage3, 128, 7, 1, activation_fn=None, scope='Mconv4_stage3') Mconv4_stage3 = tf.nn.relu(Mconv4_stage3) Mconv5_stage3 = layers.conv2d(Mconv4_stage3, 128, 7, 1, activation_fn=None, scope='Mconv5_stage3') Mconv5_stage3 = tf.nn.relu(Mconv5_stage3) Mconv6_stage3 = layers.conv2d(Mconv5_stage3, 128, 1, 1, activation_fn=None, scope='Mconv6_stage3') Mconv6_stage3 = tf.nn.relu(Mconv6_stage3) Mconv7_stage3 = layers.conv2d(Mconv6_stage3, 15, 1, 1, activation_fn=None, scope='Mconv7_stage3') concat_stage4 = tf.concat( axis=3, values=[Mconv7_stage3, conv4_7_CPM, pool_center_lower]) Mconv1_stage4 = layers.conv2d(concat_stage4, 128, 7, 1, activation_fn=None, scope='Mconv1_stage4') Mconv1_stage4 = tf.nn.relu(Mconv1_stage4) Mconv2_stage4 = layers.conv2d(Mconv1_stage4, 128, 7, 1, activation_fn=None, scope='Mconv2_stage4') Mconv2_stage4 = tf.nn.relu(Mconv2_stage4) Mconv3_stage4 = layers.conv2d(Mconv2_stage4, 128, 7, 1, activation_fn=None, scope='Mconv3_stage4') Mconv3_stage4 = tf.nn.relu(Mconv3_stage4) Mconv4_stage4 = layers.conv2d(Mconv3_stage4, 128, 7, 1, activation_fn=None, scope='Mconv4_stage4') Mconv4_stage4 = tf.nn.relu(Mconv4_stage4) Mconv5_stage4 = layers.conv2d(Mconv4_stage4, 128, 7, 1, activation_fn=None, scope='Mconv5_stage4') Mconv5_stage4 = tf.nn.relu(Mconv5_stage4) Mconv6_stage4 = layers.conv2d(Mconv5_stage4, 128, 1, 1, activation_fn=None, scope='Mconv6_stage4') Mconv6_stage4 = tf.nn.relu(Mconv6_stage4) Mconv7_stage4 = layers.conv2d(Mconv6_stage4, 15, 1, 1, activation_fn=None, scope='Mconv7_stage4') concat_stage5 = tf.concat( axis=3, values=[Mconv7_stage4, conv4_7_CPM, pool_center_lower]) Mconv1_stage5 = layers.conv2d(concat_stage5, 128, 7, 1, activation_fn=None, scope='Mconv1_stage5') Mconv1_stage5 = tf.nn.relu(Mconv1_stage5) Mconv2_stage5 = layers.conv2d(Mconv1_stage5, 128, 7, 1, activation_fn=None, scope='Mconv2_stage5') Mconv2_stage5 = tf.nn.relu(Mconv2_stage5) Mconv3_stage5 = layers.conv2d(Mconv2_stage5, 128, 7, 1, activation_fn=None, scope='Mconv3_stage5') Mconv3_stage5 = tf.nn.relu(Mconv3_stage5) Mconv4_stage5 = layers.conv2d(Mconv3_stage5, 128, 7, 1, activation_fn=None, scope='Mconv4_stage5') Mconv4_stage5 = tf.nn.relu(Mconv4_stage5) Mconv5_stage5 = layers.conv2d(Mconv4_stage5, 128, 7, 1, activation_fn=None, scope='Mconv5_stage5') Mconv5_stage5 = tf.nn.relu(Mconv5_stage5) Mconv6_stage5 = layers.conv2d(Mconv5_stage5, 128, 1, 1, activation_fn=None, scope='Mconv6_stage5') Mconv6_stage5 = tf.nn.relu(Mconv6_stage5) Mconv7_stage5 = layers.conv2d(Mconv6_stage5, 15, 1, 1, activation_fn=None, scope='Mconv7_stage5') concat_stage6 = tf.concat( axis=3, values=[Mconv7_stage5, conv4_7_CPM, pool_center_lower]) Mconv1_stage6 = layers.conv2d(concat_stage6, 128, 7, 1, activation_fn=None, scope='Mconv1_stage6') Mconv1_stage6 = tf.nn.relu(Mconv1_stage6) Mconv2_stage6 = layers.conv2d(Mconv1_stage6, 128, 7, 1, activation_fn=None, scope='Mconv2_stage6') Mconv2_stage6 = tf.nn.relu(Mconv2_stage6) Mconv3_stage6 = layers.conv2d(Mconv2_stage6, 128, 7, 1, activation_fn=None, scope='Mconv3_stage6') Mconv3_stage6 = tf.nn.relu(Mconv3_stage6) Mconv4_stage6 = layers.conv2d(Mconv3_stage6, 128, 7, 1, activation_fn=None, scope='Mconv4_stage6') Mconv4_stage6 = tf.nn.relu(Mconv4_stage6) Mconv5_stage6 = layers.conv2d(Mconv4_stage6, 128, 7, 1, activation_fn=None, scope='Mconv5_stage6') Mconv5_stage6 = tf.nn.relu(Mconv5_stage6) Mconv6_stage6 = layers.conv2d(Mconv5_stage6, 128, 1, 1, activation_fn=None, scope='Mconv6_stage6') Mconv6_stage6 = tf.nn.relu(Mconv6_stage6) Mconv7_stage6 = layers.conv2d(Mconv6_stage6, 15, 1, 1, activation_fn=None, scope='Mconv7_stage6') return Mconv7_stage6
def inception_v3(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, min_depth=16, depth_multiplier=1.0, prediction_fn=layers_lib.softmax, spatial_squeeze=True, reuse=None, scope='InceptionV3'): """Inception model from http://arxiv.org/abs/1512.00567. "Rethinking the Inception Architecture for Computer Vision" Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna. With the default arguments this method constructs the exact model defined in the paper. However, one can experiment with variations of the inception_v3 network by changing arguments dropout_keep_prob, min_depth and depth_multiplier. The default image size used to train this network is 299x299. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: the percentage of activation values that are retained. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape is [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. To use this parameter, the input images must be smaller than 300x300 pixels, in which case the output logit layer does not contain spatial information and can be removed. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, num_classes] end_points: a dictionary from components of the network to the corresponding activation. Raises: ValueError: if 'depth_multiplier' is less than or equal to zero. """ if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) with variable_scope.variable_scope( scope, 'InceptionV3', [inputs, num_classes], reuse=reuse) as scope: with arg_scope( [layers_lib.batch_norm, layers_lib.dropout], is_training=is_training): net, end_points = inception_v3_base( inputs, scope=scope, min_depth=min_depth, depth_multiplier=depth_multiplier) # Auxiliary Head logits with arg_scope( [layers.conv2d, layers_lib.max_pool2d, layers_lib.avg_pool2d], stride=1, padding='SAME'): aux_logits = end_points['Mixed_6e'] with variable_scope.variable_scope('AuxLogits'): aux_logits = layers_lib.avg_pool2d( aux_logits, [5, 5], stride=3, padding='VALID', scope='AvgPool_1a_5x5') aux_logits = layers.conv2d( aux_logits, depth(128), [1, 1], scope='Conv2d_1b_1x1') # Shape of feature map before the final layer. kernel_size = _reduced_kernel_size_for_small_input(aux_logits, [5, 5]) aux_logits = layers.conv2d( aux_logits, depth(768), kernel_size, weights_initializer=trunc_normal(0.01), padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size)) aux_logits = layers.conv2d( aux_logits, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, weights_initializer=trunc_normal(0.001), scope='Conv2d_2b_1x1') if spatial_squeeze: aux_logits = array_ops.squeeze( aux_logits, [1, 2], name='SpatialSqueeze') end_points['AuxLogits'] = aux_logits # Final pooling and prediction with variable_scope.variable_scope('Logits'): kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8]) net = layers_lib.avg_pool2d( net, kernel_size, padding='VALID', scope='AvgPool_1a_{}x{}'.format(*kernel_size)) # 1 x 1 x 2048 net = layers_lib.dropout( net, keep_prob=dropout_keep_prob, scope='Dropout_1b') end_points['PreLogits'] = net # 2048 logits = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') if spatial_squeeze: logits = array_ops.squeeze(logits, [1, 2], name='SpatialSqueeze') # 1000 end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def resnet_v2(inputs, blocks, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope=None): """Generator for v2 (preactivation) ResNet models. This function generates a family of ResNet v2 models. See the resnet_v2_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce ResNets of various depths. Training for image classification on Imagenet is usually done with [224, 224] inputs, resulting in [7, 7] feature maps at the output of the last ResNet block for the ResNets defined in [1] that have nominal stride equal to 32. However, for dense prediction tasks we advise that one uses inputs with spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In this case the feature maps at the ResNet output will have spatial shape [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] and corners exactly aligned with the input image corners, which greatly facilitates alignment of the features to the image. Using as input [225, 225] images results in [8, 8] feature maps at the output of the last ResNet block. For dense prediction tasks, the ResNet needs to run in fully-convolutional (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all have nominal stride equal to 32 and a good choice in FCN mode is to use output_stride=16 in order to increase the density of the computed features at small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. blocks: A list of length equal to the number of ResNet blocks. Each element is a resnet_utils.Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If None we return the features before the logit layer. is_training: whether batch_norm layers are in training mode. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. include_root_block: If True, include the initial convolution followed by max-pooling, if False excludes it. If excluded, `inputs` should be the results of an activation-less convolution. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. If num_classes is not None, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with variable_scope.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with arg_scope( [layers_lib.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): with arg_scope([layers.batch_norm], is_training=is_training): net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.' ) output_stride /= 4 # We do not include batch normalization or activation functions in # conv1 because the first ResNet unit will perform these. Cf. # Appendix of [2]. with arg_scope([layers_lib.conv2d], activation_fn=None, normalizer_fn=None): net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = layers.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) # This is needed because the pre-activation variant does not have batch # normalization or activation functions in the residual unit output. See # Appendix of [2]. net = layers.batch_norm(net, activation_fn=nn_ops.relu, scope='postnorm') if global_pool: # Global average pooling. net = math_ops.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) if num_classes is not None: net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') # Convert end_points_collection into a dictionary of end_points. end_points = utils.convert_collection_to_dict( end_points_collection) if num_classes is not None: end_points['predictions'] = layers.softmax( net, scope='predictions') return net, end_points
def _build_model(self, input_batch): inputs_image, inputs_Beta = tf.split(input_batch, num_or_size_splits=2, axis=3) if self.data_format == 'NCHW': reduction_axis = [2,3] _inputs_image = tf.cast(tf.transpose(inputs_image, [0, 3, 1, 2]), tf.float32) _inputs_Beta = tf.cast(tf.transpose(inputs_Beta, [0, 3, 1, 2]), tf.float32) else: reduction_axis = [1,2] _inputs_image = tf.cast(inputs_image, tf.float32) _inputs_Beta = tf.cast(inputs_Beta, tf.float32) with arg_scope([layers.conv2d], num_outputs=16, kernel_size=3, stride=1, padding='SAME', data_format=self.data_format, activation_fn=None, weights_initializer=layers.variance_scaling_initializer(), weights_regularizer=layers.l2_regularizer(2e-4), biases_initializer=tf.constant_initializer(0.2), biases_regularizer=None),\ arg_scope([layers.batch_norm], decay=0.9, center=True, scale=True, updates_collections=None, is_training=self.is_training, fused=True, data_format=self.data_format),\ arg_scope([layers.avg_pool2d], kernel_size=[3,3], stride=[2,2], padding='SAME', data_format=self.data_format): with tf.variable_scope('Layer1'): # 256*256 W = tf.get_variable('W', shape=[3,3,1,64],\ initializer=layers.variance_scaling_initializer(), \ dtype=tf.float32, \ regularizer=layers.l2_regularizer(5e-4)) b = tf.get_variable('b', shape=[64], dtype=tf.float32, \ initializer=tf.constant_initializer(0.2)) conv = tf.nn.bias_add( \ tf.nn.conv2d(tf.cast(_inputs_image, tf.float32), \ W, [1,1,1,1], 'SAME', \ data_format=self.data_format), b, \ data_format=self.data_format, name='Layer1') actv=tf.nn.relu(conv) prob_map = tf.sqrt(tf.nn.conv2d(tf.cast(_inputs_Beta, tf.float32), \ tf.abs(W), [1,1,1,1], 'SAME', \ data_format=self.data_format)) out_L1=tf.add_n([actv,prob_map]) with tf.variable_scope('Layer2'): # 256*256 conv=layers.conv2d(out_L1) actv=tf.nn.relu(layers.batch_norm(conv)) with tf.variable_scope('Layer3'): # 256*256 conv1=layers.conv2d(actv) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn2=layers.batch_norm(conv2) res= tf.add(actv, bn2) with tf.variable_scope('Layer4'): # 256*256 conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn2=layers.batch_norm(conv2) res= tf.add(res, bn2) with tf.variable_scope('Layer5'): # 256*256 conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) res= tf.add(res, bn) with tf.variable_scope('Layer6'): # 256*256 conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) res= tf.add(res, bn) with tf.variable_scope('Layer7'): # 256*256 conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) res= tf.add(res, bn) with tf.variable_scope('Layer8'): # 256*256 convs = layers.conv2d(res, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer9'): # 128*128 convs = layers.conv2d(res, num_outputs=64, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res, num_outputs=64) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=64) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer10'): # 64*64 convs = layers.conv2d(res, num_outputs=128, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res, num_outputs=128) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=128) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer11'): # 32*32 convs = layers.conv2d(res, num_outputs=256, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res, num_outputs=256) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=256) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer12'): # 16*16 conv1=layers.conv2d(res, num_outputs=512) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=512) bn=layers.batch_norm(conv2) avgp = tf.reduce_mean(bn, reduction_axis, keep_dims=True ) ip=layers.fully_connected(layers.flatten(avgp), num_outputs=2, activation_fn=None, normalizer_fn=None, weights_initializer=tf.random_normal_initializer(mean=0., stddev=0.01), biases_initializer=tf.constant_initializer(0.), scope='ip') self.outputs = ip return self.outputs
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, outputs_collections=None, scope=None): """Bottleneck residual unit variant with BN before convolutions. This is the full preactivation residual unit variant proposed in [2]. See Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck variant which has an extra bottleneck layer. When putting together two consecutive ResNet blocks that use this unit, one should use stride = 2 in the last unit of the first block. Args: inputs: A tensor of size [batch, height, width, channels]. depth: The depth of the ResNet unit output. depth_bottleneck: The depth of the bottleneck layers. stride: The ResNet unit's stride. Determines the amount of downsampling of the units output compared to its input. rate: An integer, rate for atrous convolution. outputs_collections: Collection to add the ResNet unit output. scope: Optional variable_scope. Returns: The ResNet unit's output. """ with variable_scope.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4) preact = layers.batch_norm(inputs, activation_fn=nn_ops.relu, scope='preact') if depth == depth_in: shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') else: shortcut = layers_lib.conv2d(preact, depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut') residual = layers_lib.conv2d(preact, depth_bottleneck, [1, 1], stride=1, scope='conv1') residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2') residual = layers_lib.conv2d(residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3') output = shortcut + residual return utils.collect_named_outputs(outputs_collections, sc.name, output)
def backbone(self, inputs, is_training=False, reuse=None): end_points = OrderedDict() with tf.variable_scope(self.name, reuse=reuse): with arg_scope([layers.batch_norm], scale=True, fused=True, data_format=self.data_format, is_training=is_training): with arg_scope([layers.conv2d], activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, biases_initializer=None, weights_regularizer=layers.l2_regularizer(self.weight_decay), data_format=self.data_format): with arg_scope([layers.separable_conv2d], depth_multiplier=1, activation_fn=None, normalizer_fn=layers.batch_norm, biases_initializer=None, weights_regularizer=layers.l2_regularizer(self.weight_decay), data_format=self.data_format): if self.data_format == 'NCHW': inputs = tf.transpose(inputs, [0, 3, 1, 2]) with tf.variable_scope('conv1'): net = layers.conv2d(inputs, num_outputs=64, kernel_size=3, stride=2, scope='conv1_3x3') end_points['conv1/conv1_3x3'] = net net = layers.conv2d(inputs, num_outputs=64, kernel_size=3, scope='conv2_3x3') end_points['conv1/conv2_3x3'] = net net = layers.conv2d(inputs, num_outputs=128, kernel_size=3, scope='conv3_3x3') end_points['conv1/conv3_3x3'] = net net = layers.max_pool2d(net, kernel_size=3, stride=2, padding='SAME', data_format=self.data_format, scope='maxpool_3x3_2') end_points['conv1/maxpool_3x3_2'] = net with tf.variable_scope('conv2'): for idx in xrange(10): net = self.separable_resBlock(net, num_outputs=self.num_outputs[0], stride=2 if not idx else 1, scope='resBlock_%d'%idx) end_points['conv2/resBlock_%d'%idx] = net with tf.variable_scope('conv3'): for idx in xrange(10): net = self.separable_resBlock(net, num_outputs=self.num_outputs[1], stride=2 if not idx else 1, scope='resBlock_%d'%idx) end_points['conv3/resBlock_%d'%idx] = net with tf.variable_scope('conv4'): for idx in xrange(23): net = self.separable_resBlock(net, num_outputs=self.num_outputs[2], stride=2 if not idx else 1, scope='resBlock_%d'%idx) end_points['conv4/resBlock_%d'%idx] = net with tf.variable_scope('conv5'): for idx in xrange(10): net = self.separable_resBlock(net, num_outputs=self.num_outputs[3], stride=2 if not idx else 1, scope='resBlock_%d'%idx) end_points['conv4/resBlock_%d'%idx] = net with tf.variable_scope('conv5'): net = layers.conv2d(net, num_outputs=self.num_outputs[4], kernel_size=1, stride=1, scope='conv_1x1') end_points['conv6/conv_1x1'] = net net = tf.reduce_mean(net, self.spatial_axis) return net, end_points
def c2(inflow, filters, name): with tf.variable_scope(name): l1 = layers.conv2d(inflow, filters, 3) l2 = layers.conv2d(l1, filters, 3) return l2
def inception_v2_base(inputs, final_endpoint='Mixed_5c', min_depth=16, depth_multiplier=1.0, replace_separable_convolution=False, scope=None): """Inception v2 (6a2). Constructs an Inception v2 network from inputs to the given final endpoint. This method can construct the network up to the layer inception(5b) as described in http://arxiv.org/abs/1502.03167. Args: inputs: a tensor of shape [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c']. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. replace_separable_convolution: Replace the separable convolution in the layer Conv2d_1a_7x7 with a normal convolution. scope: Optional variable_scope. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or depth_multiplier <= 0 """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} # Used to find thinned depths for each layer. if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) with variable_scope.variable_scope(scope, 'InceptionV2', [inputs]): with arg_scope([ layers.conv2d, layers_lib.max_pool2d, layers_lib.avg_pool2d, layers.separable_conv2d ], stride=1, padding='SAME'): # Note that sizes in the comments below assume an input spatial size of # 224x224, however, the inputs can be of any size greater 32x32. # 224 x 224 x 3 end_point = 'Conv2d_1a_7x7' if replace_separable_convolution: # Use a normal convolution instead of a separable convolution as it # provides better performance on some devices. net = layers.conv2d(inputs, depth(64), [7, 7], stride=2, weights_initializer=trunc_normal(1.0), scope=end_point) else: # depthwise_multiplier here is different from depth_multiplier. # depthwise_multiplier determines the output channels of the initial # depthwise conv (see docs for tf.nn.separable_conv2d), while # depth_multiplier controls the # channels of the subsequent 1x1 # convolution. Must have # in_channels * depthwise_multipler <= out_channels # so that the separable convolution is not overparameterized. depthwise_multiplier = min(int(depth(64) / 3), 8) net = layers.separable_conv2d( inputs, depth(64), [7, 7], depth_multiplier=depthwise_multiplier, stride=2, weights_initializer=trunc_normal(1.0), scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 112 x 112 x 64 end_point = 'MaxPool_2a_3x3' net = layers_lib.max_pool2d(net, [3, 3], scope=end_point, stride=2) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 56 x 56 x 64 end_point = 'Conv2d_2b_1x1' net = layers.conv2d(net, depth(64), [1, 1], scope=end_point, weights_initializer=trunc_normal(0.1)) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 56 x 56 x 64 end_point = 'Conv2d_2c_3x3' net = layers.conv2d(net, depth(192), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 56 x 56 x 192 end_point = 'MaxPool_3a_3x3' net = layers_lib.max_pool2d(net, [3, 3], scope=end_point, stride=2) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 28 x 28 x 192 # Inception module. end_point = 'Mixed_3b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, depth(64), [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(32), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 28 x 28 x 256 end_point = 'Mixed_3c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(64), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 28 x 28 x 320 end_point = 'Mixed_4a' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_0 = layers.conv2d(branch_0, depth(160), [3, 3], stride=2, scope='Conv2d_1a_3x3') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_1 = layers.conv2d(branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers_lib.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_1a_3x3') net = array_ops.concat([branch_0, branch_1, branch_2], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(96), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(96), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, depth(128), [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(96), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4d' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, depth(160), [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, depth(160), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d(branch_2, depth(160), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(96), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4e' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, depth(192), [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(160), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, depth(192), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d(branch_2, depth(192), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(96), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_5a' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_0 = layers.conv2d(branch_0, depth(192), [3, 3], stride=2, scope='Conv2d_1a_3x3') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, depth(256), [3, 3], scope='Conv2d_0b_3x3') branch_1 = layers.conv2d(branch_1, depth(256), [3, 3], stride=2, scope='Conv2d_1a_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers_lib.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_1a_3x3') net = array_ops.concat([branch_0, branch_1, branch_2], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 7 x 7 x 1024 end_point = 'Mixed_5b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(160), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 7 x 7 x 1024 end_point = 'Mixed_5c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3') branch_2 = layers.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = layers.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if end_point == final_endpoint: return net, end_points raise ValueError('Unknown final endpoint %s' % final_endpoint)
def atrous_spatial_pyramid_pooling(inputs, output_stride, batch_norm_decay, is_training, depth=256): """Atrous Spatial Pyramid Pooling. #提取高级特征,(语义信息),对应图中encoder部分 Args: inputs: A tensor of size [batch, height, width, channels]. output_stride: The ResNet unit's stride. Determines the rates for atrous convolution. the rates are (6, 12, 18) when the stride is 16, and doubled when 8. batch_norm_decay: The moving average decay when estimating layer activation statistics in batch normalization. is_training: A boolean denoting whether the input is for training. depth: The depth of the ResNet unit output. Returns: The atrous spatial pyramid pooling output.无规则空间金字塔汇集输出 """ with tf.variable_scope("aspp"): if output_stride not in [8, 16]: raise ValueError('output_stride must be either 8 or 16.') atrous_rates = [6, 12, 18] #采用不同的卷积膨胀率得到图像的空间的关联性,(3,3)的卷积卷积核膨胀率为(6,12,18) if output_stride == 8: atrous_rates = [2 * rate for rate in atrous_rates] #12,24,36 with tf.contrib.slim.arg_scope( resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): with arg_scope([layers.batch_norm], is_training=is_training): inputs_size = tf.shape(inputs)[1:3] # (a) one 1x1 convolution and three 3x3 convolutions with rates = (6, 12, 18) when output stride = 16. # the rates are doubled when output stride = 8. conv_1x1 = layers_lib.conv2d(inputs, depth, [1, 1], stride=1, scope="conv_1x1") conv_3x3_1 = resnet_utils.conv2d_same(inputs, depth, 3, stride=1, rate=atrous_rates[0], scope='conv_3x3_1') conv_3x3_2 = resnet_utils.conv2d_same(inputs, depth, 3, stride=1, rate=atrous_rates[1], scope='conv_3x3_2') conv_3x3_3 = resnet_utils.conv2d_same(inputs, depth, 3, stride=1, rate=atrous_rates[2], scope='conv_3x3_3') # (b) the image-level features #图像级特征 with tf.variable_scope("image_level_features"): # global average pooling #指定【1,2】纬度上求平均值,并保证输出的维度和输入的纬度一样,不进行降维 image_level_features = tf.reduce_mean( inputs, [1, 2], name='global_average_pooling', keepdims=True) # 1x1 convolution with 256 filters( and batch normalization) image_level_features = layers_lib.conv2d( image_level_features, depth, [1, 1], stride=1, scope='conv_1x1') # bilinearly upsample features #双线性resize把图片缩放成输入图片的大小 image_level_features = tf.image.resize_bilinear( image_level_features, inputs_size, name='upsample') net = tf.concat([ conv_1x1, conv_3x3_1, conv_3x3_2, conv_3x3_3, image_level_features ], axis=3, name='concat') net = layers_lib.conv2d(net, depth, [1, 1], stride=1, scope='conv_1x1_concat') return net
def build(self): """build Build the actual network, using the values passed over the from agent object, which themselves are derived from the Obs object. This has no concept of transfer learning. """ # Maps a series of symbols to embeddings, # where an embedding is a mapping from discrete objects, # such as words, to vectors of real numbers. # In this case it is from the unit types. units_embedded = layers.embed_sequence( self.placeholders.screen_unit_type, vocab_size=SCREEN_FEATURES.unit_type.scale, embed_dim=self.unittype_emb_dim, scope="unit_type_emb", trainable=self.trainable, ) # "One hot" encoding performs "binarization" on the input # meaning we end up with features we can suitably learn # from. # Basically, learning from categories isn't possible, # but learning from ints (i.e. 0/1/2 for 3 categories) # ends up with further issues, like the ML algorithm # picking up some pattern in the categories, when none exists. # Instead we want it in a binary form instead, to prevent this. # This is not needed for the background, since it is # not used, which is why we ignore channel 0 in the # last sub-array. player_relative_screen_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_screen, num_classes=SCREEN_FEATURES.player_relative.scale, )[:, :, :, 1:] player_relative_minimap_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_minimap, num_classes=MINIMAP_FEATURES.player_relative.scale, )[:, :, :, 1:] channel_axis = 3 # Group together all the inputs, such that a conv # layer can be built upon them. screen_numeric_all = tf.concat( [ self.placeholders.screen_numeric, units_embedded, player_relative_screen_one_hot, ], axis=channel_axis, ) minimap_numeric_all = tf.concat( [self.placeholders.minimap_numeric, player_relative_minimap_one_hot], axis=channel_axis, ) non_spatial_features = tf.cast( self.placeholders.non_spatial_features, tf.float32 ) log_non_spatial_features = tf.log(non_spatial_features + 1.0) four_d_non_spatial = reference_tiling_method(self, log_non_spatial_features) if DEBUG: # We want to print the values of the tensor four_d_non_spatial = tf.Print( four_d_non_spatial, [four_d_non_spatial], "4D non spatial tensor values: ", summarize=1024, # this is the number of values TF will print from the Tensor ) # Build the 2 convolutional layers based on the screen # and the mini-map. screen_conv_layer_output = self.build_conv_layers_for_input( screen_numeric_all, "screen_network" ) minimap_conv_layer_output = self.build_conv_layers_for_input( minimap_numeric_all, "minimap_network" ) # Group these two convolutional layers now, and the non_spatial # features. build a further convolutional layer on top of it. visual_inputs = tf.concat( [screen_conv_layer_output, minimap_conv_layer_output, four_d_non_spatial], axis=channel_axis, ) spatial_actions = layers.conv2d( visual_inputs, data_format="NHWC", num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope="spatial_action", trainable=self.trainable, ) if self.trainable: tf.summary.image( f"spatial_action", tf.reshape(spatial_actions, [-1, 32, 32, 1]), 3 ) # Take the softmax of this final convolutional layer. spatial_action_probs = tf.nn.softmax(layers.flatten(spatial_actions)) # Build a full connected layer of this final convolutional layer. # Could possibly pass in additional variables here, alongside the # convolutional layer. map_output_flat = layers.flatten(visual_inputs) fully_connected_layer1 = layers.fully_connected( map_output_flat, num_outputs=256, activation_fn=tf.nn.relu, scope="fully_connected_layer1", trainable=self.trainable, ) # Generate the probability of a given action from the # fully connected layer. Finally, produce a value # estimate for the given actions. action_id_probs = layers.fully_connected( fully_connected_layer1, num_outputs=len(actions.FUNCTIONS), activation_fn=tf.nn.softmax, scope="action_id", trainable=self.trainable, ) value_estimate = tf.squeeze( layers.fully_connected( fully_connected_layer1, num_outputs=1, activation_fn=None, scope="value", trainable=self.trainable, ), axis=1, ) # Disregard all the non-allowed actions by giving them a # probability of zero, before re-normalizing to 1. action_id_probs *= self.placeholders.available_action_ids action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keepdims=True) spatial_action_log_probs = self.logclip(spatial_action_probs) * tf.expand_dims( self.placeholders.is_spatial_action_available, axis=1 ) action_id_log_probs = self.logclip(action_id_probs) self.value_estimate = value_estimate self.action_id_probs = action_id_probs self.spatial_action_probs = spatial_action_probs self.action_id_log_probs = action_id_log_probs self.spatial_action_log_probs = spatial_action_log_probs return self
def build_DeepLab_Arch(self, inputs, is_training=False, name="DeepLab_Arch", output_stride=8, base_architecture='resnet_v2_50', pre_trained_model=None, batch_norm_decay=0.9997, data_format='channels_last'): """Generator for DeepLab v3 plus models. Args: num_classes: The number of possible classes for image classification. output_stride: The ResNet unit's stride. Determines the rates for atrous convolution. the rates are (6, 12, 18) when the stride is 16, and doubled when 8. base_architecture: The architecture of base Resnet building block. pre_trained_model: The path to the directory that contains pre-trained models. batch_norm_decay: The moving average decay when estimating layer activation statistics in batch normalization. data_format: The input format ('channels_last', 'channels_first', or None). If set to None, the format is dependent on whether a GPU is available. Only 'channels_last' is supported currently. Returns: The model function that takes in `inputs` and `is_training` and returns the output tensor of the DeepLab v3 model. """ print('---------------------------------') print('Initializing DeepLab Architecture') print('---------------------------------') print('Input data shape:', inputs.shape) if data_format == 'channels_first': # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). # This provides a large performance boost on GPU. See # https://www.tensorflow.org/performance/performance_guide#data_formats inputs = tf.transpose(inputs, [0, 3, 1, 2]) if base_architecture not in ['resnet_v2_50', 'resnet_v2_101']: raise ValueError( "'base_architrecture' must be either 'resnet_v2_50' or 'resnet_v2_101'." ) if base_architecture == 'resnet_v2_50': base_model = resnet_v2.resnet_v2_50 elif base_architecture == 'resnet_v2_101': base_model = resnet_v2.resnet_v2_101 print('Initializing ResNet as Encoder...') # Resnet as Encoder with tf.contrib.slim.arg_scope( resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): logits, end_points = base_model(inputs, num_classes=None, is_training=is_training, global_pool=False, output_stride=output_stride) print('Loading Atrous Convolution layer...') inputs_size = tf.shape(inputs)[1:3] net = end_points[base_architecture + '/block4'] encoder_output = self.atrous_spatial_pyramid_pooling( net, output_stride, batch_norm_decay, is_training) print('Initializing Decoder...') # Building DeepLab Decoder with tf.variable_scope("decoder"): with tf.contrib.slim.arg_scope( resnet_v2.resnet_arg_scope( batch_norm_decay=batch_norm_decay)): with arg_scope([layers.batch_norm], is_training=is_training): with tf.variable_scope("low_level_features"): low_level_features = end_points[ base_architecture + '/block1/unit_3/bottleneck_v2/conv1'] low_level_features = layers_lib.conv2d( low_level_features, 48, [1, 1], stride=1, scope='conv_1x1') low_level_features_size = tf.shape( low_level_features)[1:3] with tf.variable_scope("upsampling_logits"): net = tf.image.resize_bilinear(encoder_output, low_level_features_size, name='upsample_1') net = tf.concat([net, low_level_features], axis=3, name='concat') net = layers_lib.conv2d(net, 256, [3, 3], stride=1, scope='conv_3x3_1') net = layers_lib.conv2d(net, 256, [3, 3], stride=1, scope='conv_3x3_2') net = layers_lib.conv2d(net, self.args.num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv_1x1') logits = tf.image.resize_bilinear(net, inputs_size, name='upsample_2') prediction = tf.nn.softmax(logits, name=name + '_softmax') print('DeeplabV3+ successfully loaded!') print('-----------------------------------') return logits, prediction, encoder_output