def create_model(self,
                   model_input,
                   vocab_size,
                   num_mixtures=None,
                   l2_penalty=1e-8,
                   **unused_params):
    """Creates a Mixture of (Logistic) Experts model.

     The model consists of a per-class softmax distribution over a
     configurable number of logistic classifiers. One of the classifiers in the
     mixture is not trained, and always predicts 0.

    Args:
      model_input: 'batch_size' x 'num_features' matrix of input features.
      vocab_size: The number of classes in the dataset.
      num_mixtures: The number of mixtures (excluding a dummy 'expert' that
        always predicts the non-existence of an entity).
      l2_penalty: How much to penalize the squared magnitudes of parameter
        values.
    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      batch_size x num_classes.
    """
    num_mixtures = num_mixtures or FLAGS.moe_num_mixtures

    gate_activations = slim.fully_connected(
        model_input,
        vocab_size * (num_mixtures + 1),
        activation_fn=None,
        biases_initializer=None,
        weights_regularizer=slim.l2_regularizer(l2_penalty),
        scope="gates")
    expert_activations = slim.fully_connected(
        model_input,
        vocab_size * num_mixtures,
        activation_fn=None,
        weights_regularizer=slim.l2_regularizer(l2_penalty),
        scope="experts")

    gating_distribution = tf.nn.softmax(tf.reshape(
        gate_activations,
        [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
    expert_distribution = tf.nn.sigmoid(tf.reshape(
        expert_activations,
        [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures

    final_probabilities_by_class_and_batch = tf.reduce_sum(
        gating_distribution[:, :num_mixtures] * expert_distribution, 1)
    final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
                                     [-1, vocab_size])
    return {"predictions": final_probabilities}
    def encoder(self, images, is_training):
        activation_fn = leaky_relu  # tf.nn.relu
        weight_decay = 0.0
        with tf.variable_scope('encoder'):
            with slim.arg_scope([slim.batch_norm],
                                is_training=is_training):
                with slim.arg_scope([slim.conv2d, slim.fully_connected],
                                    weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                    weights_regularizer=slim.l2_regularizer(weight_decay),
                                    normalizer_fn=slim.batch_norm,
                                    normalizer_params=self.batch_norm_params):
                    net = images
                    
                    net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a')
                    net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b')
                    
                    net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a')
                    net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b')

                    net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a')
                    net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b')

                    net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a')
                    net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b')
                    
                    net = slim.flatten(net)
                    fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1')
                    fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2')
        return fc1, fc2
 def decoder(self, latent_var, is_training):
     activation_fn = leaky_relu  # tf.nn.relu
     weight_decay = 0.0 
     with tf.variable_scope('decoder'):
         with slim.arg_scope([slim.batch_norm],
                             is_training=is_training):
             with slim.arg_scope([slim.conv2d, slim.fully_connected],
                                 weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                 weights_regularizer=slim.l2_regularizer(weight_decay),
                                 normalizer_fn=slim.batch_norm,
                                 normalizer_params=self.batch_norm_params):
                 net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1')
                 net = tf.reshape(net, [-1,4,4,256], name='Reshape')
                 
                 net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1')
                 net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1a')
                 net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1b')
         
                 net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2')
                 net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2a')
                 net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2b')
         
                 net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3')
                 net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3a')
                 net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3b')
         
                 net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4')
                 net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4a')
                 net = slim.repeat(net, 3, conv2d_block, 0.1, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4b')
                 net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4c')
             
     return net
Exemple #4
0
  def __init__(self, net, labels_one_hot, model_params, method_params):
    """Stores argument in member variable for further use.

    Args:
      net: A tensor with shape [batch_size, num_features, feature_size] which
        contains some extracted image features.
      labels_one_hot: An optional (can be None) ground truth labels for the
        input features. Is a tensor with shape
        [batch_size, seq_length, num_char_classes]
      model_params: A namedtuple with model parameters (model.ModelParams).
      method_params: A SequenceLayerParams instance.
    """
    self._params = model_params
    self._mparams = method_params
    self._net = net
    self._labels_one_hot = labels_one_hot
    self._batch_size = net.get_shape().dims[0].value

    # Initialize parameters for char logits which will be computed on the fly
    # inside an LSTM decoder.
    self._char_logits = {}
    regularizer = slim.l2_regularizer(self._mparams.weight_decay)
    self._softmax_w = slim.model_variable(
        'softmax_w',
        [self._mparams.num_lstm_units, self._params.num_char_classes],
        initializer=orthogonal_initializer,
        regularizer=regularizer)
    self._softmax_b = slim.model_variable(
        'softmax_b', [self._params.num_char_classes],
        initializer=tf.zeros_initializer(),
        regularizer=regularizer)
def _extra_conv_arg_scope(weight_decay=0.00001, activation_fn=None, normalizer_fn=None):

  with slim.arg_scope(
      [slim.conv2d, slim.conv2d_transpose],
      padding='SAME',
      weights_regularizer=slim.l2_regularizer(weight_decay),
      weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
      activation_fn=activation_fn,
      normalizer_fn=normalizer_fn,) as arg_sc:
    with slim.arg_scope(
      [slim.fully_connected],
          weights_regularizer=slim.l2_regularizer(weight_decay),
          weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
          activation_fn=activation_fn,
          normalizer_fn=normalizer_fn) as arg_sc:
          return arg_sc
    def build_feature_pyramid(self):

        '''
        reference: https://github.com/CharlesShang/FastMaskRCNN
        build P2, P3, P4, P5, P6
        :return: multi-scale feature map
        '''

        feature_pyramid = {}
        with tf.variable_scope('feature_pyramid'):
            with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.rpn_weight_decay)):
                feature_pyramid['P5'] = slim.conv2d(self.feature_maps_dict['C5'],
                                                    num_outputs=256,
                                                    kernel_size=[1, 1],
                                                    stride=1,
                                                    scope='build_P5')

                feature_pyramid['P6'] = slim.max_pool2d(feature_pyramid['P5'],
                                                        kernel_size=[2, 2], stride=2, scope='build_P6')
                # P6 is down sample of P5

                for layer in range(4, 1, -1):
                    p, c = feature_pyramid['P' + str(layer + 1)], self.feature_maps_dict['C' + str(layer)]
                    up_sample_shape = tf.shape(c)
                    up_sample = tf.image.resize_nearest_neighbor(p, [up_sample_shape[1], up_sample_shape[2]],
                                                                 name='build_P%d/up_sample_nearest_neighbor' % layer)

                    c = slim.conv2d(c, num_outputs=256, kernel_size=[1, 1], stride=1,
                                    scope='build_P%d/reduce_dimension' % layer)
                    p = up_sample + c
                    p = slim.conv2d(p, 256, kernel_size=[3, 3], stride=1,
                                    padding='SAME', scope='build_P%d/avoid_aliasing' % layer)
                    feature_pyramid['P' + str(layer)] = p

        return feature_pyramid
Exemple #7
0
 def _l2_regularized_embedding(self, n_class, h_dim, scope_name, var_name='y_emb'):
     with tf.variable_scope(scope_name):
         embeddings = tf.get_variable(
             name=var_name,
             shape=[n_class, h_dim],
             regularizer=slim.l2_regularizer(1e-6))
     return embeddings
  def create_model(self, model_input, vocab_size, num_frames, **unused_params):
    """Creates a model which uses a logistic classifier over the average of the
    frame-level features.

    This class is intended to be an example for implementors of frame level
    models. If you want to train a model over averaged features it is more
    efficient to average them beforehand rather than on the fly.

    Args:
      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
                   input features.
      vocab_size: The number of classes in the dataset.
      num_frames: A vector of length 'batch' which indicates the number of
           frames for each video (before padding).

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      'batch_size' x 'num_classes'.
    """
    num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
    feature_size = model_input.get_shape().as_list()[2]

    denominators = tf.reshape(
        tf.tile(num_frames, [1, feature_size]), [-1, feature_size])
    avg_pooled = tf.reduce_sum(model_input,
                               axis=[1]) / denominators

    output = slim.fully_connected(
        avg_pooled, vocab_size, activation_fn=tf.nn.sigmoid,
        weights_regularizer=slim.l2_regularizer(1e-8))
    return {"predictions": output}
Exemple #9
0
    def inference(self):
        _x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
        # tf.image_summary(_x.op.name, _x, max_images=10, collections=[digits.GraphKeys.SUMMARIES_TRAIN])

        # Split out the color channels
        _, model_g, model_b = tf.split(_x, 3, 3, name='split_channels')
        # tf.image_summary(model_g.op.name, model_g, max_images=10, collections=[digits.GraphKeys.SUMMARIES_TRAIN])
        # tf.image_summary(model_b.op.name, model_b, max_images=10, collections=[digits.GraphKeys.SUMMARIES_TRAIN])

        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            weights_initializer=tf.contrib.layers.xavier_initializer(),
                            weights_regularizer=slim.l2_regularizer(0.0005)):
            with tf.variable_scope("siamese") as scope:
                def make_tower(net):
                    net = slim.conv2d(net, 20, [5, 5], padding='VALID', scope='conv1')
                    net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool1')
                    net = slim.conv2d(net, 50, [5, 5], padding='VALID', scope='conv2')
                    net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool2')
                    net = slim.flatten(net)
                    net = slim.fully_connected(net, 500, scope='fc1')
                    net = slim.fully_connected(net, 2, activation_fn=None, scope='fc2')
                    return net

                model_g = make_tower(model_g)
                model_g = tf.reshape(model_g, shape=[-1, 2])
                scope.reuse_variables()
                model_b = make_tower(model_b)
                model_b = tf.reshape(model_b, shape=[-1, 2])

                return [model_g, model_b]
Exemple #10
0
    def build_graph(self, image, label):
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1

        is_training = get_current_tower_context().is_training
        with slim.arg_scope([slim.layers.fully_connected],
                            weights_regularizer=slim.l2_regularizer(1e-5)):
            l = slim.layers.conv2d(image, 32, [3, 3], scope='conv0')
            l = slim.layers.max_pool2d(l, [2, 2], scope='pool0')
            l = slim.layers.conv2d(l, 32, [3, 3], padding='SAME', scope='conv1')
            l = slim.layers.conv2d(l, 32, [3, 3], scope='conv2')
            l = slim.layers.max_pool2d(l, [2, 2], scope='pool1')
            l = slim.layers.conv2d(l, 32, [3, 3], scope='conv3')
            l = slim.layers.flatten(l, scope='flatten')
            l = slim.layers.fully_connected(l, 512, scope='fc0')
            l = slim.layers.dropout(l, is_training=is_training)
            logits = slim.layers.fully_connected(l, 10, activation_fn=None, scope='fc1')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        acc = tf.to_float(tf.nn.in_top_k(logits, label, 1))

        acc = tf.reduce_mean(acc, name='accuracy')
        summary.add_moving_summary(acc)

        summary.add_moving_summary(cost)
        summary.add_param_summary(('.*/weights', ['histogram', 'rms']))  # slim uses different variable names
        return cost + regularize_cost_from_collection()
def inference(image_batch, keep_probability, 
              phase_train=True, bottleneck_layer_size=512, 
              weight_decay=0.0):
    with tf.variable_scope('LResnetE_IR'):
        with slim.arg_scope([slim.conv2d, slim.fully_connected], 
                             weights_initializer=tf.contrib.layers.xavier_initializer(), 
                             weights_regularizer=slim.l2_regularizer(weight_decay), 
                             biases_initializer=None, #default no biases
                             activation_fn=None,
                             normalizer_fn=None
                             ):
            with slim.arg_scope([slim.conv2d], kernel_size=3):
                with slim.arg_scope([slim.batch_norm],
                                    decay=0.995,
                                    epsilon=1e-5,
                                    scale=True,
                                    is_training=phase_train,
                                    activation_fn=prelu,
                                    updates_collections=None,
                                    variables_collections=[ tf.GraphKeys.TRAINABLE_VARIABLES ]
                                   ):
                    return LResnet50E_IR(images=image_batch, 
                                    keep_probability=keep_probability, 
                                    phase_train=phase_train, 
                                    bottleneck_layer_size=bottleneck_layer_size, 
                                    reuse=None)
def inference(image_batch, keep_probability, 
              phase_train=True, bottleneck_layer_size=512, 
              weight_decay=0.0):
    batch_norm_params = {
        'decay': 0.995,
        'epsilon': 0.001,
        'scale':True,
        'is_training': phase_train,
        'updates_collections': None,
        'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ]
    }    
    with tf.variable_scope('Resface'):
        with slim.arg_scope([slim.conv2d, slim.fully_connected], 
                             weights_initializer=tf.contrib.layers.xavier_initializer(),
                             weights_regularizer=slim.l2_regularizer(weight_decay), 
                             activation_fn=prelu,
                             normalizer_fn=slim.batch_norm,
                             #normalizer_fn=None,
                             normalizer_params=batch_norm_params):
            with slim.arg_scope([slim.conv2d], kernel_size=3):
                return resface20(images=image_batch, 
                                keep_probability=keep_probability, 
                                phase_train=phase_train, 
                                bottleneck_layer_size=bottleneck_layer_size, 
                                reuse=None)
Exemple #13
0
	def __init__(self,is_training):
		
		self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,64,64,3],name='input_image')
		
		self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label')

		self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd')

		#logits, end_points = resnet_v2.resnet_v2_50(self.input_image, num_classes=100, is_training=True)

		# flatten_hist = tf.reshape(self.input_image,[-1,96])

		self.keep_prob = tf.placeholder(tf.float32)

		weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay)


		flatten_hist = tf.reshape(self.input_image,[-1,3*64*64])
		flatten_hist = tf.concat([flatten_hist,self.input_nlcd],1)
		x = slim.fully_connected(flatten_hist, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1')
		x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='decoder/fc_2')
		flatten_hist = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer, scope='decoder/fc_3')

		all_logits = []
		all_output = []

		for i in range(100):
			if i == 0 :
				current_input_x = flatten_hist
			else:
				current_output = tf.concat(all_output,1)
				current_input_x = tf.concat([flatten_hist,current_output],1)

			x = slim.fully_connected(current_input_x, 256,weights_regularizer=weights_regularizer)
			x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer)
			#x = slim.fully_connected(x, 17,weights_regularizer=weights_regularizer)

			x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training)
			all_logits.append(slim.fully_connected(x, 1, activation_fn=None, weights_regularizer=weights_regularizer))
			all_output.append(tf.sigmoid(all_logits[i]))

		final_logits = tf.concat(all_logits,1)
		final_output = tf.sigmoid(final_logits)

		self.output = final_output
		self.ce_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.input_label,logits=final_logits),1))

		slim.losses.add_loss(self.ce_loss)
		tf.summary.scalar('ce_loss',self.ce_loss)
		
		# l2 loss
		self.l2_loss = tf.add_n(slim.losses.get_regularization_losses())
		tf.summary.scalar('l2_loss',self.l2_loss)

		#total loss
		self.total_loss = slim.losses.get_total_loss()
		tf.summary.scalar('total_loss',self.total_loss)

		#self.output = tf.sigmoid(x)
Exemple #14
0
def build_resnet50(inputs, get_pred, is_training, var_scope):
    batch_norm_params = {'is_training': is_training}
    with tf.variable_scope(var_scope) as sc:
        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(0.0001),
                            activation_fn=tf.nn.relu):
            conv1 = conv(inputs, 64, 7, 2) # H/2  -   64D
            pool1 = maxpool(conv1,           3) # H/4  -   64D
            conv2 = resblock(pool1,      64, 3) # H/8  -  256D
            conv3 = resblock(conv2,     128, 4) # H/16 -  512D
            conv4 = resblock(conv3,     256, 6) # H/32 - 1024D
            conv5 = resblock(conv4,     512, 3) # H/64 - 2048D

            skip1 = conv1
            skip2 = pool1
            skip3 = conv2
            skip4 = conv3
            skip5 = conv4
            
            # DECODING
            upconv6 = upconv(conv5,   512, 3, 2) #H/32
            upconv6 = resize_like(upconv6, skip5)
            concat6 = tf.concat([upconv6, skip5], 3)
            iconv6  = conv(concat6,   512, 3, 1)

            upconv5 = upconv(iconv6, 256, 3, 2) #H/16
            upconv5 = resize_like(upconv5, skip4)
            concat5 = tf.concat([upconv5, skip4], 3)
            iconv5  = conv(concat5,   256, 3, 1)

            upconv4 = upconv(iconv5,  128, 3, 2) #H/8
            upconv4 = resize_like(upconv4, skip3)
            concat4 = tf.concat([upconv4, skip3], 3)
            iconv4  = conv(concat4,   128, 3, 1)
            pred4 = get_pred(iconv4)
            upred4  = upsample_nn(pred4, 2)

            upconv3 = upconv(iconv4,   64, 3, 2) #H/4
            concat3 = tf.concat([upconv3, skip2, upred4], 3)
            iconv3  = conv(concat3,    64, 3, 1)
            pred3 = get_pred(iconv3)
            upred3  = upsample_nn(pred3, 2)

            upconv2 = upconv(iconv3,   32, 3, 2) #H/2
            concat2 = tf.concat([upconv2, skip1, upred3], 3)
            iconv2  = conv(concat2,    32, 3, 1)
            pred2 = get_pred(iconv2)
            upred2  = upsample_nn(pred2, 2)

            upconv1 = upconv(iconv2,  16, 3, 2) #H
            concat1 = tf.concat([upconv1, upred2], 3)
            iconv1  = conv(concat1,   16, 3, 1)
            pred1 = get_pred(iconv1)

            return [pred1, pred2, pred3, pred4]
def prediction_layer(cfg, input, name, num_outputs):
    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], padding='SAME',
                        activation_fn=None, normalizer_fn=None,
                        weights_regularizer=slim.l2_regularizer(cfg.weight_decay)):
        with tf.variable_scope(name):
            pred = slim.conv2d_transpose(input, num_outputs,
                                         kernel_size=[3, 3], stride=2,
                                         scope='block4')
            return pred
Exemple #16
0
 def _merge(self, var_list, fan_out, l2_reg=1e-6):
     x = 0.
     with slim.arg_scope(
         [slim.fully_connected],
         num_outputs=fan_out,
         weights_regularizer=slim.l2_regularizer(l2_reg),
         normalizer_fn=None,
         activation_fn=None):
         for var in var_list:
             x = x + slim.fully_connected(var)
     return slim.bias_add(x)
    def inference(self):
        _x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
                            weights_initializer=tf.contrib.layers.xavier_initializer(),
                            weights_regularizer=slim.l2_regularizer(0.05)):

            # 1*H*W -> 32*H*W
            model = slim.conv2d(_x, 32, [3, 3], padding='SAME', scope='conv1')
            # 32*H*W -> 1024*H/16*W/16
            model = slim.conv2d(model, 1024, [16, 16], padding='VALID', scope='conv2', stride=16)
            model = slim.conv2d_transpose(model, self.input_shape[2], [16, 16],
                                          stride=16, padding='VALID', activation_fn=None, scope='deconv_1')
            return model
Exemple #18
0
 def ds_cnn_arg_scope(weight_decay=0):
   """Defines the default ds_cnn argument scope.
   Args:
     weight_decay: The weight decay to use for regularizing the model.
   Returns:
     An `arg_scope` to use for the DS-CNN model.
   """
   with slim.arg_scope(
       [slim.convolution2d, slim.separable_convolution2d],
       weights_initializer=slim.initializers.xavier_initializer(),
       biases_initializer=slim.init_ops.zeros_initializer(),
       weights_regularizer=slim.l2_regularizer(weight_decay)) as sc:
     return sc
Exemple #19
0
 def construct_net(self,is_trained = True):
     with slim.arg_scope([slim.conv2d], padding='VALID',
                         weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
                         weights_regularizer=slim.l2_regularizer(0.0005)):
         net = slim.conv2d(self.input_images,6,[5,5],1,padding='SAME',scope='conv1')
         net = slim.max_pool2d(net, [2, 2], scope='pool2')
         net = slim.conv2d(net,16,[5,5],1,scope='conv3')
         net = slim.max_pool2d(net, [2, 2], scope='pool4')
         net = slim.conv2d(net,120,[5,5],1,scope='conv5')
         net = slim.flatten(net, scope='flat6')
         net = slim.fully_connected(net, 84, scope='fc7')
         net = slim.dropout(net, self.dropout,is_training=is_trained, scope='dropout8')
         digits = slim.fully_connected(net, 10, scope='fc9')
     return digits
Exemple #20
0
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
Exemple #21
0
def fc_network(x, neurons, wt_decay, name, num_pred=None, offset=0,
               batch_norm_param=None, dropout_ratio=0.0, is_training=None): 
  if dropout_ratio > 0:
    assert(is_training is not None), \
      'is_training needs to be defined when trainnig with dropout.'
  
  repr = []
  for i, neuron in enumerate(neurons):
    init_var = np.sqrt(2.0/neuron)
    if batch_norm_param is not None:
      x = slim.fully_connected(x, neuron, activation_fn=None,
                               weights_initializer=tf.random_normal_initializer(stddev=init_var),
                               weights_regularizer=slim.l2_regularizer(wt_decay),
                               normalizer_fn=slim.batch_norm,
                               normalizer_params=batch_norm_param,
                               biases_initializer=tf.zeros_initializer(),
                               scope='{:s}_{:d}'.format(name, offset+i))
    else:
      x = slim.fully_connected(x, neuron, activation_fn=tf.nn.relu,
                               weights_initializer=tf.random_normal_initializer(stddev=init_var),
                               weights_regularizer=slim.l2_regularizer(wt_decay),
                               biases_initializer=tf.zeros_initializer(),
                               scope='{:s}_{:d}'.format(name, offset+i))
    if dropout_ratio > 0:
       x = slim.dropout(x, keep_prob=1-dropout_ratio, is_training=is_training,
                        scope='{:s}_{:d}'.format('dropout_'+name, offset+i))
    repr.append(x)
  
  if num_pred is not None:
    init_var = np.sqrt(2.0/num_pred)
    x = slim.fully_connected(x, num_pred,
                             weights_regularizer=slim.l2_regularizer(wt_decay),
                             weights_initializer=tf.random_normal_initializer(stddev=init_var),
                             biases_initializer=tf.zeros_initializer(),
                             activation_fn=None,
                             scope='{:s}_pred'.format(name))
  return x, repr
def network(inputs):
    '''Define the network'''
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      activation_fn=tf.nn.relu,
                      weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
                      weights_regularizer=slim.l2_regularizer(0.0005)):
        net = tf.reshape(inputs,[-1,FLAGS.im_size ,FLAGS.im_size,3])
        net = slim.conv2d(net, 32, [3,3], scope='conv1')
        net = slim.max_pool2d(net, [4,4], scope = 'conv1')
        net = slim.conv2d(net,128,[3,3], scope = 'conv2')
        net = slim.max_pool2d(net,[4,4], scope = 'pool2')
        net = slim.flatten(net)
        net = slim.fully_connected(net,64, scope = 'fc')
        net = slim.fully_connected(net, n_classes, activation_fn = None, scope = 'output')
    return net
  def create_model(self, model_input, vocab_size, l2_penalty=1e-8, **unused_params):
    """Creates a logistic model.

    Args:
      model_input: 'batch' x 'num_features' matrix of input features.
      vocab_size: The number of classes in the dataset.

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      batch_size x num_classes."""
    output = slim.fully_connected(
        model_input, vocab_size, activation_fn=tf.nn.sigmoid,
        weights_regularizer=slim.l2_regularizer(l2_penalty))
    return {"predictions": output}
Exemple #24
0
 def inference(self):
     x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
     # scale (divide by MNIST std)
     x = x * 0.0125
     with slim.arg_scope([slim.conv2d, slim.fully_connected],
                         weights_initializer=tf.contrib.layers.xavier_initializer(),
                         weights_regularizer=slim.l2_regularizer(0.0005)):
         model = slim.conv2d(x, 20, [5, 5], padding='VALID', scope='conv1')
         model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool1')
         model = slim.conv2d(model, 50, [5, 5], padding='VALID', scope='conv2')
         model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool2')
         model = slim.flatten(model)
         model = slim.fully_connected(model, 500, scope='fc1')
         model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do1')
         model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fc2')
         return model
def inference(images, keep_probability, phase_train=True, weight_decay=0.0, reuse=None):
    batch_norm_params = {
        # Decay for the moving averages.
        'decay': 0.995,
        # epsilon to prevent 0s in variance.
        'epsilon': 0.001,
        # force in-place updates of mean and variance estimates
        'updates_collections': None,
    }
    with slim.arg_scope([slim.conv2d],
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                        weights_regularizer=slim.l2_regularizer(weight_decay),
                        normalizer_fn=slim.batch_norm,
                        normalizer_params=batch_norm_params):
        return inception_resnet_v1(images, is_training=phase_train,
              dropout_keep_prob=keep_probability, reuse=reuse)
    def rpn_net(self):

        rpn_encode_boxes_list = []
        rpn_scores_list = []
        with tf.variable_scope('rpn_net'):
            with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.rpn_weight_decay)):
                for level in self.level:

                    if self.share_head:
                        reuse_flag = None if level == 'P2' else True
                        scope_list = ['conv2d_3x3', 'rpn_classifier', 'rpn_regressor']
                        # in the begining(i,e, P2), we should create variables, then sharing variables in P3, P4, P5
                    else:
                        reuse_flag = None
                        scope_list = ['conv2d_3x3_'+level, 'rpn_classifier_'+level, 'rpn_regressor_'+level]

                    rpn_conv2d_3x3 = slim.conv2d(inputs=self.feature_pyramid[level],
                                                 num_outputs=256,
                                                 kernel_size=[self.kernel_size, self.kernel_size],
                                                 stride=1,
                                                 scope=scope_list[0],
                                                 reuse=reuse_flag)
                    rpn_box_scores = slim.conv2d(rpn_conv2d_3x3,
                                                 num_outputs=2 * self.num_of_anchors_per_location,
                                                 kernel_size=[1, 1],
                                                 stride=1,
                                                 scope=scope_list[1],
                                                 activation_fn=None,
                                                 reuse=reuse_flag)
                    rpn_encode_boxes = slim.conv2d(rpn_conv2d_3x3,
                                                   num_outputs=5 * self.num_of_anchors_per_location,
                                                   kernel_size=[1, 1],
                                                   stride=1,
                                                   scope=scope_list[2],
                                                   activation_fn=None,
                                                   reuse=reuse_flag)

                    rpn_box_scores = tf.reshape(rpn_box_scores, [-1, 2])
                    rpn_encode_boxes = tf.reshape(rpn_encode_boxes, [-1, 5])

                    rpn_scores_list.append(rpn_box_scores)
                    rpn_encode_boxes_list.append(rpn_encode_boxes)

                rpn_all_encode_boxes = tf.concat(rpn_encode_boxes_list, axis=0)
                rpn_all_boxes_scores = tf.concat(rpn_scores_list, axis=0)

            return rpn_all_encode_boxes, rpn_all_boxes_scores
Exemple #27
0
def resnet_arg_scope(weight_decay=0.0001,
                     batch_norm_decay=0.997,
                     batch_norm_epsilon=1e-5,
                     batch_norm_scale=True):
  """Defines the default ResNet arg scope.

  TODO(gpapan): The batch-normalization related default values above are
    appropriate for use in conjunction with the reference ResNet models
    released at https://github.com/KaimingHe/deep-residual-networks. When
    training ResNets from scratch, they might need to be tuned.

  Args:
    weight_decay: The weight decay to use for regularizing the model.
    batch_norm_decay: The moving average decay when estimating layer activation
      statistics in batch normalization.
    batch_norm_epsilon: Small constant to prevent division by zero when
      normalizing activations by their variance in batch normalization.
    batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
      activations in the batch normalization layer.

  Returns:
    An `arg_scope` to use for the resnet models.
  """
  batch_norm_params = {
      'decay': batch_norm_decay,
      'epsilon': batch_norm_epsilon,
      'scale': batch_norm_scale,
      'updates_collections': tf.GraphKeys.UPDATE_OPS,
  }

  with slim.arg_scope(
      [slim.conv2d],
      weights_regularizer=slim.l2_regularizer(weight_decay),
      weights_initializer=slim.variance_scaling_initializer(),
      activation_fn=tf.nn.relu,
      normalizer_fn=slim.batch_norm,
      normalizer_params=batch_norm_params):
    with slim.arg_scope([slim.batch_norm], **batch_norm_params):
      # The following implies padding='SAME' for pool1, which makes feature
      # alignment easier for dense prediction tasks. This is also used in
      # https://github.com/facebook/fb.resnet.torch. However the accompanying
      # code of 'Deep Residual Learning for Image Recognition' uses
      # padding='VALID' for pool1. You can switch to that choice by setting
      # slim.arg_scope([slim.max_pool2d], padding='VALID').
      with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
        return arg_sc
Exemple #28
0
 def build_backbones(self):
     inputs = self.inputs
     with slim.arg_scope([slim.conv2d, slim.fully_connected],
                         padding='SAME', weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01),
                         weights_regularizer=slim.l2_regularizer(0.0005),
                         activation_fn=tf.nn.relu):
         net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
         net = slim.max_pool2d(net, [2, 2], scope='pool1')
         net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
         net = slim.max_pool2d(net, [2, 2], scope='pool2')
         net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
         net = slim.max_pool2d(net, [2, 2], scope='pool3')
         net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
         net = slim.max_pool2d(net, [2, 2], scope='pool4')
         net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
         net = slim.max_pool2d(net, [2, 2], scope='pool5')
         self.vgg_head = net
	def __init__(self,is_training):

		z_dim = FLAGS.z_dim
		batch_size = FLAGS.batch_size

		self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,64,64,3],name='input_image')
		
		self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd')
		
		self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label')

		self.keep_prob = tf.placeholder(tf.float32)

		weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay)

		flatten_hist = tf.reshape(self.input_image,[-1,3*64*64])

		# flatten_hist = slim.fully_connected(flatten_hist, 1024,weights_regularizer=weights_regularizer,scope='fig/fc_1')
		# flatten_hist = slim.fully_connected(flatten_hist, 256,weights_regularizer=weights_regularizer, scope='fig/fc_2')
		# flatten_hist = slim.fully_connected(flatten_hist, 25,weights_regularizer=weights_regularizer, scope='fig/fc_3')

		self.image_feature_encoder = flatten_hist
		self.image_feature_decoder = flatten_hist
		
		############## Q(z|X) ###############


		############## Sample_z ###############

		eps = tf.random_normal(shape=[batch_size,z_dim])
		# self.sample_z = z_miu + tf.exp(z_logvar / 2) * eps
		self.sample_z = eps

		############## P(X|z) ###############

		x = tf.concat([self.input_nlcd,self.image_feature_decoder,self.sample_z],1)

		x = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1')
		x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='decoder/fc_2')
		x = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer, scope='decoder/fc_3')

		x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training)
		
		self.logits = slim.fully_connected(x, 100, activation_fn=None, weights_regularizer=weights_regularizer,scope='decoder/logits')

		self.output = tf.sigmoid(self.logits,name='decoder/output')
def inference(images, keep_probability, phase_train=True, 
              bottleneck_layer_size=128, weight_decay=0.0, reuse=None):
    batch_norm_params = {
        # Decay for the moving averages.
        'decay': 0.995,
        # epsilon to prevent 0s in variance.
        'epsilon': 0.001,
        # force in-place updates of mean and variance estimates
        'updates_collections': None,
        # Moving averages ends up in the trainable variables collection
        'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
}
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        weights_initializer=slim.initializers.xavier_initializer(), 
                        weights_regularizer=slim.l2_regularizer(weight_decay),
                        normalizer_fn=slim.batch_norm,
                        normalizer_params=batch_norm_params):
        return inception_resnet_v2(images, is_training=phase_train,
              dropout_keep_prob=keep_probability, bottleneck_layer_size=bottleneck_layer_size, reuse=reuse)
Exemple #31
0
def main(args):
    project_dir = os.path.dirname(os.getcwd())
    network = importlib.import_module(args.model_def)

    with open(join(project_dir, 'config.yaml'), 'r') as f:
        cfg = yaml.load(f)

    if cfg['specs']['set_gpu']:
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg['base_conf']['gpu_num'])

    subdir = '%s_center_loss_factor_%1.2f' % (args.data_dir, args.center_loss_factor)

    # test = os.path.expanduser(args.logs_base_dir)
    log_dir = os.path.join(project_dir, 'fine_tuning_process', 'logs', subdir)
    if not os.path.isdir(log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(project_dir, 'fine_tuning_process', 'models', subdir)
    if not os.path.isdir(model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    # Write arguments to a text file
    facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt'))

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    data_dir = os.path.join(project_dir, 'fine_tuning_process', 'data', args.data_dir, 'train')

    train_set = facenet.get_dataset(data_dir)
    if args.filter_filename:
        train_set = filter_dataset(train_set, os.path.expanduser(args.filter_filename),
                                   args.filter_percentile, args.filter_min_nrof_images_per_class)
    nrof_classes = len(train_set)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.join(project_dir, 'fine_tuning_process', 'models', args.pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)

    lfw_dir = os.path.join(project_dir, 'fine_tuning_process', 'data', args.data_dir, 'test')
    print('LFW directory: %s' % lfw_dir)
    # Read the file containing the pairs used for testing
    lfw_pairs = os.path.join(project_dir, 'fine_tuning_process', 'data', args.data_dir, 'pairs.txt')
    pairs = lfw.read_pairs(lfw_pairs)
    # Get the paths for the corresponding images
    lfw_paths, actual_issame = lfw.get_paths_personal(lfw_dir, pairs)

    data_paths = tools.get_format_file(data_dir, 2, r'.+\.jpeg$')

    meta_data_path = os.path.join(model_dir, 'metadata.tsv')
    with open(meta_data_path, 'w') as f:
        f.write("Index\tLabel\n")
        for d in data_paths:
            tmp = os.path.split(d)
            t = os.path.split(tmp[0])
            f.write("%s\t%s\n" % (tmp[1], t[1]))

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # get soft labels
        with open(join(data_dir, 'soft_label.pk'), 'rb') as f:
            confidence_score = pickle.load(f)
        image_list, soft_labels_list = facenet.get_image_paths_and_soft_labels(train_set, confidence_score)
        soft_labels_array = np.array(soft_labels_list)
        soft_labels = ops.convert_to_tensor(soft_labels_array, dtype=tf.float32)

        assert len(image_list) > 0, 'The dataset should not be empty'

        # Create a queue that produces indices into the image_list and label_list
        range_size = array_ops.shape(soft_labels)[0]
        index_queue = tf.train.range_input_producer(range_size, num_epochs=None,
                                                    shuffle=True, seed=None, capacity=32)

        index_dequeue_op = index_queue.dequeue_many(args.batch_size * args.epoch_size, 'index_dequeue')

        learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths')

        soft_labels_placeholder = tf.placeholder(tf.float32, shape=(None, nrof_classes), name='soft_labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.float32],
                                              shapes=[(1,), (nrof_classes,)],
                                              shared_name=None, name=None)
        enqueue_op = input_queue.enqueue_many([image_paths_placeholder, soft_labels_placeholder],
                                              name='enqueue_op')

        nrof_preprocess_threads = 4
        images_and_softlabels = []
        for _ in range(nrof_preprocess_threads):
            filenames, soft_labels = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=3)
                if args.random_rotate:
                    image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8)
                if args.random_crop:
                    image = tf.random_crop(image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)

                # pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))

            images_and_softlabels.append([images, soft_labels])

        image_batch, soft_label_batch = tf.train.batch_join(
            images_and_softlabels, batch_size=batch_size_placeholder)
        image_batch = tf.squeeze(image_batch, 1)

        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        soft_label_batch = tf.identity(soft_label_batch, 'soft_label_batch')

        print('Total number of classes: %d' % nrof_classes)
        print('Total number of examples: %d' % len(image_list))

        print('Building training graph')

        # Build the inference graph
        prelogits, _ = network.inference(image_batch, args.keep_probability,
                                         phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size,
                                         weight_decay=args.weight_decay)
        # fine_tuning = slim.fully_connected(prelogits, args.embedding_size, activation_fn=None,
        #                            scope='FineTuning', reuse=False, trainable=True)

        logits = slim.fully_connected(prelogits, nrof_classes, activation_fn=None,
                                      weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                      weights_regularizer=slim.l2_regularizer(args.weight_decay),
                                      scope='Logits', reuse=False)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

        # Add center loss
        if args.center_loss_factor > 0.0:
            prelogits_center_loss, _ = facenet.fuzzy_center_loss(prelogits, soft_label_batch,
                                                                 args.center_loss_alfa, args.fuzzier, nrof_classes)
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor)
            tf.summary.scalar('prelogits_center_loss', prelogits_center_loss)


        learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step,
                                                   args.learning_rate_decay_epochs * args.epoch_size,
                                                   args.learning_rate_decay_factor, staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=soft_label_batch, logits=logits, name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

        # Calculate the total losses
        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss, global_step, args.optimizer,
                                 learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms)

        # Create a saver
        all_vars = tf.trainable_variables()
        var_to_restore = [v for v in all_vars if not v.name.startswith('Logits')]
        saver = tf.train.Saver(var_to_restore, max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():
            if args.pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                saver.restore(sess, pretrained_model)
                # result = sess.graph.get_tensor_by_name("InceptionResnetV1/Bottleneck/weights:0")
                # pre = sess.graph.get_tensor_by_name("InceptionResnetV1/Block8/Branch_1/Conv2d_0c_3x1/weights:0")
                # tf.stop_gradient(persisted_result)
                # print(result.eval())
                # print("======")
                # print(pre.eval())

            # Training and validation loop
            print('Running training')
            epoch = 0
            pre_acc = -1
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, epoch, image_list, soft_labels_array, index_dequeue_op, enqueue_op,
                      image_paths_placeholder, soft_labels_placeholder,
                      learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step,
                      total_loss, train_op, summary_op, summary_writer, regularization_losses,
                      args.learning_rate_schedule_file, logits)
                # print(result.eval())
                # print("======")
                # print(pre.eval())

                # Save variables and the metagraph if it doesn't exist already
                # Evaluate on LFW
                if args.lfw_dir:
                    acc = evaluate(sess, enqueue_op, image_paths_placeholder, soft_labels_placeholder, phase_train_placeholder,
                             batch_size_placeholder,
                             embeddings, soft_label_batch, lfw_paths, actual_issame, args.lfw_batch_size,
                             args.lfw_nrof_folds, log_dir, step, summary_writer, nrof_classes, prelogits_center_loss)
                    if acc > pre_acc:
                        save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step)
                        pre_acc = acc
    return model_dir
Exemple #32
0
    def model_fn(features, labels, mode, params):
        is_training = mode == tf.estimator.ModeKeys.TRAIN
        # Inputs
        tokens = features['features']  # (N, L)
        token_lengths = features['feature_length']  # (N,)
        sequence_mask = tf.sequence_mask(maxlen=tf.shape(tokens)[1], lengths=token_lengths)  # (N,L)
        n = tf.shape(tokens)[0]
        L = tf.shape(tokens)[1]

        with tf.control_dependencies([
            tf.assert_greater_equal(params.flat_length, token_lengths, message="Tokens longer than tree size"),
            tf.assert_greater(vocab_size, tokens, message="Tokens larger than vocab"),
            tf.assert_greater_equal(tokens, 0, message="Tokens less than 0")
        ]):
            tokens = tf.identity(tokens)

        if params.l2 > 0:
            weights_regularizer = slim.l2_regularizer(params.l2)
        else:
            weights_regularizer = None

        # Encoder
        mu_t, logsigma_t = vae_flat_encoder_simple(
            tokens=tokens,
            token_lengths=token_lengths,
            vocab_size=vocab_size,
            params=params,
            n=n,
            weights_regularizer=weights_regularizer
        )  # (L,N,D)
        mu = tf.transpose(mu_t, (1, 0, 2))  # (N,L,D)
        logsigma = tf.transpose(logsigma_t, (1, 0, 2))  # (N,L,D)

        # Sampling
        idx = tf.where(sequence_mask)
        with tf.name_scope("kl"):
            selected_mu = tf.gather_nd(params=mu, indices=idx)
            selected_logsigma = tf.gather_nd(params=logsigma, indices=idx)
            latent_sample_values, latent_prior_sample_values = kl(
                mu=selected_mu,
                logsigma=selected_logsigma,
                params=params,
                n=n)
            latent_sample = tf.scatter_nd(
                updates=latent_sample_values,
                indices=idx,
                shape=(n, L, latent_sample_values.shape[-1].value)
            )  # (N,L,D)
            latent_prior_sample = tf.scatter_nd(
                updates=latent_prior_sample_values,
                indices=idx,
                shape=(n, L, latent_prior_sample_values.shape[-1].value)
            )  # (N,L,D)

        # Decoder
        with tf.variable_scope('vae_decoder') as decoder_scope:
            logits, penalty = vae_decoder_dag(
                latent=latent_sample,
                vocab_size=vocab_size,
                sequence_lengths=token_lengths,
                params=params,
                weights_regularizer=weights_regularizer,
                n=n,
                is_training=is_training
            )
        with tf.name_scope("dag_penalty"):
            penalty_scale = get_penalty_scale_logistic(params)
            dag_penalty_raw = tf.reduce_mean(tf.square(penalty))
            weighted_dag_penalty = penalty_scale * dag_penalty_raw
            tf.losses.add_loss(loss=weighted_dag_penalty, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES)
            tf.summary.scalar('dag_penalty_scale', penalty_scale)
            tf.summary.scalar('dag_penalty_raw', dag_penalty_raw)
            tf.summary.scalar('dag_penalty_weighted', weighted_dag_penalty)

        # Loss calculation
        logits_values = tf.gather_nd(params=logits, indices=idx)
        labels_values = tf.gather_nd(params=tokens, indices=idx)
        onehot_labels_values = tf.one_hot(indices=labels_values, depth=vocab_size)
        loss_values = tf.losses.softmax_cross_entropy(
            onehot_labels=onehot_labels_values,
            logits=logits_values,
            reduction=tf.losses.Reduction.NONE,
            loss_collection=None
        )
        loss_arr = tf.scatter_nd(updates=loss_values, indices=idx, shape=(n, L))
        loss_n = tf.reduce_sum(loss_arr, axis=-1)
        loss = tf.reduce_mean(loss_n)
        tf.losses.add_loss(loss)
        tf.summary.scalar("softmax_cross_entropy", loss)

        total_loss = tf.losses.get_total_loss()

        # Generated data
        with tf.variable_scope(decoder_scope, reuse=True):
            glogits, _ = vae_decoder_dag(
                latent=latent_prior_sample,
                vocab_size=vocab_size,
                sequence_lengths=token_lengths,
                params=params,
                weights_regularizer=weights_regularizer,
                n=n,
                is_training=is_training
            )

        # Hooks
        autoencode_hook = DAGHook(
            logits=logits,
            true=tokens,
            vocab=vocab,
            path=os.path.join(run_config.model_dir, "autoencoded", "autoencoded-{:08d}.csv"),
            name="Autoencoded",
            idx=idx
        )
        generate_hook = DAGHook(
            logits=glogits,
            true=tokens,
            vocab=vocab,
            path=os.path.join(run_config.model_dir, "generated", "generated-{:08d}.csv"),
            name="Generated",
            idx=idx
        )
        evaluation_hooks = [autoencode_hook, generate_hook]

        #tf.summary.scalar('model_total_loss', total_loss)

        # Train
        optimizer = tf.train.AdamOptimizer(params.lr)
        train_op = slim.learning.create_train_op(
            total_loss,
            optimizer,
            clip_gradient_norm=params.clip_gradient_norm)
        eval_metric_ops = {
            'cross_entropy_eval': tf.metrics.mean(loss_n),
            'token_lengths_eval': tf.metrics.mean(token_lengths)
        }

        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=total_loss,
            eval_metric_ops=eval_metric_ops,
            evaluation_hooks=evaluation_hooks,
            train_op=train_op)
Exemple #33
0
    def __feature_sequence_extraction(self, input_tensor):
        is_training = True if self.__phase == 'train' else False
        # is_training = True
        with slim.arg_scope(
            [slim.conv2d],
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.01),
                weights_regularizer=slim.l2_regularizer(0.0005),
                biases_initializer=None):
            net = slim.repeat(
                input_tensor,
                2,
                slim.conv2d,
                64,
                kernel_size=3,
                stride=1,
                scope='conv1'
            )  # input_tensor  shape(32,64,?,3)  to_shape(32,1,?,x)
            net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool1')
            net = slim.repeat(net,
                              2,
                              slim.conv2d,
                              128,
                              kernel_size=3,
                              stride=1,
                              scope='conv2')
            net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool2')
            net = slim.repeat(net,
                              2,
                              slim.conv2d,
                              256,
                              kernel_size=3,
                              stride=1,
                              scope='conv3')
            net = slim.max_pool2d(net,
                                  kernel_size=[2, 1],
                                  stride=[2, 1],
                                  scope='pool3')
            net = slim.conv2d(net, 512, kernel_size=3, stride=1, scope='conv4')
            # net = slim.batch_norm(net, decay=_BATCH_DECAY, is_training=is_training, scope='bn4')
            bn_layer(x=net,
                     scope='bn4',
                     is_training=is_training,
                     decay=_BATCH_DECAY)
            net = slim.conv2d(net, 512, kernel_size=3, stride=1, scope='conv5')
            # net = slim.batch_norm(net, decay=_BATCH_DECAY, is_training=is_training, scope='bn5')
            bn_layer(x=net,
                     scope='bn5',
                     is_training=is_training,
                     decay=_BATCH_DECAY)
            net = slim.max_pool2d(net,
                                  kernel_size=[2, 1],
                                  stride=[2, 1],
                                  scope='pool5')
            net = slim.conv2d(net,
                              512,
                              padding="VALID",
                              kernel_size=[2, 1],
                              stride=1,
                              scope='conv6')

            # net = slim.repeat(input_tensor, 2, slim.conv2d, 64, kernel_size=4, stride=1,
            #                   scope='conv1')  # input_tensor  shape(32,64,?,3)  to_shape(32,1,?,x)
            # net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool1')
            # net = slim.repeat(net, 2, slim.conv2d, 128, kernel_size=4, stride=1, scope='conv2')
            # net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool2')
            # net = slim.repeat(net, 2, slim.conv2d, 256, kernel_size=4, stride=1, scope='conv3')
            # net = slim.max_pool2d(net, kernel_size=[2, 1], stride=[2, 1], scope='pool3')
            # net = slim.conv2d(net, 512, kernel_size=4, stride=1, scope='conv4')
            # net = slim.batch_norm(net, decay=_BATCH_DECAY, is_training=is_training, scope='bn4')
            # net = slim.conv2d(net, 512, kernel_size=4, stride=1, scope='conv5')
            # net = slim.batch_norm(net, decay=_BATCH_DECAY, is_training=is_training, scope='bn5')
            # net = slim.max_pool2d(net, kernel_size=[2, 1], stride=[2, 1], scope='pool5')
            # net = slim.conv2d(net, 512, padding="VALID", kernel_size=[2, 1], stride=1, scope='conv6')
        return net
    coord = tf.train.Coordinator()
    tf.train.start_queue_runners(coord=coord, sess=sess)

    x, y = get_input()

    prelogits, _ = inception_resnet_v1.inference(x,
                                                 keep_probability=0.8,
                                                 phase_train=True,
                                                 bottleneck_layer_size=512,
                                                 weight_decay=5e-5)
    logits = slim.fully_connected(
        prelogits,
        8,
        activation_fn=None,
        weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
        weights_regularizer=slim.l2_regularizer(5e-5),
        scope='Logits',
        reuse=False)

    embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            log_device_placement=False))
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    coord = tf.train.Coordinator()
    tf.train.start_queue_runners(coord=coord, sess=sess)

    with sess.as_default():
Exemple #35
0
    def encoder(self, images, is_training):
        activation_fn = leaky_relu  # tf.nn.relu
        weight_decay = 0.0
        with tf.variable_scope('encoder'):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                with slim.arg_scope(
                    [slim.conv2d, slim.fully_connected],
                        weights_initializer=tf.truncated_normal_initializer(
                            stddev=0.1),
                        weights_regularizer=slim.l2_regularizer(weight_decay),
                        normalizer_fn=slim.batch_norm,
                        normalizer_params=self.batch_norm_params):
                    net = images

                    net = slim.conv2d(net,
                                      32, [4, 4],
                                      2,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_1a')
                    net = slim.repeat(net,
                                      3,
                                      conv2d_block,
                                      0.1,
                                      32, [4, 4],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_1b')

                    net = slim.conv2d(net,
                                      64, [4, 4],
                                      2,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_2a')
                    net = slim.repeat(net,
                                      3,
                                      conv2d_block,
                                      0.1,
                                      64, [4, 4],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_2b')

                    net = slim.conv2d(net,
                                      128, [4, 4],
                                      2,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_3a')
                    net = slim.repeat(net,
                                      3,
                                      conv2d_block,
                                      0.1,
                                      128, [4, 4],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_3b')

                    net = slim.conv2d(net,
                                      256, [4, 4],
                                      2,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_4a')
                    net = slim.repeat(net,
                                      3,
                                      conv2d_block,
                                      0.1,
                                      256, [4, 4],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_4b')

                    net = slim.flatten(net)
                    fc1 = slim.fully_connected(net,
                                               self.latent_variable_dim,
                                               activation_fn=None,
                                               normalizer_fn=None,
                                               scope='Fc_1')
                    fc2 = slim.fully_connected(net,
                                               self.latent_variable_dim,
                                               activation_fn=None,
                                               normalizer_fn=None,
                                               scope='Fc_2')
        return fc1, fc2
def inference_2(inputs, bottleneck_layer_size=128, weight_decay=0.0, reuse=None):

    #----tf.int32 to tf.bool
    #new_phase_train = tf.cast(phase_train,tf.bool)

    batch_norm_params = {
        # Decay for the moving averages.
        'decay': 0.995,
        # epsilon to prevent 0s in variance.
        'epsilon': 0.001,
        # force in-place updates of mean and variance estimates
        'updates_collections': None,
        # Moving averages ends up in the trainable variables collection
        'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
}
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        weights_initializer=slim.initializers.xavier_initializer(),
                        weights_regularizer=slim.l2_regularizer(weight_decay),
                        normalizer_fn=slim.batch_norm,
                        normalizer_params=batch_norm_params):
        # return inception_resnet_v2(images, phase_train,
        #       dropout_keep_prob=keep_probability, bottleneck_layer_size=bottleneck_layer_size, reuse=reuse)
        end_points = {}

        # phase_train_tranform = tf.where(phase_train == 0,False,True)
        #is_training = tf.cond(tf.greater(phase_train, 0), lambda: True, lambda: False)
        scope = 'InceptionResnetV2'
        with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):
            with slim.arg_scope([slim.batch_norm, slim.dropout]):

                with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                                    stride=1, padding='SAME'):
                    # 149 x 149 x 32
                    net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID',
                                      scope='Conv2d_1a_3x3')
                    end_points['Conv2d_1a_3x3'] = net
                    # 147 x 147 x 32
                    net = slim.conv2d(net, 32, 3, padding='VALID',
                                      scope='Conv2d_2a_3x3')
                    end_points['Conv2d_2a_3x3'] = net
                    # 147 x 147 x 64
                    net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                    end_points['Conv2d_2b_3x3'] = net
                    # 73 x 73 x 64
                    net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
                                          scope='MaxPool_3a_3x3')
                    end_points['MaxPool_3a_3x3'] = net
                    # 73 x 73 x 80
                    net = slim.conv2d(net, 80, 1, padding='VALID',
                                      scope='Conv2d_3b_1x1')
                    end_points['Conv2d_3b_1x1'] = net
                    # 71 x 71 x 192
                    net = slim.conv2d(net, 192, 3, padding='VALID',
                                      scope='Conv2d_4a_3x3')
                    end_points['Conv2d_4a_3x3'] = net
                    # 35 x 35 x 192
                    net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
                                          scope='MaxPool_5a_3x3')
                    end_points['MaxPool_5a_3x3'] = net

                    # 35 x 35 x 320
                    with tf.variable_scope('Mixed_5b'):
                        with tf.variable_scope('Branch_0'):
                            tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1')
                        with tf.variable_scope('Branch_1'):
                            tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1')
                            tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5,
                                                        scope='Conv2d_0b_5x5')
                        with tf.variable_scope('Branch_2'):
                            tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1')
                            tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3,
                                                        scope='Conv2d_0b_3x3')
                            tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3,
                                                        scope='Conv2d_0c_3x3')
                        with tf.variable_scope('Branch_3'):
                            tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME',
                                                         scope='AvgPool_0a_3x3')
                            tower_pool_1 = slim.conv2d(tower_pool, 64, 1,
                                                       scope='Conv2d_0b_1x1')
                        net = tf.concat([tower_conv, tower_conv1_1,
                                         tower_conv2_2, tower_pool_1], 3)

                    end_points['Mixed_5b'] = net
                    net = slim.repeat(net, 10, block35, scale=0.17)

                    # 17 x 17 x 1024
                    with tf.variable_scope('Mixed_6a'):
                        with tf.variable_scope('Branch_0'):
                            tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID',
                                                     scope='Conv2d_1a_3x3')
                        with tf.variable_scope('Branch_1'):
                            tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
                            tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3,
                                                        scope='Conv2d_0b_3x3')
                            tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3,
                                                        stride=2, padding='VALID',
                                                        scope='Conv2d_1a_3x3')
                        with tf.variable_scope('Branch_2'):
                            tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
                                                         scope='MaxPool_1a_3x3')
                        net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)

                    end_points['Mixed_6a'] = net
                    net = slim.repeat(net, 20, block17, scale=0.10)

                    with tf.variable_scope('Mixed_7a'):
                        with tf.variable_scope('Branch_0'):
                            tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
                            tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
                                                       padding='VALID', scope='Conv2d_1a_3x3')
                        with tf.variable_scope('Branch_1'):
                            tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
                            tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2,
                                                        padding='VALID', scope='Conv2d_1a_3x3')
                        with tf.variable_scope('Branch_2'):
                            tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
                            tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
                                                        scope='Conv2d_0b_3x3')
                            tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2,
                                                        padding='VALID', scope='Conv2d_1a_3x3')
                        with tf.variable_scope('Branch_3'):
                            tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
                                                         scope='MaxPool_1a_3x3')
                        net = tf.concat([tower_conv_1, tower_conv1_1,
                                         tower_conv2_2, tower_pool], 3)

                    end_points['Mixed_7a'] = net

                    net = slim.repeat(net, 9, block8, scale=0.20)
                    net = block8(net, activation_fn=None)

                    net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
                    end_points['Conv2d_7b_1x1'] = net

                    with tf.variable_scope('Logits'):
                        end_points['PrePool'] = net
                        # pylint: disable=no-member
                        net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
                                              scope='AvgPool_1a_8x8')
                        net = slim.flatten(net)

                        # net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                        #                    scope='Dropout')

                        end_points['PreLogitsFlatten'] = net

                    net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None,
                                               scope='Bottleneck', reuse=False)

        return net
Exemple #37
0
def pfld_inference_for_mobileNetV3_small(input, weight_decay, batch_norm_params):
    layers = [
        [16, 16, 3, 2, "RE", True, 16],
        [16, 24, 3, 2, "RE", False, 72],
        [24, 24, 3, 1, "RE", False, 88],
        [24, 40, 5, 2, "HS", True, 96],
        [40, 40, 5, 1, "HS", True, 240],
        [40, 40, 5, 1, "HS", True, 240],
        [40, 48, 5, 1, "HS", True, 120],
        [48, 48, 5, 1, "HS", True, 144],
        [48, 96, 5, 2, "HS", True, 288],
        [96, 96, 5, 1, "HS", True, 576],
        [96, 96, 5, 1, "HS", True, 576],
    ]
    reduction_ratio = 4
    multiplier = 1
    with tf.variable_scope('pfld_inference'):
        features = {}
        with slim.arg_scope([slim.convolution2d, slim.separable_conv2d],
                            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
                            biases_initializer=tf.zeros_initializer(),
                            weights_regularizer=slim.l2_regularizer(weight_decay),
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            padding='SAME'):
            print('PFLD input shape({}): {}'.format(input.name, input.get_shape()))

            # 112*112*3
            out = slim.convolution2d(input, 16 * multiplier, [3, 3], stride=1, activation_fn=hard_swish, scope='conv_1')
            print(out.name, out.get_shape())

            with tf.variable_scope("MobilenetV3_large"):
                for index in range(3):
                    in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index]
                    out_channels *= multiplier
                    out = mobileNetV3_block(out, "bneck{}".format(index), expand_dims, out_channels, kernel_size,
                                            stride, ratio=reduction_ratio, activation_fn=activatation, se=se,
                                            short_cut=(in_channels == out_channels))
                    print(out.name, out.get_shape())

                # 28*28
                features['auxiliary_input'] = out

                # 14*14
                index = 3
                in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index]
                out_channels *= multiplier
                out1 = mobileNetV3_block(out, "bneck{}".format(index), expand_dims, out_channels, kernel_size,
                                         stride, ratio=reduction_ratio, activation_fn=activatation, se=se,
                                         short_cut=(in_channels == out_channels))
                print(out1.name, out1.get_shape())
                for index in range(4, 8):
                    in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index]
                    out_channels *= multiplier
                    out1 = mobileNetV3_block(out1, "bneck{}".format(index), expand_dims, out_channels, kernel_size,
                                             stride, ratio=reduction_ratio, activation_fn=activatation, se=se,
                                             short_cut=(in_channels == out_channels))
                    print(out1.name, out1.get_shape())

                # 7*7
                index = 8
                in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index]
                out_channels *= multiplier
                out2 = mobileNetV3_block(out1, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride,
                                         ratio=reduction_ratio, activation_fn=activatation, se=se,
                                         short_cut=(in_channels == out_channels))
                print(out2.name, out2.get_shape())
                for index in range(9, len(layers)):
                    in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index]
                    out_channels *= multiplier
                    out2 = mobileNetV3_block(out2, "bneck{}".format(index), expand_dims, out_channels, kernel_size,
                                             stride, ratio=reduction_ratio, activation_fn=activatation, se=se,
                                             short_cut=(in_channels == out_channels))
                    print(out2.name, out2.get_shape())

                out3 = slim.convolution2d(out2, 576, [1, 1], stride=1, activation_fn=hard_swish, dscope='conv_2')
                print(out3.name, out3.get_shape())

                out3 = slim.avg_pool2d(out3, [out3.get_shape()[1], out3.get_shape()[2]], stride=1, scope='group_pool')
                print(out3.name, out3.get_shape())

                out3 = slim.convolution2d(out3, 1280, [1, 1], stride=1, normalizer_fn=None, activation_fn=hard_swish,
                                          scope='conv_3')
                print(out3.name, out3.get_shape())

            s1 = slim.flatten(out1)
            s2 = slim.flatten(out2)
            s3 = slim.flatten(out3)
            multi_scale = tf.concat([s1, s2, s3], 1)
            landmarks = slim.fully_connected(multi_scale, num_outputs=136, activation_fn=None, scope='fc')
            print(landmarks.name, landmarks.get_shape())
        return features, landmarks
Exemple #38
0
def main(args):

    network = importlib.import_module(args.model_def)
    image_size = (args.image_size, args.image_size)

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    stat_file_name = os.path.join(log_dir, 'stat.h5')

    # Write arguments to a text file
    facenet.write_arguments_to_file(args, os.path.join(log_dir,
                                                       'arguments.txt'))

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    dataset = facenet.get_dataset(args.data_dir)
    if args.filter_filename:
        dataset = filter_dataset(dataset,
                                 os.path.expanduser(args.filter_filename),
                                 args.filter_percentile,
                                 args.filter_min_nrof_images_per_class)

    if args.validation_set_split_ratio > 0.0:
        train_set, val_set = facenet.split_dataset(
            dataset, args.validation_set_split_ratio,
            args.min_nrof_val_images_per_class, 'SPLIT_IMAGES')
    else:
        train_set, val_set = dataset, []

    nrof_classes = len(train_set)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.expanduser(args.pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)

    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(
            os.path.expanduser(args.lfw_dir), pairs)

    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)

        # Get a list of image paths and their labels
        image_list, label_list = facenet.get_image_paths_and_labels(train_set)
        assert len(image_list) > 0, 'The training set should not be empty'

        val_image_list, val_label_list = facenet.get_image_paths_and_labels(
            val_set)

        # Create a queue that produces indices into the image_list and label_list
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]
        index_queue = tf.train.range_input_producer(range_size,
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    seed=None,
                                                    capacity=32)

        index_dequeue_op = index_queue.dequeue_many(
            args.batch_size * args.epoch_size, 'index_dequeue')

        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')
        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int32,
                                            shape=(None, 1),
                                            name='labels')
        control_placeholder = tf.placeholder(tf.int32,
                                             shape=(None, 1),
                                             name='control')

        nrof_preprocess_threads = 4
        input_queue = data_flow_ops.FIFOQueue(
            capacity=2000000,
            dtypes=[tf.string, tf.int32, tf.int32],
            shapes=[(1, ), (1, ), (1, )],
            shared_name=None,
            name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder, control_placeholder],
            name='enqueue_op')
        image_batch, label_batch = facenet.create_input_pipeline(
            input_queue, image_size, nrof_preprocess_threads,
            batch_size_placeholder)

        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        label_batch = tf.identity(label_batch, 'label_batch')

        print('Number of classes in training set: %d' % nrof_classes)
        print('Number of examples in training set: %d' % len(image_list))

        print('Number of classes in validation set: %d' % len(val_set))
        print('Number of examples in validation set: %d' % len(val_image_list))

        print('Building training graph')

        # Build the inference graph
        prelogits, _ = network.inference(
            image_batch,
            args.keep_probability,
            phase_train=phase_train_placeholder,
            bottleneck_layer_size=args.embedding_size,
            weight_decay=args.weight_decay)
        logits = slim.fully_connected(
            prelogits,
            len(train_set),
            activation_fn=None,
            weights_initializer=slim.initializers.xavier_initializer(),
            weights_regularizer=slim.l2_regularizer(args.weight_decay),
            scope='Logits',
            reuse=False)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

        # Norm for the prelogits
        eps = 1e-4
        prelogits_norm = tf.reduce_mean(
            tf.norm(tf.abs(prelogits) + eps, ord=args.prelogits_norm_p,
                    axis=1))
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             prelogits_norm * args.prelogits_norm_loss_factor)

        # Add center loss
        prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch,
                                                       args.center_loss_alfa,
                                                       nrof_classes)
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             prelogits_center_loss * args.center_loss_factor)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_batch,
            logits=logits,
            name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

        correct_prediction = tf.cast(
            tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)),
            tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses,
                              name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss, global_step, args.optimizer,
                                 learning_rate, args.moving_average_decay,
                                 tf.global_variables(), args.log_histograms)

        # Create a saver
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                saver.restore(sess, pretrained_model)

            # Training and validation loop
            print('Running training')
            nrof_steps = args.max_nrof_epochs * args.epoch_size
            nrof_val_samples = int(
                math.ceil(args.max_nrof_epochs / args.validate_every_n_epochs)
            )  # Validate every validate_every_n_epochs as well as in the last epoch
            stat = {
                'loss':
                np.zeros((nrof_steps, ), np.float32),
                'center_loss':
                np.zeros((nrof_steps, ), np.float32),
                'reg_loss':
                np.zeros((nrof_steps, ), np.float32),
                'xent_loss':
                np.zeros((nrof_steps, ), np.float32),
                'prelogits_norm':
                np.zeros((nrof_steps, ), np.float32),
                'accuracy':
                np.zeros((nrof_steps, ), np.float32),
                'val_loss':
                np.zeros((nrof_val_samples, ), np.float32),
                'val_xent_loss':
                np.zeros((nrof_val_samples, ), np.float32),
                'val_accuracy':
                np.zeros((nrof_val_samples, ), np.float32),
                'lfw_accuracy':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'lfw_valrate':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'learning_rate':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'time_train':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'time_validate':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'time_evaluate':
                np.zeros((args.max_nrof_epochs, ), np.float32),
                'prelogits_hist':
                np.zeros((args.max_nrof_epochs, 1000), np.float32),
            }
            for epoch in range(1, args.max_nrof_epochs + 1):
                step = sess.run(global_step, feed_dict=None)
                # Train for one epoch
                t = time.time()
                cont = train(
                    args, sess, epoch, image_list, label_list,
                    index_dequeue_op, enqueue_op, image_paths_placeholder,
                    labels_placeholder, learning_rate_placeholder,
                    phase_train_placeholder, batch_size_placeholder,
                    control_placeholder, global_step, total_loss, train_op,
                    summary_op, summary_writer, regularization_losses,
                    args.learning_rate_schedule_file, stat, cross_entropy_mean,
                    accuracy, learning_rate, prelogits, prelogits_center_loss,
                    args.random_rotate, args.random_crop, args.random_flip,
                    prelogits_norm, args.prelogits_hist_max,
                    args.use_fixed_image_standardization)
                stat['time_train'][epoch - 1] = time.time() - t

                if not cont:
                    break

                t = time.time()
                if len(val_image_list) > 0 and (
                    (epoch - 1) % args.validate_every_n_epochs
                        == args.validate_every_n_epochs - 1
                        or epoch == args.max_nrof_epochs):
                    validate(args, sess, epoch, val_image_list, val_label_list,
                             enqueue_op, image_paths_placeholder,
                             labels_placeholder, control_placeholder,
                             phase_train_placeholder, batch_size_placeholder,
                             stat, total_loss, regularization_losses,
                             cross_entropy_mean, accuracy,
                             args.validate_every_n_epochs,
                             args.use_fixed_image_standardization)
                stat['time_validate'][epoch - 1] = time.time() - t

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, epoch)

                # Evaluate on LFW
                t = time.time()
                if args.lfw_dir:
                    evaluate(sess, enqueue_op, image_paths_placeholder,
                             labels_placeholder, phase_train_placeholder,
                             batch_size_placeholder, control_placeholder,
                             embeddings, label_batch, lfw_paths, actual_issame,
                             args.lfw_batch_size, args.lfw_nrof_folds, log_dir,
                             step, summary_writer, stat, epoch,
                             args.lfw_distance_metric, args.lfw_subtract_mean,
                             args.lfw_use_flipped_images,
                             args.use_fixed_image_standardization)
                stat['time_evaluate'][epoch - 1] = time.time() - t

                print('Saving statistics')
                with h5py.File(stat_file_name, 'w') as f:
                    for key, value in stat.items():
                        f.create_dataset(key, data=value)

    return model_dir
def UNet_pp(inputs, reg, deep_supervision=True):  # Unet
    '''
     1-1---> 1-2 ---> 1-3 ---> 1-4 --->1-5
        \   /   \   /    \    /   \   /
         2-1 --->2-2 ---> 2-3 --->2-4
           \    /   \    /   \   /
            3-1 ---> 3-2 ---> 3-3
              \     /   \    / 
                4-1---> 4-2
                  \     /
                    5-1  
    '''

    nb_filter = [32, 64, 128, 256, 512]

    conv1_1 = standard_unit(inputs, stage='stage_11', nb_filter=nb_filter[0])
    pool1 = slim.max_pool2d(conv1_1, [2, 2], padding='SAME')

    conv2_1 = standard_unit(pool1, stage='stage_21', nb_filter=nb_filter[1])
    pool2 = slim.max_pool2d(conv2_1, [2, 2], padding='SAME')

    conv3_1 = standard_unit(pool2, stage='stage_31', nb_filter=nb_filter[2])
    pool3 = slim.max_pool2d(conv3_1, [2, 2], padding='SAME')

    conv4_1 = standard_unit(pool3, stage='stage_41', nb_filter=nb_filter[3])
    pool4 = slim.max_pool2d(conv4_1, [2, 2], padding='SAME')

    conv5_1 = standard_unit(pool4, stage='stage_51', nb_filter=nb_filter[4])

    up1_2 = upsample(conv2_1, num_outputs=nb_filter[0])
    #up1_2 = slim.conv2d_transpose(conv2_1,num_outputs=nb_filter[0],kernel_size=2,stride=2)
    conv1_2 = tf.concat([conv1_1, up1_2], 3)
    #conv1_2 = crop_and_concat(conv1_1,up1_2)
    #conv1_2 = np.concatenate((conv1_1,up1_2),3)
    conv1_2 = standard_unit(conv1_2, stage='stage_12', nb_filter=nb_filter[0])

    up2_2 = upsample(conv3_1, num_outputs=nb_filter[1])
    #up2_2 = slim.conv2d_transpose(conv3_1,num_outputs=nb_filter[1],kernel_size=2,stride=2)
    conv2_2 = tf.concat([conv2_1, up2_2], 3)
    conv2_2 = standard_unit(conv2_2, stage='stage_22', nb_filter=nb_filter[1])

    up3_2 = upsample(conv4_1, num_outputs=nb_filter[2])
    #up3_2 = slim.conv2d_transpose(conv4_1,num_outputs=nb_filter[2],kernel_size=2,stride=2)
    conv3_2 = tf.concat([conv3_1, up3_2], 3)
    conv3_2 = standard_unit(conv3_2, stage='stage_32', nb_filter=nb_filter[2])

    up4_2 = upsample(conv5_1, num_outputs=nb_filter[3])
    #up4_2 = slim.conv2d_transpose(conv5_1,num_outputs=nb_filter[3],kernel_size=2,stride=2)
    conv4_2 = tf.concat([conv4_1, up4_2], 3)
    conv4_2 = standard_unit(conv4_2, stage='stage_42', nb_filter=nb_filter[3])

    up1_3 = upsample(conv2_2, num_outputs=nb_filter[0])
    #up1_3 = slim.conv2d_transpose(conv2_2,num_outputs=nb_filter[0],kernel_size=2,stride=2)
    conv1_3 = tf.concat([conv1_1, conv1_2, up1_3], 3)
    conv1_3 = standard_unit(conv1_3, stage='stage_13', nb_filter=nb_filter[0])

    up2_3 = upsample(conv3_2, num_outputs=nb_filter[1])
    #up2_3 = slim.conv2d_transpose(conv3_2,num_outputs=nb_filter[1],kernel_size=2,stride=2)
    conv2_3 = tf.concat([conv2_1, conv2_2, up2_3], 3)
    conv2_3 = standard_unit(conv2_3, stage='stage_23', nb_filter=nb_filter[1])

    up3_3 = upsample(conv4_2, num_outputs=nb_filter[2])
    #up3_3 = slim.conv2d_transpose(conv4_2,num_outputs=nb_filter[2],kernel_size=2,stride=2)
    conv3_3 = tf.concat([conv3_1, conv3_2, up3_3], 3)
    conv3_3 = standard_unit(conv3_3, stage='stage_33', nb_filter=nb_filter[2])

    up1_4 = upsample(conv2_3, num_outputs=nb_filter[0])
    #up1_4 = slim.conv2d_transpose(conv2_3,num_outputs=nb_filter[0],kernel_size=2,stride=2)
    conv1_4 = tf.concat([conv1_1, conv1_2, conv1_3, up1_4], 3)
    conv1_4 = standard_unit(conv1_4, stage='stage_14', nb_filter=nb_filter[0])

    up2_4 = upsample(conv3_3, num_outputs=nb_filter[1])
    #up2_4 = slim.conv2d_transpose(conv3_3,num_outputs=nb_filter[1],kernel_size=2,stride=2)
    conv2_4 = tf.concat([conv2_1, conv2_2, conv2_3, up2_4], 3)
    conv2_4 = standard_unit(conv2_4, stage='stage_24', nb_filter=nb_filter[1])

    up1_5 = upsample(conv2_4, num_outputs=nb_filter[2])
    #up1_5 = slim.conv2d_transpose(conv2_4,num_outputs=nb_filter[0],kernel_size=2,stride=2)
    conv1_5 = tf.concat([conv1_1, conv1_2, conv1_3, conv1_4, up1_5], 3)
    conv1_5 = standard_unit(conv1_5, stage='stage_15', nb_filter=nb_filter[0])

    nestnet_output_1 = slim.conv2d(
        conv1_2,
        1, [1, 1],
        rate=1,
        activation_fn=tf.nn.sigmoid,
        scope='output_1',
        weights_regularizer=slim.l2_regularizer(scale=0.0001))
    nestnet_output_2 = slim.conv2d(
        conv1_3,
        1, [1, 1],
        rate=1,
        activation_fn=tf.nn.sigmoid,
        scope='output_2',
        weights_regularizer=slim.l2_regularizer(scale=0.0001))
    nestnet_output_3 = slim.conv2d(
        conv1_4,
        1, [1, 1],
        rate=1,
        activation_fn=tf.nn.sigmoid,
        scope='output_3',
        weights_regularizer=slim.l2_regularizer(scale=0.0001))
    nestnet_output_4 = slim.conv2d(
        conv1_5,
        1, [1, 1],
        rate=1,
        activation_fn=tf.nn.sigmoid,
        scope='output_4',
        weights_regularizer=slim.l2_regularizer(scale=0.0001))
    if deep_supervision:
        h_deconv_concat = tf.concat([
            nestnet_output_1, nestnet_output_2, nestnet_output_3,
            nestnet_output_4
        ], 3)
        h_deconv_concat = conv2d(inputs=h_deconv_concat,
                                 num_outputs=3,
                                 kernel_size=3,
                                 activation_fn=None)
        h_deconv_concat = tf.tanh(h_deconv_concat)
        return h_deconv_concat
    else:
        return nestnet_output_4
Exemple #40
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch,
                                      gtboxes_h_batch):

        if self.is_training:
            # ensure shape is [M, 5] and [M, 6]
            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)
            gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        C2_, C4 = self.build_base_network(input_img_batch)

        C2 = slim.conv2d(C2_,
                         num_outputs=1024,
                         kernel_size=[1, 1],
                         stride=1,
                         scope='build_C2_to_1024')

        self.feature_pyramid = {'C2': C2, 'C4': C4}

        # 2. build rpn

        rpn_all_encode_boxes = {}
        rpn_all_boxes_scores = {}
        rpn_all_cls_score = {}
        anchors = {}

        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):
            i = 0
            for level in self.level:
                rpn_conv3x3 = slim.conv2d(
                    self.feature_pyramid[level],
                    512, [3, 3],
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=tf.nn.relu,
                    scope='rpn_conv/3x3_{}'.format(level))
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location[i] * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    scope='rpn_cls_score_{}'.format(level))
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location[i] * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    scope='rpn_bbox_pred_{}'.format(level))
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
                rpn_cls_prob = slim.softmax(
                    rpn_cls_score,
                    scope='rpn_cls_prob_{}'.format(level))  # do the softmax

                rpn_all_cls_score[level] = rpn_cls_score
                rpn_all_boxes_scores[level] = rpn_cls_prob  # do the softmax
                rpn_all_encode_boxes[level] = rpn_box_pred
                i += 1

        # 3. generate_anchors
        i = 0
        for level, base_anchor_size, stride in zip(self.level,
                                                   self.base_anchor_size_list,
                                                   self.stride):
            featuremap_height, featuremap_width = tf.shape(
                self.feature_pyramid[level])[1], tf.shape(
                    self.feature_pyramid[level])[2]

            featuremap_height = tf.cast(featuremap_height, tf.float32)
            featuremap_width = tf.cast(featuremap_width, tf.float32)

            #anchor_scale = tf.constant(self.anchor_scales[i], dtype=tf.float32)
            #)anchor_ratio = tf.constant(self.anchor_ratios[i], dtype=tf.float32)
            anchor_scale = self.anchor_scales[i]
            anchor_ratio = self.anchor_ratios[i]

            tmp_anchors = anchor_utils.make_anchors(
                base_anchor_size=base_anchor_size,
                anchor_scales=anchor_scale,
                anchor_ratios=anchor_ratio,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=stride,
                name="make_anchors_forRPN_{}".format(level))
            tmp_anchors = tf.reshape(tmp_anchors, [-1, 4])
            anchors[level] = tmp_anchors
            i += 1

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        rois = {}
        roi_scores = {}
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            for level in self.level:
                rois_rpn, roi_scores_rpn = postprocess_rpn_proposals(
                    rpn_bbox_pred=rpn_all_encode_boxes[level],
                    rpn_cls_prob=rpn_all_boxes_scores[level],
                    img_shape=img_shape,
                    anchors=anchors[level],
                    is_training=self.is_training)
                # rois[level] = rois
                # roi_scores[level] = roi_scores
                # rois shape [-1, 4]
                # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++
                rois[level] = rois_rpn
                roi_scores[level] = roi_scores_rpn

                if self.is_training:
                    rois_in_img = show_box_in_tensor.draw_boxes_with_categories(
                        img_batch=input_img_batch,
                        boxes=rois_rpn,
                        scores=roi_scores_rpn)
                    tf.summary.image('all_rpn_rois_{}'.format(level),
                                     rois_in_img)

                    score_gre_05 = tf.reshape(
                        tf.where(tf.greater_equal(roi_scores_rpn, 0.5)), [-1])
                    score_gre_05_rois = tf.gather(rois_rpn, score_gre_05)
                    score_gre_05_score = tf.gather(roi_scores_rpn,
                                                   score_gre_05)
                    score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories(
                        img_batch=input_img_batch,
                        boxes=score_gre_05_rois,
                        scores=score_gre_05_score)
                    tf.summary.image('score_greater_05_rois_{}'.format(level),
                                     score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        rpn_labels = {}
        rpn_bbox_targets = {}
        labels_all = []
        labels = {}
        bbox_targets_h = {}
        bbox_targets_r = {}
        bbox_targets_all_h = []
        bbox_targets_all_r = []

        if self.is_training:
            for level in self.level:
                with tf.variable_scope(
                        'sample_anchors_minibatch_{}'.format(level)):
                    rpn_labels_one, rpn_bbox_targets_one = \
                        tf.py_func(
                            anchor_target_layer,
                            [gtboxes_h_batch, img_shape, anchors[level]],
                            [tf.float32, tf.float32])
                    rpn_bbox_targets_one = tf.reshape(rpn_bbox_targets_one,
                                                      [-1, 4])
                    rpn_labels_one = tf.to_int32(
                        rpn_labels_one, name="to_int32_{}".format(level))
                    rpn_labels_one = tf.reshape(rpn_labels_one, [-1])
                    self.add_anchor_img_smry(input_img_batch, anchors[level],
                                             rpn_labels_one)

                    # -----------------------------add to the dict-------------------------------------------------------------
                    rpn_labels[level] = rpn_labels_one
                    rpn_bbox_targets[level] = rpn_bbox_targets_one
                # --------------------------------------add smry-----------------------------------------------------------

                rpn_cls_category = tf.argmax(rpn_all_boxes_scores[level],
                                             axis=1)
                kept_rpppn = tf.reshape(
                    tf.where(tf.not_equal(rpn_labels_one, -1)), [-1])
                rpn_cls_category = tf.gather(rpn_cls_category,
                                             kept_rpppn)  # 预测
                acc = tf.reduce_mean(
                    tf.to_float(
                        tf.equal(
                            rpn_cls_category,
                            tf.to_int64(tf.gather(rpn_labels_one,
                                                  kept_rpppn)))))
                tf.summary.scalar('ACC/rpn_accuracy_{}'.format(level), acc)

                with tf.control_dependencies([rpn_labels[level]]):
                    with tf.variable_scope(
                            'sample_RCNN_minibatch_{}'.format(level)):
                        rois_, labels_, bbox_targets_h_, bbox_targets_r_ = \
                        tf.py_func(proposal_target_layer,
                                   [rois[level], gtboxes_h_batch, gtboxes_r_batch],
                                   [tf.float32, tf.float32, tf.float32, tf.float32])

                        rois_fast = tf.reshape(rois_, [-1, 4])
                        labels_fast = tf.to_int32(labels_)
                        labels_fast = tf.reshape(labels_fast, [-1])
                        bbox_targets_h_fast = tf.reshape(
                            bbox_targets_h_, [-1, 4 * (cfgs.CLASS_NUM + 1)])
                        bbox_targets_r_fast = tf.reshape(
                            bbox_targets_r_, [-1, 5 * (cfgs.CLASS_NUM + 1)])
                        self.add_roi_batch_img_smry(input_img_batch, rois_fast,
                                                    labels_fast)
                        #----------------------new_add----------------------
                        rois[level] = rois_fast
                        labels[level] = labels_fast
                        bbox_targets_h[level] = bbox_targets_h_fast
                        bbox_targets_r[level] = bbox_targets_r_fast
                        labels_all.append(labels_fast)
                        bbox_targets_all_h.append(bbox_targets_h_fast)
                        bbox_targets_all_r.append(bbox_targets_r_fast)

            fast_labels = tf.concat(labels_all, axis=0)
            fast_bbox_targets_h = tf.concat(bbox_targets_all_h, axis=0)
            fast_bbox_targets_r = tf.concat(bbox_targets_all_r, axis=0)
        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)

        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn(
            feature_to_cropped=self.feature_pyramid,
            rois_all=rois,
            img_shape=img_shape)

        # 这里的feature_to_cropped是feature maps 特征图
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob_h = slim.softmax(cls_score_h,
                                  'cls_prob_h')  # 根据代码可看到水平和旋转的处理过程是分开的
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:

            cls_category_h = tf.argmax(cls_prob_h, axis=1)
            fast_acc_h = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_h,
                                     tf.to_int64(fast_labels))))
            tf.summary.scalar('ACC/fast_acc_h', fast_acc_h)

            cls_category_r = tf.argmax(cls_prob_r, axis=1)
            fast_acc_r = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_r,
                                     tf.to_int64(fast_labels))))
            tf.summary.scalar('ACC/fast_acc_r', fast_acc_r)

        #  6. postprocess_fastrcnn
        if not self.is_training:

            rois_all = []
            for level in self.level:
                rois_all.append(rois[level])
            rois = tf.concat(rois_all, axis=0)

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)
            return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_all_encode_boxes,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_all_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred_h=bbox_pred_h,
                                        bbox_targets_h=fast_bbox_targets_h,
                                        cls_score_h=cls_score_h,
                                        bbox_pred_r=bbox_pred_r,
                                        bbox_targets_r=fast_bbox_targets_r,
                                        cls_score_r=cls_score_r,
                                        labels=fast_labels)
            rois_all = []
            for level in self.level:
                rois_all.append(rois[level])
            rois = tf.concat(rois_all, axis=0)

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)

            return final_boxes_h, final_scores_h, final_category_h, \
                   final_boxes_r, final_scores_r, final_category_r, loss_dict
Exemple #41
0
def model(images, text_scale=512, weight_decay=1e-5, is_training=True):
    """
    define the model, we use slim's implemention of resnet
    """
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images,
                                                    is_training=is_training,
                                                    scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training
        }
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [
                end_points['pool5'], end_points['pool4'], end_points['pool3'],
                end_points['pool2']
            ]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1),
                                       num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(
                    i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3],
                                  1,
                                  1,
                                  activation_fn=tf.nn.sigmoid,
                                  normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(
                g[3], 4, 1, activation_fn=tf.nn.sigmoid,
                normalizer_fn=None) * text_scale
            angle_map = (slim.conv2d(
                g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) -
                         0.5) * np.pi / 2  # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
Exemple #42
0
    def build_fastrcnn(self, feature_to_cropped, rois_all, img_shape):

        with tf.variable_scope('Fast-RCNN'):

            # 5. ROI Pooling
            with tf.variable_scope('rois_pooling'):
                pooled_features = self.roi_pooling(
                    feature_maps=feature_to_cropped,
                    rois=rois_all,
                    img_shape=img_shape)

            #6. inferecne rois in Fast-RCNN to obtain fc_flatten features
            if self.base_network_name.startswith('resnet'):

                fc_flatten = resnet.restnet_head(
                    input=pooled_features,
                    is_training=self.is_training,
                    scope_name=self.base_network_name
                )  # self.base_network_name
                #fc_flatten = pooled_features

            elif self.base_network_name.startswith('MobilenetV2'):
                fc_flatten = mobilenet_v2.mobilenetv2_head(
                    inputs=pooled_features, is_training=self.is_training)
            else:
                raise NotImplementedError('only support resnet and mobilenet')

            # 7. cls and reg in Fast-RCNN
            with tf.variable_scope('horizen_branch'):
                with slim.arg_scope([slim.fully_connected],
                                    weights_regularizer=slim.l2_regularizer(
                                        cfgs.WEIGHT_DECAY)):

                    print('*' * 20, fc_flatten.shape)
                    fc6 = slim.fully_connected(fc_flatten, 2048, scope='fc_1')

                    if self.usedropout:
                        fc6 = slim.dropout(fc6,
                                           keep_prob=0.5,
                                           is_training=self.is_training,
                                           scope='dropout_1')

                    fc7 = slim.fully_connected(fc6, 2048, scope='fc_2')
                    if self.usedropout:
                        fc7 = slim.dropout(fc7,
                                           keep_prob=0.5,
                                           is_training=self.is_training,
                                           scope='dropout_2')

                    cls_score_h = slim.fully_connected(
                        fc7,
                        num_outputs=cfgs.CLASS_NUM + 1,
                        weights_initializer=cfgs.INITIALIZER,
                        activation_fn=None,
                        trainable=self.is_training,
                        scope='cls_fc_h')

                    bbox_pred_h = slim.fully_connected(
                        fc7,
                        num_outputs=(cfgs.CLASS_NUM + 1) * 4,
                        weights_initializer=cfgs.BBOX_INITIALIZER,
                        activation_fn=None,
                        trainable=self.is_training,
                        scope='reg_fc_h')
                    # for convient. It also produce (cls_num +1) bboxes

                    cls_score_h = tf.reshape(cls_score_h,
                                             [-1, cfgs.CLASS_NUM + 1])
                    bbox_pred_h = tf.reshape(bbox_pred_h,
                                             [-1, 4 * (cfgs.CLASS_NUM + 1)])

            with tf.variable_scope('rotation_branch'):
                with slim.arg_scope([slim.fully_connected],
                                    weights_regularizer=slim.l2_regularizer(
                                        cfgs.WEIGHT_DECAY)):
                    cls_score_r = slim.fully_connected(
                        fc_flatten,
                        num_outputs=cfgs.CLASS_NUM + 1,
                        weights_initializer=cfgs.INITIALIZER,
                        activation_fn=None,
                        trainable=self.is_training,
                        scope='cls_fc_r')

                    bbox_pred_r = slim.fully_connected(
                        fc_flatten,
                        num_outputs=(cfgs.CLASS_NUM + 1) * 5,
                        weights_initializer=cfgs.BBOX_INITIALIZER,
                        activation_fn=None,
                        trainable=self.is_training,
                        scope='reg_fc_r')
                    # for convient. It also produce (cls_num +1) bboxes
                    cls_score_r = tf.reshape(cls_score_r,
                                             [-1, cfgs.CLASS_NUM + 1])
                    bbox_pred_r = tf.reshape(bbox_pred_r,
                                             [-1, 5 * (cfgs.CLASS_NUM + 1)])

            return bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r
Exemple #43
0
def mobilenet_v2(input, weight_decay, batch_norm_params):
    features = {}
    with tf.variable_scope('Mobilenet'):
        with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], \
                            activation_fn=tf.nn.relu6,\
                            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
                            biases_initializer=tf.zeros_initializer(),
                            weights_regularizer=slim.l2_regularizer(weight_decay),
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            padding='SAME'):
            print('Mobilnet input shape({}): {}'.format(input.name, input.get_shape()))

            # 96*96*3   112*112*3
            conv_1 = slim.convolution2d(input, 32, [3, 3], stride=2, scope='conv_1')
            print(conv_1.name, conv_1.get_shape())

            # 48*48*32  56*56*32
            conv2_1 = slim.separable_convolution2d(conv_1, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv2_1/dwise')
            print(conv2_1.name, conv2_1.get_shape())
            conv2_1 = slim.convolution2d(conv2_1, 16, [1, 1], stride=1, activation_fn=None,
                                         scope='conv2_1/linear')
            print(conv2_1.name, conv2_1.get_shape())
            features['feature2'] = conv2_1
            # 48*48*16  56*56*16
            conv3_1 = slim.convolution2d(conv2_1, 96, [1, 1], stride=1, scope='conv3_1/expand')
            print(conv3_1.name, conv3_1.get_shape())
            conv3_1 = slim.separable_convolution2d(conv3_1, num_outputs=None, stride=2, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv3_1/dwise')
            print(conv3_1.name, conv3_1.get_shape())
            conv3_1 = slim.convolution2d(conv3_1, 24, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_1/linear')
            print(conv3_1.name, conv3_1.get_shape())

            conv3_2 = slim.convolution2d(conv3_1, 144, [1, 1], stride=1, scope='conv3_2/expand')
            print(conv3_2.name, conv3_2.get_shape())
            conv3_2 = slim.separable_convolution2d(conv3_2, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv3_2/dwise')
            print(conv3_2.name, conv3_2.get_shape())
            conv3_2 = slim.convolution2d(conv3_2, 24, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_2/linear')
            print(conv3_2.name, conv3_2.get_shape())
            block_3_2 = conv3_1 + conv3_2
            print(block_3_2.name, block_3_2.get_shape())

            features['feature3'] = block_3_2
            features['pfld'] = block_3_2
            # 24*24*24   28*28*24
            conv4_1 = slim.convolution2d(block_3_2, 144, [1, 1], stride=1, scope='conv4_1/expand')
            print(conv4_1.name, conv4_1.get_shape())
            conv4_1 = slim.separable_convolution2d(conv4_1, num_outputs=None, stride=2, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv4_1/dwise')
            print(conv4_1.name, conv4_1.get_shape())
            conv4_1 = slim.convolution2d(conv4_1, 32, [1, 1], stride=1, activation_fn=None,
                                         scope='conv4_1/linear')
            print(conv4_1.name, conv4_1.get_shape())

            conv4_2 = slim.convolution2d(conv4_1, 192, [1, 1], stride=1, scope='conv4_2/expand')
            print(conv4_2.name, conv4_2.get_shape())
            conv4_2 = slim.separable_convolution2d(conv4_2, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv4_2/dwise')
            print(conv4_2.name, conv4_2.get_shape())
            conv4_2 = slim.convolution2d(conv4_2, 32, [1, 1], stride=1, activation_fn=None,
                                         scope='conv4_2/linear')
            print(conv4_2.name, conv4_2.get_shape())
            block_4_2 = conv4_1 + conv4_2
            print(block_4_2.name, block_4_2.get_shape())

            conv4_3 = slim.convolution2d(block_4_2, 192, [1, 1], stride=1, scope='conv4_3/expand')
            print(conv4_3.name, conv4_3.get_shape())
            conv4_3 = slim.separable_convolution2d(conv4_3, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv4_3/dwise')
            print(conv4_3.name, conv4_3.get_shape())
            conv4_3 = slim.convolution2d(conv4_3, 32, [1, 1], stride=1, activation_fn=None,
                                         scope='conv4_3/linear')
            print(conv4_3.name, conv4_3.get_shape())
            block_4_3 = block_4_2 + conv4_3
            print(block_4_3.name, block_4_3.get_shape())

            # 12*12*32   14*14*32
            features['feature4'] = block_4_3
            conv5_1 = slim.convolution2d(block_4_3, 192, [1, 1], stride=1, scope='conv5_1/expand')
            print(conv5_1.name, conv5_1.get_shape())
            conv5_1 = slim.separable_convolution2d(conv5_1, num_outputs=None, stride=2, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv5_1/dwise')
            print(conv5_1.name, conv5_1.get_shape())
            conv5_1 = slim.convolution2d(conv5_1, 64, [1, 1], stride=1,activation_fn=None,
                                         scope='conv5_1/linear')
            print(conv5_1.name, conv5_1.get_shape())

            conv5_2 = slim.convolution2d(conv5_1, 384, [1, 1], stride=1, scope='conv5_2/expand')
            print(conv5_2.name, conv5_2.get_shape())
            conv5_2 = slim.separable_convolution2d(conv5_2, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv5_2/dwise')
            print(conv5_2.name, conv5_2.get_shape())
            conv5_2 = slim.convolution2d(conv5_2, 64, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_2/linear')
            print(conv5_2.name, conv5_2.get_shape())
            block_5_2 = conv5_1 + conv5_2
            print(block_5_2.name, block_5_2.get_shape())

            conv5_3 = slim.convolution2d(block_5_2, 384, [1, 1], stride=1, scope='conv5_3/expand')
            print(conv5_3.name, conv5_3.get_shape())
            conv5_3 = slim.separable_convolution2d(conv5_3, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv5_3/dwise')
            print(conv5_3.name, conv5_3.get_shape())
            conv5_3 = slim.convolution2d(conv5_3, 64, [1, 1], stride=1,  activation_fn=None,
                                         scope='conv5_3/linear')
            print(conv5_3.name, conv5_3.get_shape())
            block_5_3 = block_5_2 + conv5_3
            print(block_5_3.name, block_5_3.get_shape())

            conv5_4 = slim.convolution2d(block_5_3, 384, [1, 1], stride=1, scope='conv5_4/expand')
            print(conv5_4.name, conv5_4.get_shape())
            conv5_4 = slim.separable_convolution2d(conv5_4, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv5_4/dwise')
            print(conv5_4.name, conv5_4.get_shape())
            conv5_4 = slim.convolution2d(conv5_4, 64, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_4/linear')
            print(conv5_4.name, conv5_4.get_shape())
            block_5_4 = block_5_3 + conv5_4
            print(block_5_4.name, block_5_4.get_shape())

            # 6*6*64    7*7*64
            conv6_1 = slim.convolution2d(block_5_4, 384, [1, 1], stride=1, scope='conv6_1/expand')
            print(conv6_1.name, conv6_1.get_shape())
            conv6_1 = slim.separable_convolution2d(conv6_1, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv6_1/dwise')
            print(conv6_1.name, conv6_1.get_shape())
            conv6_1 = slim.convolution2d(conv6_1, 96, [1, 1], stride=1, activation_fn=None,
                                         scope='conv6_1/linear')
            print(conv6_1.name, conv6_1.get_shape())

            conv6_2 = slim.convolution2d(conv6_1, 576, [1, 1], stride=1, scope='conv6_2/expand')
            print(conv6_2.name, conv6_2.get_shape())
            conv6_2 = slim.separable_convolution2d(conv6_2, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv6_2/dwise')
            print(conv6_2.name, conv6_2.get_shape())
            conv6_2 = slim.convolution2d(conv6_2, 96, [1, 1], stride=1, activation_fn=None,
                                         scope='conv6_2/linear')
            print(conv6_2.name, conv6_2.get_shape())
            block_6_2 = conv6_1 + conv6_2
            print(block_6_2.name, block_6_2.get_shape())

            conv6_3 = slim.convolution2d(block_6_2, 576, [1, 1], stride=1, scope='conv6_3/expand')
            print(conv6_3.name, conv6_3.get_shape())
            conv6_3 = slim.separable_convolution2d(conv6_3, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv6_3/dwise')
            print(conv6_3.name, conv6_3.get_shape())
            conv6_3 = slim.convolution2d(conv6_3, 96, [1, 1], stride=1, activation_fn=None,
                                         scope='conv6_3/linear')
            print(conv6_3.name, conv6_3.get_shape())
            block_6_3 = block_6_2 + conv6_3
            print(block_6_3.name, block_6_3.get_shape())

            features['feature5'] = block_6_3
            # 6*6*96    7*7*96
            conv7_1 = slim.convolution2d(block_6_3, 576, [1, 1], stride=1, scope='conv7_1/expand')
            print(conv7_1.name, conv7_1.get_shape())
            conv7_1 = slim.separable_convolution2d(conv7_1, num_outputs=None, stride=2, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv7_1/dwise')
            print(conv7_1.name, conv7_1.get_shape())
            conv7_1 = slim.convolution2d(conv7_1, 160, [1, 1], stride=1, activation_fn=None,
                                         scope='conv7_1/linear')
            print(conv7_1.name, conv7_1.get_shape())

            conv7_2 = slim.convolution2d(conv7_1, 960, [1, 1], stride=1, scope='conv7_2/expand')
            print(conv7_2.name, conv7_2.get_shape())
            conv7_2 = slim.separable_convolution2d(conv7_2, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv7_2/dwise')
            print(conv7_2.name, conv7_2.get_shape())
            conv7_2 = slim.convolution2d(conv7_2, 160, [1, 1], stride=1, activation_fn=None,
                                         scope='conv7_2/linear')
            print(conv7_2.name, conv7_2.get_shape())
            block_7_2 = conv7_1 + conv7_2
            print(block_7_2.name, block_7_2.get_shape())


            conv7_3 = slim.convolution2d(block_7_2, 960, [1, 1], stride=1, scope='conv7_3/expand')
            print(conv7_3.name, conv7_3.get_shape())
            conv7_3 = slim.separable_convolution2d(conv7_3, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv7_3/dwise')
            print(conv7_3.name, conv7_3.get_shape())
            conv7_3 = slim.convolution2d(conv7_3, 160, [1, 1], stride=1, activation_fn=None,
                                         scope='conv7_3/linear')
            print(conv7_3.name, conv7_3.get_shape())
            block_7_3 = block_7_2 + conv7_3
            print(block_7_3.name, block_7_3.get_shape())

            conv7_4 = slim.convolution2d(block_7_3, 960, [1, 1], stride=1, scope='conv7_4/expand')
            print(conv7_4.name, conv7_4.get_shape())
            conv7_4 = slim.separable_convolution2d(conv7_4, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv7_4/dwise')
            print(conv7_4.name, conv7_4.get_shape())
            conv7_4 = slim.convolution2d(conv7_4, 320, [1, 1], stride=1, activation_fn=None,
                                         scope='conv7_4/linear')
            print(conv7_4.name, conv7_4.get_shape())
            features['feature6'] = conv7_4
    return features
Exemple #44
0
def pfld_inference(input, weight_decay, batch_norm_params):

    coefficient = 1
    with tf.variable_scope('pfld_inference'):
        features = {}
        with slim.arg_scope([slim.convolution2d, slim.separable_conv2d],
                            activation_fn=tf.nn.relu6,
                            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
                            biases_initializer=tf.zeros_initializer(),
                            weights_regularizer=slim.l2_regularizer(weight_decay),
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            padding='SAME'):
            print('PFLD input shape({}): {}'.format(input.name, input.get_shape()))
            # 112*112*3
            conv1 = slim.convolution2d(input, 64*coefficient, [3, 3], stride=2, scope='conv_1')
            print(conv1.name, conv1.get_shape())

            # 56*56*64
            conv2 = slim.separable_convolution2d(conv1, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv2/dwise')
            print(conv2.name, conv2.get_shape())

            # 56*56*64
            conv3_1 = slim.convolution2d(conv2, 128, [1, 1], stride=2, activation_fn=None, scope='conv3_1/expand')
            print(conv3_1.name, conv3_1.get_shape())
            conv3_1 = slim.separable_convolution2d(conv3_1, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv3_1/dwise')
            print(conv3_1.name, conv3_1.get_shape())
            conv3_1 = slim.convolution2d(conv3_1, 64*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_1/linear')
            print(conv3_1.name, conv3_1.get_shape())

            conv3_2 = slim.convolution2d(conv3_1, 128, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_2/expand')
            print(conv3_2.name, conv3_2.get_shape())
            conv3_2 = slim.separable_convolution2d(conv3_2, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv3_2/dwise')
            print(conv3_2.name, conv3_2.get_shape())
            conv3_2 = slim.convolution2d(conv3_1, 64*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_2/linear')
            print(conv3_2.name, conv3_2.get_shape())

            block3_2 = conv3_1 + conv3_2
            print(block3_2.name, block3_2.get_shape())

            conv3_3 = slim.convolution2d(block3_2, 128, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_3/expand')
            print(conv3_3.name, conv3_3.get_shape())
            conv3_3 = slim.separable_convolution2d(conv3_3, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv3_3/dwise')
            print(conv3_3.name, conv3_3.get_shape())
            conv3_3 = slim.convolution2d(conv3_3, 64*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_3linear')
            print(conv3_3.name, conv3_3.get_shape())

            block3_3 = block3_2 + conv3_3
            print(block3_3.name, block3_3.get_shape())

            conv3_4 = slim.convolution2d(block3_3, 128, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_4/expand')
            print(conv3_4.name, conv3_4.get_shape())
            conv3_4 = slim.separable_convolution2d(conv3_4, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv3_4/dwise')
            print(conv3_4.name, conv3_4.get_shape())
            conv3_4 = slim.convolution2d(conv3_4, 64*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_4/linear')
            print(conv3_4.name, conv3_4.get_shape())

            block3_4 = block3_3 + conv3_4
            print(block3_4.name, block3_4.get_shape())

            conv3_5 = slim.convolution2d(block3_4, 128, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_5/expand')
            print(conv3_5.name, conv3_5.get_shape())
            conv3_5 = slim.separable_convolution2d(conv3_5, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv3_5/dwise')
            print(conv3_5.name, conv3_5.get_shape())
            conv3_5 = slim.convolution2d(conv3_5, 64*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv3_5/linear')
            print(conv3_5.name, conv3_5.get_shape())

            block3_5 = block3_4 + conv3_5
            print(block3_5.name, block3_5.get_shape())

            features['auxiliary_input'] = block3_5

            #28*28*64
            conv4_1 = slim.convolution2d(block3_5, 128, [1, 1], stride=2, activation_fn=None,
                                         scope='conv4_1/expand')
            print(conv4_1.name, conv4_1.get_shape())
            conv4_1 = slim.separable_convolution2d(conv4_1, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv4_1/dwise')
            print(conv4_1.name, conv4_1.get_shape())
            conv4_1 = slim.convolution2d(conv4_1, 128*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv4_1/linear')
            print(conv4_1.name, conv4_1.get_shape())

            #14*14*128
            conv5_1 = slim.convolution2d(conv4_1, 512, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_1/expand')
            print(conv5_1.name, conv5_1.get_shape())
            conv5_1 = slim.separable_convolution2d(conv5_1, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv5_1/dwise')
            print(conv5_1.name, conv5_1.get_shape())
            conv5_1 = slim.convolution2d(conv5_1, 128*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_1/linear')
            print(conv5_1.name, conv5_1.get_shape())

            conv5_2 = slim.convolution2d(conv5_1, 512, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_2/expand')
            print(conv5_2.name, conv5_2.get_shape())
            conv5_2 = slim.separable_convolution2d(conv5_2, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv5_2/dwise')
            print(conv5_2.name, conv5_2.get_shape())
            conv5_2 = slim.convolution2d(conv5_2, 128*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_2/linear')
            print(conv5_2.name, conv5_2.get_shape())

            block5_2 = conv5_1 + conv5_2
            print(block5_2.name, block5_2.get_shape())

            conv5_3 = slim.convolution2d(block5_2, 512, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_3/expand')
            print(conv5_3.name, conv5_3.get_shape())
            conv5_3 = slim.separable_convolution2d(conv5_3, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv5_3/dwise')
            print(conv5_3.name, conv5_3.get_shape())
            conv5_3 = slim.convolution2d(conv5_3, 128*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_3/linear')
            print(conv5_3.name, conv5_3.get_shape())

            block5_3 = block5_2 + conv5_3
            print(block5_3.name, block5_3.get_shape())

            conv5_4 = slim.convolution2d(block5_3, 512, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_4/expand')
            print(conv5_4.name, conv5_4.get_shape())
            conv5_4 = slim.separable_convolution2d(conv5_4, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv5_4/dwise')
            print(conv5_4.name, conv5_4.get_shape())
            conv5_4 = slim.convolution2d(conv5_4, 128*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_4/linear')
            print(conv5_4.name, conv5_4.get_shape())

            block5_4 = block5_3 + conv5_4
            print(block5_4.name, block5_4.get_shape())

            conv5_5 = slim.convolution2d(block5_4, 512, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_5/expand')
            print(conv5_5.name, conv5_5.get_shape())
            conv5_5 = slim.separable_convolution2d(conv5_5, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv5_5/dwise')
            print(conv5_5.name, conv5_5.get_shape())
            conv5_5 = slim.convolution2d(conv5_5, 128*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_5/linear')
            print(conv5_5.name, conv5_5.get_shape())

            block5_5 = block5_4 + conv5_5
            print(block5_5.name, block5_5.get_shape())

            conv5_6 = slim.convolution2d(block5_5, 512, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_6/expand')
            print(conv5_6.name, conv5_6.get_shape())
            conv5_6 = slim.separable_convolution2d(conv5_6, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv5_6/dwise')
            print(conv5_6.name, conv5_6.get_shape())
            conv5_6 = slim.convolution2d(conv5_6, 128*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv5_6/linear')
            print(conv5_6.name, conv5_6.get_shape())

            block5_6 = block5_5 + conv5_6
            print(block5_6.name, block5_6.get_shape())

            #14*14*128
            conv6_1 = slim.convolution2d(block5_6, 256, [1, 1], stride=1, activation_fn=None, scope='conv6_1/expand')
            print(conv6_1.name, conv6_1.get_shape())
            conv6_1 = slim.separable_convolution2d(conv6_1, num_outputs=None, stride=1, depth_multiplier=1,
                                                   kernel_size=[3, 3], scope='conv6_1/dwise')
            print(conv6_1.name, conv6_1.get_shape())
            conv6_1 = slim.convolution2d(conv6_1, 16*coefficient, [1, 1], stride=1, activation_fn=None,
                                         scope='conv6_1/linear')
            print(conv6_1.name, conv6_1.get_shape())

            #14*14*16
            conv7 = slim.convolution2d(conv6_1, 32*coefficient, [3, 3], stride=2, activation_fn=None, scope='conv7')
            print(conv7.name, conv7.get_shape())

            #7*7*32
            conv8 = slim.convolution2d(conv7, 128*coefficient, [7, 7], stride=1, activation_fn=None, padding='valid',
                                       scope='conv8')
            print(conv8.name, conv8.get_shape())

            # avg_pool1 = slim.avg_pool2d(conv6_1, [conv6_1.get_shape()[1], conv6_1.get_shape()[2]], stride=1)
            # print(avg_pool1.name, avg_pool1.get_shape())
            #
            # avg_pool2 = slim.avg_pool2d(conv7,[conv7.get_shape()[1],conv7.get_shape()[2]],stride=1)
            # print(avg_pool2.name,avg_pool2.get_shape())
            #
            # s1 = slim.flatten(avg_pool1)
            # s2 = slim.flatten(avg_pool2)
            s1 = slim.flatten(conv6_1)
            s2 = slim.flatten(conv7)
            #1*1*128
            s3 = slim.flatten(conv8)
            multi_scale = tf.concat([s1, s2, s3], 1)
            landmarks = slim.fully_connected(multi_scale, num_outputs=136, activation_fn=None, scope='fc')
            print(landmarks.name, landmarks.get_shape())
        return features, landmarks
def main(args):
    network = importlib.import_module(args.model_def)
    with tf.Graph().as_default():
        with tf.Session() as sess:
            # Load the model metagraph and checkpoint
            image_batch = tf.placeholder(tf.float32,
                                         shape=(None, args.image_size,
                                                args.image_size, 3),
                                         name='input')
            # Build the inference graph
            batch_norm_params = {
                # Decay for the moving averages
                'decay': 0.995,
                # epsilon to prevent 0s in variance
                'epsilon': 0.001,
                # force in-place updates of mean and variance estimates
                'updates_collections': None,
                # Moving averages ends up in the trainable variables collection
                'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES],
                # Only update statistics during training mode
                'is_training': False
            }
            prelogits, _ = network.inference(image_batch,
                                             args.keep_probability,
                                             phase_train=False,
                                             weight_decay=args.weight_decay)
            bottleneck = slim.fully_connected(
                prelogits,
                args.embedding_size,
                activation_fn=None,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.1),
                weights_regularizer=slim.l2_regularizer(args.weight_decay),
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                scope='Bottleneck',
                reuse=False)
            embeddings = tf.nn.l2_normalize(bottleneck,
                                            1,
                                            1e-10,
                                            name='embeddings')
            print('Model directory: %s' % args.model_dir)
            # _, ckpt_file = get_model_filenames(os.path.expanduser(args.model_dir))
            meta_file, ckpt_file = facenet.get_model_filenames(
                os.path.expanduser(args.model_dir)
            )  #clpham:to fix "Key Bottleneck/BatchNorm/beta not found..."

            print('Checkpoint file: %s' % ckpt_file)

            model_dir_exp = os.path.expanduser(args.model_dir)
            # saver = tf.train.Saver()
            saver = tf.train.import_meta_graph(
                os.path.join(model_dir_exp, meta_file), clear_devices=True
            )  #clpham:to fix "Key Bottleneck/BatchNorm/beta not found"
            tf.get_default_session().run(tf.global_variables_initializer())
            tf.get_default_session().run(tf.local_variables_initializer())
            saver.restore(tf.get_default_session(),
                          os.path.join(model_dir_exp, ckpt_file))

            # Retrieve the protobuf graph definition and fix the batch norm nodes
            gd = sess.graph.as_graph_def()
            for node in gd.node:
                if node.op == 'RefSwitch':
                    node.op = 'Switch'
                    for index in range(
                            len(node.input
                                )):  #clpham: was=xrange, to support python3
                        if 'moving_' in node.input[index]:
                            node.input[index] = node.input[index] + '/read'
                elif node.op == 'AssignSub':
                    node.op = 'Sub'
                    if 'use_locking' in node.attr: del node.attr['use_locking']
                elif node.op == 'AssignAdd':
                    node.op = 'Add'
                    if 'use_locking' in node.attr: del node.attr['use_locking']

            # Get the list of important nodes
            output_node_names = 'embeddings'
            whitelist_names = []
            for node in gd.node:
                if node.name.startswith(
                        'InceptionResnetV1') or node.name.startswith(
                            'embeddings') or node.name.startswith(
                                'phase_train') or node.name.startswith(
                                    'Bottleneck'):
                    print(node.name)
                    whitelist_names.append(node.name)

            # Replace all the variables in the graph with constants of the same values
            output_graph_def = graph_util.convert_variables_to_constants(
                sess,
                gd,
                output_node_names.split(","),
                variable_names_whitelist=whitelist_names)

        # Serialize and dump the output graph to the filesystem
        with tf.gfile.GFile(args.output_file, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print("%d ops in the final graph." % len(output_graph_def.node))
Exemple #46
0
def model():

    x = tf.placeholder(dtype=tf.float32,
                       shape=[batch_size, 32, 32, 3],
                       name='Input')
    y = tf.placeholder(dtype=tf.float32, shape=[batch_size], name='True_Y')
    y = tf.cast(y, tf.int64)
    keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name='dropout')
    is_training = tf.placeholder(tf.bool, shape=())

    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        activation_fn=tf.nn.crelu,
                        normalizer_fn=slim.batch_norm,
                        normalizer_params={
                            'is_training': is_training,
                            'decay': 0.95
                        }):
        h = slim.conv2d(inputs=x,
                        num_outputs=24,
                        kernel_size=2,
                        weights_regularizer=slim.l2_regularizer(0.0016))
        h = slim.conv2d(inputs=h,
                        num_outputs=57,
                        kernel_size=3,
                        weights_regularizer=slim.l2_regularizer(0.0001))
        h = slim.conv2d(inputs=h,
                        num_outputs=63,
                        kernel_size=5,
                        weights_regularizer=slim.l2_regularizer(0.0096))
        h = slim.conv2d(inputs=h,
                        num_outputs=35,
                        kernel_size=5,
                        weights_regularizer=slim.l2_regularizer(0.0071))
        h = slim.conv2d(inputs=h,
                        num_outputs=76,
                        kernel_size=3,
                        weights_regularizer=slim.l2_regularizer(0.0015))
        h = slim.max_pool2d(h, kernel_size=2, stride=2)
        flatten = slim.flatten(h)
        full = slim.fully_connected(flatten, 512)
        drop_full = slim.dropout(full, keep_prob)
        with tf.name_scope('accuracy'):
            logits = slim.fully_connected(drop_full, 10, activation_fn=None)
            correct_prediction = tf.equal(tf.argmax(logits, 1), y)
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        with tf.name_scope('loss'):
            loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=y, logits=logits)) + tf.add_n(
                        tf.losses.get_regularization_losses())
        with tf.name_scope('train'):
            optimizer = tf.train.AdamOptimizer()
            step = tf.get_variable("step", [],
                                   initializer=tf.constant_initializer(0.0),
                                   trainable=False)
            train_op = slim.learning.create_train_op(loss,
                                                     optimizer,
                                                     global_step=step)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            if update_ops:
                updates = tf.group(*update_ops)
                loss = control_flow_ops.with_dependencies([updates], loss)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            train_data, train_label = get_data.get_train_data(True)
            validate_data, validate_label = get_data.get_test_data(True)
            epochs = total_epochs
            for current_epoch in range(epochs):
                train_loss_list = []
                train_accu_list = []
                total_length = train_data.shape[0]
                idx = np.arange(total_length)
                np.random.shuffle(idx)
                train_data = train_data[idx]
                train_label = train_label[idx]
                total_steps = total_length // batch_size
                for step in range(total_steps):
                    batch_train_data = train_data[step *
                                                  batch_size:(step + 1) *
                                                  batch_size]
                    batch_train_label = train_label[step *
                                                    batch_size:(step + 1) *
                                                    batch_size]
                    _, loss_v, accuracy_str = sess.run(
                        [train_op, loss, accuracy], {
                            x: batch_train_data,
                            y: batch_train_label,
                            keep_prob: 0.5,
                            is_training: True
                        })
                    train_loss_list.append(loss_v)
                    train_accu_list.append(accuracy_str)

                #test
                test_length = validate_data.shape[0]
                test_steps = test_length // batch_size
                test_loss_list = []
                test_accu_list = []
                for step in range(test_steps):
                    batch_test_data = validate_data[step *
                                                    batch_size:(step + 1) *
                                                    batch_size]
                    batch_test_label = validate_label[step *
                                                      batch_size:(step + 1) *
                                                      batch_size]
                    loss_v, accuracy_str = sess.run(
                        [loss, accuracy], {
                            x: batch_test_data,
                            y: batch_test_label,
                            keep_prob: 1.0,
                            is_training: False
                        })
                    test_loss_list.append(loss_v)
                    test_accu_list.append(accuracy_str)

                print(
                    '{}, epoch:{}/{}, step:{}/{}, loss:{:.6f}, accu:{:.4f}, test loss:{:.6f}, accu:{:.4f}'
                    .format(datetime.now(), current_epoch, total_epochs,
                            total_steps * current_epoch + step,
                            total_steps * epochs, np.mean(train_loss_list),
                            np.mean(train_accu_list), np.mean(test_loss_list),
                            np.mean(test_accu_list)))
Exemple #47
0
    def forward(self, inputs, is_training=False, reuse=False):
        # the input img_size, form: [height, weight]
        self.img_size = tf.shape(inputs)[1:3]

        # set batch norm params
        batch_norm_params = {
            'decay': self.batch_norm_decay,
            'epsilon': 1e-05,
            'scale': True,
            'is_training': is_training,
            'fused': None,  # Use fused batch norm if possible.
        }

        with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse):
            with slim.arg_scope(
                [slim.conv2d],
                    normalizer_fn=slim.batch_norm,
                    normalizer_params=batch_norm_params,
                    biases_initializer=None,
                    activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1)):
                with tf.variable_scope('darknet53_body'):
                    route_1, route_2, route_3 = darknet53_body(inputs)

                with tf.variable_scope('yolov3_head'):
                    inter1, net = yolo_block(route_3, 512)
                    feature_map_1 = slim.conv2d(
                        net,
                        3 * (5 + self.class_num),
                        1,
                        stride=1,
                        normalizer_fn=None,
                        activation_fn=None,
                        weights_regularizer=slim.l2_regularizer(0.001),
                        weights_initializer=tf.contrib.layers.
                        xavier_initializer(),
                        biases_initializer=tf.zeros_initializer())
                    feature_map_1 = tf.identity(feature_map_1,
                                                name='feature_map_1')

                    inter1 = conv2d(inter1, 256, 1)
                    inter1 = upsample_layer(inter1,
                                            route_2.get_shape().as_list())
                    concat1 = tf.concat([inter1, route_2], axis=3)

                    inter2, net = yolo_block(concat1, 256)
                    feature_map_2 = slim.conv2d(
                        net,
                        3 * (5 + self.class_num),
                        1,
                        stride=1,
                        normalizer_fn=None,
                        activation_fn=None,
                        weights_regularizer=slim.l2_regularizer(0.001),
                        weights_initializer=tf.contrib.layers.
                        xavier_initializer(),
                        biases_initializer=tf.zeros_initializer())
                    feature_map_2 = tf.identity(feature_map_2,
                                                name='feature_map_2')

                    inter2 = conv2d(inter2, 128, 1)
                    inter2 = upsample_layer(inter2,
                                            route_1.get_shape().as_list())
                    concat2 = tf.concat([inter2, route_1], axis=3)

                    _, feature_map_3 = yolo_block(concat2, 128)
                    feature_map_3 = slim.conv2d(
                        feature_map_3,
                        3 * (5 + self.class_num),
                        1,
                        stride=1,
                        normalizer_fn=None,
                        activation_fn=None,
                        weights_regularizer=slim.l2_regularizer(0.001),
                        weights_initializer=tf.contrib.layers.
                        xavier_initializer(),
                        biases_initializer=tf.zeros_initializer())
                    feature_map_3 = tf.identity(feature_map_3,
                                                name='feature_map_3')

            return feature_map_1, feature_map_2, feature_map_3
Exemple #48
0
with tf.Session(config=cuda_set(GPU)) as sess:
    N = tf.placeholder(tf.float32, [None] + tensor_shape['neck'], name='neck')
    Y = tf.placeholder(tf.float32, [None] + tensor_shape['mid'], name='mid')
    C = tf.placeholder(tf.int32, [None], name='C')
    is_train = tf.placeholder(tf.bool, name='is_training')
    label = tf.squeeze(tf.one_hot(C, c, 1.0, 0.0, 1, tf.float32), name='label')
    Z = PSAM_Fusion(N, Y)
    # Z = PSAM_TOP(Y)
    #Z = slim.dropout(Z, 0.8, is_training=is_train,scope='fc_drop')
    Z = tf.expand_dims(tf.expand_dims(Z, 1), 1)
    print(Z)
    with slim.arg_scope([slim.conv2d],
                        activation_fn=None,
                        weights_initializer=tf.truncated_normal_initializer(
                            0.0, 0.01),
                        weights_regularizer=slim.l2_regularizer(0.01)):
        logits_Z = slim.conv2d(Z, c, [1, 1], scope='fc_Z')
    logits_Z = tf.squeeze(logits_Z, [1, 2])
    score = tf.nn.softmax(logits_Z, name='softmax')
    #    pre_cross = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label,logits=logits_N))
    #    slim.losses.add_loss(pre_cross*0.1)
    loss_cross = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits_Z))
    slim.losses.add_loss(loss_cross)
    # slim.losses.add_loss(loss_tight*0.05)
    total_loss = slim.losses.get_total_loss()  #+ loss_tight * tight_a
    train_op = tf.train.AdamOptimizer(LEARNING_RATE).minimize(total_loss)
    correct_prediction = tf.equal(tf.argmax(label, 1), tf.argmax(logits_Z, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    output = tf.squeeze(logits_Z, name='output')
    sess.run(tf.global_variables_initializer())
Exemple #49
0
    def __ctpn_base(self):
        """
        特征提取层 feature extract layer
        :return: proposal_predicted : shape = [1, h, w, A*4
                 proposal_cls_score: shape = [1, h, w, A*cfg["CLASSES_NUM"]]
                 proposal_cls_prob: shape = [1, h, w, A*cfg["CLASSES_NUM"]]
        """
        stddev = 0.01
        weight_decay = cfg["TRAIN"]["WEIGHT_DECAY"]

        assert cfg["ANCHOR_WIDTH"] == 8 or cfg["ANCHOR_WIDTH"] == 16, \
            'Anchor must be 8 or 16!Not be {}.'.format(cfg["ANCHOR_WIDTH"])

        with tf.variable_scope("CTPN_Network"):
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected],
                    weights_initializer=tf.truncated_normal_initializer(
                        0.0, stddev=stddev),
                    weights_regularizer=slim.l2_regularizer(weight_decay)):

                if cfg["BACKBONE"] == "InceptionNet":
                    features, featuremap_scale = inception_base(self.img_input)
                elif cfg["BACKBONE"] == "VggNet":
                    features, featuremap_scale = vgg_base(self.img_input)
                else:
                    assert 0, "error: backbone {} is not support!".format(
                        cfg["BACKBONE"])

            print('featuremap_scale is {}, anchor width is {}'.format(
                featuremap_scale, cfg['ANCHOR_WIDTH']))
            assert featuremap_scale == cfg['ANCHOR_WIDTH']

            print("using {} backbone...".format(cfg["BACKBONE"]))

            features = slim.conv2d(features, 512, [3, 3], scope='rpn_conv_3x3')

            if cfg["USE_LSTM"]:
                features = self.__bilstm(features, 512, 128, 512)
            else:
                features = self.__semantic_info_extract_layer(features)
            print('Lstm is using?', cfg["USE_LSTM"])

            proposal_predicted = self._lstm_fc(features,
                                               512,
                                               10 * 4,
                                               scope_name="bbox_pred")
            proposal_cls_score = self._lstm_fc(features,
                                               512,
                                               10 * 2,
                                               scope_name="cls_pred")
            # # proposal_predicted shape = [1, h, w, A*4]
            # proposal_predicted = slim.conv2d(features, len(cfg["ANCHOR_HEIGHT"]) * 4, [1, 1], scope='proposal_conv_1x1', activation_fn=None)
            # # proposal_cls_score shape = [1, h, w, A*cfg["CLASSES_NUM"]]
            # proposal_cls_score = slim.conv2d(features, len(cfg["ANCHOR_HEIGHT"]) * cfg["CLASSES_NUM"], [1, 1], scope='cls_conv_1x1', activation_fn=None)

            proposal_cls_score_shape = tf.shape(proposal_cls_score)
            # proposal_cls_score_reshape shape = [h*w*A, cfg["CLASSES_NUM"]]
            proposal_cls_score_reshape = tf.reshape(proposal_cls_score, [
                proposal_cls_score_shape[0], proposal_cls_score_shape[1], -1,
                cfg["CLASSES_NUM"]
            ])
            proposal_cls_score_reshape_shape = tf.shape(
                proposal_cls_score_reshape)
            proposal_cls_score_reshape = tf.reshape(proposal_cls_score_reshape,
                                                    [-1, cfg["CLASSES_NUM"]])
            # proposal_cls_prob shape = [1, h, w, A*cfg["CLASSES_NUM"]]
            proposal_cls_prob = tf.reshape(
                tf.nn.softmax(proposal_cls_score_reshape), [
                    -1, proposal_cls_score_reshape_shape[1],
                    proposal_cls_score_reshape_shape[2],
                    proposal_cls_score_reshape_shape[3]
                ])

        return proposal_predicted, proposal_cls_score, proposal_cls_prob
Exemple #50
0
def model():
    is_training = tf.placeholder(tf.bool, [])
    train_images, train_label = data.get_train_data(batch_size)
    test_images, test_label = data.get_test_data(batch_size)
    x = tf.cond(is_training, lambda: train_images, lambda: test_images)
    y_ = tf.cond(is_training, lambda: train_label, lambda: test_label)
    y_ = tf.cast(y_, tf.int64)
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        activation_fn=tf.nn.crelu,
                        normalizer_fn=slim.batch_norm,
                        weights_regularizer=slim.l2_regularizer(0.005),
                        normalizer_params={
                            'is_training': is_training,
                            'decay': 0.95
                        }):
        conv1 = slim.conv2d(
            x,
            48, [9, 9],
            weights_initializer=tf.truncated_normal_initializer(mean=-0.08,
                                                                stddev=0.63))
        pool1 = slim.max_pool2d(conv1, [4, 4], stride=4, padding='SAME')
        conv2 = slim.conv2d(
            pool1,
            43, [7, 7],
            weights_initializer=tf.truncated_normal_initializer(mean=-0.23,
                                                                stddev=0.22))
        pool2 = slim.max_pool2d(conv2, [4, 4], stride=4, padding='SAME')
        pool3 = slim.avg_pool2d(pool2, [3, 3], stride=3, padding='SAME')
        flatten = slim.flatten(pool3)
        logits = slim.fully_connected(
            flatten,
            2,
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(
                mean=0.726, stddev=0.397992),
            biases_initializer=tf.constant_initializer(0.1, dtype=tf.float32))
        correct_prediction = tf.equal(tf.argmax(logits, 1), y_)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        regularization_loss = tf.add_n(slim.losses.get_regularization_losses())
        cross_entropy = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=y_, logits=logits)) + regularization_loss
        step = tf.get_variable("step", [],
                               initializer=tf.constant_initializer(0.0),
                               trainable=False)

        #         lr = tf.train.exponential_decay(0.1,
        #                                   step,
        #                                   550*30,
        #                                   0.9,
        #                                   staircase=True)
        #
        #
        #         optimizer = tf.train.GradientDescentOptimizer(lr)
        optimizer = tf.train.AdamOptimizer(0.001)
        #         lr_summary = tf.summary.scalar('lr', lr)
        train_step = slim.learning.create_train_op(cross_entropy,
                                                   optimizer,
                                                   global_step=step)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops:
            updates = tf.group(*update_ops)
            cross_entropy = control_flow_ops.with_dependencies([updates],
                                                               cross_entropy)

        loss_summary = tf.summary.scalar('loss', cross_entropy)
        accuracy_summary = tf.summary.scalar('accuracy', accuracy)
        merge_summary = tf.summary.merge([loss_summary, accuracy_summary])
        return is_training, train_step, step, accuracy, cross_entropy, merge_summary
import tensorflow as tf
import numpy as np
import tensorflow.contrib.slim as slim
import os
from tensorflow.contrib.data import Dataset
from tensorflow.contrib.layers import conv2d

reg = slim.l2_regularizer(scale=0.001)


def crop_and_concat(x1, x2):
    with tf.name_scope("crop_and_concat"):
        x1_shape = tf.shape(x1)
        x2_shape = tf.shape(x2)
        # offsets for the top left corner of the crop
        offsets = [
            0, (x1_shape[1] - x2_shape[1]) // 2,
            (x1_shape[2] - x2_shape[2]) // 2, 0
        ]
        size = [-1, x2_shape[1], x2_shape[2], -1]
        x1_crop = tf.slice(x1, offsets, size)
        return tf.concat([x1_crop, x2], 3)


def standard_unit(inputs, stage, nb_filter, kernel_size=3):
    x = slim.conv2d(inputs,
                    nb_filter, [3, 3],
                    rate=1,
                    activation_fn=None,
                    weights_regularizer=reg)
    x = slim.batch_norm(x)
Exemple #52
0
def O_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    print('O_Net')
    with slim.arg_scope([slim.conv2d],
                        activation_fn=prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        padding='valid'):
        print(inputs.get_shape())
        net = slim.conv2d(inputs,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope="conv1")
        print(net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[3, 3],
                              stride=2,
                              scope="pool1",
                              padding='SAME')
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope="conv2")
        print(net.get_shape())
        net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool2")
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope="conv3")
        print(net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope="pool3",
                              padding='SAME')
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=128,
                          kernel_size=[2, 2],
                          stride=1,
                          scope="conv4")
        print(net.get_shape())
        fc_flatten = slim.flatten(net)
        print(fc_flatten.get_shape())
        fc1 = slim.fully_connected(fc_flatten, num_outputs=256, scope="fc1")
        print(fc1.get_shape())
        #batch*2
        cls_prob = slim.fully_connected(fc1,
                                        num_outputs=2,
                                        scope="cls_fc",
                                        activation_fn=tf.nn.softmax)
        print('cls_fc', cls_prob.get_shape())
        #batch*4
        bbox_pred = slim.fully_connected(fc1,
                                         num_outputs=4,
                                         scope="bbox_fc",
                                         activation_fn=None)
        print('bbox_fc', bbox_pred.get_shape())
        #batch*10
        landmark_pred = slim.fully_connected(fc1,
                                             num_outputs=(no_landmarks * 2),
                                             scope="landmark_fc",
                                             activation_fn=None)
        print('landmark_fc', landmark_pred.get_shape())
        #train
        if training:
            cls_loss = cls_ohem(cls_prob, label)
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            accuracy = cal_accuracy(cls_prob, label)
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)
            L2_loss = tf.add_n(slim.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy, landmark_pred
        else:
            return cls_prob, bbox_pred, landmark_pred
Exemple #53
0
def build_network(images, num_classes=default.num_classes, training=None):
    tf.logging.info("Loading CNN Model")

    if config.stn:
        tf.logging.info("Start to loading stn network")
        # locnet
        with slim.arg_scope(
            [slim.conv2d],
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.01),
                weights_regularizer=slim.l2_regularizer(0.0005),
                biases_initializer=None):
            with tf.variable_scope('Loc_Net'):
                n_fc = 6
                #B, H, W, C = images.shape
                # identity transform
                initial = np.array([[1., 0, 0], [0, 1., 0]])
                initial = initial.astype('float32').flatten()
                # Output Layer Transformation
                # localization network

                # 64 x 128
                avg_net = slim.avg_pool2d(images,
                                          kernel_size=2,
                                          stride=2,
                                          scope="pool1")
                # 32 x 64
                conv1_1_net = slim.conv2d(avg_net,
                                          32,
                                          kernel_size=3,
                                          stride=4,
                                          scope='conv1_1')
                # 8 x 16
                conv1_2_net = slim.conv2d(images,
                                          32,
                                          kernel_size=5,
                                          stride=8,
                                          scope='conv1_2')

                loc_concat_net = tf.concat([conv1_1_net, conv1_2_net],
                                           3,
                                           name='concat')
                #loc_net = slim.repeat(images, 2, slim.conv2d, 32, kernel_size=3, stride=1, scope='loc_conv1')
                #loc_net = slim.max_pool2d(loc_net, kernel_size=2, stride=2, scope='loc_pool1')
                # 8 x 16
                loc_net = slim.conv2d(conv1_2_net,
                                      128,
                                      kernel_size=3,
                                      stride=1,
                                      scope='conv3')
                loc_net = slim.batch_norm(loc_net,
                                          decay=_BATCH_DECAY,
                                          is_training=training,
                                          scope='bn1')
                loc_net = slim.conv2d(loc_net,
                                      32,
                                      kernel_size=3,
                                      stride=1,
                                      scope='conv4')
                loc_net = slim.batch_norm(loc_net,
                                          decay=_BATCH_DECAY,
                                          is_training=training,
                                          scope='bn2')
                loc_net = slim.max_pool2d(loc_net,
                                          kernel_size=5,
                                          stride=4,
                                          scope='pool3')
                # 2 x 4
                loc_net = slim.conv2d(loc_net,
                                      16,
                                      kernel_size=3,
                                      stride=1,
                                      scope='conv5')
                loc_net = tf.reduce_mean(input_tensor=loc_net,
                                         axis=[1, 2],
                                         keep_dims=False,
                                         name="se_pool1")
                loc_net = tf.reshape(loc_net, [loc_net.shape[0], -1])
                loc_B, loc_W = loc_net.shape
                W_fc1 = tf.Variable(tf.zeros([loc_W, n_fc]), name='W_fc1')
                b_fc1 = tf.Variable(initial_value=initial, name='b_fc1')
                loc_net = tf.matmul(loc_net, W_fc1) + b_fc1
                loc_output = spatial_transformer_network(images, loc_net)
                images = loc_output
                tf.logging.info("stn network loaded...")

        # 1 x 2

    if config.rgb:
        tf.logging.info("Start to loading Init rgb network")
        # rgbnet
        with slim.arg_scope(
            [slim.conv2d],
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.01),
                weights_regularizer=slim.l2_regularizer(0.0005),
                biases_initializer=None):
            with tf.variable_scope('RGB_Net'):
                # identity transform
                # 64 x 128
                avg_net = slim.avg_pool2d(images,
                                          kernel_size=2,
                                          stride=2,
                                          scope="pool1")
                # 32 x 64
                conv1_1_net = slim.conv2d(avg_net,
                                          32,
                                          kernel_size=3,
                                          stride=4,
                                          scope='conv1_1')
                # 8 x 16
                conv1_2_net = slim.conv2d(images,
                                          32,
                                          kernel_size=5,
                                          stride=8,
                                          scope='conv1_2')

                rgb_concat_net = tf.concat([conv1_1_net, conv1_2_net],
                                           3,
                                           name='concat')
                #loc_net = slim.repeat(images, 2, slim.conv2d, 32, kernel_size=3, stride=1, scope='loc_conv1')
                #loc_net = slim.max_pool2d(loc_net, kernel_size=2, stride=2, scope='loc_pool1')
                # 8 x 16
                rgb_output = channel_wise_attention(rgb_concat_net, images,
                                                    "RGB")
                images = rgb_output
                tf.logging.info("stn network loaded...")

        # 1 x 2

    # first apply the cnn feature extraction stage
    with slim.arg_scope(
        [slim.conv2d],
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
            weights_regularizer=slim.l2_regularizer(0.0005),
            biases_initializer=None):
        with tf.variable_scope('FEN'):
            tf.logging.info("Start to loading cnn feature extraction network")
            net = slim.repeat(images,
                              2,
                              slim.conv2d,
                              64,
                              kernel_size=3,
                              stride=1,
                              scope='conv1')

            net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool1')
            # 32 x 64
            net = slim.repeat(net,
                              2,
                              slim.conv2d,
                              128,
                              kernel_size=3,
                              stride=1,
                              scope='conv2')
            C1 = net

            net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool2')
            # 16 x 32
            net = slim.repeat(net,
                              3,
                              slim.conv2d,
                              256,
                              kernel_size=3,
                              stride=1,
                              scope='conv3')
            C2 = net

            net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool3')
            # 8 x 16
            net = slim.repeat(net,
                              3,
                              slim.conv2d,
                              512,
                              kernel_size=3,
                              stride=1,
                              scope='conv4')
            C3 = net

            net = slim.max_pool2d(net,
                                  kernel_size=[2, 1],
                                  stride=[2, 1],
                                  scope='pool4')
            # 4 x 16
            net = slim.repeat(net,
                              3,
                              slim.conv2d,
                              512,
                              kernel_size=3,
                              stride=1,
                              scope='conv5')
            C4 = net
            net = slim.max_pool2d(net,
                                  kernel_size=[2, 1],
                                  stride=[2, 1],
                                  scope='pool5')
            # 1 x 16
            C5 = net

            C1 = slim.conv2d(C1, 64, kernel_size=3, scope='C1_conv')
            C1 = slim.batch_norm(C1,
                                 decay=_BATCH_DECAY,
                                 is_training=training,
                                 scope='C1_BN')
            C2 = slim.conv2d(C2, 64, kernel_size=3, scope='C2_conv')
            C2 = slim.batch_norm(C2,
                                 decay=_BATCH_DECAY,
                                 is_training=training,
                                 scope='C2_BN')

            if config.with_CPFE:
                C1_cfe = CFE(C1, 32, 'C3_cfe', training)
                C2_cfe = CFE(C2, 32, 'C4_cfe', training)
                C3_cfe = CFE(C3, 32, 'C5_cfe', training)
                C1_cfe = BilinearDownsampling(C1_cfe,
                                              upsampling=(4, 4),
                                              name="C5_cfe_up4")
                C2_cfe = BilinearDownsampling(C2_cfe,
                                              upsampling=(2, 2),
                                              name="C4_cfe_up2")
                C123 = tf.concat([C1_cfe, C2_cfe, C3_cfe],
                                 axis=-1,
                                 name='C123_aspp_concat')

            C123 = slim.conv2d(C123, 64, kernel_size=1, scope='C123_conv')
            C123 = slim.batch_norm(C123,
                                   decay=_BATCH_DECAY,
                                   is_training=training,
                                   scope='C123_BN')
            #C123 = BilinearUpsampling(C345, upsampling=(4, 4), name="C123_up4")

            if config.with_SA:
                C5 = BilinearUpsampling(C5, upsampling=(2, 1), name="C2_up2")
                C45 = tf.concat([C4, C5], axis=-1, name='C12_concat')
                #C12 = tf.con`(name='C12_concat', axis=-1)([C1, C2])
                C45 = slim.conv2d(C45, 64, kernel_size=3, scope='C12_conv')
                #C12 = Conv2D(64, (3, 3), padding='same', name='C12_conv')(C12)
                C45 = slim.batch_norm(C45,
                                      decay=_BATCH_DECAY,
                                      is_training=training,
                                      scope='C12')
                #C12 = BN(C12, 'C12')
                #C45 = tf.multiply(SA, C45, name="C12_atten_multiply")
                #C12 = Multiply(name='C12_atten_mutiply')([SA, C12])
                C45 = BilinearUpsampling(C45,
                                         upsampling=(2, 1),
                                         name="C45_up3")
                SA = SpatialAttention(C45, training, name="spatial_attention")
                if config.with_CA:
                    C45 = ChannelWiseAttention(
                        C45, name="C345_ChannelWiseAttention_withcpfe")
            C123 = tf.multiply(SA, C123, name="C123_atten_multiply")

            net = tf.concat([C123, C45], axis=-1, name="fuse_concat")
            net = slim.conv2d(net,
                              256,
                              padding="VALID",
                              kernel_size=[2, 1],
                              stride=[2, 1],
                              scope='conv6')
            net.get_shape()
            # 2 x 32
            cnn_out = slim.conv2d(net,
                                  512,
                                  padding="VALID",
                                  kernel_size=[4, 1],
                                  stride=1,
                                  scope='conv7')
            # 1 x 32
            tf.logging.info("feature network loaded")
    # second apply the map to sequence stage
    shape = cnn_out.get_shape().as_list()
    assert shape[1] == 1
    sequence = tf.squeeze(cnn_out, axis=1)
    # third apply the sequence label stage
    shape = sequence.get_shape().as_list()
    B, W, C = shape
    with tf.variable_scope('Softmax_Layers'):
        # forward lstm cell
        # Doing the affine projection
        w = tf.Variable(tf.truncated_normal([C, num_classes], stddev=0.01),
                        name="w")
        b = tf.Variable(tf.truncated_normal([num_classes], stddev=0.01),
                        name="b")
        logits = tf.matmul(sequence, w) + b

        logits = tf.reshape(logits, [B, W, num_classes])
        # Swap batch and batch axis
        net_out = tf.transpose(logits, (1, 0, 2), name='transpose_time_major')
    return net_out
Exemple #54
0
    def decoder(self, latent_var, is_training):
        activation_fn = leaky_relu  # tf.nn.relu
        weight_decay = 0.0
        with tf.variable_scope('decoder'):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                with slim.arg_scope(
                    [slim.conv2d, slim.fully_connected],
                        weights_initializer=tf.truncated_normal_initializer(
                            stddev=0.1),
                        weights_regularizer=slim.l2_regularizer(weight_decay),
                        normalizer_fn=slim.batch_norm,
                        normalizer_params=self.batch_norm_params):
                    net = slim.fully_connected(latent_var,
                                               4096,
                                               activation_fn=None,
                                               normalizer_fn=None,
                                               scope='Fc_1')
                    net = tf.reshape(net, [-1, 4, 4, 256], name='Reshape')

                    net = tf.image.resize_nearest_neighbor(net,
                                                           size=(8, 8),
                                                           name='Upsample_1')
                    net = slim.conv2d(net,
                                      128, [3, 3],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_1a')
                    net = slim.repeat(net,
                                      3,
                                      conv2d_block,
                                      0.1,
                                      128, [3, 3],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_1b')

                    net = tf.image.resize_nearest_neighbor(net,
                                                           size=(16, 16),
                                                           name='Upsample_2')
                    net = slim.conv2d(net,
                                      64, [3, 3],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_2a')
                    net = slim.repeat(net,
                                      3,
                                      conv2d_block,
                                      0.1,
                                      64, [3, 3],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_2b')

                    net = tf.image.resize_nearest_neighbor(net,
                                                           size=(32, 32),
                                                           name='Upsample_3')
                    net = slim.conv2d(net,
                                      32, [3, 3],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_3a')
                    net = slim.repeat(net,
                                      3,
                                      conv2d_block,
                                      0.1,
                                      32, [3, 3],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_3b')

                    net = tf.image.resize_nearest_neighbor(net,
                                                           size=(64, 64),
                                                           name='Upsample_4')
                    net = slim.conv2d(net,
                                      3, [3, 3],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_4a')
                    net = slim.repeat(net,
                                      3,
                                      conv2d_block,
                                      0.1,
                                      3, [3, 3],
                                      1,
                                      activation_fn=activation_fn,
                                      scope='Conv2d_4b')
                    net = slim.conv2d(net,
                                      3, [3, 3],
                                      1,
                                      activation_fn=None,
                                      scope='Conv2d_4c')

        return net
Exemple #55
0
def get_map_from_images(imgs, mapper_arch, task_params, freeze_conv, wt_decay,
                        is_training, batch_norm_is_training_op, num_maps,
                        split_maps=True):
  # Hit image with a resnet.
  n_views = len(task_params.aux_delta_thetas) + 1
  out = utils.Foo()

  images_reshaped = tf.reshape(imgs, 
      shape=[-1, task_params.img_height,
             task_params.img_width,
             task_params.img_channels], name='re_image')

  x, out.vars_to_restore = get_repr_from_image(
      images_reshaped, task_params.modalities, task_params.data_augment,
      mapper_arch.encoder, freeze_conv, wt_decay, is_training)

  # Reshape into nice things so that these can be accumulated over time steps
  # for faster backprop.
  sh_before = x.get_shape().as_list()
  out.encoder_output = tf.reshape(x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:])
  x = tf.reshape(out.encoder_output, shape=[-1] + sh_before[1:])

  # Add a layer to reduce dimensions for a fc layer.
  if mapper_arch.dim_reduce_neurons > 0:
    ks = 1; neurons = mapper_arch.dim_reduce_neurons;
    init_var = np.sqrt(2.0/(ks**2)/neurons)
    batch_norm_param = mapper_arch.batch_norm_param
    batch_norm_param['is_training'] = batch_norm_is_training_op
    out.conv_feat = slim.conv2d(x, neurons, kernel_size=ks, stride=1,
                    normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_param,
                    padding='SAME', scope='dim_reduce',
                    weights_regularizer=slim.l2_regularizer(wt_decay),
                    weights_initializer=tf.random_normal_initializer(stddev=init_var))
    reshape_conv_feat = slim.flatten(out.conv_feat)
    sh = reshape_conv_feat.get_shape().as_list()
    out.reshape_conv_feat = tf.reshape(reshape_conv_feat, shape=[-1, sh[1]*n_views])

  with tf.variable_scope('fc'):
    # Fully connected layers to compute the representation in top-view space.
    fc_batch_norm_param = {'center': True, 'scale': True, 
                           'activation_fn':tf.nn.relu,
                           'is_training': batch_norm_is_training_op}
    f = out.reshape_conv_feat
    out_neurons = (mapper_arch.fc_out_size**2)*mapper_arch.fc_out_neurons
    neurons = mapper_arch.fc_neurons + [out_neurons]
    f, _ = tf_utils.fc_network(f, neurons=neurons, wt_decay=wt_decay,
                               name='fc', offset=0,
                               batch_norm_param=fc_batch_norm_param,
                               is_training=is_training,
                               dropout_ratio=mapper_arch.fc_dropout)
    f = tf.reshape(f, shape=[-1, mapper_arch.fc_out_size,
                             mapper_arch.fc_out_size,
                             mapper_arch.fc_out_neurons], name='re_fc')

  # Use pool5 to predict the free space map via deconv layers.
  with tf.variable_scope('deconv'):
    x, outs = deconv(f, batch_norm_is_training_op, wt_decay=wt_decay,
                     neurons=mapper_arch.deconv_neurons,
                     strides=mapper_arch.deconv_strides,
                     layers_per_block=mapper_arch.deconv_layers_per_block,
                     kernel_size=mapper_arch.deconv_kernel_size,
                     conv_fn=slim.conv2d_transpose, offset=0, name='deconv')

  # Reshape x the right way.
  sh = x.get_shape().as_list()
  x = tf.reshape(x, shape=[task_params.batch_size, -1] + sh[1:])
  out.deconv_output = x

  # Separate out the map and the confidence predictions, pass the confidence
  # through a sigmoid.
  if split_maps:
    with tf.name_scope('split'):
      out_all = tf.split(value=x, axis=4, num_or_size_splits=2*num_maps)
      out.fss_logits = out_all[:num_maps]
      out.confs_logits = out_all[num_maps:]
    with tf.name_scope('sigmoid'):
      out.confs_probs = [tf.nn.sigmoid(x) for x in out.confs_logits]
  return out
Exemple #56
0
# -*- coding: utf-8 -* -
'''
tensorflow slim基本使用方法
'''
import tensorflow as tf
import tensorflow.contrib.slim as slim

# 创建一个权重变量,名称为"weights",用一个截断的正态分布初始化它,用 l2_loss 进行正则,并将它放在 CPU 上
weights_var = slim.variable(
    'weights',
    shape=[10, 10, 3, 3],
    initializer=tf.truncated_normal_initializer(stddev=0.1),
    regularizer=slim.l2_regularizer(0.05),
    device='/CPU:0')

# 通过model_variable来定义一个代表模型参数的变量,non-model变量指训练、评估过程中需要但推理过程不需要的变量(例如global step)
weights_model_var = slim.model_variable(
    'weights',
    shape=[10, 10, 3, 3],
    initializer=tf.truncated_normal_initializer(stddev=0.1),
    regularizer=slim.l2_regularizer(0.05),
    device='/CPU:0')
model_variables = slim.get_model_variables()

# 定义并获取一个常规的变量
my_var = slim.variable("my_var",
                       shape=[20, 1],
                       initializer=tf.zeros_initializer())
regular_variables_and_model_variables = slim.get_variables()

# slim.model_variable将变量添加到了tf.GrapghKeys.MODEL_VARIABLES容器中,也可以手动将自定义的layer或variables添加到对应的容器中
Exemple #57
0
    def cnn_network(self,
                    incoming,
                    num_classes=1501,
                    reuse=None,
                    l2_normalize=True,
                    create_summaries=False,
                    weight_decay=1e-8):
        nonlinearity = tf.nn.elu
        conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
        conv_bias_init = tf.zeros_initializer()
        conv_regularizer = slim.l2_regularizer(self.weight_decay)
        fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
        fc_bias_init = tf.zeros_initializer()
        fc_regularizer = slim.l2_regularizer(self.weight_decay)

        def batch_norm_fn(x):
            return slim.batch_norm(x,
                                   scope=tf.get_variable_scope().name + "/bn")

        network = incoming
        network = slim.conv2d(network,
                              32, [3, 3],
                              stride=1,
                              activation_fn=nonlinearity,
                              padding="SAME",
                              normalizer_fn=batch_norm_fn,
                              scope="conv1_1",
                              weights_initializer=conv_weight_init,
                              biases_initializer=conv_bias_init,
                              weights_regularizer=conv_regularizer)
        if create_summaries:
            tf.summary.histogram(network.name + "/activations", network)
            tf.summary.image("conv1_1/weights",
                             tf.transpose(
                                 slim.get_variables("conv1_1/weights:0")[0],
                                 [3, 0, 1, 2]),
                             max_outputs=128)
        network = slim.conv2d(network,
                              32, [3, 3],
                              stride=1,
                              activation_fn=nonlinearity,
                              padding="SAME",
                              normalizer_fn=batch_norm_fn,
                              scope="conv1_2",
                              weights_initializer=conv_weight_init,
                              biases_initializer=conv_bias_init,
                              weights_regularizer=conv_regularizer)
        if create_summaries:
            tf.summary.histogram(network.name + "/activations", network)

        network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1")

        network = self.cnn_residual_block(
            network,
            "conv2_1",
            nonlinearity,
            conv_weight_init,
            conv_bias_init,
            conv_regularizer,
            increase_dim=False,
            is_first=True,
            summarize_activations=create_summaries)
        network = self.cnn_residual_block(
            network,
            "conv2_3",
            nonlinearity,
            conv_weight_init,
            conv_bias_init,
            conv_regularizer,
            increase_dim=False,
            summarize_activations=create_summaries)

        network = self.cnn_residual_block(
            network,
            "conv3_1",
            nonlinearity,
            conv_weight_init,
            conv_bias_init,
            conv_regularizer,
            increase_dim=True,
            summarize_activations=create_summaries)
        network = self.cnn_residual_block(
            network,
            "conv3_3",
            nonlinearity,
            conv_weight_init,
            conv_bias_init,
            conv_regularizer,
            increase_dim=False,
            summarize_activations=create_summaries)

        network = self.cnn_residual_block(
            network,
            "conv4_1",
            nonlinearity,
            conv_weight_init,
            conv_bias_init,
            conv_regularizer,
            increase_dim=True,
            summarize_activations=create_summaries)
        network = self.cnn_residual_block(
            network,
            "conv4_3",
            nonlinearity,
            conv_weight_init,
            conv_bias_init,
            conv_regularizer,
            increase_dim=False,
            summarize_activations=create_summaries)

        feature_dim = network.get_shape().as_list()[-1]
        # print("feature dimensionality: ", feature_dim)
        network = slim.flatten(network)

        network = slim.dropout(network, keep_prob=0.6)
        network = slim.fully_connected(network,
                                       feature_dim,
                                       activation_fn=nonlinearity,
                                       normalizer_fn=batch_norm_fn,
                                       weights_regularizer=fc_regularizer,
                                       scope="fc1",
                                       weights_initializer=fc_weight_init,
                                       biases_initializer=fc_bias_init)

        features = network

        if l2_normalize:
            # Features in rows, normalize axis 1.
            features = slim.batch_norm(features, scope="ball", reuse=reuse)
            feature_norm = tf.sqrt(
                tf.constant(1e-8, tf.float32) +
                tf.reduce_sum(tf.square(features), [1], keep_dims=True))
            features = features / feature_norm

            with slim.variable_scope.variable_scope("ball", reuse=reuse):
                weights = slim.model_variable(
                    "mean_vectors", (feature_dim, num_classes),
                    initializer=tf.truncated_normal_initializer(stddev=1e-3),
                    regularizer=None)
                scale = slim.model_variable("scale", (num_classes, ),
                                            tf.float32,
                                            tf.constant_initializer(
                                                0., tf.float32),
                                            regularizer=None)
                if create_summaries:
                    tf.summary.histogram("scale", scale)
                # scale = slim.model_variable(
                #     "scale", (), tf.float32,
                #     initializer=tf.constant_initializer(0., tf.float32),
                #     regularizer=slim.l2_regularizer(1e-2))
                # if create_summaries:
                #     tf.scalar_summary("scale", scale)
                scale = tf.nn.softplus(scale)

            # Each mean vector in columns, normalize axis 0.
            weight_norm = tf.sqrt(
                tf.constant(1e-8, tf.float32) +
                tf.reduce_sum(tf.square(weights), [0], keep_dims=True))
            logits = scale * tf.matmul(features, weights / weight_norm)

        else:
            logits = slim.fully_connected(features,
                                          num_classes,
                                          activation_fn=None,
                                          normalizer_fn=None,
                                          weights_regularizer=fc_regularizer,
                                          scope="softmax",
                                          weights_initializer=fc_weight_init,
                                          biases_initializer=fc_bias_init)

        return features
Exemple #58
0
    def fusion(self, vid_emb_state, cap_emb_state, iii, reuse=False):
        states = []
        for i in range(int(self.config.video_steps)):
            vid_sample = tf.tile(tf.expand_dims(vid_emb_state[:, i, :], 1),
                                 [1, self.config.caption_length, 1])
            sum_repr = tf.multiply(vid_sample, cap_emb_state)
            states.append(sum_repr)
        # V x B x C x 256
        cnn_repr = tf.stack(states)
        # B x V x C x 256
        cnn_repr = tf.transpose(cnn_repr, [1, 0, 2, 3])

        with slim.arg_scope(
            [slim.fully_connected],
                weights_regularizer=slim.l2_regularizer(0.0005),
                #activation_fn=tf.nn.tanh,
                normalizer_fn=self.bn_fn,
                normalizer_params=self.bn_params):
            h1 = slim.fully_connected(cnn_repr,
                                      512,
                                      scope='rel_h1',
                                      activation_fn=tf.nn.tanh,
                                      reuse=reuse)
            input_gate1 = slim.fully_connected(h1,
                                               1,
                                               scope='rel_halp',
                                               activation_fn=tf.nn.sigmoid,
                                               reuse=reuse)

            h2 = slim.fully_connected(cnn_repr,
                                      512,
                                      scope='rel_h2',
                                      activation_fn=tf.nn.tanh,
                                      reuse=reuse)
            h3 = slim.fully_connected(h2,
                                      512,
                                      scope='rel_h3',
                                      activation_fn=tf.nn.tanh,
                                      reuse=reuse)
        output1 = tf.multiply(h3, input_gate1)
        output1 = tf.multiply(
            tf.multiply(
                output1,
                tf.expand_dims(
                    tf.expand_dims(tf.expand_dims(self.video_mask[iii], 0),
                                   2), 3)),
            tf.expand_dims(tf.expand_dims(self.caption_mask, 1), 3))

        # Conv
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.tanh,
                weights_initializer=self.initializer,
                weights_regularizer=slim.l2_regularizer(0.0005),
                #normalizer_fn=self.bn_fn,
                #normalizer_params=self.bn_params,
                reuse=reuse):

            # Conv1
            conv1 = slim.conv2d(output1,
                                256, [3, 3],
                                padding="Valid",
                                scope='conv1',
                                activation_fn=tf.nn.leaky_relu)
            # added
            ratio = 4
            average_conv1 = slim.avg_pool2d(conv1,
                                            [conv1.shape[1], conv1.shape[2]])
            fc1_conv1 = slim.fully_connected(average_conv1,
                                             int(256 / ratio),
                                             activation_fn=tf.nn.leaky_relu,
                                             scope='conv1_fc1',
                                             reuse=tf.AUTO_REUSE)
            fc2_conv1 = slim.fully_connected(fc1_conv1,
                                             256,
                                             activation_fn=tf.nn.sigmoid,
                                             scope='conv1_fc2',
                                             reuse=tf.AUTO_REUSE)
            fc2_conv1 = tf.reshape(fc2_conv1, [-1, 1, 1, 256])
            conv1 = conv1 * fc2_conv1

            convalp1 = slim.conv2d(output1,
                                   1, [3, 3],
                                   padding="Valid",
                                   scope='conv1alp',
                                   activation_fn=tf.nn.sigmoid)
            input_gate2 = convalp1
            output2 = tf.multiply(conv1, input_gate2)
            output2 = tf.multiply(
                tf.multiply(
                    output2,
                    tf.expand_dims(
                        tf.expand_dims(
                            tf.expand_dims(self.video_mask_list[0][iii], 0),
                            2), 3)),
                tf.expand_dims(tf.expand_dims(self.caption_mask_list[0], 1),
                               3))

            #Conv2
            conv2 = slim.conv2d(output2,
                                256, [3, 3],
                                padding="Valid",
                                scope='conv2',
                                activation_fn=tf.nn.leaky_relu)
            # added
            ratio = 4
            average_conv2 = slim.avg_pool2d(conv2,
                                            [conv2.shape[1], conv2.shape[2]])
            fc1_conv2 = slim.fully_connected(average_conv2,
                                             int(256 / ratio),
                                             activation_fn=tf.nn.leaky_relu,
                                             scope='conv2_fc1',
                                             reuse=tf.AUTO_REUSE)
            fc2_conv2 = slim.fully_connected(fc1_conv2,
                                             256,
                                             activation_fn=tf.nn.sigmoid,
                                             scope='conv2_fc2',
                                             reuse=tf.AUTO_REUSE)
            fc2_conv2 = tf.reshape(fc2_conv2, [-1, 1, 1, 256])
            conv2 = conv2 * fc2_conv2

            convalp2 = slim.conv2d(output2,
                                   1, [3, 3],
                                   padding="Valid",
                                   scope='conv2alp',
                                   activation_fn=tf.nn.sigmoid)
            input_gate3 = convalp2
            output3 = tf.multiply(conv2, input_gate3)
            output3 = tf.multiply(
                tf.multiply(
                    output3,
                    tf.expand_dims(
                        tf.expand_dims(
                            tf.expand_dims(self.video_mask_list[1][iii], 0),
                            2), 3)),
                tf.expand_dims(tf.expand_dims(self.caption_mask_list[1], 1),
                               3))

            #Conv3
            conv3 = slim.conv2d(output3,
                                256, [3, 3], [2, 2],
                                padding="Valid",
                                scope='conv3',
                                activation_fn=tf.nn.leaky_relu)
            # added
            ratio = 4
            average_conv3 = slim.avg_pool2d(conv3,
                                            [conv3.shape[1], conv3.shape[2]])
            fc1_conv3 = slim.fully_connected(average_conv3,
                                             int(256 / ratio),
                                             activation_fn=tf.nn.leaky_relu,
                                             scope='conv3_fc1',
                                             reuse=tf.AUTO_REUSE)
            fc2_conv3 = slim.fully_connected(fc1_conv3,
                                             256,
                                             activation_fn=tf.nn.sigmoid,
                                             scope='conv3_fc2',
                                             reuse=tf.AUTO_REUSE)
            fc2_conv3 = tf.reshape(fc2_conv3, [-1, 1, 1, 256])
            conv3 = conv3 * fc2_conv3

            convalp3 = slim.conv2d(output3,
                                   1, [3, 3], [2, 2],
                                   padding="Valid",
                                   scope='conv3alp',
                                   activation_fn=tf.nn.sigmoid)
            input_gate4 = convalp3
            output4 = tf.multiply(conv3, input_gate4)
            output4 = tf.multiply(
                tf.multiply(
                    output4,
                    tf.expand_dims(
                        tf.expand_dims(
                            tf.expand_dims(self.video_mask_list[2][iii], 0),
                            2), 3)),
                tf.expand_dims(tf.expand_dims(self.caption_mask_list[2], 1),
                               3))
        valid = tf.multiply(
            tf.reduce_sum(self.video_mask_list[2][iii], axis=0),
            tf.reduce_sum(self.caption_mask_list[2], axis=1))
        sum_state = tf.div(tf.reduce_sum(output4, [1, 2]),
                           tf.expand_dims(valid, axis=1))

        return sum_state
Exemple #59
0
def pfld_inference_for_shuffleNetV2(input, weight_decay, shuffle_group=2):
    # [(out_channel, repeat_times), (out_channel, repeat_times), ...]
    # # model_scale = 0.5
    # channel_sizes = [(48, 4), (96, 8), (192, 4), (1024, 1)]

    # model_scale = 1.0
    channel_sizes = [(116, 4), (232, 8), (464, 4), (1024, 1)]

    # # model_scale = 1.5
    # channel_sizes = [(176, 4), (352, 8), (704, 4), (1024, 1)]
    #
    # # model_scale = 2.0
    # channel_sizes = [(244, 4), (488, 8), (976, 4), (2048, 1)]

    with tf.variable_scope('pfld_inference'):
        features = {}
        with slim.arg_scope([slim.convolution2d, slim.separable_conv2d],
                            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
                            biases_initializer=tf.zeros_initializer(),
                            weights_regularizer=slim.l2_regularizer(weight_decay),
                            padding='SAME'):
            print('PFLD input shape({}): {}'.format(input.name, input.get_shape()))
            # 112*112*3=>56*56*24
            with tf.variable_scope('conv_1'):
                conv1 = conv_bn_relu(input, 24, [3, 3], stride=2)
            print(conv1.name, conv1.get_shape())

            # 实现stage2:56*56*24=>28*28*C
            with tf.variable_scope('shuffle_block_1'):
                out_channel, repeate_times = channel_sizes[0]
                shuffle_block_1 = shufflenet_v2_block(conv1, out_channel, stride=2, shuffle_group=shuffle_group)
                print(shuffle_block_1.name, shuffle_block_1.get_shape())
                for i in range(repeate_times - 1):
                    shuffle_block_1 = shufflenet_v2_block(shuffle_block_1, out_channel, stride=1,
                                                          shuffle_group=shuffle_group)
                    print(shuffle_block_1.name, shuffle_block_1.get_shape())

            features['auxiliary_input'] = shuffle_block_1

            # 实现stage3:28*28*C=>14*14*C
            with tf.variable_scope('shuffle_block_2'):
                out_channel, repeate_times = channel_sizes[1]
                shuffle_block_2 = shufflenet_v2_block(shuffle_block_1, out_channel, stride=2, shuffle_group=shuffle_group)
                print(shuffle_block_2.name, shuffle_block_2.get_shape())
                for i in range(repeate_times - 1):
                    shuffle_block_2 = shufflenet_v2_block(shuffle_block_2, out_channel, stride=1,
                                                          shuffle_group=shuffle_group)
                    print(shuffle_block_2.name, shuffle_block_2.get_shape())

            # 实现stage4:14*14*C=>7*7*C
            with tf.variable_scope('shuffle_block_3'):
                out_channel, repeate_times = channel_sizes[2]
                shuffle_block_3 = shufflenet_v2_block(shuffle_block_2, out_channel, stride=2,
                                                      shuffle_group=shuffle_group)
                print(shuffle_block_3.name, shuffle_block_3.get_shape())
                for i in range(repeate_times - 1):
                    shuffle_block_3 = shufflenet_v2_block(shuffle_block_3, out_channel, stride=1,
                                                          shuffle_group=shuffle_group)
                    print(shuffle_block_3.name, shuffle_block_3.get_shape())

            # 7*7*C=>1*1*C
            with tf.variable_scope('end_conv'):
                with slim.arg_scope([slim.convolution2d], padding='valid'):
                    out_channel = channel_sizes[-1][0]
                    end_conv = conv_bn_relu(shuffle_block_3, out_channel, [3, 3], stride=1)
                    print(end_conv.name, end_conv.get_shape())
                    end_conv = conv_bn_relu(end_conv, out_channel, [3, 3], stride=1)
                    print(end_conv.name, end_conv.get_shape())
                    end_conv = conv_bn_relu(end_conv, out_channel, [3, 3], stride=1,)
                    print(end_conv.name, end_conv.get_shape())

            group_pool1 = slim.avg_pool2d(shuffle_block_2, [shuffle_block_2.get_shape()[1],
                                                            shuffle_block_2.get_shape()[2]], stride=1)
            print(group_pool1.name, group_pool1.get_shape())
            group_pool2 = slim.avg_pool2d(shuffle_block_3, [shuffle_block_3.get_shape()[1],
                                                            shuffle_block_3.get_shape()[2]], stride=1)
            print(group_pool2.name, group_pool2.get_shape())
            group_pool3 = slim.avg_pool2d(end_conv, [end_conv.get_shape()[1], end_conv.get_shape()[2]], stride=1)
            print(group_pool3.name, group_pool3.get_shape())

            s1 = slim.flatten(group_pool1)
            s2 = slim.flatten(group_pool2)
            s3 = slim.flatten(group_pool3)

            multi_scale = tf.concat([s1, s2, s3], 1)
            landmarks = slim.fully_connected(multi_scale, num_outputs=196, activation_fn=None, scope='fc')
            print(landmarks.name, landmarks.get_shape())

        return features, landmarks
Exemple #60
0
def P_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    #define common param
    print('P_Net')
    with slim.arg_scope([slim.conv2d],
                        activation_fn=prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        padding='valid'):
        print(inputs.get_shape())

        net = slim.conv2d(inputs, 10, 3, stride=1, scope='conv1')
        _activation_summary(net)
        print(net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope='pool1',
                              padding='SAME')
        _activation_summary(net)
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=16,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv2')
        _activation_summary(net)
        print(net.get_shape())
        #
        net = slim.conv2d(net,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3')
        _activation_summary(net)
        print(net.get_shape())
        #batch*H*W*2
        conv4_1 = slim.conv2d(net,
                              num_outputs=2,
                              kernel_size=[1, 1],
                              stride=1,
                              scope='conv4_1',
                              activation_fn=tf.nn.softmax)
        _activation_summary(conv4_1)
        #conv4_1 = slim.conv2d(net,num_outputs=1,kernel_size=[1,1],stride=1,scope='conv4_1',activation_fn=tf.nn.sigmoid)

        print(conv4_1.get_shape())
        #batch*H*W*4
        bbox_pred = slim.conv2d(net,
                                num_outputs=4,
                                kernel_size=[1, 1],
                                stride=1,
                                scope='conv4_2',
                                activation_fn=None)
        _activation_summary(bbox_pred)
        print(bbox_pred.get_shape())
        #batch*H*W*10
        landmark_pred = slim.conv2d(net,
                                    num_outputs=(no_landmarks * 2),
                                    kernel_size=[1, 1],
                                    stride=1,
                                    scope='conv4_3',
                                    activation_fn=None)
        _activation_summary(landmark_pred)
        print(landmark_pred.get_shape())
        # add projectors for visualization
        #cls_prob_original = conv4_1
        #bbox_pred_original = bbox_pred
        if training:
            #batch*2
            # calculate classification loss
            cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob')
            cls_loss = cls_ohem(cls_prob, label)
            #batch
            # cal bounding box error, squared sum error
            bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred')
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            #batch*10
            landmark_pred = tf.squeeze(landmark_pred, [1, 2],
                                       name="landmark_pred")
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)

            accuracy = cal_accuracy(cls_prob, label)
            L2_loss = tf.add_n(slim.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy, landmark_pred
        #test
        else:
            #when test,batch_size = 1
            cls_pro_test = tf.squeeze(conv4_1, axis=0)
            print('class prob shape', cls_pro_test.get_shape())
            bbox_pred_test = tf.squeeze(bbox_pred, axis=0)
            print('bbox shape', bbox_pred_test.get_shape())
            landmark_pred_test = tf.squeeze(landmark_pred, axis=0)
            print('landmark pred shape', landmark_pred_test.get_shape())
            return cls_pro_test, bbox_pred_test, landmark_pred_test