Esempio n. 1
0
 def _build_model(self, inputs, is_training=True):
     with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=is_training)):
         logits, endpoints = mobilenet_v2.mobilenet(inputs, num_classes=self.config.num_outputs)
     ema = tf.train.ExponentialMovingAverage(0.999)
     self.mobile_net_vars = [var for var in tf.trainable_variables() if var.name.startswith("Mobilenet") and
                             "Logits" not in var.name]
     return logits, endpoints
def net(image, classes):

    
    #encoding - convolution/pooling
    with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=True)):
        logits, endpoints = mobilenet_v2.mobilenet(image, num_classes=None)

    logits = endpoints["layer_10/output"]
    print(logits.get_shape())
    #new_size = (16,32)
    #resize = tf.image.resize(logits, new_size, align_corners=True)
    #conv = util.conv(resize, [3,3,512,320], "up_1", pad="SAME")
    #new_size = (64,128)
    #resize = tf.image.resize(logits, new_size, align_corners=True)
    #conv = util.conv(resize, [3,3,256,512], "up_2", pad="SAME")
          
    new_size = (192,256)
    resize = tf.image.resize(logits, new_size, align_corners=True)
    conv = util.conv(resize, [3,3,128,256], "up_3", pad="SAME")
    
    conv6 = util.conv(conv, [1,1,128,classes], "c6", pad="SAME")

    softmax = tf.nn.softmax(conv6)

    return conv6, tf.argmax(softmax, axis=3), softmax
def load_mobilenet_v2(model_dir, sess):
    model_url = "https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz"

    filename = model_url.split("/")[-1]
    filepath = os.path.join(model_dir, filename.split(".tgz")[0])

    try:
        utils.download_pretrained_model_weights(model_url,
                                                filepath,
                                                unzip=True)
    except:
        print("Pre-training weights download failed!")

    model_file_name = "mobilenet_v2_1.4_224.ckpt"
    model_path = os.path.join(filepath, model_file_name)

    resized_input_tensor = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 3])
    with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
        bottleneck_tensor, _ = mobilenet_v2.mobilenet(resized_input_tensor,
                                                      num_classes=None,
                                                      depth_multiplier=1.4)

    variable_restore_op = tf.contrib.slim.assign_from_checkpoint_fn(
        model_path,
        tf.contrib.slim.get_trainable_variables(),
        ignore_missing_vars=True)
    variable_restore_op(sess)

    # bottleneck_tensor = tf.squeeze(bottleneck_tensor, axis=[1, 2])
    bottleneck_tensor_size = 1792

    return bottleneck_tensor, resized_input_tensor, bottleneck_tensor_size
Esempio n. 4
0
    def __init__(self, checkpoint='../mobilenet_v2_1.0_224.ckpt'):
        # save the checkpoint
        self.checkpoint = checkpoint

        tf.reset_default_graph()

        # placeholder for the image input, need to decode the file
        self.file_in = tf.placeholder(tf.string, ())
        image = tf.image.decode_jpeg(tf.read_file(self.file_in))

        # expand for batch then cast to between -1 and 1
        inputs = tf.expand_dims(image, 0)
        inputs = (tf.cast(inputs, tf.float32) / 128) - 1
        # ensure that it only has three dimensions and resize to 224x224
        inputs.set_shape((None, None, None, 3))
        inputs = tf.image.resize_images(inputs, (224, 224))

        # get the endpoints of the network
        with tf.contrib.slim.arg_scope(
                mobilenet_v2.training_scope(is_training=False)):
            _, self.endpoints = mobilenet_v2.mobilenet(inputs)

        # Restore using exponential moving average since it produces (1.5-2%) higher
        # accuracy
        ema = tf.train.ExponentialMovingAverage(0.999)
        vars = ema.variables_to_restore()

        saver = tf.train.Saver(vars)

        # create the label map from imagenet, same thing
        self.label_map = imagenet.create_readable_names_for_imagenet_labels()

        # create session and restore the checkpoint downloaded
        self.sess = tf.Session()
        saver.restore(self.sess, self.checkpoint)
Esempio n. 5
0
    def create_inference_graph(self, input_image, base_graph):
      util.download(self.params.CHECKPOINT_TARBALL_URI, self.params.MODEL_BASEDIR)
      
      self.graph = base_graph
      with self.graph.as_default():        
        input_image = tf.cast(input_image, tf.float32) / 128. - 1
        input_image.set_shape(self.params.INPUT_TENSOR_SHAPE)

        from nets.mobilenet import mobilenet_v2
        with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)):
          # See also e.g. mobilenet_v2_035
          self.logits, self.endpoints = mobilenet_v2.mobilenet(
                                input_image,
                                is_training=False,
                                depth_multiplier=self.params.DEPTH_MULTIPLIER,
                                finegrain_classification_mode=self.params.FINE)

        # Per authors: Restore using exponential moving average since it produces
        # (1.5-2%) higher accuracy
        ema = tf.train.ExponentialMovingAverage(0.999)
        vs = ema.variables_to_restore()
        
      saver = tf.train.Saver(vs)
      checkpoint = os.path.join(
        self.params.MODEL_BASEDIR,
        self.params.CHECKPOINT + '.ckpt')
      nodes = list(self.output_names) + [input_image]
      self.graph = util.give_me_frozen_graph(
                              checkpoint,
                              nodes=self.output_names,
                              base_graph=self.graph,
                              saver=saver)
      return self.graph
  def __call__(self, inputs, castFromUint8=True):
    pr_shape = lambda var : print(var.shape)
    if castFromUint8:
      inputs = tf.cast(inputs, self.dtype)
    # print(inputs.dtype)

    with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(
        is_training=self.is_training)):
      # print(inputs.dtype)
      global_pool, endpoints = mobilenet_v2.mobilenet(inputs, num_classes=None)
    self.variables_to_restore = slim.get_variables() # len 260
    # 后加两层fc
    dropout_keep_prob = 0.5
    weight_decay = 0.05
    with tf.variable_scope('additional', 'fc'):
      # flatten = tf.flatten(endpoints['global_pool'])
      flatten = slim.flatten(global_pool)
      with slim.arg_scope([slim.fully_connected],
          weights_regularizer=slim.l2_regularizer(weight_decay),
          weights_initializer = tc.layers.xavier_initializer(tf.float32),
          # weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
          activation_fn=None) as sc:
        net = slim.fully_connected(flatten, 128, activation_fn=None, scope='fc1')
        net = slim.dropout(net, dropout_keep_prob, is_training=self.is_training, scope='dropout')
        logits = slim.fully_connected(net, self.n_classes, activation_fn=None, scope='fc2')
    # 多出来的4个参数保存 共264
    self.variables_to_save = slim.get_variables()

    for var in self.variables_to_save:
      if var in self.variables_to_restore:
        continue
      self.variables_to_train.append(var)
    # pr_shape(out)
    return logits
Esempio n. 7
0
def training_scope(weight_decay, is_training, stddev, dropout_keep_prob,
                   bn_decay):
    return mobilenet_v2_builder.training_scope(
        weight_decay=weight_decay,
        is_training=is_training,
        stddev=stddev,
        dropout_keep_prob=dropout_keep_prob,
        bn_decay=bn_decay)
Esempio n. 8
0
def Encoder_mobilenet(x, is_training=True, weight_decay=0.001, reuse=False):
    # from nets.mobilenet import mobilenet_v2
    from nets.mobilenet import mobilenet_v2
    with slim.arg_scope(mobilenet_v2.training_scope()):
        net, endpoints = mobilenet_v2.mobilenet(x)

    variables = tf.contrib.framework.get_variables('mobilenet_v2')

    return net, variables
Esempio n. 9
0
def train_kfold(record_file, train_log_step, train_param, val_log_step,
                num_classes, data_shape, snapshot, snapshot_prefix):
    [base_lr, max_steps] = train_param
    [batch_size, resize_height, resize_width, depths] = data_shape
    # ============================================================================================================
    # Define the model: [core]
    with slim.arg_scope(
            mobilenet_v2.training_scope(dropout_keep_prob=R.dropout)):
        out, end_points = mobilenet_v2.mobilenet(
            input_tensor=input_images,
            num_classes=num_classes,
            depth_multiplier=R.depth_multiplier,
            is_training=is_training)

    # Specify the loss function: tf.losses定义的loss函数都会自动添加到loss函数, 无需 # slim.losses.add_loss(my_loss)
    tf.losses.softmax_cross_entropy(onehot_labels=input_labels,
                                    logits=out)  # 添加交叉熵损失loss=1.6
    loss = tf.losses.get_total_loss(
        add_regularization_losses=True)  # 添加正则化损失loss=2.2
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(input_labels, 1)),
                tf.float32))

    # Specify the optimization scheme:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=base_lr)
    '''
    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(0.05, global_step, 150, 0.9) 
    optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
    train_tensor = optimizer.minimize(loss, global_step)
    train_op = slim.learning.create_train_op(loss, optimizer, global_step=global_step)
    '''
    # 在定义训练的时�? 注意到我们使用了`batch_norm`层时,需要更新每一层的`average`和`variance`参数,
    # 更新的过程不包含在正常的训练过程�? 需要我们去手动像下面这样更�?    # 通过`tf.get_collection`获得所有需要更新的`op`
    # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    # 使用`tensorflow`的控制流, 先执行更新算�? 再执行训�?
    # with tf.control_dependencies(update_ops):
    # create_train_op that ensures that when we evaluate it to get the loss,
    # the update_ops are done and the gradient updates are computed.
    train_op = slim.learning.create_train_op(total_loss=loss,
                                             optimizer=optimizer)
    # ================================================================================================================
    # 从record中读取图片和labels数据
    all_nums = get_example_nums(record_file)
    all_images, all_labels = read_records(record_file,
                                          resize_height,
                                          resize_width,
                                          type='normalization',
                                          is_train=None)
    all_images_batch, all_labels_batch = get_batch_images(
        all_images,
        all_labels,
        batch_size=batch_size,
        labels_nums=num_classes,
        one_hot=True,
        shuffle=True)
Esempio n. 10
0
    def _image_to_head(self, is_training, reuse=None):
        with slim.arg_scope(mobilenet_v2.training_scope(is_training=is_training)):
            net, endpoints = mobilenet_v2.mobilenet_base(self._image, conv_defs=CTPN_DEF)

        self.variables_to_restore = slim.get_variables_to_restore()

        self._act_summaries.append(net)
        self._layers['head'] = net

        return net
Esempio n. 11
0
def inspect_module():
    features = tf.zeros([8, 224, 224, 3], name='input')
    with tf.variable_scope('TestSSD',
                           default_name=None,
                           values=[features],
                           reuse=tf.AUTO_REUSE):
        with tf.contrib.slim.arg_scope(
                mobilenet_v2.training_scope(is_training=False)):
            logits, endpoints = mobilenet_v2.mobilenet(features)
            for key in endpoints:
                print(key, endpoints[key])
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['layer_15/expansion_output', 'layer_19', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_depthwise': self._use_depthwise,
            'use_explicit_padding': self._use_explicit_padding,
        }

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    # TODO(b/68150321): Enable fused batch norm once quantization
                    # supports it.
                    with slim.arg_scope([slim.batch_norm], fused=False):
                        _, image_features = mobilenet_v2.mobilenet_base(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            final_endpoint='layer_19',
                            depth_multiplier=self._depth_multiplier,
                            use_explicit_padding=self._use_explicit_padding,
                            scope=scope)
                with slim.arg_scope(self._conv_hyperparams_fn()):
                    # TODO(b/68150321): Enable fused batch norm once quantization
                    # supports it.
                    with slim.arg_scope([slim.batch_norm], fused=False):
                        feature_maps = feature_map_generators.multi_resolution_feature_maps(
                            feature_map_layout=feature_map_layout,
                            depth_multiplier=self._depth_multiplier,
                            min_depth=self._min_depth,
                            insert_1x1_conv=True,
                            image_features=image_features)

        return feature_maps.values()
Esempio n. 13
0
    def _image_to_head(self, is_training, reuse=None):
        with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=is_training)):
            net, endpoints = mobilenet_v2.mobilenet_base(self._image,
                                                         conv_defs=CTPN_DEF)

        self.variables_to_restore = slim.get_variables_to_restore()

        self._act_summaries.append(net)
        self._layers['head'] = net

        return net
Esempio n. 14
0
def MobileNet(depth_multiplier, imgs_in, weight_decay, batch_norm_momentum,
              is_training):
    with tf.contrib.slim.arg_scope(
            mobilenet_v2.training_scope(is_training=is_training,
                                        weight_decay=weight_decay,
                                        bn_decay=batch_norm_momentum)):
        features, _ = mobilenet_v2.mobilenet_base(
            imgs_in,
            depth_multiplier=depth_multiplier,
            finegrain_classification_mode=depth_multiplier < 1,
            output_stride=16)

    return features
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_depthwise': self._use_depthwise,
        'use_explicit_padding': self._use_explicit_padding,
    }

    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          # TODO(b/68150321): Enable fused batch norm once quantization
          # supports it.
          with slim.arg_scope([slim.batch_norm], fused=False):
            _, image_features = mobilenet_v2.mobilenet_base(
                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
                final_endpoint='layer_19',
                depth_multiplier=self._depth_multiplier,
                use_explicit_padding=self._use_explicit_padding,
                scope=scope)
        with slim.arg_scope(self._conv_hyperparams_fn()):
          # TODO(b/68150321): Enable fused batch norm once quantization
          # supports it.
          with slim.arg_scope([slim.batch_norm], fused=False):
            feature_maps = feature_map_generators.multi_resolution_feature_maps(
                feature_map_layout=feature_map_layout,
                depth_multiplier=self._depth_multiplier,
                min_depth=self._min_depth,
                insert_1x1_conv=True,
                image_features=image_features)

    return feature_maps.values()
    def encode(self, input_tensor, name):
        """
        根据MobileNet框架对输入的tensor进行编码
        :param input_tensor:
        :param name:
        :param flags:
        :return: 输出MobileNet编码特征
        """
        ret = OrderedDict()

        with tf.variable_scope(name):
            with tf.contrib.slim.arg_scope(
                    mobilenet_v2.training_scope(is_training=True)):
                net, end_points = mobilenet_v2.mobilenet(input_tensor,
                                                         base_only=True)

            # # Version B
            # ret['layer_5'] = dict()
            # ret['layer_5']['data'] = end_points['layer_5']
            # ret['layer_5']['shape'] = end_points['layer_5'].get_shape().as_list()
            #
            #
            # ret['layer_8'] = dict()
            # ret['layer_8']['data'] = end_points['layer_8']
            # ret['layer_8']['shape'] = end_points['layer_8'].get_shape().as_list()
            #
            #
            # ret['layer_18'] = dict()
            # ret['layer_18']['data'] = end_points['layer_18']
            # ret['layer_18']['shape'] = end_points['layer_18'].get_shape().as_list()

            # Version A
            ret['layer_7'] = dict()
            ret['layer_7']['data'] = end_points['layer_7']
            ret['layer_7']['shape'] = end_points['layer_7'].get_shape(
            ).as_list()

            ret['layer_14'] = dict()
            ret['layer_14']['data'] = end_points['layer_14']
            ret['layer_14']['shape'] = end_points['layer_14'].get_shape(
            ).as_list()

            ret['layer_19'] = dict()
            ret['layer_19']['data'] = end_points['layer_19']
            ret['layer_19']['shape'] = end_points['layer_19'].get_shape(
            ).as_list()

            # ret['end_points'] = end_points

        return ret
Esempio n. 17
0
def mobilenet_v2_140(inputs, is_training, opts):
    if is_training:
        with slim.arg_scope(mobilenet_v2.training_scope(
                weight_decay=opts.weight_decay,
                stddev=0.09,
                bn_decay=opts.batch_norm_decay)):
            return mobilenet_v2.mobilenet_v2_140(
                inputs,
                num_classes=opts.num_classes,
                reuse=None)
    else:
        return mobilenet_v2.mobilenet_v2_140(
            inputs,
            num_classes=opts.num_classes,
            reuse=None)
Esempio n. 18
0
def mobilenet(images,
              depth_multiplier=1.0,
              is_training=True,
              verbose=False,
              **kwargs):
    """ Base MobileNet architecture
    Based on https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet

    Args:
        images: input images in [0., 1.]
        depth_multiplier: MobileNet depth multiplier.
        is_training: training bool for batch norm
        verbose: verbosity level

    Kwargs:
        weight_decay: Regularization constant. Defaults to 0.
        normalizer_decay: Batch norm decay. Defaults to 0.9
    """
    del kwargs
    base_scope = tf.get_variable_scope().name

    # Input in [0., 1.] -> [-1, 1]
    with tf.control_dependencies([tf.assert_greater_equal(images, 0.)]):
        with tf.control_dependencies([tf.assert_less_equal(images, 1.)]):
            net = (images - 0.5) * 2.

    # Mobilenet
    with tf.contrib.slim.arg_scope(
            mobilenet_v2.training_scope(is_training=is_training)):
        if depth_multiplier == 1.0:
            net, _ = mobilenet_v2.mobilenet(net, base_only=True)
        elif depth_multiplier == 0.5:
            net, _ = mobilenet_v2.mobilenet_v2_050(net, base_only=True)
        elif depth_multiplier == 0.35:
            net, _ = mobilenet_v2.mobilenet_v2_035(net, base_only=True)

    # Add a saver to restore Imagenet-pretrained weights
    saver_collection = '%s_mobilenet_%s_saver' % (base_scope, depth_multiplier)
    savers = tf.get_collection(saver_collection)
    if len(savers) == 0:
        var_list = {
            x.op.name.replace('%s/' % base_scope, ''): x
            for x in tf.global_variables(scope=base_scope)
        }
        saver = tf.train.Saver(var_list=var_list)
        tf.add_to_collection(saver_collection, saver)
    return net
def compare_layer_output(net, layer_name, checkpoint, tensor_name, image_file):
    ### Compare outputs from the same layer (tensor)
    ### from caffe net and tensorflow graph

    ### matching name examples:
    ##    tf: MobilenetV2/Conv/Conv2D:0, MobilenetV2/Conv/Relu6:0, MobilenetV2/Conv/BatchNorm/FusedBatchNorm:0
    ## caffe: conv1,                     conv1/relu,               conv1/scale

    def square_error(x, x_):
        return np.sum(np.square(x - x_))

    image = tf_preprocess(image_file)

    ## caffe inference
    net.blobs['data'].data[...] = image[...]
    net.forward()
    caffe_output = net.blobs[layer_name].data
    caffe_output = caffe_output.transpose(0, 2, 3, 1)  # channel first to last

    ## tf inference
    tf.reset_default_graph()
    images = tf.placeholder(tf.float32,
                            shape=(None, image_scale, image_scale, 3))
    with tf.contrib.slim.arg_scope(
            mobilenet_v2.training_scope(is_training=False)):
        logits, endpoints = mobilenet_v2.mobilenet(images, num_classes=1001)
    ema = tf.train.ExponentialMovingAverage(0.999)
    vars = ema.variables_to_restore()
    saver = tf.train.Saver(vars)

    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        tensor = sess.graph.get_tensor_by_name(tensor_name)
        tf_output = sess.run(tensor, feed_dict={images: image})

    ### compare tf and caffe result of a specific layer
    ### need graphs and layer (tensor) name in caffe and tf
    print('...................................')
    error = 0
    for i in range(32):
        err = square_error(tf_output[0, :, :, i], caffe_output[0, :, :, i])
        print('channel', i, err)
        error += err
    print('total error:', error)
    print('...................................')

    return
Esempio n. 20
0
def getMobileNet(checkpoint):
    graph = tf.Graph()
    sess = tf.Session(graph=graph)
    with graph.as_default():
        file_input = tf.placeholder(tf.string, ())
        image = tf.image.decode_image(tf.read_file(file_input))
        images = tf.expand_dims(image, 0)
        images = tf.cast(images, tf.float32) / 128. - 1
        images.set_shape((None, None, None, 3))
        images = tf.image.resize_images(images, (224, 224))    
        with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)):
            logits, endpoints = mobilenet_v2.mobilenet(images)
        ema = tf.train.ExponentialMovingAverage(0.999)
        vars = ema.variables_to_restore()
        saver = tf.train.Saver(vars)
        saver.restore(sess, checkpoint)
    return sess, graph, endpoints, file_input
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        #    print('###faster_rcnn_mobilenet_v2_feature_extractor.py### - extract_proposal_features')

        preprocessed_inputs.get_shape().assert_has_rank(4)
        preprocessed_inputs = shape_utils.check_min_image_dim(
            min_dim=33, image_tensor=preprocessed_inputs)

        with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=self._train_batch_norm,
                                            weight_decay=self._weight_decay)):
            with tf.variable_scope('MobilenetV2',
                                   reuse=self._reuse_weights) as scope:
                params = {}
                if self._skip_last_stride:
                    # Not called by default, will use conv_defs in slim.nets.mobilenet.mobilenet_v2
                    params[
                        'conv_defs'] = _get_mobilenet_conv_no_last_stride_defs(
                            conv_depth_ratio_in_percentage=self.
                            _conv_depth_ratio_in_percentage)

                _, endpoints = mobilenet_v2.mobilenet_base(
                    preprocessed_inputs,
                    final_endpoint='layer_19',  # actually 'MobilenetV2/Conv_1'
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope,
                    **params)
        return endpoints['layer_19'], endpoints
Esempio n. 22
0
def load_mobilenet_v2(model_dir, sess):
  model_file_name = "mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.ckpt"
  model_path = os.path.join(model_dir, model_file_name)

  resized_input_tensor = tf.placeholder(tf.float32, shape=[None, None, None, 3])
  with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
    bottleneck_tensor, _ = mobilenet_v2.mobilenet(
      resized_input_tensor, num_classes=None, depth_multiplier=1.4)

  variable_restore_op = tf.contrib.slim.assign_from_checkpoint_fn(
    model_path,
    tf.contrib.slim.get_trainable_variables(),
    ignore_missing_vars=True)
  variable_restore_op(sess)

  #bottleneck_tensor = tf.squeeze(bottleneck_tensor, axis=[1, 2])
  bottleneck_tensor_size = 1792

  return bottleneck_tensor, resized_input_tensor, bottleneck_tensor_size
Esempio n. 23
0
def fcn_mobv2(images, num_classes, is_training=True):

    with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
        _, end_points = mobilenet_v2.mobilenet(images, num_classes)

        for v, k in end_points.items():
            print('{v}:{k}'.format(v=v, k=k))

#        pool4=end_points['resnet_v1_101/pool4']
#
#        dconv1_out=pool4.get_shape().as_list()
#
#
#        deconv1=slim.conv2d_transpose(net,dconv1_out[3],[4,4], stride=2,scope='deconv1')
#
#        fu1=tf.add(deconv1,pool4)
#
#
#        pool3=end_points['resnet_v1_101/pool3']
#        dconv2_out=pool3.get_shape().as_list()
#        deconv2=slim.conv2d_transpose(fu1,dconv2_out[3],[4,4], stride=2,scope='deconv2')
#
#        fu2=tf.add(deconv2,pool3)
        net = end_points['layer_18']
        #        net_14=end_points['Conv2d_11_pointwise']
        #        net_28=end_points['Conv2d_5_pointwise']

        #        up1=slim.conv2d_transpose(net_7,2,[4,4], stride=2,scope='deconv32')
        #        fu1=tf.add(up1,net_14,name='fu1')
        #
        #        up2=slim.conv2d_transpose(fu1,2,[4,4], stride=2,scope='deconv16')
        #        fu2=tf.add(up2,net_28,name='fu2')

        logit = slim.conv2d_transpose(net,
                                      2, [64, 64],
                                      stride=32,
                                      scope='deconv8')

        prediction = tf.argmax(logit, dimension=3)  #, name="prediction")

        print('logit', logit)

        return logit, tf.expand_dims(prediction, axis=3)
    def _extract_box_classifier_features(self, proposal_feature_maps, scope):
        """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
        net = proposal_feature_maps

        conv_depth = 1024
        if self._skip_last_stride:
            conv_depth_ratio = float(
                self._conv_depth_ratio_in_percentage) / 100.0
            conv_depth = int(float(conv_depth) * conv_depth_ratio)

        depth = lambda d: max(int(d * 1.0), 16)
        with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights):
            with slim.arg_scope(
                    mobilenet_v2.training_scope(
                        is_training=self._train_batch_norm,
                        weight_decay=self._weight_decay)):
                with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                                    padding='SAME'):
                    net = slim.separable_conv2d(
                        net,
                        depth(conv_depth), [3, 3],
                        depth_multiplier=1,
                        stride=2,
                        scope='Conv_2')  # or 'layer_20'
                    return slim.separable_conv2d(
                        net,
                        depth(conv_depth), [3, 3],
                        depth_multiplier=1,
                        stride=1,
                        scope='Conv_3')  # or 'layer_21'
Esempio n. 25
0
def _get_endpoints(model_name, img_tensor):
    if model_name == "res50":
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            _, end_points = resnet_v1.resnet_v1_50(img_tensor,
                                                   1000,
                                                   is_training=False)
        return end_points["predictions"]

    elif model_name == "res152":
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            _, end_points = resnet_v1.resnet_v1_152(img_tensor,
                                                    1000,
                                                    is_training=False)
        return end_points["predictions"]

    elif model_name.startswith("mobilenet"):
        with tf.contrib.slim.arg_scope(
                mobilenet_v2.training_scope(is_training=False)):
            _, endpoints = mobilenet_v2.mobilenet(img_tensor)
        return endpoints["Predictions"]
Esempio n. 26
0
  def extract_features(self, inputs):
    """Extracts features from inputs.

    This function adds 4 additional feature maps on top of 
    'layer_15/expansion_output' and 'layer_19' in the base Mobilenet v2 network.

    Args:
      inputs: a tensor of shape [batch_size, height, with, channels],
        holding the input images.

    Returns: 
      a list of 6 float tensors of shape [batch_size, height, width, channels],
        holding feature map tensors to be fed to box predictor.
    """
    feature_map_specs_dict = {
        'layer_name': ['layer_15/expansion_output', 'layer_19', 
            None, None, None, None],
        'layer_depth': [None, None, 512, 256, 256, 128]}

    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)):
        _, end_points = mobilenet_v2.mobilenet_base(
            inputs, 
            final_endpoint='layer_19', 
            depth_multiplier=self._depth_multiplier, 
            scope=scope)

      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.ssd_feature_maps(
            feature_map_tensor_dict=end_points,
            feature_map_specs_dict=feature_map_specs_dict,
            depth_multiplier=1,
            use_depthwise=self._use_depthwise,
            insert_1x1_conv=True)
        feature_map_list = list(feature_maps.values())
        return feature_map_list
Esempio n. 27
0
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                _, activations = mobilenet_v2.mobilenet_base(
                    preprocessed_inputs,
                    final_endpoint='layer_19',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope)

        return activations['layer_19'], activations

#1.2、先构建图结构,再加载权重
#临时添加slim到python搜索路径
import sys
sys.path.append('./models/research/slim')

#导入mobilenet_v2
from nets.mobilenet import mobilenet_v2
#重置图
tf.reset_default_graph()

#导入mobilenet,先构建图结构
#加载完毕后,tf.get_default_graph()中包含了mobilenet计算图结构,可以使用tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)对比reset_graph前后的差异
images = tf.placeholder(tf.float32,(None, 224, 224, 3))
with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training = False)):
    logits, endpoints = mobilenet_v2.mobilenet(images, depth_multiplier = 1.4)

#定义saver类,用于恢复图权重
saver = tf.train.Saver()
with tf.Session() as sess:
    #latest_checkpoint检查checkpoint检查点文件,查找最新的模型
    #restore恢复图权重
    saver.restore(sess, tf.train.latest_checkpoint('./model_ckpt/moilenet_v2'))
    #get_tensor_by_name通过张量名称获取张量
    print(sess.run(tf.get_default_graph().get_tensor_by_name('MoilenetV2/Conv/weights:0')).shape)


#1.3、frozen inference
"""
pb文件将变量取值和计算图整个结构统一放在一个文件中,通过convert_variable_to_constants
tf.reset_default_graph()
# For simplicity we just decode jpeg inside tensorflow.
# But one can provide any input obviously.
file_input = tf.placeholder(tf.string, ())

image = tf.image.decode_jpeg(tf.read_file(file_input))

images = tf.expand_dims(image, 0)
images = tf.cast(images, tf.float32) / 128. - 1
images.set_shape((None, None, None, 3))
images = tf.image.resize_images(images, (224, 224))

# images = tf.placeholder(tf.float32, (None, 224, 224, 3))

# Note: arg_scope is optional for inference.
with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)):
    logits, endpoints = mobilenet_v2.mobilenet(images)

# Restore using exponential moving average since it produces (1.5-2%) higher
# accuracy
ema = tf.train.ExponentialMovingAverage(0.999)
vars = ema.variables_to_restore()

saver = tf.train.Saver(vars)

from datasets import imagenet
with tf.Session() as sess:
    saver.restore(sess, checkpoint)
    x = endpoints['Predictions'].eval(feed_dict={file_input: 'imgs/dog.jpg'})

    # writer = tf.summary.FileWriter("TensorBoard/", graph=sess.graph)
Esempio n. 30
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v2.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='layer_19',
                        depth_multiplier=self._depth_multiplier,
                        conv_defs=self._conv_defs,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            depth_fn = lambda d: max(int(d * self._depth_multiplier), self.
                                     _min_depth)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('fpn', reuse=self._reuse_weights):
                    feature_blocks = [
                        'layer_4', 'layer_7', 'layer_14', 'layer_19'
                    ]
                    base_fpn_max_level = min(self._fpn_max_level, 5)
                    feature_block_list = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_block_list.append(feature_blocks[level - 2])
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=depth_fn(self._additional_layer_depth),
                        use_depthwise=self._use_depthwise,
                        use_explicit_padding=self._use_explicit_padding)
                    feature_maps = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_maps.append(fpn_features['top_down_{}'.format(
                            feature_blocks[level - 2])])
                    last_feature_map = fpn_features['top_down_{}'.format(
                        feature_blocks[base_fpn_max_level - 2])]
                    # Construct coarse features
                    padding = 'VALID' if self._use_explicit_padding else 'SAME'
                    kernel_size = 3
                    for i in range(base_fpn_max_level + 1,
                                   self._fpn_max_level + 1):
                        if self._use_depthwise:
                            conv_op = functools.partial(slim.separable_conv2d,
                                                        depth_multiplier=1)
                        else:
                            conv_op = slim.conv2d
                        if self._use_explicit_padding:
                            last_feature_map = ops.fixed_padding(
                                last_feature_map, kernel_size)
                        last_feature_map = conv_op(
                            last_feature_map,
                            num_outputs=depth_fn(self._additional_layer_depth),
                            kernel_size=[kernel_size, kernel_size],
                            stride=2,
                            padding=padding,
                            scope='bottom_up_Conv2d_{}'.format(
                                i - base_fpn_max_level + 19))
                        feature_maps.append(last_feature_map)
        return feature_maps
Esempio n. 31
0
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)
    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.99)), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
        with slim.arg_scope(
            training_scope(l2_weight_decay=4e-5,
                           is_training=self._is_training)):

          _, image_features = mobilenet_v2.mobilenet_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='layer_18',
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)

    multiplier_func = functools.partial(
        _apply_multiplier,
        multiplier=self._depth_multiplier,
        min_depth=self._min_depth)
    with tf.variable_scope('MnasFPN', reuse=self._reuse_weights):
      with slim.arg_scope(
          training_scope(l2_weight_decay=1e-4, is_training=self._is_training)):
        # Create C6 by downsampling C5.
        c6 = slim.max_pool2d(
            _maybe_pad(image_features['layer_18'], self._use_explicit_padding),
            [3, 3],
            stride=[2, 2],
            padding='VALID' if self._use_explicit_padding else 'SAME',
            scope='C6_downsample')
        c6 = slim.conv2d(
            c6,
            multiplier_func(self._fpn_layer_depth),
            [1, 1],
            activation_fn=tf.identity,
            normalizer_fn=slim.batch_norm,
            weights_regularizer=None,  # this 1x1 has no kernel regularizer.
            padding='VALID',
            scope='C6_Conv1x1')
        image_features['C6'] = tf.identity(c6)  # Needed for quantization.
        for k in sorted(image_features.keys()):
          tf.logging.error('{}: {}'.format(k, image_features[k]))

        mnasfpn_inputs = [
            image_features['layer_7'],  # C3
            image_features['layer_14'],  # C4
            image_features['layer_18'],  # C5
            image_features['C6']  # C6
        ]
        self._verify_config(mnasfpn_inputs)
        feature_maps = mnasfpn(
            mnasfpn_inputs,
            head_def=self._head_def,
            output_channel=self._fpn_layer_depth,
            use_explicit_padding=self._use_explicit_padding,
            use_native_resize_op=self._use_native_resize_op,
            multiplier_func=multiplier_func)
    return feature_maps
  def extract_features(self, preprocessed_inputs, state_saver=None,
                       state_name='lstm_state', unroll_length=10, scope=None):
    """Extract features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <scope>/MobilenetV2_1/...
    <scope>/MobilenetV2_2/...
    <scope>/LSTM/...
    <scope>/FeatureMap/...

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: Python string, the name to use with the state_saver.
      unroll_length: number of steps to unroll the lstm.
      scope: Scope for the base network of the feature extractor.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    Raises:
      ValueError: if interleave_method not recognized or large and small base
        network output feature maps of different sizes.
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)
    preprocessed_inputs = ops.pad_to_multiple(
        preprocessed_inputs, self._pad_to_multiple)
    batch_size = preprocessed_inputs.shape[0].value / unroll_length
    batch_axis = 0
    nets = []

    # Batch processing of mobilenet features.
    with slim.arg_scope(mobilenet_v2.training_scope(
        is_training=self._is_training,
        bn_decay=0.9997)), \
        slim.arg_scope([mobilenet.depth_multiplier],
                       min_depth=self._min_depth, divisible_by=8):
      # Big model.
      net, _ = self.extract_base_features_large(preprocessed_inputs)
      nets.append(net)
      large_base_feature_shape = net.shape

      # Small models
      net, _ = self.extract_base_features_small(preprocessed_inputs)
      nets.append(net)
      small_base_feature_shape = net.shape
      if not (large_base_feature_shape[1] == small_base_feature_shape[1] and
              large_base_feature_shape[2] == small_base_feature_shape[2]):
        raise ValueError('Large and Small base network feature map dimension '
                         'not equal!')

    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('LSTM', reuse=self._reuse_weights):
        output_size = (large_base_feature_shape[1], large_base_feature_shape[2])
        lstm_cell, init_state, step = self.create_lstm_cell(
            batch_size, output_size, state_saver, state_name)

        nets_seq = [
            tf.split(net, unroll_length, axis=batch_axis) for net in nets
        ]

        net_seq, states_out = rnn_decoder.multi_input_rnn_decoder(
            nets_seq,
            init_state,
            lstm_cell,
            step,
            selection_strategy=self._interleave_method,
            is_training=self._is_training,
            is_quantized=self._is_quantized,
            pre_bottleneck=self._pre_bottleneck,
            flatten_state=self._flatten_state,
            scope=None)
        self._states_out = states_out

      batcher_ops = None
      if state_saver is not None:
        self._step = state_saver.state(state_name + '_step')
        batcher_ops = [
            state_saver.save_state(state_name + '_c', states_out[-1][0]),
            state_saver.save_state(state_name + '_h', states_out[-1][1]),
            state_saver.save_state(state_name + '_step', self._step + 1)]
      image_features = {}
      with tf_ops.control_dependencies(batcher_ops):
        image_features['layer_19'] = tf.concat(net_seq, 0)

      # SSD layers.
      with tf.variable_scope('FeatureMap'):
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=self._feature_map_layout,
            depth_multiplier=self._depth_multiplier,
            min_depth=self._min_depth,
            insert_1x1_conv=True,
            image_features=image_features,
            pool_residual=True)
    return feature_maps.values()
Esempio n. 33
0
def backbone_net(inputs, image_size, is_training=True, depth_multiplier=0.5):
    
    pad_to_multiple = 14 if image_size == 112 else (10 if image_size == 80 else 8)
    use_explicit_padding = False
    depth_multiplier = depth_multiplier

    print('construct backbone_net for image_size', image_size, 'depth_multiplier = ', depth_multiplier)
    use_depthwise = True
    override_base_feature_extractor_hyperparams = False
    reuse_weights = None
    min_depth = 16

    specs = [
            op(slim.conv2d, stride=2, num_outputs=64, kernel_size=[3, 3]),
            # todo: Depthwise Conv3×3
            op(slim.separable_conv2d, stride=1, kernel_size=[3, 3], num_outputs=None, multiplier_func=dummy_depth_multiplier),    
            # 562×64Bottleneck 2 64 5 2
            op(ops.expanded_conv, stride=2, num_outputs=64),            
        ]
    for _ in range(0, 4):
        specs.append(op(ops.expanded_conv, stride=1, num_outputs=64))

    # 282×64Bottleneck212812
    specs.append(op(ops.expanded_conv, stride=2, num_outputs=128))

    # 142×128Bottleneck412861    
    for _ in range(0, 6):            
        specs.append(op(ops.expanded_conv, 
            expansion_size=expand_input(4), 
            num_outputs=128,
            stride=1))

    kernel_size = [7, 7] if image_size == 112 else ([5,5] if image_size == 80 else [4,4])
    specs.append(op(ops.expanded_conv, stride=1, num_outputs=16, scope='S1'))
    specs.append(op(slim.conv2d, stride=2, kernel_size=[3, 3], num_outputs=32, scope='S2'))
    specs.append(op(slim.conv2d, stride=1, kernel_size=kernel_size, 
        num_outputs=128, scope='S3', padding='VALID'))

    # print('specs = ', specs, ' len = ', len(specs))

    arch = dict(
        defaults={
            # Note: these parameters of batch norm affect the architecture
            # that's why they are here and not in training_scope.
            (slim.batch_norm,): {'center': True, 'scale': True},
            (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
                'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
            },
            (ops.expanded_conv,): {
                'expansion_size': expand_input(2),
                'split_expansion': 1,
                'normalizer_fn': slim.batch_norm,
                'residual': True,
            },
            (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME', 'weights_initializer': slim.xavier_initializer()}
        },

        spec=specs
    )

    print('input to backbone_net ' , inputs)
    with tf.variable_scope('Backbone', reuse=reuse_weights) as scope:
        with slim.arg_scope(
            mobilenet_v2.training_scope(is_training=is_training, bn_decay=0.9997)), \
            slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=min_depth):
            with (slim.arg_scope(conv_hyperparams_fn(is_training=is_training))
                if override_base_feature_extractor_hyperparams else
                context_manager.IdentityContextManager()):
                _, image_features = mobilenet_v2.mobilenet_base(
                  od_ops.pad_to_multiple(inputs, pad_to_multiple),                  
                  depth_multiplier=depth_multiplier,
                  is_training=is_training,
                  use_explicit_padding=use_explicit_padding,
                  conv_defs=arch,
                  scope=scope)
                # do a fully connected layer here
                # TODO
                layer_15 = image_features['layer_15']
                layer_16 = image_features['layer_16']
                layer_17 = image_features['layer_17']
                # batch_size = tf.shape(S1)[0]                

                S1 = slim.flatten(layer_15, scope='S1flatten') # tf.reshape(S1, [batch_size, -1])
                S2 = slim.flatten(layer_16, scope='S2flatten') # [batch_size, -1])
                S3 = slim.flatten(layer_17, scope='S3flatten') # [batch_size, -1])
                before_dense = tf.concat([S1, S2, S3], 1)
                
                for i in range(1, 18):
                    print('layer_' + str(i), image_features['layer_' + str(i)])
                # print('layer_17', layer_17)
                print('S1', S1)
                print('S2', S2)
                print('S3', S3)

                # to_test = slim.conv2d(image_features['layer_19'])
                print('image image_features', image_features.keys())
                with slim.arg_scope([slim.batch_norm], is_training=is_training, center=True, scale=True):
                    return image_features, slim.fully_connected(before_dense, 
                            136, 
                            activation_fn=tf.nn.relu6,
                            normalizer_fn=slim.batch_norm,
                            weights_initializer=slim.xavier_initializer()), (image_features['layer_1'], inputs, image_features['layer_2'])
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v2.mobilenet_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='layer_19',
              depth_multiplier=self._depth_multiplier,
              conv_defs=_CONV_DEFS if self._use_depthwise else None,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope('fpn', reuse=self._reuse_weights):
          feature_blocks = [
              'layer_4', 'layer_7', 'layer_14', 'layer_19'
          ]
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append(feature_blocks[level - 2])
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(self._additional_layer_depth),
              use_depthwise=self._use_depthwise)
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(fpn_features['top_down_{}'.format(
                feature_blocks[level - 2])])
          last_feature_map = fpn_features['top_down_{}'.format(
              feature_blocks[base_fpn_max_level - 2])]
          # Construct coarse features
          for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
            if self._use_depthwise:
              conv_op = functools.partial(
                  slim.separable_conv2d, depth_multiplier=1)
            else:
              conv_op = slim.conv2d
            last_feature_map = conv_op(
                last_feature_map,
                num_outputs=depth_fn(self._additional_layer_depth),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 19))
            feature_maps.append(last_feature_map)
    return feature_maps
def style_prediction_mobilenet(style_input_,
                               activation_names,
                               activation_depths,
                               mobilenet_end_point='layer_19',
                               mobilenet_trainable=True,
                               style_params_trainable=False,
                               style_prediction_bottleneck=100,
                               reuse=None):
  """Maps style images to the style embeddings using MobileNetV2.

  Args:
    style_input_: Tensor. Batch of style input images.
    activation_names: string. Scope names of the activations of the transformer
        network which are used to apply style normalization.
    activation_depths: Shapes of the activations of the transformer network
        which are used to apply style normalization.
    mobilenet_end_point: string. Specifies the endpoint to construct the
        MobileNetV2 network up to. This network is part of the style prediction
        network.
    mobilenet_trainable: bool. Should the MobileNetV2 parameters be marked
        as trainable?
    style_params_trainable: bool. Should the mapping from bottleneck to
        beta and gamma parameters be marked as trainable?
    style_prediction_bottleneck: int. Specifies the bottleneck size in the
        number of parameters of the style embedding.
    reuse: bool. Whether to reuse model parameters. Defaults to False.

  Returns:
    Tensor for the output of the style prediction network, Tensor for the
        bottleneck of style parameters of the style prediction network.
  """
  with tf.name_scope('style_prediction_mobilenet') and tf.variable_scope(
      tf.get_variable_scope(), reuse=reuse):
    with slim.arg_scope(mobilenet_v2.training_scope(
        is_training=mobilenet_trainable)):
      _, end_points = mobilenet.mobilenet_base(
          style_input_,
          conv_defs=mobilenet_v2.V2_DEF,
          final_endpoint=mobilenet_end_point,
          scope='MobilenetV2'
      )

    feat_convlayer = end_points[mobilenet_end_point]
    with tf.name_scope('bottleneck'):
      # (batch_size, 1, 1, depth).
      bottleneck_feat = tf.reduce_mean(
          feat_convlayer, axis=[1, 2], keep_dims=True)

    if style_prediction_bottleneck > 0:
      with tf.variable_scope('mobilenet_conv'):
        with slim.arg_scope(
            [slim.conv2d],
            activation_fn=None,
            normalizer_fn=None,
            trainable=mobilenet_trainable):
          # (batch_size, 1, 1, style_prediction_bottleneck).
          bottleneck_feat = slim.conv2d(bottleneck_feat,
                                        style_prediction_bottleneck, [1, 1])

    style_params = {}
    with tf.variable_scope('style_params'):
      for i in range(len(activation_depths)):
        with tf.variable_scope(activation_names[i], reuse=reuse):
          with slim.arg_scope(
              [slim.conv2d],
              activation_fn=None,
              normalizer_fn=None,
              trainable=style_params_trainable):
            # Computing beta parameter of the style normalization for the
            # activation_names[i] layer of the style transformer network.
            # (batch_size, 1, 1, activation_depths[i])
            beta = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1])
            # (batch_size, activation_depths[i])
            beta = tf.squeeze(beta, [1, 2], name='SpatialSqueeze')
            style_params['{}/beta'.format(activation_names[i])] = beta

            # Computing gamma parameter of the style normalization for the
            # activation_names[i] layer of the style transformer network.
            # (batch_size, 1, 1, activation_depths[i])
            gamma = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1])
            # (batch_size, activation_depths[i])
            gamma = tf.squeeze(gamma, [1, 2], name='SpatialSqueeze')
            style_params['{}/gamma'.format(activation_names[i])] = gamma

  return style_params, bottleneck_feat