コード例 #1
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)
        '''
    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }
    '''
        feature_map_layout = {
            'from_layer': [
                'FeatureExtractor/vgg_16/conv4/conv4_3',
                'FeatureExtractor/vgg_16/fc7', '', '', '', ''
            ],
            'layer_depth': [-1, -1, 256, 128, 128, 128],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }
        with tf.variable_scope('vgg_16', reuse=self._reuse_weights) as scope:
            with slim.arg_scope(vgg.vgg_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    # TODO(skligys): Enable fused batch norm once quantization supports it.
                    with slim.arg_scope([slim.batch_norm], fused=False):

                        _, image_features = vgg.vgg_16(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            num_classes=None,
                            is_training=self._is_training,
                            scope=scope)
            print(image_features.keys())
            print(image_features.values())
            with slim.arg_scope(self._conv_hyperparams_fn()):
                # TODO(skligys): Enable fused batch norm once quantization supports it.
                with slim.arg_scope([slim.batch_norm], fused=False):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
コード例 #2
0
    def _extract_box_classifier_features(self, proposal_feature_maps, scope):
        """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
        with tf.variable_scope(self._architecture, reuse=self._reuse_weights):
            with slim.arg_scope(
                    vgg.vgg_arg_scope(weight_decay=self._weight_decay)):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self._train_batch_norm):
                    blocks = [
                        resnet_utils.Block('block4', resnet_v1.bottleneck,
                                           [{
                                               'depth': 2048,
                                               'depth_bottleneck': 512,
                                               'stride': 1
                                           }] * 3)
                    ]
                    proposal_classifier_features = resnet_utils.stack_blocks_dense(
                        proposal_feature_maps, blocks)
        return proposal_classifier_features
コード例 #3
0
    def __init__(self):
        from nets import vgg

        self.image_size = 224
        self.num_classes = 1000
        self.predictions_is_correct = False
        self.use_larger_step_size = False
        self.use_smoothed_grad = False

        # For dataprior attacks. gamma = A^2 * D / d in the paper
        self.gamma = 4.5

        batch_shape = [None, self.image_size, self.image_size, 3]
        self.x_input = tf.placeholder(tf.float32, shape=batch_shape)
        self.target_label = tf.placeholder(tf.int32, shape=[None])
        target_onehot = tf.one_hot(self.target_label, self.num_classes)

        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_16(self.x_input,
                                            num_classes=self.num_classes,
                                            is_training=False)

        self.predicted_labels = tf.argmax(end_points['vgg_16/fc8'], 1)
        #logits -= tf.reduce_min(logits)
        #real = tf.reduce_max(logits * target_onehot, 1)
        #other = tf.reduce_max(logits * (1 - target_onehot), 1)
        #self.loss = other - real
        self.loss = tf.nn.softmax_cross_entropy_with_logits(
            labels=target_onehot, logits=logits)
        self.grad = 255.0 * tf.gradients(self.loss, self.x_input)[0]

        saver = tf.train.Saver(slim.get_model_variables(scope='vgg_16'))
        self.sess = tf.get_default_session()
        saver.restore(self.sess, 'vgg_16.ckpt')
コード例 #4
0
ファイル: eval_s_vgg16.py プロジェクト: shigenius/tf-yolo_v3
def specific_object_recognition(image_size, num_classes_s):
    # Define placeholders
    with tf.name_scope('input'):
        with tf.name_scope('cropped_images'):
            cropped_images_placeholder = tf.placeholder(dtype="float32",
                                                        shape=(None,
                                                               image_size,
                                                               image_size, 3))
        with tf.name_scope('labels'):
            labels_placeholder = tf.placeholder(dtype="float32",
                                                shape=(None, num_classes_s))
        keep_prob = tf.placeholder(dtype="float32")
        is_training = tf.placeholder(dtype="bool")  # train flag

    # Build the graph
    with slim.arg_scope(vgg_arg_scope()):
        logits, _ = vgg_16(cropped_images_placeholder,
                           num_classes=num_classes_s,
                           is_training=True,
                           reuse=None)

    predictions = tf.nn.softmax(logits, name='Predictions')
    predict_labels = tf.argmax(predictions, 1)

    return predict_labels, [cropped_images_placeholder, keep_prob, is_training]
コード例 #5
0
def test_vgg_19(img_dir):
    """
    Test VGG-19 with a single image.
    :param img_dir: Path of the image to be classified
    :return: classification result and probability of a single image
    """
    img = cv2.imread(img_dir)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = img.reshape((1, 224, 224, 3))

    tf.reset_default_graph()
    inputs = tf.placeholder(name='input_images',
                            shape=[None, 224, 224, 3],
                            dtype=tf.float32)
    with slim.arg_scope(vgg_arg_scope()):
        _, _ = vgg_19(inputs, is_training=False)

    with tf.Session() as sess:
        tf.train.Saver().restore(sess, './models/vgg_19.ckpt')
        inputs = sess.graph.get_tensor_by_name('input_images:0')
        outputs = sess.graph.get_tensor_by_name('vgg_19/fc8/squeezed:0')
        pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0]
        prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0]

        pred, prob = sess.run([pred, prob], feed_dict={inputs: img})
        name = label_dict[pred + 1]

    print('Result of VGG-19:', name, prob)
    return name, prob
コード例 #6
0
def model(image):
    # 图像去均值
    image = mean_image_subtraction(image)
    with slim.arg_scope(vgg.vgg_arg_scope()):
        conv5_3 = vgg.vgg_16(image)

    # 卷积操作
    rpn_conv = slim.conv2d(conv5_3, 512, 3)

    lstm_output = Bilstm(rpn_conv, 512, 128, 512, scope_name='BiLSTM')

    bbox_pred = lstm_fc(lstm_output, 512, 10 * 4, scope_name="bbox_pred")
    cls_pred = lstm_fc(lstm_output, 512, 10 * 2, scope_name="cls_pred")

    # transpose: (1, H, W, A x d) -> (1, H, WxA, d)
    cls_pred_shape = tf.shape(cls_pred)
    cls_pred_reshape = tf.reshape(
        cls_pred, [cls_pred_shape[0], cls_pred_shape[1], -1, 2])

    cls_pred_reshape_shape = tf.shape(cls_pred_reshape)
    cls_prob = tf.reshape(tf.nn.softmax(
        tf.reshape(cls_pred_reshape, [-1, cls_pred_reshape_shape[3]])), [
            -1, cls_pred_reshape_shape[1], cls_pred_reshape_shape[2],
            cls_pred_reshape_shape[3]
        ],
                          name="cls_prob")

    return bbox_pred, cls_pred, cls_prob
コード例 #7
0
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        if len(preprocessed_inputs.get_shape().as_list()) != 4:
            raise ValueError(
                '`preprocessed_inputs` must be 4 dimensional, got a '
                'tensor of shape %s' % preprocessed_inputs.get_shape())
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(
                    vgg.vgg_arg_scope(weight_decay=self._weight_decay)):
                with tf.variable_scope(self._architecture,
                                       reuse=self._reuse_weights) as var_scope:
                    _, endpoints = self._vgg_model(
                        preprocessed_inputs,
                        final_endpoint='conv5',
                        trainable=self._is_training,
                        freeze_layer=self._freeze_layer,
                        scope=var_scope)

        handle = self._base_features
        return endpoints[handle]
コード例 #8
0
 def _extract_box_classifier_features(self, proposal_feature_maps, scope):
     with tf.variable_scope(self._architecture, reuse=self._reuse_weights):
         with slim.arg_scope(
                 vgg.vgg_arg_scope(weight_decay=self._weight_decay)):
             proposal_classifier_features = tf.identity(
                 proposal_feature_maps)
     return proposal_classifier_features
コード例 #9
0
    def build(self, cost, model, train):
        if model == "MobilenetV1":
            with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
                self.logits, self.end_points = mobilenet_v1.mobilenet_v1(
                    self.input,
                    num_classes=self.numb_logits,
                    dropout_keep_prob=self.kp,
                    is_training=train)
        elif model == "vgg_16":
            with slim.arg_scope(vgg.vgg_arg_scope()):
                self.logits, self.end_points = vgg.vgg_16(
                    self.input,
                    num_classes=self.numb_logits,
                    dropout_keep_prob=self.kp,
                    is_training=True)

        self.prob = tf.nn.softmax(self.logits, name="prob")
        self.loss = tf.reduce_mean(
            tf.reduce_sum(tf.pow(self.prob - self.target, 2), axis=1))
        tf.summary.scalar('loss', self.loss)
        if cost == "mse":
            self.cost = self.loss
        else:
            self.xtarget = self.target * (1 - 1e-11) + 1e-12
            assert self.xtarget.get_shape().as_list()[1] == self.numb_logits
            self.xprob = self.prob * (1 - 1e-11) + 1e-12
            assert self.xprob.get_shape().as_list()[1] == self.numb_logits
            self.cost = tf.reduce_mean(
                tf.reduce_sum(self.xtarget * tf.log(self.xtarget / self.prob),
                              axis=1))
            tf.summary.scalar('cost_kl', self.cost)
コード例 #10
0
    def build_cnn(self):
        with tf.contrib.slim.arg_scope(vgg.vgg_arg_scope()):
            _, end_points = vgg.vgg_19(inputs=self.images)
            net = end_points['vgg_19/fc7']  # shape = [batch size, 1, 1, 4096]

        with tf.variable_scope('mlc'):
            net = tf.contrib.slim.dropout(net,
                                          self.dropout_keep_prob,
                                          is_training=self.is_training,
                                          scope='dropout7')
            net = tf.contrib.slim.conv2d(
                net,
                1024, [1, 1],
                activation_fn=tf.nn.relu,
                normalizer_fn=None,
                scope='fc8')  # shape = [batch size, 1, 1, 1024]
            net = tf.contrib.slim.dropout(net,
                                          self.dropout_keep_prob,
                                          is_training=self.is_training,
                                          scope='dropout8')
            net = tf.contrib.slim.conv2d(
                net,
                self.label_num, [1, 1],
                activation_fn=None,
                normalizer_fn=None,
                scope='fc9')  # shape = [batch size, 1, 1, 15]
            logits = tf.squeeze(net, [1, 2])  # shape = [batch size, 15]

        self.logits = logits
        self.predictions = tf.nn.sigmoid(logits)
        self.conv5_3_feats = end_points['vgg_19/conv5/conv5_3']
        print('cnn built.')
コード例 #11
0
def model(image):
    image = _p_shape(image, "最开始输入")
    image = mean_image_subtraction(image)
    with slim.arg_scope(vgg.vgg_arg_scope()):
        # 最终,出来的图像是 (m/16 x n/16 x 512)
        vgg_fc2 = vgg.vgg16(image)
        vgg_fc2 = _p_shape(vgg_fc2, "VGG的5-3卷基层输出")
        vgg_fc2 = tf.squeeze(vgg_fc2, [1, 2])  # 把[1,1,4096,4096] => [4096,4096],[1,2],而不是[0,1,2]是因为0是batch

    logger.debug("vgg_fc2:%r", vgg_fc2.get_shape())

    # 先注释掉
    init_weights = tf.contrib.layers.variance_scaling_initializer(factor=0.01, mode='FAN_AVG', uniform=False)
    init_biases = tf.constant_initializer(0.0)
    w_fc1 = tf.get_variable("w_fc1", [4096,256], initializer=init_weights)
    w_b1 = tf.get_variable("w_b1", [256], initializer=init_biases)
    w_fc2 = tf.get_variable("w_fc2", [256, 4], initializer=init_weights)
    w_b2 = tf.get_variable("w_b2", [4],initializer=init_biases)

    # 接2个全连接网络
    fc1 = tf.add(tf.matmul(vgg_fc2,w_fc1),w_b1)
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, keep_prob=0.75)
    fc2 = tf.add(tf.matmul(fc1,w_fc2),w_b2)
    fc2 = tf.nn.relu(fc2)
    fc2 = _p_shape(fc2,"fc2 shape:\t")

    classes = tf.argmax(tf.nn.softmax(fc2),axis=1)
    classes = _p_shape(classes, "classes shape:\t")

    return fc2,classes
コード例 #12
0
    def _extract_box_classifier_features(self, proposal_feature_maps, scope):
        """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
        net = proposal_feature_maps

        with tf.variable_scope(self._architecture, reuse=self._reuse_weights):
            with slim.arg_scope(
                    vgg.vgg_arg_scope(weight_decay=self._weight_decay)):
                net = slim.conv2d(net,
                                  4096, [7, 7],
                                  padding='VALID',
                                  scope='fc6')
                net = slim.dropout(net, 1, is_training=True, scope='dropout6')
                net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
                net = slim.dropout(net, 1, is_training=True, scope='dropout7')
                net = slim.conv2d(net,
                                  3, [1, 1],
                                  activation_fn=None,
                                  normalizer_fn=None,
                                  scope='fc8')
                proposal_classifier_features = net

        return proposal_classifier_features
コード例 #13
0
def build_train_op(image_tensor, label_tensor, is_training):
    vgg_argscope = vgg_arg_scope(weight_decay=FLAGS.weight_decay)
    global_step = tf.get_variable(name="global_step",
                                  shape=[],
                                  dtype=tf.int32,
                                  trainable=False)
    with slim.arg_scope(vgg_argscope):
        logits, end_points = vgg_16(image_tensor,
                                    is_training=is_training,
                                    num_classes=10)
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=label_tensor))
    accuracy = tf.reduce_sum(
        tf.cast(
            tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), label_tensor),
            tf.int32))
    end_points['loss'], end_points['accuracy'] = loss, accuracy
    if is_training:
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=FLAGS.learning_rate)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return train_op, end_points
    else:
        return None, end_points
コード例 #14
0
 def __call__(self, x_input):
     if (self.build):
         tf.get_variable_scope().reuse_variables()
     else:
         self.build = True
     inception_imags = (x_input / 255.0 - 0.5) * 2
     resized_images_vgg = tf.image.resize_images(
         x_input, [224, 224]) - tf.constant([123.68, 116.78, 103.94])
     with slim.arg_scope(vgg.vgg_arg_scope()):
         logits_vgg16, _ = self.network_fn_vgg16(
             resized_images_vgg,
             num_classes=self.num_classes,
             is_training=False)
     resized_images_res = (
         tf.image.resize_images(x_input, [224, 224]) / 255.0 - 0.5) * 2
     with slim.arg_scope(resnet_v2.resnet_arg_scope()):
         logits_res, _ = self.network_fn_res(resized_images_res,
                                             num_classes=self.num_classes +
                                             1,
                                             is_training=False)
     logits_res = tf.reshape(logits_res, (-1, 1001))
     logits_res = tf.slice(logits_res, [0, 1],
                           [FLAGS.batch_size, self.num_classes])
     with slim.arg_scope(inception_utils.inception_arg_scope()):
         logits_incepv3, _ = self.network_fn_incepv3(
             inception_imags,
             num_classes=self.num_classes + 1,
             is_training=False)
     logits_incepv3 = tf.slice(logits_incepv3, [0, 1],
                               [FLAGS.batch_size, self.num_classes])
     with slim.arg_scope(inception_utils.inception_arg_scope()):
         logits_incepv4, _ = self.network_fn_incepv4(
             inception_imags,
             num_classes=self.num_classes + 1,
             is_training=False)
     logits_incepv4 = tf.slice(logits_incepv4, [0, 1],
                               [FLAGS.batch_size, self.num_classes])
     with slim.arg_scope(
             inception_resnet_v2.inception_resnet_v2_arg_scope()):
         logits_incep_res, _ = self.network_fn_incep_res(
             inception_imags,
             num_classes=self.num_classes + 1,
             is_training=False)
     logits_incep_res = tf.slice(logits_incep_res, [0, 1],
                                 [FLAGS.batch_size, self.num_classes])
     alex_images = tf.image.resize_images(x_input, [256, 256])
     alex_images = tf.reverse(alex_images, axis=[-1])
     alex_mean_npy = np.load('model/alex_mean.npy').swapaxes(0, 1).swapaxes(
         1, 2).astype(np.float32)
     alex_mean_images = tf.constant(alex_mean_npy)
     alex_images = alex_images[:, ] - alex_mean_images
     alex_images = tf.slice(alex_images, [0, 14, 14, 0],
                            [FLAGS.batch_size, 227, 227, 3])
     _, logits_alex = self.network_fn_alex(alex_images)
     logits = [
         logits_vgg16, logits_res, logits_incepv3, logits_incepv4,
         logits_incep_res, logits_alex
     ]
     ensemble_logits = tf.reduce_mean(tf.stack(logits), 0)
     return ensemble_logits
コード例 #15
0
def graph(x, y, i, x_max, x_min, grad):
    eps = FLAGS.max_epsilon
    num_iter = FLAGS.num_iter
    alpha = eps / num_iter
    momentum = FLAGS.momentum
    num_classes = 1000

    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, end_points = vgg.vgg_16(x,
                                        num_classes=num_classes,
                                        is_training=False)

    pred = tf.argmax(logits, 1)

    first_round = tf.cast(tf.equal(i, 0), tf.int64)
    y = first_round * pred + (1 - first_round) * y
    one_hot = tf.one_hot(y, num_classes)

    cross_entropy = tf.losses.softmax_cross_entropy(one_hot,
                                                    logits,
                                                    label_smoothing=0.0,
                                                    weights=1.0)
    noise = tf.gradients(cross_entropy, x)[0]
    noise = tf.nn.depthwise_conv2d(noise,
                                   stack_kernel,
                                   strides=[1, 1, 1, 1],
                                   padding='SAME')
    noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True)
    noise = momentum * grad + noise
    x = x + alpha * tf.sign(noise)
    x = tf.clip_by_value(x, x_min, x_max)
    i = tf.add(i, 1)
    return x, y, i, x_max, x_min, noise
コード例 #16
0
def train_vgg16(log_steps,save_summaries_sec,save_interval_secs,num_iterations = num_iterations_vgg):
    with tf.Graph().as_default():
        tf.logging.set_verbosity(tf.logging.INFO)

        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        global_step = slim.get_or_create_global_step()

        #dataset = plantclef2015.get_split('train', plant_data_dir)
        dataset = plantclef2015_all_labels.get_split('train', plant_data_dir)

        images,labels = load_batch(dataset, batch_size = batch_size, k=num_patches_vgg, r=r_rotations_vgg)

        # Add Images to summaries
        summaries.add(tf.summary.image("input_images", images, batch_size))

        # Create the models
        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)):
             logits, _ = vgg.vgg_16(images, num_classes=1000, is_training=False)


        # Specify the loss function:
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

        loss(logits, one_hot_labels)
        #slim.losses.softmax_cross_entropy(logits, one_hot_labels)
        #tf.losses.softmax_cross_entropy(one_hot_labels, logits)
        total_loss = slim.losses.get_total_loss()

        # Create some summaries to visualize the training process:
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))
        summaries.add(tf.summary.scalar('losses/Total_Loss', total_loss))

        # Specify the optimizer and create the train op:
        learning_rate = tf.train.exponential_decay(start_learning_rate, global_step,updating_iteration_for_learning_rate, updating_gamma, staircase=True)

        optimizer = tf.train.MomentumOptimizer(learning_rate= learning_rate, momentum=momentum)

        train_op = slim.learning.create_train_op(total_loss, optimizer)

        summaries.add(tf.summary.scalar('training/Learning_Rate', learning_rate))

        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # Run the training:
        final_loss = slim.learning.train(
            train_op,
            logdir=train_vgg16_dir,
            log_every_n_steps=log_steps,
            global_step=global_step,
            number_of_steps= num_iterations,
            summary_op=summary_op,
            init_fn=get_init_fn_vgg(),
            save_summaries_secs=save_summaries_sec,
            save_interval_secs=save_interval_secs)

    print('Finished training. Last batch loss %f' % final_loss)
コード例 #17
0
def vgg_19(inputs):
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, end_points = vgg.vgg_19(
            inputs,
            num_classes=None,
            is_training=False,
            fc_conv_padding='VALID',
            global_pool=True)
    return logits, end_points, vgg_19_ckpt_path
コード例 #18
0
def extract_features(input_file_path):
  image_names = os.listdir(input_file_path)
  print 'len image list', len(image_names)

  image_list = list()

  num_images = len(image_names)
  for i in range(num_images):
    img = image_names[i]
    image_list.append(os.path.join(input_file_path, img))
  print image_list[:10]
  images = image_list
  
  slim = tf.contrib.slim

  # Get the image size that vgg_19 accepts
  image_size = vgg.vgg_19.default_image_size
  preprocessed_images = list()
  out_features_list = []


  total_count = 0
  batch = 0
  while total_count < len(images):
    batch += 1
    print 'batch number', batch
  
    preprocessed_images = list()

    with tf.Graph().as_default():
      # This allows for default parameters
      with slim.arg_scope(vgg.vgg_arg_scope()):

        for c in range(10): 
          if total_count >= len(images):
            break  ##
          print total_count
          print images[total_count]
          image = tf.read_file(image_list[total_count])
          decoded_image = tf.image.decode_jpeg(image, channels=3)
          preprocessed_images.append(preproc.preprocess_image(decoded_image, image_size, image_size, is_training=True))
          total_count += 1

        stacked_images = tf.stack(preprocessed_images)
        print 'stacked images', stacked_images
        _, end_points = vgg.vgg_19(stacked_images, is_training=False)

	with tf.Session() as sess: 
	  print 'inside tf sess'
    sess.run(tf.global_variables_initializer())
	  saver = tf.train.Saver()
	  out_features = sess.run(stacked_images)

    out_features_list.extend(out_features)

    print 'accumulated features array'
    print np.array(out_features_list).shape
コード例 #19
0
ファイル: grad_cam.py プロジェクト: yaoyidian/ad_img
def grad_cam(x_input, sess, image):
    image = (image + 1.0) * 0.5 * 255.0
    img_vgg = preprocess_for_model(x_input, 'vgg_16')
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(img_vgg,
                                                      num_classes=110,
                                                      is_training=True,
                                                      scope='vgg_16',
                                                      reuse=True)
    end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8']
    end_points_vgg_16['pool5'] = end_points_vgg_16['vgg_16/pool5']
    end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits'])
    predict = tf.argmax(end_points_vgg_16['probs'], 1)
    logits = end_points_vgg_16['logits']
    before_fc = end_points_vgg_16['pool5']
    probs = end_points_vgg_16['probs']
    nb_classes = 110
    conv_layer = before_fc
    one_hot = tf.one_hot(predict, 110)
    signal = tf.multiply(logits, one_hot)
    loss = tf.reduce_mean(signal)
    #loss = tf.losses.softmax_cross_entropy(one_hot,
    #                                          logits,
    #                                          label_smoothing=0.0,
    #                                          weights=1.0)
    grads = tf.gradients(loss, conv_layer)[0]
    norm_grads = tf.div(
        grads,
        tf.sqrt(tf.reduce_mean(tf.square(grads))) + tf.constant(1e-5))
    output, grads_val = sess.run([conv_layer, norm_grads],
                                 feed_dict={x_input: image})
    grads_val = grads_val[0]
    output = output[0]
    weights = np.mean(grads_val, axis=(0, 1))  # [512]
    cam = np.ones(output.shape[0:2], dtype=np.float32)  # [7,7]

    # Taking a weighted average
    for i, w in enumerate(weights):
        cam += w * output[:, :, i]
    # Passing through ReLU
    #cam = imresize(cam, (224,224))
    cam = np.maximum(cam, 0)
    cam = cam / np.max(cam)
    cam = imresize(cam, (224, 224))

    # Converting grayscale to 3-D
    cam3 = np.expand_dims(cam, axis=2)
    cam3 = np.tile(cam3, [1, 1, 3])

    img = image[0]
    img = img / np.max(img)

    # Superimposing the visualization with the image.
    new_img = img + cam3
    new_img = new_img / np.max(new_img)
    #new_img = new_img.astype(np.uint8)
    return cam3
コード例 #20
0
def evaluate_vgg16(batch_size):
    with tf.Graph().as_default():
        tf.logging.set_verbosity(tf.logging.INFO)
        global_step = slim.get_or_create_global_step()


        dataset = plantclef2015.get_split('validation', plant_data_dir)
        images,labels = load_batch(dataset, batch_size = batch_size, k=num_patches_vgg, r=r_rotations_vgg, is_training =False)

        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)):
             logits, _ = vgg.vgg_16(images, num_classes=1000, is_training=False)


        total_output = []
        total_labels = []
        total_images = []

        with tf.Session() as sess:
            coord = tf.train.Coordinator()
            saver = tf. train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(train_vgg16_dir))
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            for i in range(batch_size):
                print('step: %d/%d' % (i, batch_size))
                o, l , image= sess.run([logits, labels, images[0]])
                o = tf.reduce_sum(o, 0)/float(40)
                total_output.append(o)
                total_labels.append(l[0])
                total_images.append(image)
            coord.request_stop()
            coord.join(threads)


            total_output = tf.stack(total_output,0)
            total_output = tf.nn.softmax(total_output)
            labels = tf.constant(total_labels)
            total_images = sess.run(tf.stack(total_images,0))

            top1_op = tf.nn.in_top_k(total_output, labels, 1)
            top1_acc = sess.run(tf.reduce_mean(tf.cast(top1_op, tf.float32)))
            print(top1_acc)


            top5_op = tf.nn.in_top_k(total_output, labels, 5)
            top5_acc = sess.run(tf.reduce_mean(tf.cast(top5_op, tf.float32)))
            print(top5_acc)

            accuracy1_sum = tf.summary.scalar('top1_accuracy', top1_acc)
            accuracy5_sum = tf.summary.scalar('top5_accuracy', top5_acc)
            images_sum = tf.summary.image("input_images", total_images, batch_size)

            accuracy1, accuracy5, image_batch, step = sess.run([accuracy1_sum,accuracy5_sum,images_sum, global_step])
            writer = tf.summary.FileWriter(eval_vgg16_dir)
            writer.add_summary(accuracy1, step)
            writer.add_summary(accuracy5, step)
            writer.add_summary(image_batch)
コード例 #21
0
def vgg_16(inputs, is_training, opts):
    with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=opts.weight_decay)):
        return vgg.vgg_16(
            inputs,
            num_classes=opts.num_classes,
            dropout_keep_prob=opts.dropout_keep_prob,
            spatial_squeeze=opts.spatial_squeeze,
            is_training=is_training,
            fc_conv_padding='VALID',
            global_pool=opts.global_pool)
コード例 #22
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['block4', 'block7', 'block8', 'block9', 'block10', 'block11'],
            'layer_depth': [-1, -1, -1, -1, -1, -1],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }
        '''
    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('InceptionV2',
                             reuse=self._reuse_weights) as scope:
        _, image_features = inception_v2.inception_v2_base(
            ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
            final_endpoint='Mixed_5c',
            min_depth=self._min_depth,
            depth_multiplier=self._depth_multiplier,
            scope=scope)
    '''

        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=0.0)):
            with tf.variable_scope('vgg16',
                                   reuse=self._reuse_weights) as var_scope:
                _, image_features = vgg.vgg_16_ssd(preprocessed_inputs,
                                                   num_classes=3,
                                                   is_training=True,
                                                   dropout_keep_prob=0.9,
                                                   spatial_squeeze=False,
                                                   scope=var_scope,
                                                   fc_conv_padding='VALID',
                                                   global_pool=False,
                                                   end_point='pool5')
            feature_maps = feature_map_generators.multi_resolution_feature_maps(
                feature_map_layout=feature_map_layout,
                depth_multiplier=self._depth_multiplier,
                min_depth=self._min_depth,
                insert_1x1_conv=True,
                image_features=image_features)

        return feature_maps.values()
コード例 #23
0
ファイル: VGG19.py プロジェクト: NovasMax/BigEarthNet
 def create_network(self):
     with tf.contrib.slim.arg_scope(vgg_arg_scope()):
         logits, end_points = vgg_19(self.img,
                                     num_classes=self.nb_class,
                                     is_training=self.is_training,
                                     fc_conv_padding='SAME',
                                     global_pool=True)
     self.logits = logits
     self.probabilities = tf.nn.sigmoid(self.logits)
     self.predictions = tf.cast(
         self.probabilities >= self.prediction_threshold, tf.float32)
コード例 #24
0
def vgg_net(image, reuse=tf.AUTO_REUSE, keep_prop=0.5):
    image = tf.reshape(image, [-1, 224, 224, 3])
    with tf.variable_scope(name_or_scope='VGG16', reuse=reuse):
        arg_scope = vgg.vgg_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_point = vgg.vgg_16(image,
                                           1000,
                                           is_training=True,
                                           dropout_keep_prob=keep_prop)
            probs = tf.nn.softmax(logits)  # probabilities
    return logits, probs, end_point
コード例 #25
0
def eval(params):
    batch_size = params['batch_size']
    num_examples = len(params['test_files'][0])
    with tf.Graph().as_default():
        batch = dut.distorted_inputs(params,is_training=is_training)

        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_19(batch[0], num_classes=params['n_output'], is_training=is_training)

        init_fn=ut.get_init_fn(slim,params)
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = params['per_process_gpu_memory_fraction']

        with tf.Session(config=config) as sess:
            # sess.run(tf.initialize_all_variables())
            sess.run(tf.initialize_local_variables())
            coord = tf.train.Coordinator()
            threads = []
            for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
                threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True))

            init_fn(sess)
            num_iter = int(math.ceil(num_examples / batch_size))
            print('%s: Testing started.' % (datetime.now()))

            step = 0
            loss_lst=[]
            run_lst=[]
            run_lst.append(logits)
            [run_lst.append(lst) for lst in batch[1:len(batch)]]

            while step < num_iter and not coord.should_stop():
                try:
                    batch_res= sess.run(run_lst)
                except tf.errors.OutOfRangeError:
                    print ('Testing finished....%d'%step)
                    break
                if(params['write_est']==True):
                    ut.write_est(params,batch_res)
                est=batch_res[0]
                gt=batch_res[1]
                loss= ut.get_loss(params,gt,est)
                loss_lst.append(loss)
                s ='VAL --> batch %i/%i | error %f'%(step,num_iter,loss)
                ut.log_write(s,params)
                # joint_list=['/'.join(p1.split('/')[0:-1]).replace('joints','img').replace('.cdf','')+'/frame_'+(p1.split('/')[-1].replace('.txt','')).zfill(5)+'.png' for p1 in image_names]
                # print ('List equality check:')
                # print len(label_names) == len(set(label_names))
                # print sum(joint_list==label_names)==(len(est))
                # print(len(label_names))
                step += 1
            coord.request_stop()
            coord.join(threads)
            return np.mean(loss_lst)
コード例 #26
0
def get_network_logits_and_endpoints(network, images):
  if(network == 'inceptionV1'):
    with slim.arg_scope(inception.inception_v1_arg_scope(weight_decay=weight_decay)):
      logits, endpoints = inception.inception_v1(images, num_classes=1000, is_training=False)

  elif(network == 'vgg16'):

    with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)):
      logits, endpoints = vgg.vgg_16(images, num_classes=1000, is_training=False)

  return logits,endpoints
コード例 #27
0
async def process_image(image_path):
    image_size = vgg.vgg_16.default_image_size

    with tf.Graph().as_default():
        # Convert filepath string to string tensor
        #tf_filepath = tf.convert_to_tensor(image_path, dtype=tf.string)
        #tf_filepath = tf.convert_to_tensor(str(image_path), dtype=tf.string)

        # Read .JPEG image
        #tf_img_string = tf.read_file(tf_filepath)

        image = tf.image.decode_jpeg(tf.image.encode_jpeg(image_path),
                                     channels=3)
        tf_img_string = tf.read_file(str(image_path))
        image = tf.image.decode_jpeg(
            tf_img_string)  #tf.image.encode_jpeg(tf_img_string), channels=3)

        processed_image = vgg_preprocessing.preprocess_image(image,
                                                             image_size,
                                                             image_size,
                                                             is_training=False)
        processed_images = tf.expand_dims(processed_image, 0)

        # Create the model, use the default arg scope to configure the batch norm parameters.
        with slim.arg_scope(vgg.vgg_arg_scope()):
            # 1000 classes instead of 1001.
            logits, _ = vgg.vgg_16(processed_images,
                                   num_classes=1000,
                                   is_training=False)
        probabilities = tf.nn.softmax(logits)

        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'vgg_16.ckpt'),
            slim.get_model_variables('vgg_16'))

        with tf.Session() as sess:
            init_fn(sess)
            np_image, probabilities = sess.run([image, probabilities])
            probabilities = probabilities[0, 0:]
            sorted_inds = [
                i[0]
                for i in sorted(enumerate(-probabilities), key=lambda x: x[1])
            ]

        names = imagenet.create_readable_names_for_imagenet_labels()
        animals_found = []
        for i in range(5):
            index = sorted_inds[i]
            # Shift the index of a class name by one.
            # print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, names[index+1]))
            animals_found.append(names[index + 1])
        return animals_found
コード例 #28
0
def classify(url):

    with tf.Graph().as_default():
        image_string = req.urlopen(url).read()
        image = tf.image.decode_jpeg(image_string, channels=3)
        processed_image = vgg_preprocessing.preprocess_image(image,
                                                             image_size,
                                                             image_size,
                                                             is_training=False)
        processed_images = tf.expand_dims(processed_image, 0)

        # Create the model, use the default arg scope to configure
        # the batch norm parameters. arg_scope is a very conveniet
        # feature of slim library -- you can define default
        # parameters for layers -- like stride, padding etc.
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, _ = vgg.vgg_16(processed_images,
                                   num_classes=1000,
                                   is_training=False)

        # In order to get probabilities we apply softmax on the output.
        probabilities = tf.nn.softmax(logits)

        # Create a function that reads the network weights
        # from the checkpoint file that you downloaded.
        # We will run it in session later.
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'vgg_16.ckpt'),
            slim.get_model_variables('vgg_16'))

        with tf.Session() as sess:
            writer = tf.summary.FileWriter("/temp/logs", sess.graph)
            # Load weights
            init_fn(sess)

            # We want to get predictions, image as numpy matrix
            # and resized and cropped piece that is actually
            # being fed to the network.
            np_image, network_input, probabilities = sess.run(
                [image, processed_image, probabilities])
            probabilities = probabilities[0, 0:]
            sorted_inds = [
                i[0]
                for i in sorted(enumerate(-probabilities), key=lambda x: x[1])
            ]
        rst = ""
        for i in range(5):
            index = sorted_inds[i]
            pos = probabilities[index]
            name = names[str(index + 1)]
            rst += (name + ":" + str(pos) + "\n")
        return rst
コード例 #29
0
def VGG_16(image_batch_tensor, is_training):
    '''
    Returns the VGG16 model definition for use within the FCN model.

    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, channels] Tensor
        Tensor containing a batch of input images.

    is_training : bool
        True if network is being trained, False otherwise. This controls whether
        dropout layers should be enabled. (Dropout is only enabled during training.)

    Returns
    -------
    conv7_features:
        Features with a stride length of 32 (The coarsest layer in the VGG16
        network). The layer is referred to as 'fc7' in the original VGG16 network.
        These features feed into the fc8 logits layer in the original network;
        however the 'fc8' layer has been removed in this implementation.

    pool4_features:
        Features with a stride length of 16. (Output of the 'pool4' layer.)

    pool3_features:
        Features with a stride length of 8. (Output of the 'pool3' layer.)
    '''
    # Convert image to float32 before subtracting the mean pixel values
    image_batch_float = tf.to_float(image_batch_tensor)

    # Subtract the mean pixel value from each pixel
    mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

    with slim.arg_scope(vgg.vgg_arg_scope()):
        # By setting num_classes to 0 the logits layer is omitted and the input
        # features to the logits layer are returned instead. This logits layer
        # will be added as part of the FCN_32s model. (Note: Some FCN
        # implementations choose to use the 'fc8' logits layer that is already
        # present in the VGG16 network instead.)

        # fc_conv_padding = 'SAME' is necessary to ensure that downsampling/
        # upsampling work as expected. So, if an image with dimensions that are
        # multiples of 32 is fed into the network, the resultant FCN pixel
        # classification will have the same dimensions as the original image.
        conv7_features, end_points = vgg.vgg_16(mean_centered_image_batch,
                                                num_classes=0,
                                                is_training=is_training,
                                                spatial_squeeze=False,
                                                fc_conv_padding='SAME')

    return conv7_features, end_points['vgg_16/pool4'], end_points[
        'vgg_16/pool3']
コード例 #30
0
def VGG16(image, reuse=tf.AUTO_REUSE):
    preprocess = lambda x: preprocess_image(x, 224, 224, is_training=False)
    preprocessed = tf.map_fn(preprocess, elems=image)
    # preprocessed = preprocess_for_eval(image, 224, 224, 256)
    arg_scope = vgg.vgg_arg_scope(weight_decay=0.0)
    with tf.variable_scope(name_or_scope='', reuse=reuse):
        with slim.arg_scope(arg_scope):
            logits, end_point = vgg.vgg_16(preprocessed,
                                           1000,
                                           is_training=False,
                                           dropout_keep_prob=1.0)
            probs = tf.nn.softmax(logits)  # probabilities
    return logits, probs, end_point
コード例 #31
0
ファイル: read_image.py プロジェクト: databricks/tensorframes
    # the network.
    processed_image = vgg_preprocessing.preprocess_image(image,
                                                         image_size,
                                                         image_size,
                                                         is_training=False)

    # Networks accept images in batches.
    # The first dimension usually represents the batch size.
    # In our case the batch size is one.
    processed_images  = tf.expand_dims(processed_image, 0)

    # Create the model, use the default arg scope to configure
    # the batch norm parameters. arg_scope is a very conveniet
    # feature of slim library -- you can define default
    # parameters for layers -- like stride, padding etc.
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, _ = vgg.vgg_16(processed_images,
                               num_classes=1000,
                               is_training=False)

    # In order to get probabilities we apply softmax on the output.
    probabilities = tf.nn.softmax(logits)

    # Just focus on the top predictions
    top_pred = tf.nn.top_k(tf.squeeze(probabilities), 5, name="top_predictions")

    output_nodes = [probabilities, top_pred.indices, top_pred.values]


# Create the saver
with g.as_default():
コード例 #32
0
def FCN_32s(image_batch_tensor,
            number_of_classes,
            is_training):
    """Returns the FCN-32s model definition.
    The function returns the model definition of a network that was described
    in 'Fully Convolutional Networks for Semantic Segmentation' by Long et al.
    The network subsamples the input by a factor of 32 and uses the bilinear
    upsampling kernel to upsample prediction by a factor of 32. This means that
    if the image size is not of the factor 32, the prediction of different size
    will be delivered. To adapt the network for an any size input use 
    adapt_network_for_any_size_input(FCN_32s, 32). Note: the upsampling kernel
    is fixed in this model definition, because it didn't give significant
    improvements according to aforementioned paper.
    
    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, depth] Tensor
        Tensor specifying input image batch
    number_of_classes : int
        An argument specifying the number of classes to be predicted.
        For example, for PASCAL VOC it is 21.
    is_training : boolean
        An argument specifying if the network is being evaluated or trained.
        It affects the work of underlying dropout layer of VGG-16.
    
    Returns
    -------
    upsampled_logits : [batch_size, height, width, number_of_classes] Tensor
        Tensor with logits representing predictions for each class.
        Be careful, the output can be of different size compared to input,
        use adapt_network_for_any_size_input to adapt network for any input size.
        Otherwise, the input images sizes should be of multiple 32.
    vgg_16_variables_mapping : dict {string: variable}
        Dict which maps the FCN-32s model's variables to VGG-16 checkpoint variables
        names. We need this to initilize the weights of FCN-32s model with VGG-16 from
        checkpoint file. Look at ipython notebook for examples.
    """
    
    with tf.variable_scope("fcn_32s") as fcn_32s_scope:

        upsample_factor = 32

        # Convert image to float32 before subtracting the
        # mean pixel value
        image_batch_float = tf.to_float(image_batch_tensor)

        # Subtract the mean pixel value from each pixel
        mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

        upsample_filter_np = bilinear_upsample_weights(upsample_factor,
                                                       number_of_classes)

        upsample_filter_tensor = tf.constant(upsample_filter_np)

        # TODO: make pull request to get this custom vgg feature accepted
        # to avoid using custom slim repo.
        with slim.arg_scope(vgg.vgg_arg_scope()):

            logits, end_points = vgg.vgg_16(mean_centered_image_batch,
                                            num_classes=number_of_classes,
                                            is_training=is_training,
                                            spatial_squeeze=False,
                                            fc_conv_padding='SAME')

        downsampled_logits_shape = tf.shape(logits)

        # Calculate the ouput size of the upsampled tensor
        upsampled_logits_shape = tf.pack([
                                          downsampled_logits_shape[0],
                                          downsampled_logits_shape[1] * upsample_factor,
                                          downsampled_logits_shape[2] * upsample_factor,
                                          downsampled_logits_shape[3]
                                         ])

        # Perform the upsampling
        upsampled_logits = tf.nn.conv2d_transpose(logits,
                                                  upsample_filter_tensor,
                                                  output_shape=upsampled_logits_shape,
                                                  strides=[1, upsample_factor, upsample_factor, 1])

        # Map the original vgg-16 variable names
        # to the variables in our model. This is done
        # to make it possible to use assign_from_checkpoint_fn()
        # while providing this mapping.
        # TODO: make it cleaner
        vgg_16_variables_mapping = {}

        vgg_16_variables = slim.get_variables(fcn_32s_scope)

        for variable in vgg_16_variables:

            # Here we remove the part of a name of the variable
            # that is responsible for the current variable scope
            # original_vgg_16_checkpoint_string = variable.name[len(fcn_32s_scope.original_name_scope):-2]
            
            # Updated: changed .name_scope to .name because name_scope only affects operations
            # and variable scope is actually represented by .name
            original_vgg_16_checkpoint_string = variable.name[len(fcn_32s_scope.name)+1:-2]
            vgg_16_variables_mapping[original_vgg_16_checkpoint_string] = variable

    return upsampled_logits, vgg_16_variables_mapping
コード例 #33
0
ファイル: fcn_8s.py プロジェクト: lighttxu/island-seg-fcn
def FCN_8s(image_batch_tensor,
           number_of_classes,
           is_training):
    """Returns the FCN-8s model definition.
    The function returns the model definition of a network that was described
    in 'Fully Convolutional Networks for Semantic Segmentation' by Long et al.
    The network subsamples the input by a factor of 32 and uses three bilinear
    upsampling layers to upsample prediction by a factor of 32. This means that
    if the image size is not of the factor 32, the prediction of different size
    will be delivered. To adapt the network for an any size input use 
    adapt_network_for_any_size_input(FCN_8s, 32). Note: the upsampling kernel
    is fixed in this model definition, because it didn't give significant
    improvements according to aforementioned paper.
    
    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, depth] Tensor
        Tensor specifying input image batch
    number_of_classes : int
        An argument specifying the number of classes to be predicted.
        For example, for PASCAL VOC it is 21.
    is_training : boolean
        An argument specifying if the network is being evaluated or trained.
        It affects the work of underlying dropout layer of VGG-16.
    
    Returns
    -------
    upsampled_logits : [batch_size, height, width, number_of_classes] Tensor
        Tensor with logits representing predictions for each class.
        Be careful, the output can be of different size compared to input,
        use adapt_network_for_any_size_input to adapt network for any input size.
        Otherwise, the input images sizes should be of multiple 32.
    fcn_16s_variables_mapping : dict {string: variable}
        Dict which maps the FCN-8s model's variables to FCN-16s checkpoint variables
        names. We need this to initilize the weights of FCN-8s model with FCN-16s from
        checkpoint file. Look at ipython notebook for examples.
    """

    # Convert image to float32 before subtracting the
    # mean pixel value
    image_batch_float = tf.to_float(image_batch_tensor)

    # Subtract the mean pixel value from each pixel
    mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

    upsample_filter_factor_2_np = bilinear_upsample_weights(factor=2,
                                                            number_of_classes=number_of_classes)

    upsample_filter_factor_8_np = bilinear_upsample_weights(factor=8,
                                                             number_of_classes=number_of_classes)

    upsample_filter_factor_2_tensor = tf.constant(upsample_filter_factor_2_np)
    upsample_filter_factor_8_tensor = tf.constant(upsample_filter_factor_8_np)

    with tf.variable_scope("fcn_8s", reuse = None)  as fcn_8s_scope:
        # Define the model that we want to use -- specify to use only two classes at the last layer
        # TODO: make pull request to get this custom vgg feature accepted
        # to avoid using custom slim repo.
        with slim.arg_scope(vgg.vgg_arg_scope()):

            ## Original FCN-32s model definition

            last_layer_logits, end_points = vgg.vgg_16(mean_centered_image_batch,
                                                       num_classes=number_of_classes,
                                                       is_training=is_training,
                                                       spatial_squeeze=False,
                                                       fc_conv_padding='SAME')


            last_layer_logits_shape = tf.shape(last_layer_logits)


            # Calculate the ouput size of the upsampled tensor
            last_layer_upsampled_by_factor_2_logits_shape = tf.stack([
                                                                  last_layer_logits_shape[0],
                                                                  last_layer_logits_shape[1] * 2,
                                                                  last_layer_logits_shape[2] * 2,
                                                                  last_layer_logits_shape[3]
                                                                 ])

            # Perform the upsampling
            last_layer_upsampled_by_factor_2_logits = tf.nn.conv2d_transpose(last_layer_logits,
                                                                             upsample_filter_factor_2_tensor,
                                                                             output_shape=last_layer_upsampled_by_factor_2_logits_shape,
                                                                             strides=[1, 2, 2, 1])

            ## Adding the skip here for FCN-16s model
            
            # We created vgg in the fcn_8s name scope -- so
            # all the vgg endpoints now are prepended with fcn_8s name
            pool4_features = end_points['fcn_8s/vgg_16/pool4']

            # We zero initialize the weights to start training with the same
            # accuracy that we ended training FCN-32s

            pool4_logits = slim.conv2d(pool4_features,
                                       number_of_classes,
                                       [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       weights_initializer=tf.zeros_initializer,
                                       scope='pool4_fc')

            fused_last_layer_and_pool4_logits = pool4_logits + last_layer_upsampled_by_factor_2_logits

            fused_last_layer_and_pool4_logits_shape = tf.shape(fused_last_layer_and_pool4_logits)
            
            
            

            # Calculate the ouput size of the upsampled tensor
            fused_last_layer_and_pool4_upsampled_by_factor_2_logits_shape = tf.stack([
                                                                          fused_last_layer_and_pool4_logits_shape[0],
                                                                          fused_last_layer_and_pool4_logits_shape[1] * 2,
                                                                          fused_last_layer_and_pool4_logits_shape[2] * 2,
                                                                          fused_last_layer_and_pool4_logits_shape[3]
                                                                         ])

            # Perform the upsampling
            fused_last_layer_and_pool4_upsampled_by_factor_2_logits = tf.nn.conv2d_transpose(fused_last_layer_and_pool4_logits,
                                                                        upsample_filter_factor_2_tensor,
                                                                        output_shape=fused_last_layer_and_pool4_upsampled_by_factor_2_logits_shape,
                                                                        strides=[1, 2, 2, 1])
            
            
            ## Adding the skip here for FCN-8s model

            pool3_features = end_points['fcn_8s/vgg_16/pool3']
            
            # We zero initialize the weights to start training with the same
            # accuracy that we ended training FCN-32s

            pool3_logits = slim.conv2d(pool3_features,
                                       number_of_classes,
                                       [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       weights_initializer=tf.zeros_initializer,
                                       scope='pool3_fc')
            
            
            fused_last_layer_and_pool4_logits_and_pool_3_logits = pool3_logits + \
                                            fused_last_layer_and_pool4_upsampled_by_factor_2_logits
            
            
            fused_last_layer_and_pool4_logits_and_pool_3_logits_shape = tf.shape(fused_last_layer_and_pool4_logits_and_pool_3_logits)
            
            
            # Calculate the ouput size of the upsampled tensor
            fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_shape = tf.stack([
                                                                          fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[0],
                                                                          fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[1] * 8,
                                                                          fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[2] * 8,
                                                                          fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[3]
                                                                         ])

            # Perform the upsampling
            fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits = tf.nn.conv2d_transpose(fused_last_layer_and_pool4_logits_and_pool_3_logits,
                                                                        upsample_filter_factor_8_tensor,
                                                                        output_shape=fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_shape,
                                                                        strides=[1, 8, 8, 1])
            
            
            
            
            fcn_16s_variables_mapping = {}

            fcn_8s_variables = slim.get_variables(fcn_8s_scope)

            for variable in fcn_8s_variables:
                
                # We only need FCN-16s variables to resture from checkpoint
                # Variables of FCN-8s should be initialized
                if 'pool3_fc' in variable.name:
                    continue

                # Here we remove the part of a name of the variable
                # that is responsible for the current variable scope
                original_fcn_16s_checkpoint_string = 'fcn_16s/' +  variable.name[len(fcn_8s_scope.original_name_scope):-2]
                fcn_16s_variables_mapping[original_fcn_16s_checkpoint_string] = variable

    return fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits, fcn_16s_variables_mapping