def choose_model(x, model):
    """
    选择模型
    :param x:
    :param model:
    :return:
    """
    # 模型保存路径,模型名,预训练文件路径,前向传播
    if model == 'Alex':
        log_dir = "E:/alum/log/Alex"
        y, _ = alexnet.alexnet_v2(
            x,
            num_classes=CLASSES,  # 分类的类别
            is_training=True,  # 是否在训练
            dropout_keep_prob=1.0,  # 保留比率
            spatial_squeeze=True,  # 压缩掉1维的维度
            global_pool=GLOBAL_POOL)  # 输入不是规定的尺寸时,需要global_pool
    elif model == 'VGG':
        log_dir = "E:/alum/log/VGG"
        y, _ = vgg.vgg_16(x,
                          num_classes=CLASSES,
                          is_training=True,
                          dropout_keep_prob=1.0,
                          spatial_squeeze=True,
                          global_pool=GLOBAL_POOL)
    elif model == 'VGG2':
        log_dir = "E:/alum/log/VGG2"
        y, _ = vgg.vgg_16(x,
                          num_classes=CLASSES,
                          is_training=True,
                          dropout_keep_prob=1.0,
                          spatial_squeeze=True,
                          global_pool=GLOBAL_POOL)
    elif model == 'Incep4':
        log_dir = "E:/alum/log/Incep4"
        y, _ = inception_v4.inception_v4(x,
                                         num_classes=CLASSES,
                                         is_training=True,
                                         dropout_keep_prob=1.0,
                                         reuse=None,
                                         scope='InceptionV4',
                                         create_aux_logits=True)
    elif model == 'Res':
        log_dir = "E:/alum/log/Res"
        y, _ = resnet_v2.resnet_v2_50(x,
                                      num_classes=CLASSES,
                                      is_training=True,
                                      global_pool=GLOBAL_POOL,
                                      output_stride=None,
                                      spatial_squeeze=True,
                                      reuse=None,
                                      scope='resnet_v2_50')
    else:
        print('Error: model name not exist')
        return

    return y, log_dir
Exemple #2
0
def model(image):
    # 图像去均值
    image = mean_image_subtraction(image)
    with slim.arg_scope(vgg.vgg_arg_scope()):
        conv5_3 = vgg.vgg_16(image)

    # 卷积操作
    rpn_conv = slim.conv2d(conv5_3, 512, 3)

    lstm_output = Bilstm(rpn_conv, 512, 128, 512, scope_name='BiLSTM')

    bbox_pred = lstm_fc(lstm_output, 512, 10 * 4, scope_name="bbox_pred")
    cls_pred = lstm_fc(lstm_output, 512, 10 * 2, scope_name="cls_pred")

    # transpose: (1, H, W, A x d) -> (1, H, WxA, d)
    cls_pred_shape = tf.shape(cls_pred)
    cls_pred_reshape = tf.reshape(
        cls_pred, [cls_pred_shape[0], cls_pred_shape[1], -1, 2])

    cls_pred_reshape_shape = tf.shape(cls_pred_reshape)
    cls_prob = tf.reshape(tf.nn.softmax(
        tf.reshape(cls_pred_reshape, [-1, cls_pred_reshape_shape[3]])), [
            -1, cls_pred_reshape_shape[1], cls_pred_reshape_shape[2],
            cls_pred_reshape_shape[3]
        ],
                          name="cls_prob")

    return bbox_pred, cls_pred, cls_prob
def test_vgg_16(img_dir):
    """
    Test VGG-16 with a single image.
    :param img_dir: Path of the image to be classified
    :return: classification result and probability of a single image
    """
    img = cv2.imread(img_dir)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = img.reshape((1, 224, 224, 3))

    tf.reset_default_graph()
    inputs = tf.placeholder(name='input_images',
                            shape=[None, 224, 224, 3],
                            dtype=tf.float32)
    with slim.arg_scope(vgg_arg_scope()):
        _, _ = vgg_16(inputs, is_training=False)

    with tf.Session() as sess:
        tf.train.Saver().restore(sess, './models/vgg_16.ckpt')
        inputs = sess.graph.get_tensor_by_name('input_images:0')
        outputs = sess.graph.get_tensor_by_name('vgg_16/fc8/squeezed:0')
        pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0]
        prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0]

        pred, prob = sess.run([pred, prob], feed_dict={inputs: img})
        name = label_dict[pred + 1]

    print('Result of VGG-16:', name, prob)
    return name, prob
Exemple #4
0
def specific_object_recognition(image_size, num_classes_s):
    # Define placeholders
    with tf.name_scope('input'):
        with tf.name_scope('cropped_images'):
            cropped_images_placeholder = tf.placeholder(dtype="float32",
                                                        shape=(None,
                                                               image_size,
                                                               image_size, 3))
        with tf.name_scope('labels'):
            labels_placeholder = tf.placeholder(dtype="float32",
                                                shape=(None, num_classes_s))
        keep_prob = tf.placeholder(dtype="float32")
        is_training = tf.placeholder(dtype="bool")  # train flag

    # Build the graph
    with slim.arg_scope(vgg_arg_scope()):
        logits, _ = vgg_16(cropped_images_placeholder,
                           num_classes=num_classes_s,
                           is_training=True,
                           reuse=None)

    predictions = tf.nn.softmax(logits, name='Predictions')
    predict_labels = tf.argmax(predictions, 1)

    return predict_labels, [cropped_images_placeholder, keep_prob, is_training]
Exemple #5
0
 def testNoClasses(self):
     batch_size = 5
     height, width = 224, 224
     num_classes = None
     with self.test_session():
         inputs = tf.random_uniform((batch_size, height, width, 3))
         net, end_points = vgg.vgg_16(inputs, num_classes)
         expected_names = [
             'vgg_16/conv1/conv1_1',
             'vgg_16/conv1/conv1_2',
             'vgg_16/pool1',
             'vgg_16/conv2/conv2_1',
             'vgg_16/conv2/conv2_2',
             'vgg_16/pool2',
             'vgg_16/conv3/conv3_1',
             'vgg_16/conv3/conv3_2',
             'vgg_16/conv3/conv3_3',
             'vgg_16/pool3',
             'vgg_16/conv4/conv4_1',
             'vgg_16/conv4/conv4_2',
             'vgg_16/conv4/conv4_3',
             'vgg_16/pool4',
             'vgg_16/conv5/conv5_1',
             'vgg_16/conv5/conv5_2',
             'vgg_16/conv5/conv5_3',
             'vgg_16/pool5',
             'vgg_16/fc6',
             'vgg_16/fc7',
         ]
         self.assertSetEqual(set(end_points.keys()), set(expected_names))
         self.assertTrue(net.op.name.startswith('vgg_16/fc7'))
Exemple #6
0
    def build(self, cost, model, train):
        if model == "MobilenetV1":
            with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
                self.logits, self.end_points = mobilenet_v1.mobilenet_v1(
                    self.input,
                    num_classes=self.numb_logits,
                    dropout_keep_prob=self.kp,
                    is_training=train)
        elif model == "vgg_16":
            with slim.arg_scope(vgg.vgg_arg_scope()):
                self.logits, self.end_points = vgg.vgg_16(
                    self.input,
                    num_classes=self.numb_logits,
                    dropout_keep_prob=self.kp,
                    is_training=True)

        self.prob = tf.nn.softmax(self.logits, name="prob")
        self.loss = tf.reduce_mean(
            tf.reduce_sum(tf.pow(self.prob - self.target, 2), axis=1))
        tf.summary.scalar('loss', self.loss)
        if cost == "mse":
            self.cost = self.loss
        else:
            self.xtarget = self.target * (1 - 1e-11) + 1e-12
            assert self.xtarget.get_shape().as_list()[1] == self.numb_logits
            self.xprob = self.prob * (1 - 1e-11) + 1e-12
            assert self.xprob.get_shape().as_list()[1] == self.numb_logits
            self.cost = tf.reduce_mean(
                tf.reduce_sum(self.xtarget * tf.log(self.xtarget / self.prob),
                              axis=1))
            tf.summary.scalar('cost_kl', self.cost)
Exemple #7
0
def build_train_op(image_tensor, label_tensor, is_training):
    vgg_argscope = vgg_arg_scope(weight_decay=FLAGS.weight_decay)
    global_step = tf.get_variable(name="global_step",
                                  shape=[],
                                  dtype=tf.int32,
                                  trainable=False)
    with slim.arg_scope(vgg_argscope):
        logits, end_points = vgg_16(image_tensor,
                                    is_training=is_training,
                                    num_classes=10)
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=label_tensor))
    accuracy = tf.reduce_sum(
        tf.cast(
            tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), label_tensor),
            tf.int32))
    end_points['loss'], end_points['accuracy'] = loss, accuracy
    if is_training:
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=FLAGS.learning_rate)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return train_op, end_points
    else:
        return None, end_points
Exemple #8
0
    def __call__(self, x_input, batch_size, is_training=False):
        """Constructs model and return probabilities for given input."""
        reuse = True if self.built else None

        # ResNet V1 and VGG have different preprocessing
        preproc = tf.map_fn(
            lambda img: vgg_preprocess(0.5 * 255.0 * (
                img + 1.0), vgg.vgg_16.default_image_size, vgg.vgg_16.
                                       default_image_size), x_input)

        with tf.variable_scope(self.name):
            logits, end_points = vgg.vgg_16(preproc,
                                            num_classes=self.num_classes - 1,
                                            is_training=is_training)

        # VGG and ResNetV1 don't have a background class
        background_class = tf.constant(-np.inf,
                                       dtype=tf.float32,
                                       shape=[batch_size, 1])
        logits = tf.concat([background_class, logits], axis=1)

        preds = tf.argmax(logits, axis=1)

        self.built = True
        self.logits = logits
        self.preds = preds
        return logits
Exemple #9
0
 def get_feature_map(self):
     input_image = self._processor()
     net, endpoints = vgg.vgg_16(input_image,
                                 num_classes=None,
                                 is_training=self.is_training)
     feature_map = endpoints['vgg_16/conv5/conv5_3']
     return feature_map
def graph(x, y, i, x_max, x_min, grad):
    eps = FLAGS.max_epsilon
    num_iter = FLAGS.num_iter
    alpha = eps / num_iter
    momentum = FLAGS.momentum
    num_classes = 1000

    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, end_points = vgg.vgg_16(x,
                                        num_classes=num_classes,
                                        is_training=False)

    pred = tf.argmax(logits, 1)

    first_round = tf.cast(tf.equal(i, 0), tf.int64)
    y = first_round * pred + (1 - first_round) * y
    one_hot = tf.one_hot(y, num_classes)

    cross_entropy = tf.losses.softmax_cross_entropy(one_hot,
                                                    logits,
                                                    label_smoothing=0.0,
                                                    weights=1.0)
    noise = tf.gradients(cross_entropy, x)[0]
    noise = tf.nn.depthwise_conv2d(noise,
                                   stack_kernel,
                                   strides=[1, 1, 1, 1],
                                   padding='SAME')
    noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True)
    noise = momentum * grad + noise
    x = x + alpha * tf.sign(noise)
    x = tf.clip_by_value(x, x_min, x_max)
    i = tf.add(i, 1)
    return x, y, i, x_max, x_min, noise
 def testEndPoints(self):
   batch_size = 5
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     inputs = tf.random_uniform((batch_size, height, width, 3))
     _, end_points = vgg.vgg_16(inputs, num_classes)
     expected_names = ['vgg_16/conv1/conv1_1',
                       'vgg_16/conv1/conv1_2',
                       'vgg_16/pool1',
                       'vgg_16/conv2/conv2_1',
                       'vgg_16/conv2/conv2_2',
                       'vgg_16/pool2',
                       'vgg_16/conv3/conv3_1',
                       'vgg_16/conv3/conv3_2',
                       'vgg_16/conv3/conv3_3',
                       'vgg_16/pool3',
                       'vgg_16/conv4/conv4_1',
                       'vgg_16/conv4/conv4_2',
                       'vgg_16/conv4/conv4_3',
                       'vgg_16/pool4',
                       'vgg_16/conv5/conv5_1',
                       'vgg_16/conv5/conv5_2',
                       'vgg_16/conv5/conv5_3',
                       'vgg_16/pool5',
                       'vgg_16/fc6',
                       'vgg_16/fc7',
                       'vgg_16/fc8'
                      ]
     self.assertSetEqual(set(end_points.keys()), set(expected_names))
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)
        '''
    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }
    '''
        feature_map_layout = {
            'from_layer': [
                'FeatureExtractor/vgg_16/conv4/conv4_3',
                'FeatureExtractor/vgg_16/fc7', '', '', '', ''
            ],
            'layer_depth': [-1, -1, 256, 128, 128, 128],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }
        with tf.variable_scope('vgg_16', reuse=self._reuse_weights) as scope:
            with slim.arg_scope(vgg.vgg_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    # TODO(skligys): Enable fused batch norm once quantization supports it.
                    with slim.arg_scope([slim.batch_norm], fused=False):

                        _, image_features = vgg.vgg_16(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            num_classes=None,
                            is_training=self._is_training,
                            scope=scope)
            print(image_features.keys())
            print(image_features.values())
            with slim.arg_scope(self._conv_hyperparams_fn()):
                # TODO(skligys): Enable fused batch norm once quantization supports it.
                with slim.arg_scope([slim.batch_norm], fused=False):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
Exemple #13
0
    def __init__(self):
        from nets import vgg

        self.image_size = 224
        self.num_classes = 1000
        self.predictions_is_correct = False
        self.use_larger_step_size = False
        self.use_smoothed_grad = False

        # For dataprior attacks. gamma = A^2 * D / d in the paper
        self.gamma = 4.5

        batch_shape = [None, self.image_size, self.image_size, 3]
        self.x_input = tf.placeholder(tf.float32, shape=batch_shape)
        self.target_label = tf.placeholder(tf.int32, shape=[None])
        target_onehot = tf.one_hot(self.target_label, self.num_classes)

        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_16(self.x_input,
                                            num_classes=self.num_classes,
                                            is_training=False)

        self.predicted_labels = tf.argmax(end_points['vgg_16/fc8'], 1)
        #logits -= tf.reduce_min(logits)
        #real = tf.reduce_max(logits * target_onehot, 1)
        #other = tf.reduce_max(logits * (1 - target_onehot), 1)
        #self.loss = other - real
        self.loss = tf.nn.softmax_cross_entropy_with_logits(
            labels=target_onehot, logits=logits)
        self.grad = 255.0 * tf.gradients(self.loss, self.x_input)[0]

        saver = tf.train.Saver(slim.get_model_variables(scope='vgg_16'))
        self.sess = tf.get_default_session()
        saver.restore(self.sess, 'vgg_16.ckpt')
Exemple #14
0
 def testNoClasses(self):
   batch_size = 5
   height, width = 224, 224
   num_classes = None
   with self.test_session():
     inputs = tf.random_uniform((batch_size, height, width, 3))
     net, end_points = vgg.vgg_16(inputs, num_classes)
     expected_names = ['vgg_16/conv1/conv1_1',
                       'vgg_16/conv1/conv1_2',
                       'vgg_16/pool1',
                       'vgg_16/conv2/conv2_1',
                       'vgg_16/conv2/conv2_2',
                       'vgg_16/pool2',
                       'vgg_16/conv3/conv3_1',
                       'vgg_16/conv3/conv3_2',
                       'vgg_16/conv3/conv3_3',
                       'vgg_16/pool3',
                       'vgg_16/conv4/conv4_1',
                       'vgg_16/conv4/conv4_2',
                       'vgg_16/conv4/conv4_3',
                       'vgg_16/pool4',
                       'vgg_16/conv5/conv5_1',
                       'vgg_16/conv5/conv5_2',
                       'vgg_16/conv5/conv5_3',
                       'vgg_16/pool5',
                       'vgg_16/fc6',
                       'vgg_16/fc7',
                      ]
     self.assertSetEqual(set(end_points.keys()), set(expected_names))
     self.assertTrue(net.op.name.startswith('vgg_16/fc7'))
Exemple #15
0
 def testEndPoints(self):
   batch_size = 5
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     inputs = tf.random_uniform((batch_size, height, width, 3))
     _, end_points = vgg.vgg_16(inputs, num_classes)
     expected_names = ['vgg_16/conv1/conv1_1',
                       'vgg_16/conv1/conv1_2',
                       'vgg_16/pool1',
                       'vgg_16/conv2/conv2_1',
                       'vgg_16/conv2/conv2_2',
                       'vgg_16/pool2',
                       'vgg_16/conv3/conv3_1',
                       'vgg_16/conv3/conv3_2',
                       'vgg_16/conv3/conv3_3',
                       'vgg_16/pool3',
                       'vgg_16/conv4/conv4_1',
                       'vgg_16/conv4/conv4_2',
                       'vgg_16/conv4/conv4_3',
                       'vgg_16/pool4',
                       'vgg_16/conv5/conv5_1',
                       'vgg_16/conv5/conv5_2',
                       'vgg_16/conv5/conv5_3',
                       'vgg_16/pool5',
                       'vgg_16/fc6',
                       'vgg_16/fc7',
                       'vgg_16/fc8'
                      ]
     self.assertSetEqual(set(end_points.keys()), set(expected_names))
Exemple #16
0
def models(inputs, is_training=True, dropout_keep_prob=0.5):
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        weights_regularizer=slim.l2_regularizer(weight_decay)):
        net, endpoints = vgg.vgg_16(inputs,
                                    num_classes,
                                    is_training=is_training,
                                    dropout_keep_prob=dropout_keep_prob)
    return net
def train_vgg16(log_steps,save_summaries_sec,save_interval_secs,num_iterations = num_iterations_vgg):
    with tf.Graph().as_default():
        tf.logging.set_verbosity(tf.logging.INFO)

        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        global_step = slim.get_or_create_global_step()

        #dataset = plantclef2015.get_split('train', plant_data_dir)
        dataset = plantclef2015_all_labels.get_split('train', plant_data_dir)

        images,labels = load_batch(dataset, batch_size = batch_size, k=num_patches_vgg, r=r_rotations_vgg)

        # Add Images to summaries
        summaries.add(tf.summary.image("input_images", images, batch_size))

        # Create the models
        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)):
             logits, _ = vgg.vgg_16(images, num_classes=1000, is_training=False)


        # Specify the loss function:
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

        loss(logits, one_hot_labels)
        #slim.losses.softmax_cross_entropy(logits, one_hot_labels)
        #tf.losses.softmax_cross_entropy(one_hot_labels, logits)
        total_loss = slim.losses.get_total_loss()

        # Create some summaries to visualize the training process:
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))
        summaries.add(tf.summary.scalar('losses/Total_Loss', total_loss))

        # Specify the optimizer and create the train op:
        learning_rate = tf.train.exponential_decay(start_learning_rate, global_step,updating_iteration_for_learning_rate, updating_gamma, staircase=True)

        optimizer = tf.train.MomentumOptimizer(learning_rate= learning_rate, momentum=momentum)

        train_op = slim.learning.create_train_op(total_loss, optimizer)

        summaries.add(tf.summary.scalar('training/Learning_Rate', learning_rate))

        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # Run the training:
        final_loss = slim.learning.train(
            train_op,
            logdir=train_vgg16_dir,
            log_every_n_steps=log_steps,
            global_step=global_step,
            number_of_steps= num_iterations,
            summary_op=summary_op,
            init_fn=get_init_fn_vgg(),
            save_summaries_secs=save_summaries_sec,
            save_interval_secs=save_interval_secs)

    print('Finished training. Last batch loss %f' % final_loss)
Exemple #18
0
def grad_cam(x_input, sess, image):
    image = (image + 1.0) * 0.5 * 255.0
    img_vgg = preprocess_for_model(x_input, 'vgg_16')
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(img_vgg,
                                                      num_classes=110,
                                                      is_training=True,
                                                      scope='vgg_16',
                                                      reuse=True)
    end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8']
    end_points_vgg_16['pool5'] = end_points_vgg_16['vgg_16/pool5']
    end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits'])
    predict = tf.argmax(end_points_vgg_16['probs'], 1)
    logits = end_points_vgg_16['logits']
    before_fc = end_points_vgg_16['pool5']
    probs = end_points_vgg_16['probs']
    nb_classes = 110
    conv_layer = before_fc
    one_hot = tf.one_hot(predict, 110)
    signal = tf.multiply(logits, one_hot)
    loss = tf.reduce_mean(signal)
    #loss = tf.losses.softmax_cross_entropy(one_hot,
    #                                          logits,
    #                                          label_smoothing=0.0,
    #                                          weights=1.0)
    grads = tf.gradients(loss, conv_layer)[0]
    norm_grads = tf.div(
        grads,
        tf.sqrt(tf.reduce_mean(tf.square(grads))) + tf.constant(1e-5))
    output, grads_val = sess.run([conv_layer, norm_grads],
                                 feed_dict={x_input: image})
    grads_val = grads_val[0]
    output = output[0]
    weights = np.mean(grads_val, axis=(0, 1))  # [512]
    cam = np.ones(output.shape[0:2], dtype=np.float32)  # [7,7]

    # Taking a weighted average
    for i, w in enumerate(weights):
        cam += w * output[:, :, i]
    # Passing through ReLU
    #cam = imresize(cam, (224,224))
    cam = np.maximum(cam, 0)
    cam = cam / np.max(cam)
    cam = imresize(cam, (224, 224))

    # Converting grayscale to 3-D
    cam3 = np.expand_dims(cam, axis=2)
    cam3 = np.tile(cam3, [1, 1, 3])

    img = image[0]
    img = img / np.max(img)

    # Superimposing the visualization with the image.
    new_img = img + cam3
    new_img = new_img / np.max(new_img)
    #new_img = new_img.astype(np.uint8)
    return cam3
Exemple #19
0
 def testForward(self):
     batch_size = 1
     height, width = 224, 224
     with self.test_session() as sess:
         inputs = tf.random_uniform((batch_size, height, width, 3))
         logits, _ = vgg.vgg_16(inputs)
         sess.run(tf.global_variables_initializer())
         output = sess.run(logits)
         self.assertTrue(output.any())
 def testForward(self):
   batch_size = 1
   height, width = 224, 224
   with self.test_session() as sess:
     inputs = tf.random_uniform((batch_size, height, width, 3))
     logits, _ = vgg.vgg_16(inputs)
     sess.run(tf.global_variables_initializer())
     output = sess.run(logits)
     self.assertTrue(output.any())
def evaluate_vgg16(batch_size):
    with tf.Graph().as_default():
        tf.logging.set_verbosity(tf.logging.INFO)
        global_step = slim.get_or_create_global_step()


        dataset = plantclef2015.get_split('validation', plant_data_dir)
        images,labels = load_batch(dataset, batch_size = batch_size, k=num_patches_vgg, r=r_rotations_vgg, is_training =False)

        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)):
             logits, _ = vgg.vgg_16(images, num_classes=1000, is_training=False)


        total_output = []
        total_labels = []
        total_images = []

        with tf.Session() as sess:
            coord = tf.train.Coordinator()
            saver = tf. train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(train_vgg16_dir))
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            for i in range(batch_size):
                print('step: %d/%d' % (i, batch_size))
                o, l , image= sess.run([logits, labels, images[0]])
                o = tf.reduce_sum(o, 0)/float(40)
                total_output.append(o)
                total_labels.append(l[0])
                total_images.append(image)
            coord.request_stop()
            coord.join(threads)


            total_output = tf.stack(total_output,0)
            total_output = tf.nn.softmax(total_output)
            labels = tf.constant(total_labels)
            total_images = sess.run(tf.stack(total_images,0))

            top1_op = tf.nn.in_top_k(total_output, labels, 1)
            top1_acc = sess.run(tf.reduce_mean(tf.cast(top1_op, tf.float32)))
            print(top1_acc)


            top5_op = tf.nn.in_top_k(total_output, labels, 5)
            top5_acc = sess.run(tf.reduce_mean(tf.cast(top5_op, tf.float32)))
            print(top5_acc)

            accuracy1_sum = tf.summary.scalar('top1_accuracy', top1_acc)
            accuracy5_sum = tf.summary.scalar('top5_accuracy', top5_acc)
            images_sum = tf.summary.image("input_images", total_images, batch_size)

            accuracy1, accuracy5, image_batch, step = sess.run([accuracy1_sum,accuracy5_sum,images_sum, global_step])
            writer = tf.summary.FileWriter(eval_vgg16_dir)
            writer.add_summary(accuracy1, step)
            writer.add_summary(accuracy5, step)
            writer.add_summary(image_batch)
 def testBuild(self):
   batch_size = 5
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     inputs = tf.random_uniform((batch_size, height, width, 3))
     logits, _ = vgg.vgg_16(inputs, num_classes)
     self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed')
     self.assertListEqual(logits.get_shape().as_list(),
                          [batch_size, num_classes])
Exemple #23
0
 def testFullyConvolutional(self):
     batch_size = 1
     height, width = 256, 256
     num_classes = 1000
     with self.test_session():
         inputs = tf.random_uniform((batch_size, height, width, 3))
         logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False)
         self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd')
         self.assertListEqual(logits.get_shape().as_list(),
                              [batch_size, 2, 2, num_classes])
Exemple #24
0
 def testBuild(self):
     batch_size = 5
     height, width = 224, 224
     num_classes = 1000
     with self.test_session():
         inputs = tf.random_uniform((batch_size, height, width, 3))
         logits, _ = vgg.vgg_16(inputs, num_classes)
         self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed')
         self.assertListEqual(logits.get_shape().as_list(),
                              [batch_size, num_classes])
Exemple #25
0
 def testModelVariables(self):
     batch_size = 5
     height, width = 224, 224
     num_classes = 1000
     with self.test_session():
         inputs = tf.random_uniform((batch_size, height, width, 3))
         vgg.vgg_16(inputs, num_classes)
         expected_names = [
             'vgg_16/conv1/conv1_1/weights',
             'vgg_16/conv1/conv1_1/biases',
             'vgg_16/conv1/conv1_2/weights',
             'vgg_16/conv1/conv1_2/biases',
             'vgg_16/conv2/conv2_1/weights',
             'vgg_16/conv2/conv2_1/biases',
             'vgg_16/conv2/conv2_2/weights',
             'vgg_16/conv2/conv2_2/biases',
             'vgg_16/conv3/conv3_1/weights',
             'vgg_16/conv3/conv3_1/biases',
             'vgg_16/conv3/conv3_2/weights',
             'vgg_16/conv3/conv3_2/biases',
             'vgg_16/conv3/conv3_3/weights',
             'vgg_16/conv3/conv3_3/biases',
             'vgg_16/conv4/conv4_1/weights',
             'vgg_16/conv4/conv4_1/biases',
             'vgg_16/conv4/conv4_2/weights',
             'vgg_16/conv4/conv4_2/biases',
             'vgg_16/conv4/conv4_3/weights',
             'vgg_16/conv4/conv4_3/biases',
             'vgg_16/conv5/conv5_1/weights',
             'vgg_16/conv5/conv5_1/biases',
             'vgg_16/conv5/conv5_2/weights',
             'vgg_16/conv5/conv5_2/biases',
             'vgg_16/conv5/conv5_3/weights',
             'vgg_16/conv5/conv5_3/biases',
             'vgg_16/fc6/weights',
             'vgg_16/fc6/biases',
             'vgg_16/fc7/weights',
             'vgg_16/fc7/biases',
             'vgg_16/fc8/weights',
             'vgg_16/fc8/biases',
         ]
         model_variables = [v.op.name for v in slim.get_model_variables()]
         self.assertSetEqual(set(model_variables), set(expected_names))
Exemple #26
0
def vgg_16(inputs, is_training, opts):
    with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=opts.weight_decay)):
        return vgg.vgg_16(
            inputs,
            num_classes=opts.num_classes,
            dropout_keep_prob=opts.dropout_keep_prob,
            spatial_squeeze=opts.spatial_squeeze,
            is_training=is_training,
            fc_conv_padding='VALID',
            global_pool=opts.global_pool)
 def testFullyConvolutional(self):
   batch_size = 1
   height, width = 256, 256
   num_classes = 1000
   with self.test_session():
     inputs = tf.random_uniform((batch_size, height, width, 3))
     logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False)
     self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd')
     self.assertListEqual(logits.get_shape().as_list(),
                          [batch_size, 2, 2, num_classes])
 def testModelVariables(self):
   batch_size = 5
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     inputs = tf.random_uniform((batch_size, height, width, 3))
     vgg.vgg_16(inputs, num_classes)
     expected_names = ['vgg_16/conv1/conv1_1/weights',
                       'vgg_16/conv1/conv1_1/biases',
                       'vgg_16/conv1/conv1_2/weights',
                       'vgg_16/conv1/conv1_2/biases',
                       'vgg_16/conv2/conv2_1/weights',
                       'vgg_16/conv2/conv2_1/biases',
                       'vgg_16/conv2/conv2_2/weights',
                       'vgg_16/conv2/conv2_2/biases',
                       'vgg_16/conv3/conv3_1/weights',
                       'vgg_16/conv3/conv3_1/biases',
                       'vgg_16/conv3/conv3_2/weights',
                       'vgg_16/conv3/conv3_2/biases',
                       'vgg_16/conv3/conv3_3/weights',
                       'vgg_16/conv3/conv3_3/biases',
                       'vgg_16/conv4/conv4_1/weights',
                       'vgg_16/conv4/conv4_1/biases',
                       'vgg_16/conv4/conv4_2/weights',
                       'vgg_16/conv4/conv4_2/biases',
                       'vgg_16/conv4/conv4_3/weights',
                       'vgg_16/conv4/conv4_3/biases',
                       'vgg_16/conv5/conv5_1/weights',
                       'vgg_16/conv5/conv5_1/biases',
                       'vgg_16/conv5/conv5_2/weights',
                       'vgg_16/conv5/conv5_2/biases',
                       'vgg_16/conv5/conv5_3/weights',
                       'vgg_16/conv5/conv5_3/biases',
                       'vgg_16/fc6/weights',
                       'vgg_16/fc6/biases',
                       'vgg_16/fc7/weights',
                       'vgg_16/fc7/biases',
                       'vgg_16/fc8/weights',
                       'vgg_16/fc8/biases',
                      ]
     model_variables = [v.op.name for v in slim.get_model_variables()]
     self.assertSetEqual(set(model_variables), set(expected_names))
Exemple #29
0
def get_network_logits_and_endpoints(network, images):
  if(network == 'inceptionV1'):
    with slim.arg_scope(inception.inception_v1_arg_scope(weight_decay=weight_decay)):
      logits, endpoints = inception.inception_v1(images, num_classes=1000, is_training=False)

  elif(network == 'vgg16'):

    with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)):
      logits, endpoints = vgg.vgg_16(images, num_classes=1000, is_training=False)

  return logits,endpoints
Exemple #30
0
 def testEvaluation(self):
   batch_size = 2
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     eval_inputs = tf.random_uniform((batch_size, height, width, 3))
     logits, _ = vgg.vgg_16(eval_inputs, is_training=False)
     self.assertListEqual(logits.get_shape().as_list(),
                          [batch_size, num_classes])
     predictions = tf.argmax(logits, 1)
     self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
Exemple #31
0
def vgg_net(image, reuse=tf.AUTO_REUSE, keep_prop=0.5):
    image = tf.reshape(image, [-1, 224, 224, 3])
    with tf.variable_scope(name_or_scope='VGG16', reuse=reuse):
        arg_scope = vgg.vgg_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_point = vgg.vgg_16(image,
                                           1000,
                                           is_training=True,
                                           dropout_keep_prob=keep_prop)
            probs = tf.nn.softmax(logits)  # probabilities
    return logits, probs, end_point
 def testEvaluation(self):
   batch_size = 2
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     eval_inputs = tf.random_uniform((batch_size, height, width, 3))
     logits, _ = vgg.vgg_16(eval_inputs, is_training=False)
     self.assertListEqual(logits.get_shape().as_list(),
                          [batch_size, num_classes])
     predictions = tf.argmax(logits, 1)
     self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
Exemple #33
0
def classify(url):

    with tf.Graph().as_default():
        image_string = req.urlopen(url).read()
        image = tf.image.decode_jpeg(image_string, channels=3)
        processed_image = vgg_preprocessing.preprocess_image(image,
                                                             image_size,
                                                             image_size,
                                                             is_training=False)
        processed_images = tf.expand_dims(processed_image, 0)

        # Create the model, use the default arg scope to configure
        # the batch norm parameters. arg_scope is a very conveniet
        # feature of slim library -- you can define default
        # parameters for layers -- like stride, padding etc.
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, _ = vgg.vgg_16(processed_images,
                                   num_classes=1000,
                                   is_training=False)

        # In order to get probabilities we apply softmax on the output.
        probabilities = tf.nn.softmax(logits)

        # Create a function that reads the network weights
        # from the checkpoint file that you downloaded.
        # We will run it in session later.
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'vgg_16.ckpt'),
            slim.get_model_variables('vgg_16'))

        with tf.Session() as sess:
            writer = tf.summary.FileWriter("/temp/logs", sess.graph)
            # Load weights
            init_fn(sess)

            # We want to get predictions, image as numpy matrix
            # and resized and cropped piece that is actually
            # being fed to the network.
            np_image, network_input, probabilities = sess.run(
                [image, processed_image, probabilities])
            probabilities = probabilities[0, 0:]
            sorted_inds = [
                i[0]
                for i in sorted(enumerate(-probabilities), key=lambda x: x[1])
            ]
        rst = ""
        for i in range(5):
            index = sorted_inds[i]
            pos = probabilities[index]
            name = names[str(index + 1)]
            rst += (name + ":" + str(pos) + "\n")
        return rst
Exemple #34
0
async def process_image(image_path):
    image_size = vgg.vgg_16.default_image_size

    with tf.Graph().as_default():
        # Convert filepath string to string tensor
        #tf_filepath = tf.convert_to_tensor(image_path, dtype=tf.string)
        #tf_filepath = tf.convert_to_tensor(str(image_path), dtype=tf.string)

        # Read .JPEG image
        #tf_img_string = tf.read_file(tf_filepath)

        image = tf.image.decode_jpeg(tf.image.encode_jpeg(image_path),
                                     channels=3)
        tf_img_string = tf.read_file(str(image_path))
        image = tf.image.decode_jpeg(
            tf_img_string)  #tf.image.encode_jpeg(tf_img_string), channels=3)

        processed_image = vgg_preprocessing.preprocess_image(image,
                                                             image_size,
                                                             image_size,
                                                             is_training=False)
        processed_images = tf.expand_dims(processed_image, 0)

        # Create the model, use the default arg scope to configure the batch norm parameters.
        with slim.arg_scope(vgg.vgg_arg_scope()):
            # 1000 classes instead of 1001.
            logits, _ = vgg.vgg_16(processed_images,
                                   num_classes=1000,
                                   is_training=False)
        probabilities = tf.nn.softmax(logits)

        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'vgg_16.ckpt'),
            slim.get_model_variables('vgg_16'))

        with tf.Session() as sess:
            init_fn(sess)
            np_image, probabilities = sess.run([image, probabilities])
            probabilities = probabilities[0, 0:]
            sorted_inds = [
                i[0]
                for i in sorted(enumerate(-probabilities), key=lambda x: x[1])
            ]

        names = imagenet.create_readable_names_for_imagenet_labels()
        animals_found = []
        for i in range(5):
            index = sorted_inds[i]
            # Shift the index of a class name by one.
            # print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, names[index+1]))
            animals_found.append(names[index + 1])
        return animals_found
Exemple #35
0
def VGG_16(image_batch_tensor, is_training):
    '''
    Returns the VGG16 model definition for use within the FCN model.

    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, channels] Tensor
        Tensor containing a batch of input images.

    is_training : bool
        True if network is being trained, False otherwise. This controls whether
        dropout layers should be enabled. (Dropout is only enabled during training.)

    Returns
    -------
    conv7_features:
        Features with a stride length of 32 (The coarsest layer in the VGG16
        network). The layer is referred to as 'fc7' in the original VGG16 network.
        These features feed into the fc8 logits layer in the original network;
        however the 'fc8' layer has been removed in this implementation.

    pool4_features:
        Features with a stride length of 16. (Output of the 'pool4' layer.)

    pool3_features:
        Features with a stride length of 8. (Output of the 'pool3' layer.)
    '''
    # Convert image to float32 before subtracting the mean pixel values
    image_batch_float = tf.to_float(image_batch_tensor)

    # Subtract the mean pixel value from each pixel
    mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

    with slim.arg_scope(vgg.vgg_arg_scope()):
        # By setting num_classes to 0 the logits layer is omitted and the input
        # features to the logits layer are returned instead. This logits layer
        # will be added as part of the FCN_32s model. (Note: Some FCN
        # implementations choose to use the 'fc8' logits layer that is already
        # present in the VGG16 network instead.)

        # fc_conv_padding = 'SAME' is necessary to ensure that downsampling/
        # upsampling work as expected. So, if an image with dimensions that are
        # multiples of 32 is fed into the network, the resultant FCN pixel
        # classification will have the same dimensions as the original image.
        conv7_features, end_points = vgg.vgg_16(mean_centered_image_batch,
                                                num_classes=0,
                                                is_training=is_training,
                                                spatial_squeeze=False,
                                                fc_conv_padding='SAME')

    return conv7_features, end_points['vgg_16/pool4'], end_points[
        'vgg_16/pool3']
def VGG16(image, reuse=tf.AUTO_REUSE):
    preprocess = lambda x: preprocess_image(x, 224, 224, is_training=False)
    preprocessed = tf.map_fn(preprocess, elems=image)
    # preprocessed = preprocess_for_eval(image, 224, 224, 256)
    arg_scope = vgg.vgg_arg_scope(weight_decay=0.0)
    with tf.variable_scope(name_or_scope='', reuse=reuse):
        with slim.arg_scope(arg_scope):
            logits, end_point = vgg.vgg_16(preprocessed,
                                           1000,
                                           is_training=False,
                                           dropout_keep_prob=1.0)
            probs = tf.nn.softmax(logits)  # probabilities
    return logits, probs, end_point
 def testTrainEvalWithReuse(self):
   train_batch_size = 2
   eval_batch_size = 1
   train_height, train_width = 224, 224
   eval_height, eval_width = 256, 256
   num_classes = 1000
   with self.test_session():
     train_inputs = tf.random_uniform(
         (train_batch_size, train_height, train_width, 3))
     logits, _ = vgg.vgg_16(train_inputs)
     self.assertListEqual(logits.get_shape().as_list(),
                          [train_batch_size, num_classes])
     tf.get_variable_scope().reuse_variables()
     eval_inputs = tf.random_uniform(
         (eval_batch_size, eval_height, eval_width, 3))
     logits, _ = vgg.vgg_16(eval_inputs, is_training=False,
                            spatial_squeeze=False)
     self.assertListEqual(logits.get_shape().as_list(),
                          [eval_batch_size, 2, 2, num_classes])
     logits = tf.reduce_mean(logits, [1, 2])
     predictions = tf.argmax(logits, 1)
     self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
Exemple #38
0
 def testTrainEvalWithReuse(self):
   train_batch_size = 2
   eval_batch_size = 1
   train_height, train_width = 224, 224
   eval_height, eval_width = 256, 256
   num_classes = 1000
   with self.test_session():
     train_inputs = tf.random_uniform(
         (train_batch_size, train_height, train_width, 3))
     logits, _ = vgg.vgg_16(train_inputs)
     self.assertListEqual(logits.get_shape().as_list(),
                          [train_batch_size, num_classes])
     tf.get_variable_scope().reuse_variables()
     eval_inputs = tf.random_uniform(
         (eval_batch_size, eval_height, eval_width, 3))
     logits, _ = vgg.vgg_16(eval_inputs, is_training=False,
                            spatial_squeeze=False)
     self.assertListEqual(logits.get_shape().as_list(),
                          [eval_batch_size, 2, 2, num_classes])
     logits = tf.reduce_mean(logits, [1, 2])
     predictions = tf.argmax(logits, 1)
     self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
Exemple #39
0
def main():
    """
    You can also run these commands manually to generate the pb file
    1. git clone https://github.com/tensorflow/models.git
    2. export PYTHONPATH=Path_to_your_model_folder
    3. python alexnet.py
    """
    height, width = 224, 224
    inputs = tf.Variable(tf.random_uniform((1, height, width, 3)), name='input')
    inputs = tf.identity(inputs, "input_node")
    net, end_points  = vgg.vgg_16(inputs, is_training = False)
    print("nodes in the graph")
    for n in end_points:
        print(n + " => " + str(end_points[n]))
    net_outputs = map(lambda x: tf.get_default_graph().get_tensor_by_name(x), argv[2].split(','))
    run_model(net_outputs, argv[1], 'vgg_16', argv[3] == 'True')
Exemple #40
0
def FCN_8s(image_batch_tensor,
           number_of_classes,
           is_training):
    """Returns the FCN-8s model definition.
    The function returns the model definition of a network that was described
    in 'Fully Convolutional Networks for Semantic Segmentation' by Long et al.
    The network subsamples the input by a factor of 32 and uses three bilinear
    upsampling layers to upsample prediction by a factor of 32. This means that
    if the image size is not of the factor 32, the prediction of different size
    will be delivered. To adapt the network for an any size input use 
    adapt_network_for_any_size_input(FCN_8s, 32). Note: the upsampling kernel
    is fixed in this model definition, because it didn't give significant
    improvements according to aforementioned paper.
    
    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, depth] Tensor
        Tensor specifying input image batch
    number_of_classes : int
        An argument specifying the number of classes to be predicted.
        For example, for PASCAL VOC it is 21.
    is_training : boolean
        An argument specifying if the network is being evaluated or trained.
        It affects the work of underlying dropout layer of VGG-16.
    
    Returns
    -------
    upsampled_logits : [batch_size, height, width, number_of_classes] Tensor
        Tensor with logits representing predictions for each class.
        Be careful, the output can be of different size compared to input,
        use adapt_network_for_any_size_input to adapt network for any input size.
        Otherwise, the input images sizes should be of multiple 32.
    fcn_16s_variables_mapping : dict {string: variable}
        Dict which maps the FCN-8s model's variables to FCN-16s checkpoint variables
        names. We need this to initilize the weights of FCN-8s model with FCN-16s from
        checkpoint file. Look at ipython notebook for examples.
    """

    # Convert image to float32 before subtracting the
    # mean pixel value
    image_batch_float = tf.to_float(image_batch_tensor)

    # Subtract the mean pixel value from each pixel
    mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

    upsample_filter_factor_2_np = bilinear_upsample_weights(factor=2,
                                                            number_of_classes=number_of_classes)

    upsample_filter_factor_8_np = bilinear_upsample_weights(factor=8,
                                                             number_of_classes=number_of_classes)

    upsample_filter_factor_2_tensor = tf.constant(upsample_filter_factor_2_np)
    upsample_filter_factor_8_tensor = tf.constant(upsample_filter_factor_8_np)

    with tf.variable_scope("fcn_8s", reuse = None)  as fcn_8s_scope:
        # Define the model that we want to use -- specify to use only two classes at the last layer
        # TODO: make pull request to get this custom vgg feature accepted
        # to avoid using custom slim repo.
        with slim.arg_scope(vgg.vgg_arg_scope()):

            ## Original FCN-32s model definition

            last_layer_logits, end_points = vgg.vgg_16(mean_centered_image_batch,
                                                       num_classes=number_of_classes,
                                                       is_training=is_training,
                                                       spatial_squeeze=False,
                                                       fc_conv_padding='SAME')


            last_layer_logits_shape = tf.shape(last_layer_logits)


            # Calculate the ouput size of the upsampled tensor
            last_layer_upsampled_by_factor_2_logits_shape = tf.stack([
                                                                  last_layer_logits_shape[0],
                                                                  last_layer_logits_shape[1] * 2,
                                                                  last_layer_logits_shape[2] * 2,
                                                                  last_layer_logits_shape[3]
                                                                 ])

            # Perform the upsampling
            last_layer_upsampled_by_factor_2_logits = tf.nn.conv2d_transpose(last_layer_logits,
                                                                             upsample_filter_factor_2_tensor,
                                                                             output_shape=last_layer_upsampled_by_factor_2_logits_shape,
                                                                             strides=[1, 2, 2, 1])

            ## Adding the skip here for FCN-16s model
            
            # We created vgg in the fcn_8s name scope -- so
            # all the vgg endpoints now are prepended with fcn_8s name
            pool4_features = end_points['fcn_8s/vgg_16/pool4']

            # We zero initialize the weights to start training with the same
            # accuracy that we ended training FCN-32s

            pool4_logits = slim.conv2d(pool4_features,
                                       number_of_classes,
                                       [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       weights_initializer=tf.zeros_initializer,
                                       scope='pool4_fc')

            fused_last_layer_and_pool4_logits = pool4_logits + last_layer_upsampled_by_factor_2_logits

            fused_last_layer_and_pool4_logits_shape = tf.shape(fused_last_layer_and_pool4_logits)
            
            
            

            # Calculate the ouput size of the upsampled tensor
            fused_last_layer_and_pool4_upsampled_by_factor_2_logits_shape = tf.stack([
                                                                          fused_last_layer_and_pool4_logits_shape[0],
                                                                          fused_last_layer_and_pool4_logits_shape[1] * 2,
                                                                          fused_last_layer_and_pool4_logits_shape[2] * 2,
                                                                          fused_last_layer_and_pool4_logits_shape[3]
                                                                         ])

            # Perform the upsampling
            fused_last_layer_and_pool4_upsampled_by_factor_2_logits = tf.nn.conv2d_transpose(fused_last_layer_and_pool4_logits,
                                                                        upsample_filter_factor_2_tensor,
                                                                        output_shape=fused_last_layer_and_pool4_upsampled_by_factor_2_logits_shape,
                                                                        strides=[1, 2, 2, 1])
            
            
            ## Adding the skip here for FCN-8s model

            pool3_features = end_points['fcn_8s/vgg_16/pool3']
            
            # We zero initialize the weights to start training with the same
            # accuracy that we ended training FCN-32s

            pool3_logits = slim.conv2d(pool3_features,
                                       number_of_classes,
                                       [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       weights_initializer=tf.zeros_initializer,
                                       scope='pool3_fc')
            
            
            fused_last_layer_and_pool4_logits_and_pool_3_logits = pool3_logits + \
                                            fused_last_layer_and_pool4_upsampled_by_factor_2_logits
            
            
            fused_last_layer_and_pool4_logits_and_pool_3_logits_shape = tf.shape(fused_last_layer_and_pool4_logits_and_pool_3_logits)
            
            
            # Calculate the ouput size of the upsampled tensor
            fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_shape = tf.stack([
                                                                          fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[0],
                                                                          fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[1] * 8,
                                                                          fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[2] * 8,
                                                                          fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[3]
                                                                         ])

            # Perform the upsampling
            fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits = tf.nn.conv2d_transpose(fused_last_layer_and_pool4_logits_and_pool_3_logits,
                                                                        upsample_filter_factor_8_tensor,
                                                                        output_shape=fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_shape,
                                                                        strides=[1, 8, 8, 1])
            
            
            
            
            fcn_16s_variables_mapping = {}

            fcn_8s_variables = slim.get_variables(fcn_8s_scope)

            for variable in fcn_8s_variables:
                
                # We only need FCN-16s variables to resture from checkpoint
                # Variables of FCN-8s should be initialized
                if 'pool3_fc' in variable.name:
                    continue

                # Here we remove the part of a name of the variable
                # that is responsible for the current variable scope
                original_fcn_16s_checkpoint_string = 'fcn_16s/' +  variable.name[len(fcn_8s_scope.original_name_scope):-2]
                fcn_16s_variables_mapping[original_fcn_16s_checkpoint_string] = variable

    return fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits, fcn_16s_variables_mapping
Exemple #41
0
                                                         image_size,
                                                         image_size,
                                                         is_training=False)

    # Networks accept images in batches.
    # The first dimension usually represents the batch size.
    # In our case the batch size is one.
    processed_images  = tf.expand_dims(processed_image, 0)

    # Create the model, use the default arg scope to configure
    # the batch norm parameters. arg_scope is a very conveniet
    # feature of slim library -- you can define default
    # parameters for layers -- like stride, padding etc.
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, _ = vgg.vgg_16(processed_images,
                               num_classes=1000,
                               is_training=False)

    # In order to get probabilities we apply softmax on the output.
    probabilities = tf.nn.softmax(logits)

    # Just focus on the top predictions
    top_pred = tf.nn.top_k(tf.squeeze(probabilities), 5, name="top_predictions")

    output_nodes = [probabilities, top_pred.indices, top_pred.values]


# Create the saver
with g.as_default():

    # Create a function that reads the network weights
def FCN_32s(image_batch_tensor,
            number_of_classes,
            is_training):
    """Returns the FCN-32s model definition.
    The function returns the model definition of a network that was described
    in 'Fully Convolutional Networks for Semantic Segmentation' by Long et al.
    The network subsamples the input by a factor of 32 and uses the bilinear
    upsampling kernel to upsample prediction by a factor of 32. This means that
    if the image size is not of the factor 32, the prediction of different size
    will be delivered. To adapt the network for an any size input use 
    adapt_network_for_any_size_input(FCN_32s, 32). Note: the upsampling kernel
    is fixed in this model definition, because it didn't give significant
    improvements according to aforementioned paper.
    
    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, depth] Tensor
        Tensor specifying input image batch
    number_of_classes : int
        An argument specifying the number of classes to be predicted.
        For example, for PASCAL VOC it is 21.
    is_training : boolean
        An argument specifying if the network is being evaluated or trained.
        It affects the work of underlying dropout layer of VGG-16.
    
    Returns
    -------
    upsampled_logits : [batch_size, height, width, number_of_classes] Tensor
        Tensor with logits representing predictions for each class.
        Be careful, the output can be of different size compared to input,
        use adapt_network_for_any_size_input to adapt network for any input size.
        Otherwise, the input images sizes should be of multiple 32.
    vgg_16_variables_mapping : dict {string: variable}
        Dict which maps the FCN-32s model's variables to VGG-16 checkpoint variables
        names. We need this to initilize the weights of FCN-32s model with VGG-16 from
        checkpoint file. Look at ipython notebook for examples.
    """
    
    with tf.variable_scope("fcn_32s") as fcn_32s_scope:

        upsample_factor = 32

        # Convert image to float32 before subtracting the
        # mean pixel value
        image_batch_float = tf.to_float(image_batch_tensor)

        # Subtract the mean pixel value from each pixel
        mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

        upsample_filter_np = bilinear_upsample_weights(upsample_factor,
                                                       number_of_classes)

        upsample_filter_tensor = tf.constant(upsample_filter_np)

        # TODO: make pull request to get this custom vgg feature accepted
        # to avoid using custom slim repo.
        with slim.arg_scope(vgg.vgg_arg_scope()):

            logits, end_points = vgg.vgg_16(mean_centered_image_batch,
                                            num_classes=number_of_classes,
                                            is_training=is_training,
                                            spatial_squeeze=False,
                                            fc_conv_padding='SAME')

        downsampled_logits_shape = tf.shape(logits)

        # Calculate the ouput size of the upsampled tensor
        upsampled_logits_shape = tf.pack([
                                          downsampled_logits_shape[0],
                                          downsampled_logits_shape[1] * upsample_factor,
                                          downsampled_logits_shape[2] * upsample_factor,
                                          downsampled_logits_shape[3]
                                         ])

        # Perform the upsampling
        upsampled_logits = tf.nn.conv2d_transpose(logits,
                                                  upsample_filter_tensor,
                                                  output_shape=upsampled_logits_shape,
                                                  strides=[1, upsample_factor, upsample_factor, 1])

        # Map the original vgg-16 variable names
        # to the variables in our model. This is done
        # to make it possible to use assign_from_checkpoint_fn()
        # while providing this mapping.
        # TODO: make it cleaner
        vgg_16_variables_mapping = {}

        vgg_16_variables = slim.get_variables(fcn_32s_scope)

        for variable in vgg_16_variables:

            # Here we remove the part of a name of the variable
            # that is responsible for the current variable scope
            # original_vgg_16_checkpoint_string = variable.name[len(fcn_32s_scope.original_name_scope):-2]
            
            # Updated: changed .name_scope to .name because name_scope only affects operations
            # and variable scope is actually represented by .name
            original_vgg_16_checkpoint_string = variable.name[len(fcn_32s_scope.name)+1:-2]
            vgg_16_variables_mapping[original_vgg_16_checkpoint_string] = variable

    return upsampled_logits, vgg_16_variables_mapping
def _construct_model(model_type='resnet_v1_50'):
  """Constructs model for the desired type of CNN.

  Args:
    model_type: Type of model to be used.

  Returns:
    end_points: A dictionary from components of the network to the corresponding
      activations.

  Raises:
    ValueError: If the model_type is not supported.
  """
  # Placeholder input.
  images = array_ops.placeholder(
      dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE)

  # Construct model.
  if model_type == 'inception_resnet_v2':
    _, end_points = inception.inception_resnet_v2_base(images)
  elif model_type == 'inception_resnet_v2-same':
    _, end_points = inception.inception_resnet_v2_base(
        images, align_feature_maps=True)
  elif model_type == 'inception_v2':
    _, end_points = inception.inception_v2_base(images)
  elif model_type == 'inception_v2-no-separable-conv':
    _, end_points = inception.inception_v2_base(
        images, use_separable_conv=False)
  elif model_type == 'inception_v3':
    _, end_points = inception.inception_v3_base(images)
  elif model_type == 'inception_v4':
    _, end_points = inception.inception_v4_base(images)
  elif model_type == 'alexnet_v2':
    _, end_points = alexnet.alexnet_v2(images)
  elif model_type == 'vgg_a':
    _, end_points = vgg.vgg_a(images)
  elif model_type == 'vgg_16':
    _, end_points = vgg.vgg_16(images)
  elif model_type == 'mobilenet_v1':
    _, end_points = mobilenet_v1.mobilenet_v1_base(images)
  elif model_type == 'mobilenet_v1_075':
    _, end_points = mobilenet_v1.mobilenet_v1_base(
        images, depth_multiplier=0.75)
  elif model_type == 'resnet_v1_50':
    _, end_points = resnet_v1.resnet_v1_50(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_101':
    _, end_points = resnet_v1.resnet_v1_101(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_152':
    _, end_points = resnet_v1.resnet_v1_152(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_200':
    _, end_points = resnet_v1.resnet_v1_200(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_50':
    _, end_points = resnet_v2.resnet_v2_50(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_101':
    _, end_points = resnet_v2.resnet_v2_101(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_152':
    _, end_points = resnet_v2.resnet_v2_152(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_200':
    _, end_points = resnet_v2.resnet_v2_200(
        images, num_classes=None, is_training=False, global_pool=False)
  else:
    raise ValueError('Unsupported model_type %s.' % model_type)

  return end_points