Esempio n. 1
0
    def __init__(self, num_classes, train_layers=None, weights_path='DEFAULT'):

        """Create the graph of the resnetv1_50 model.
        """

        # Parse input arguments into class variables
        if weights_path == 'DEFAULT':
            self.WEIGHTS_PATH = "./pre_trained_models/resnet_v1_50.ckpt"
        else:
            self.WEIGHTS_PATH = weights_path
        self.train_layers = train_layers

        with tf.variable_scope("input"):
            self.image_size = resnet_v1.resnet_v1_50.default_image_size
            self.x_input = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3], name="x_input")
            self.y_input = tf.placeholder(tf.float32, [None, num_classes], name="y_input")
            self.learning_rate = tf.placeholder(tf.float32, name="learning_rate")

        # train
        with arg_scope(resnet_v1.resnet_arg_scope()):
            self.logits, _ = resnet_v1.resnet_v1_50(self.x_input,
                                                    num_classes=num_classes,
                                                    is_training=True,
                                                    reuse=tf.AUTO_REUSE
                                                    )

        # validation
        with arg_scope(resnet_v1.resnet_arg_scope()):
            self.logits_val, _ = resnet_v1.resnet_v1_50(self.x_input,
                                                        num_classes=num_classes,
                                                        is_training=False,
                                                        euse=tf.AUTO_REUSE
                                                        )

        with tf.name_scope("loss"):
            self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.y_input))
            self.loss_val = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits_val, labels=self.y_input))

        with tf.name_scope("train"):

            self.global_step = tf.Variable(0, name="global_step", trainable=False)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

            var_list = [v for v in tf.trainable_variables() if v.name.split('/')[-2] in train_layers or v.name.split('/')[-3] in train_layers ]
            gradients = tf.gradients(self.loss, var_list)
            self.grads_and_vars = list(zip(gradients, var_list))
            optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)

            with tf.control_dependencies(update_ops):
                self.train_op = optimizer.apply_gradients(grads_and_vars=self.grads_and_vars, global_step=self.global_step)

        with tf.name_scope("probability"):
            self.probability = tf.nn.softmax(self.logits_val, name="probability")

        with tf.name_scope("prediction"):
            self.prediction = tf.argmax(self.logits_val, 1, name="prediction")

        with tf.name_scope("accuracy"):
            correct_prediction = tf.equal(self.prediction, tf.argmax(self.y_input, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
def test_resnet_v1_50(img_dir):
    """
    Test ResNet-V1-50 with a single image.
    :param img_dir: Path of the image to be classified
    :return: classification result and probability of a single image
    """
    img = cv2.imread(img_dir)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = img.reshape((1, 224, 224, 3))

    tf.reset_default_graph()
    inputs = tf.placeholder(name='input_images',
                            shape=[None, 224, 224, 3],
                            dtype=tf.float32)
    with slim.arg_scope(resnet_arg_scope()):
        _, _ = resnet_v1_50(inputs, 1000, is_training=False)

    with tf.Session() as sess:
        tf.train.Saver().restore(sess, './models/resnet_v1_50.ckpt')
        inputs = sess.graph.get_tensor_by_name('input_images:0')
        outputs = sess.graph.get_tensor_by_name(
            'resnet_v1_50/SpatialSqueeze:0')
        pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0]
        prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0]

        pred, prob = sess.run([pred, prob], feed_dict={inputs: img})
        name = label_dict[pred + 1]

    print('Result of ResNet-V1-50:', name, prob)
    return name, prob
Esempio n. 3
0
    def __init__(self):
        from nets import resnet_v1

        self.image_size = 224
        self.num_classes = 1000
        self.predictions_is_correct = False
        self.use_larger_step_size = False
        self.use_smoothed_grad = False

        # For dataprior attacks. gamma = A^2 * D / d in the paper
        self.gamma = 2.7

        batch_shape = [None, self.image_size, self.image_size, 3]
        self.x_input = tf.placeholder(tf.float32, shape=batch_shape)
        self.target_label = tf.placeholder(tf.int32, shape=[None])
        target_onehot = tf.one_hot(self.target_label, self.num_classes)

        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_50(
                self.x_input, num_classes=self.num_classes, is_training=False)

        self.predicted_labels = tf.argmax(end_points['predictions'], 1)
        #logits -= tf.reduce_min(logits)
        #real = tf.reduce_max(logits * target_onehot, 1)
        #other = tf.reduce_max(logits * (1 - target_onehot), 1)
        #self.loss = other - real
        self.loss = tf.nn.softmax_cross_entropy_with_logits(
            labels=target_onehot, logits=logits)
        self.grad = 255.0 * tf.gradients(self.loss, self.x_input)[0]

        saver = tf.train.Saver(slim.get_model_variables(scope='resnet_v1'))
        self.sess = tf.get_default_session()
        saver.restore(self.sess, 'resnet_v1_50.ckpt')
Esempio n. 4
0
 def _resnet_v1_50(self,
                   X,
                   num_classes,
                   dropout_keep_prob=0.8,
                   is_train=False):
     arg_scope = resnet_arg_scope()
     with slim.arg_scope(arg_scope):
         net, end_points = resnet_v1_50(X, is_training=is_train)
     with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                         stride=1,
                         padding='SAME'):
         with tf.variable_scope('Logits_out'):
             net = slim.conv2d(net,
                               1000, [1, 1],
                               activation_fn=None,
                               normalizer_fn=None,
                               scope='Logits_out0')
             net = slim.dropout(net,
                                dropout_keep_prob,
                                scope='Dropout_1b_out0')
             net = slim.conv2d(net,
                               200, [1, 1],
                               activation_fn=None,
                               normalizer_fn=None,
                               scope='Logits_out1')
             net = slim.dropout(net,
                                dropout_keep_prob,
                                scope='Dropout_1b_out1')
             net = slim.conv2d(net,
                               num_classes, [1, 1],
                               activation_fn=None,
                               normalizer_fn=None,
                               scope='Logits_out2')
             net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
     return net
Esempio n. 5
0
def mag(inputs,
        num_classes=3,
        num_channels=1000,
        is_training=True,
        global_pool=False,
        output_stride=16,
        upsample_ratio=2,
        spatial_squeeze=False,
        reuse=tf.AUTO_REUSE,
        scope='graspnet'):
    with tf.variable_scope(scope, 'graspnet', [inputs], reuse=reuse):
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            net, end_points = resnet_v1.resnet_v1_50(inputs=inputs,
                                                     num_classes=num_channels,
                                                     is_training=is_training,
                                                     global_pool=global_pool,
                                                     output_stride=output_stride,
                                                     spatial_squeeze=spatial_squeeze,
                                                     scope='feature_extractor')
        with tf.variable_scope('prediction', [net]) as sc:
            end_points_collection = sc.original_name_scope + '_end_points'
            # to do: add batch normalization to the following conv layers.
            with slim.arg_scope([slim.conv2d],
                                outputs_collections=end_points_collection):
                net = slim.conv2d(net, 512, [1, 1], scope='conv1')
                net = slim.conv2d(net, 128, [1, 1], scope='conv2')
                net = slim.conv2d(net, num_classes, [1, 1], scope='conv3')
                height, width = net.get_shape().as_list()[1:3]
                net = tf.image.resize_bilinear(net,
                                               [height * upsample_ratio, width * upsample_ratio],
                                               name='resize_bilinear')
                end_points.update(slim.utils.convert_collection_to_dict(end_points_collection))
    end_points['logits'] = net
    return net, end_points
Esempio n. 6
0
def generate_graph(output_root):
    os.makedirs(output_root, exist_ok=True)

    slim_dir = os.path.join(output_root, "models/slim")
    if not os.path.exists(slim_dir):
        clone_slim(output_root)

    sys.path.append(slim_dir)
    from nets import resnet_v1
    image_size = resnet_v1.resnet_v1.default_image_size

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        x = tf.placeholder(tf.float32, [1, image_size, image_size, 3])
        logits, _ = resnet_v1.resnet_v1_50(x,
                                           num_classes=1000,
                                           is_training=False)
        y = tf.nn.softmax(logits)

    model_path = download_model(output_root)
    sess = tf.Session()
    slim.assign_from_checkpoint_fn(model_path,
                                   slim.get_model_variables())(sess)

    graph = TensorFlowConverter(sess, batch_size=1).convert([x], [y])
    return sess, x, y, graph
Esempio n. 7
0
def build_train_op(image_tensor, label_tensor, is_training):
    resnet_argscope = resnet_arg_scope(weight_decay=FLAGS.weight_decay)
    global_step = tf.get_variable(name="global_step",
                                  shape=[],
                                  dtype=tf.int32,
                                  trainable=False)
    with slim.arg_scope(resnet_argscope):
        logits, end_points = resnet_v1_50(image_tensor,
                                          is_training=is_training,
                                          num_classes=100)
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=label_tensor))
    accuracy = tf.reduce_sum(
        tf.cast(
            tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), label_tensor),
            tf.int32))
    end_points['loss'], end_points['accuracy'] = loss, accuracy
    if is_training:
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=FLAGS.learning_rate)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return train_op, end_points
    else:
        return None, end_points
Esempio n. 8
0
def network_entire(images):
	'''
	A tensorflow operation that extracts features for a batch of images.

	Args:
		images: Numpy array of shape (n, h, w, 3).

	Returns:
		embedding: Tensor of shape (n, 128).
	'''
	# Normalization.
	images = images - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3))

	# Travel through the network and get the embedding.
	with slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
		_, endpoints = resnet_v1_50(images, num_classes=None, is_training=False, global_pool=True)

	endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean(
		endpoints['resnet_v1_50/block4'], [1, 2], name='pool5', keep_dims=False)

	with tf.name_scope('head'):
		endpoints = head(endpoints, embedding_dim, is_training=False)

	embedding = endpoints['emb']

	return embedding
Esempio n. 9
0
    def __call__(self, x_input, batch_size, is_training=False):
        """Constructs model and return probabilities for given input."""
        reuse = True if self.built else None

        # ResNet V1 and VGG have different preprocessing
        preproc = tf.map_fn(
            lambda img: vgg_preprocess(0.5 * 255.0 * (
                img + 1.0), resnet_v1.resnet_v1.default_image_size, resnet_v1.
                                       resnet_v1.default_image_size), x_input)

        with slim.arg_scope(resnet_utils.resnet_arg_scope()):
            with tf.variable_scope(self.name):
                logits, end_points = resnet_v1.resnet_v1_50(
                    preproc,
                    num_classes=self.num_classes - 1,
                    is_training=is_training,
                    reuse=reuse)

                # VGG and ResNetV1 don't have a background class
                background_class = tf.constant(-np.inf,
                                               dtype=tf.float32,
                                               shape=[batch_size, 1])
                logits = tf.concat([background_class, logits], axis=1)

            preds = tf.argmax(logits, axis=1)
        self.built = True
        self.logits = logits
        self.preds = preds
        return logits
Esempio n. 10
0
    def init_network(self):
        image = tf.image.resize_images(self.image, self.size, 0)
        image = tf.subtract(image, 0.5)
        image = tf.multiply(image, 2.0)
        # bone network
        net, end_points = resnet_v1.resnet_v1_50(
            image,
            is_training=self.is_training,
            global_pool=self.global_pool,
            output_stride=self.output_stride,
            spatial_squeeze=self.spatial_squeeze,
            num_classes=self.num_classes,
            reuse=self.reuse,
            scope='resnet_v1_50'
            )

        self.feature = end_points['global_pool']
        
        # embedding
        # with tf.variable_scope('embedding'):
        #     net = end_points['global_pool']
        #     net = slim.flatten(net)
        #     net = slim.fully_connected(net, 512, activation_fn=None)
        #     net = slim.batch_norm(net, activation_fn=None)
        #     net = LeakyRelu(net, leak=0.1)
        #     net = slim.dropout(net, 0.5)
        #     net = slim.fully_connected(net, self.num_classes, activation_fn=None, scope='logits')

        # pred = slim.softmax(net)
        # end_points['logits'] = net
        # end_points['prediction'] = pred
        self.end_points = end_points
Esempio n. 11
0
def model(images, weight_decay=1e-5, is_training=True, eval=False):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images, eval)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
           
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                   # logging.info(i)
                   # logging.info(g[i-1].get_shape().as_list())
                    #logging.info(f[i].get_shape().as_list())
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))
            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)
            '''
            F_score = slim.conv2d(end_points['pool2'], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            geo_map = slim.conv2d(end_points['pool2'], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(end_points['pool2'], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            '''
	    F_geometry = tf.concat([geo_map, angle_map], axis=-1)
            
    return F_score, F_geometry
Esempio n. 12
0
def model(colors,
          depths,
          num_classes=3,
          num_channels=1000,
          is_training=True,
          global_pool=False,
          output_stride=16,
          spatial_squeeze=False,
          color_scope='color_tower',
          depth_scope='depth_tower',
          scope='urnet'):
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        color_net, color_end_points = resnet_v1.resnet_v1_50(
            inputs=colors,
            num_classes=num_channels,
            is_training=is_training,
            global_pool=global_pool,
            output_stride=output_stride,
            spatial_squeeze=spatial_squeeze,
            scope=color_scope)
        depth_net, depth_end_points = resnet_v1.resnet_v1_50(
            inputs=depths,
            num_classes=num_channels,
            is_training=is_training,
            global_pool=global_pool,
            output_stride=output_stride,
            spatial_squeeze=spatial_squeeze,
            scope=depth_scope)
        net = tf.concat([color_net, depth_net], axis=3)
    with tf.variable_scope(scope, 'arcnet', [net]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # to do: add batch normalization to the following conv layers.
        with slim.arg_scope([slim.conv2d],
                            outputs_collections=end_points_collection):
            net = slim.conv2d(net, 512, [1, 1], scope='conv1')
            net = slim.conv2d(net, 128, [1, 1], scope='conv2')
            net = slim.conv2d(net, num_classes, [1, 1], scope='conv3')
            height, width = net.get_shape().as_list()[1:3]
            net = tf.image.resize_bilinear(net, [height * 2, width * 2],
                                           name='resize_bilinear')
            end_points = slim.utils.convert_collection_to_dict(
                end_points_collection)
    end_points.update(color_end_points)
    end_points.update(depth_end_points)
    end_points['logits'] = net
    return net, end_points
Esempio n. 13
0
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images,
                                                    is_training=is_training,
                                                    scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training
        }
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [
                end_points['pool5'], end_points['pool4'], end_points['pool3'],
                end_points['pool2']
            ]
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1),
                                       num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
            F_score = slim.conv2d(g[3],
                                  1,
                                  1,
                                  activation_fn=tf.nn.sigmoid,
                                  normalizer_fn=None)
            geo_map = slim.conv2d(
                g[3], 4, 1, activation_fn=tf.nn.sigmoid,
                normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(
                g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) -
                         0.5) * np.pi / 2  # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
Esempio n. 14
0
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None, None]
            h = [None, None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    ################ Modified by Xiaolong March. 9th ####################
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))
            h[4] = GlobalAveragePooling2D()(g[3])
            # the predicted class score is mapped back to the previous convolutional layer to generate
            # the class activation mapa. the CAm highlights
            h4_tile = tf.tile(tf.reshape(h[4],[-1, 1, num_outputs[3], 1]), [1, tf.shape(g[3])[1], 1, 1])
            ram = tf.matmul(g[3],h4_tile)
            g[4] = slim.conv2d(ram, num_outputs[3], 3)
            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[4], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(g[4], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[4], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry, ram
Esempio n. 15
0
def ResNet50Model(input_tensor, weight_decay=1e-5, is_training=True):
    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        input_tensor = tf.image.resize_images(input_tensor, [224, 224])
        logits, end_points = resnet_v1.resnet_v1_50(input_tensor,
                                                    is_training=is_training,
                                                    scope='resnet_v1_50')
        feature = tf.reduce_mean(logits, reduction_indices=[1, 2])
        fc1 = tf.contrib.layers.fully_connected(feature, num_outputs=512)
        fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=10)
    return fc2
Esempio n. 16
0
def main():
    sys.setrecursionlimit(10000)

    parser = argparse.ArgumentParser()
    parser.add_argument("--model", default="resnet50", choices=["resnet50"])
    parser.add_argument('--out',
                        '-o',
                        default='output_tensorflow',
                        help='Directory to output the graph descriptor')
    parser.add_argument("--encoding", help="name of weight encoder")
    parser.add_argument("--backend",
                        default="webgpu,webgl,webassembly,fallback",
                        help="backend")
    args = parser.parse_args()

    os.makedirs(args.out, exist_ok=True)
    slim_dir = os.path.join(args.out, "models/slim")
    if not os.path.exists(slim_dir):
        clone_slim(args.out)

    model_path = download_model(args.out)

    sys.path.append(slim_dir)
    from nets import resnet_v1
    image_size = resnet_v1.resnet_v1.default_image_size

    checkpoints_dir = args.out
    sess = tf.Session()
    processed_images = tf.placeholder(tf.float32,
                                      [1, image_size, image_size, 3])

    # Create the model, use the default arg scope to configure the batch norm parameters.
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        logits, _ = resnet_v1.resnet_v1_50(processed_images,
                                           num_classes=1000,
                                           is_training=False)
    probabilities = tf.nn.softmax(logits)

    init_fn = slim.assign_from_checkpoint_fn(model_path,
                                             slim.get_model_variables())

    init_fn(sess)

    graph = TensorFlowConverter(sess, batch_size=1).convert([processed_images],
                                                            [probabilities])

    from webdnn.graph import traverse
    traverse.dump(graph)

    for backend in args.backend.split(","):
        graph_exec_data = generate_descriptor(
            backend, graph, constant_encoder_name=args.encoding)
        graph_exec_data.save(args.out)

    console.stderr("Done.")
Esempio n. 17
0
 def create_network(self):
     with tf.contrib.slim.arg_scope(resnet_arg_scope()):
         logits, end_points = resnet_v1_50(self.img,
                                           num_classes=self.nb_class,
                                           is_training=self.is_training,
                                           global_pool=True,
                                           spatial_squeeze=True)
     self.logits = logits
     self.probabilities = tf.nn.sigmoid(self.logits)
     self.predictions = tf.cast(
         self.probabilities >= self.prediction_threshold, tf.float32)
Esempio n. 18
0
def model(images, weight_decay=1e-5, is_training=True):

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]

            for i in range(4):
                print('Shape of f_{} = {}'.format(i, f[i].shape))

            h = [f[0], None, None, None]
            num_outputs = [None, 128, 64, 32]

            def unpool(data):
                return tf.image.resize_bilinear(data,
                        size=[tf.shape(data)[1]*2, tf.shape(data)[2]*2])

            def feature_merge(data, d_concat, num_output):
                concat_res = tf.concat([unpool(data), d_concat], axis=-1)
                conv1x1_res = slim.conv2d(concat_res, num_output, 1)
                conv3x3_res = slim.conv2d(conv1x1_res, num_output, 3)
                return conv3x3_res
            
            for i in range(1,4):
                h[i] = feature_merge(h[i-1], f[i], num_outputs[i])
            
            feature = slim.conv2d(h[3], 32, 3)
            F_score = slim.conv2d(feature, 1, 1,
                    activation_fn=tf.nn.sigmoid,
                    normalizer_fn=None)

            geo_map = slim.conv2d(feature, 4, 1,
                    activation_fn=tf.nn.sigmoid,
                    normalizer_fn=None) * FLAGS.text_scale
            angle_map = slim.conv2d(feature, 1, 1,
                    activation_fn=tf.nn.sigmoid,
                    normalizer_fn=None)
            angle_map = (angle_map - 0.5) * np.pi/2
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
Esempio n. 19
0
    def __init__(self,
                 model_dir,
                 load_queries=False,
                 queries_number=None,
                 gpu_id=0):

        self.load_queries = load_queries
        self.frames = tf.placeholder(tf.uint8,
                                     shape=(None, None, None, 3),
                                     name='input')
        processed_frames = self.preprocess_video(self.frames)

        with tf.device('/gpu:%i' % gpu_id):
            with tf.contrib.slim.arg_scope(resnet_v1.resnet_arg_scope()):
                _, network = resnet_v1.resnet_v1_50(processed_frames,
                                                    num_classes=None,
                                                    is_training=False)
            self.region_vectors = self.extract_region_vectors(network)

            if self.load_queries:
                print('Queries will be loaded to the gpu')
                self.queries = [
                    tf.Variable(np.zeros((1, 9, 3840)),
                                dtype=tf.float32,
                                validate_shape=False)
                    for _ in range(queries_number)
                ]
                self.candidate = tf.placeholder(tf.float32, [None, None, None],
                                                name='candidate')
                self.similarities = []
                for q in self.queries:
                    sim_matrix = self.frame_to_frame_similarity(
                        q, self.candidate)
                    sim_matrix = self.video_to_video_similarity(sim_matrix)
                    sim_matrix = tf.squeeze(sim_matrix, [0, 3])
                    self.similarities.append(
                        self.chamfer_similarity(sim_matrix))
            else:
                self.query = tf.placeholder(tf.float32, [None, None, None],
                                            name='query')
                self.candidate = tf.Variable(np.zeros((1, 9, 3840)),
                                             dtype=tf.float32,
                                             validate_shape=False)
                sim_matrix = self.frame_to_frame_similarity(
                    self.query, self.candidate)
                sim_matrix = self.video_to_video_similarity(sim_matrix)
                sim_matrix = tf.squeeze(sim_matrix, [0, 3])
                self.similarity = self.chamfer_similarity(sim_matrix)

        init = self.load_model(model_dir)
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.sess.run(init)
Esempio n. 20
0
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score_nrow = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)

            F_score_ncol = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)

            F_score_row = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)

            F_score_col = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)

    return F_score_nrow, F_score_ncol, F_score_row, F_score_col
Esempio n. 21
0
def endpoints(image, is_training):
    if image.get_shape().ndims != 4:
        raise ValueError('Input must be of size [batch, height, width, 3]')

    image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3))

    with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
        _, endpoints = resnet_v1_50(image, num_classes=None, is_training=is_training, global_pool=True)
    
    endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean(
        endpoints['resnet_v1_50/block4'], [1, 2], name='pool5')

    return endpoints, 'resnet_v1_50'
Esempio n. 22
0
def inference(
        hypes,
        images,
        train=True,
        num_classes=1000,
        num_blocks=[3, 4, 6, 3],  # defaults to 50-layer network
        preprocess=True,
        bottleneck=True):
    # if preprocess is True, input should be RGB [0,1], otherwise BGR with mean
    # subtracted

    layers = hypes['arch']['layers']

    if layers == 50:
        num_blocks = [3, 4, 6, 3]
    elif layers == 101:
        num_blocks = [3, 4, 23, 3]
    elif layers == 152:
        num_blocks = [3, 8, 36, 3]
    else:
        assert ()

    if preprocess:
        x = _imagenet_preprocess(images)

    is_training = tf.convert_to_tensor(train, dtype='bool', name='is_training')

    logits = {}

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net, end_points = resnet_v1.resnet_v1_50(images,
                                                 num_classes=num_classes,
                                                 is_training=is_training,
                                                 global_pool=False,
                                                 spatial_squeeze=False)

    logits['images'] = images

    logits['fcn_in'] = end_points['resnet_v1_50/block4']
    logits['feed2'] = end_points['resnet_v1_50/block3']
    logits['feed4'] = end_points['resnet_v1_50/block2']

    logits['early_feat'] = end_points['resnet_v1_50/block3']
    logits['deep_feat'] = end_points['resnet_v1_50/block4']

    if train:
        restore = slim.get_variables_to_restore()
        hypes['init_function'] = _initalize_variables
        hypes['restore'] = restore

    return logits
Esempio n. 23
0
    def classify_image(self,image_string,ext='png',ret = None):

        if ext not in self.valid_ext:
            # print "wrong image formatg"
            ret['result'] = (False,"please input valid image format", "png,jpg,jpeg,gif")
            return ret['result']
        try:

            image_size = resnet_v1.resnet_v1.default_image_size
            with self.g.as_default():
                #if image is from local then read file firstly
                if os.path.splitext(image_string)[1].strip(".") in self.valid_ext:
                    # print "image from local"
                    image_string = tf.read_file(image_string)
                if ext == "jpeg" or "jpg":
                    # print "jpg"
                    image = tf.image.decode_jpeg(image_string, channels=3)
                if ext == "png":
                    image = tf.image.decode_png(image_string,channels=3)
                if ext == 'gif':
                    image = tf.image.decode_gif(image_string,channels=3)
                processed_image = vgg_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
                processed_images = tf.expand_dims(processed_image, 0)
                # print "1"
                # Create the model, use the default arg scope to configure the batch norm parameters.
                with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                    logits, _ = resnet_v1.resnet_v1_50(processed_images, num_classes=1000, is_training=False)
                probabilities = tf.nn.softmax(logits)

                init_fn = slim.assign_from_checkpoint_fn(
                    os.path.join(self.checkpoints_dir, 'resnet_v1_50.ckpt'),
                    slim.get_model_variables())
                # print "2"
            with self.g.as_default():
                with tf.Session() as sess:
                    init_fn(sess)
                    starttime = time.time()
                    np_image, probabilities = sess.run([image, probabilities])
                    endtime = time.time()
                    probabilities = probabilities[0,0,0,0:]
                    sorted_inds = np.argsort(probabilities)[::-1]
            indices = sorted_inds[:5]
            preditions = synset[indices]
            meta = [(p,'%.5f'% probabilities[i]) for i, p in zip(indices,preditions)]
            ret['result']=(True,meta,'%.3f' % (endtime-starttime))
            return ret['result']
        except Exception as err:
            # print "error"
            ret['result'] = (False,"someting went wrong when classifying the image,", "Maybe try another one?")
            return ret['result']
Esempio n. 24
0
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
Esempio n. 25
0
def eval(adv_imgs, labels, x_inputs, total_score, total_count):
    image = (((adv_imgs + 1.0) * 0.5) * 255.0)
    processed_imgs_inv1 = preprocess_for_model(image, 'inception_v1')
    with slim.arg_scope(inception.inception_v1_arg_scope()):
        logits_inc_v1, end_points_inc_v1 = inception.inception_v1(
            processed_imgs_inv1, num_classes=FLAGS.num_classes, is_training=False, scope='InceptionV1', reuse=True)
    pred_inception = tf.argmax(end_points_inc_v1['Predictions'], 1)

    # rescale pixle range from [-1, 1] to [0, 255] for resnet_v1 and vgg's input
    processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50')
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50(
            processed_imgs_res_v1_50, num_classes=FLAGS.num_classes, is_training=False, scope='resnet_v1_50', reuse=True)

    end_points_res_v1_50['logits'] = tf.squeeze(end_points_res_v1_50['resnet_v1_50/logits'], [1, 2])
    end_points_res_v1_50['probs'] = tf.nn.softmax(end_points_res_v1_50['logits'])
    pred_resnet = tf.argmax(end_points_res_v1_50['probs'], 1)

    processed_imgs_inv3 = preprocess_for_model(image, 'inception_v3')
    with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
        logits_res_inception_v3, end_points_inception_v3 = inception_v3.inception_v3(
            processed_imgs_inv3, num_classes=FLAGS.num_classes, is_training=False, scope='InceptionV3', reuse=True)
    pred_inception_v3 = tf.argmax(end_points_inception_v3['Predictions'], 1)

    processed_imgs_inv_res = preprocess_for_model(image, 'inception_resnet_v2')
    with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()):
        logits_res_inception_resnet, end_points_inception_resnet = inception_resnet_v2.inception_resnet_v2(
            processed_imgs_inv_res, num_classes=FLAGS.num_classes, is_training=False, scope='InceptionResnetV2')
    pred_ince_res = tf.argmax(end_points_inception_resnet['Predictions'], 1)


    for i in range(adv_imgs.shape[0]):
        def f1(total_score, total_count):
            total_score = tf.add(total_score, 64)
            return total_score, total_count

        def f2(total_score, total_count):
            adv = (((adv_imgs[i] + 1.0) * 0.5) * 255.0)
            ori = (((x_inputs[i] + 1.0) * 0.5) * 255.0)
            diff = tf.reshape(adv, [-1, 3]) - tf.reshape(ori, [-1, 3])
            distance = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(diff), axis=1)))
            total_score = tf.add(total_score, distance)
            total_count = tf.add(total_count, 1)
            return total_score, total_count
        total_score, total_count = tf.cond(tf.equal(pred_inception[i], labels[i]), lambda: f2(total_score, total_count), lambda: f1(total_score, total_count))
        total_score, total_count = tf.cond(tf.equal(pred_resnet[i], labels[i]), lambda: f2(total_score, total_count), lambda: f1(total_score, total_count))
        # total_score, total_count = tf.cond(tf.equal(pred_inception_v3[i], labels[i]), lambda: f2(total_score, total_count), lambda: f1(total_score, total_count))
        total_score, total_count = tf.cond(tf.equal(pred_ince_res[i], labels[i]), lambda: f2(total_score, total_count), lambda: f1(total_score, total_count))

    return total_score, total_count
Esempio n. 26
0
def resnet_v1_50(inputs, is_training, opts):
    with slim.arg_scope(resnet_v1.resnet_arg_scope(
            weight_decay=opts.weight_decay,
            batch_norm_decay=opts.batch_norm_decay,
            batch_norm_epsilon=opts.batch_norm_epsilon,
            activation_fn=tf.nn.relu)):
        return resnet_v1.resnet_v1_50(
            inputs,
            num_classes=opts.num_classes,
            is_training=is_training,
            global_pool=opts.global_pool,
            output_stride=None,
            spatial_squeeze=opts.spatial_squeeze,
            reuse=None)
Esempio n. 27
0
def build_FPN(images, config, is_training, backbone='resnet50'):
    # images: [batch, h, w, channels]
    # Return: pyramid_feature Dict{P2, P3, P4, P5} of feature maps from different level of the
    # 		  feature pyramid. Each is [batch, height, width, channels]
    pyramid = {}
    # build backbone network
    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)):
        if backbone == "resnet50":
            logits, end_points = resnet_v1.resnet_v1_50(
                images, is_training=is_training, scope='resnet_v1_50')
            pyramid['C2'] = end_points[
                'resnet_v1_50/block1/unit_2/bottleneck_v1']
            pyramid['C3'] = end_points[
                'resnet_v1_50/block2/unit_3/bottleneck_v1']
            pyramid['C4'] = end_points[
                'resnet_v1_50/block3/unit_5/bottleneck_v1']
            pyramid['C5'] = end_points[
                'resnet_v1_50/block4/unit_3/bottleneck_v1']
        elif backbone == "resnet101":
            logits, end_points = resnet_v1.resnet_v1_101(
                images, is_training=is_training, scope='resnet_v1_101')
            pyramid['C2'] = end_points[
                'resnet_v1_101/block1/unit_2/bottleneck_v1']
            pyramid['C3'] = end_points[
                'resnet_v1_101/block2/unit_3/bottleneck_v1']
            pyramid['C4'] = end_points[
                'resnet_v1_101/block3/unit_22/bottleneck_v1']
            pyramid['C5'] = end_points[
                'resnet_v1_101/block4/unit_3/bottleneck_v1']
        else:
            print("Unkown backbone : ", backbone)
    # build FPN
    pyramid_feature = {}
    arg_scope = _extra_conv_arg_scope_with_bn()
    with tf.variable_scope('FPN'):
        with slim.arg_scope(arg_scope):
            pyramid_feature['P5'] = slim.conv2d(pyramid['C5'],
                                                config.TOP_DOWN_PYRAMID_SIZE,
                                                1)
            for i in range(4, 1, -1):
                upshape = tf.shape(pyramid['C%d' % i])
                u = tf.image.resize_bilinear(pyramid_feature['P%d' % (i+1)], \
                 size = (upshape[1], upshape[2]))
                c = slim.conv2d(pyramid['C%d' % i],
                                config.TOP_DOWN_PYRAMID_SIZE, 1)
                s = tf.add(c, u)
                pyramid_feature['P%d' % i] = slim.conv2d(
                    s, config.TOP_DOWN_PYRAMID_SIZE, 3)
    return pyramid_feature
Esempio n. 28
0
def _tower_fn(is_training, weight_decay, feature, label, data_format,
              num_layers, batch_norm_decay, batch_norm_epsilon):
    """Build computation tower (Resnet).

  Args:
    is_training: true if is training graph.
    weight_decay: weight regularization strength, a float.
    feature: a Tensor.
    label: a Tensor.
    data_format: channels_last (NHWC) or channels_first (NCHW).
    num_layers: number of layers, an int.
    batch_norm_decay: decay for batch normalization, a float.
    batch_norm_epsilon: epsilon for batch normalization, a float.

  Returns:
    A tuple with the loss for the tower, the gradients and parameters, and
    predictions.

  """
    with slim.arg_scope(
            resnet_v1.resnet_arg_scope(batch_norm_decay=batch_norm_decay,
                                       batch_norm_epsilon=batch_norm_epsilon)):
        net, end_points = resnet_v1.resnet_v1_50(feature,
                                                 is_training=is_training)
        net = slim.conv2d(net,
                          397, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          scope='logits')
        end_points['logits'] = net
        net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
        end_points['spatial_squeeze'] = net
    logits = net
    tower_pred = {
        'classes': tf.argmax(input=logits, axis=1),
        'probabilities': tf.nn.softmax(logits)
    }

    tower_loss = tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                        labels=label)
    tower_loss = tf.reduce_mean(tower_loss)

    model_params = tf.trainable_variables()
    tower_loss += weight_decay * tf.add_n(
        [tf.nn.l2_loss(v) for v in model_params])

    tower_grad = tf.gradients(tower_loss, model_params)

    return tower_loss, zip(tower_grad, model_params), tower_pred
Esempio n. 29
0
def model(images, weight_decay=1e-5, is_training=True):
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {'decay': 0.997,'epsilon': 1e-5,'scale': True,'is_training': is_training}
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):

            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))

            g = [None, None, None, None]
            h = [None, None, None, None]

            for i in range(4):
                h[i]=slim.conv2d(f[i], 256, 1)
            for i in range(4):
                print('Shape of h_{} {}'.format(i, h[i].shape))

            g[0]=RefineBlock(high_inputs=None,low_inputs=h[0])
            print('Shape of g_{} {}'.format(0, g[0].shape))
            g[1]=RefineBlock(g[0],h[1])
            print('Shape of g_{} {}'.format(1, g[1].shape))
            g[2]=RefineBlock(g[1],h[2])
            print('Shape of g_{} {}'.format(2, g[2].shape))
            g[3]=RefineBlock(g[2],h[3])
            g[3] = slim.conv2d(g[3], 128, 3)
            g[3] = slim.conv2d(g[3], 64, 3)
            g[3] = slim.conv2d(g[3], 32, 3)
            print('Shape of g_{} {}'.format(3, g[3].shape))

            #g[3]=unpool(g[3],scale=4)
            #g[3] = horizontal_vertical_lstm_together(g[3], 128, scope_n="layer1")
            F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)
            #F_score = slim.conv2d(g[3], 21, 1, activation_fn=tf.nn.relu, normalizer_fn=None)

    return F_score, F_geometry
Esempio n. 30
0
def endpoints(image, block4_units, is_training, embedding_dim=128):

    if image.get_shape().ndims != 4:
        raise ValueError('Input must be of size [batch, height, width, 3]')

    image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3))

    with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
        _, endpoints = resnet_v1_50(image, block4_units=block4_units, num_classes=None, is_training=is_training, global_pool=True)

    endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean(
        endpoints['resnet_v1_50/block4'], [1, 2], name='pool5', keep_dims=False)

    endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected(
        endpoints['model_output'], embedding_dim, activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(), scope='emb')

    return endpoints
Esempio n. 31
0
def make_resnetv1_50_multi_embeddings(batch_imgs,
                                      embedding_dims,
                                      n_heads,
                                      phase_is_train,
                                      uniform_bias=False,
                                      weight_decay=0.00004):
    blocks = 4
    units = [3, 4, 6, 3]
    emb_info = [
        'resnet_v1_50/block1', 'resnet_v1_50/block2', 'resnet_v1_50/block3',
        'resnet_v1_50/block4'
    ]
    if (n_heads == 16):
        emb_info = []
        for i in xrange(blocks):
            for j in xrange(units[i]):
                emb_info.append('resnet_v1_50/block' + str(i + 1) + '/unit_' +
                                str(j + 1) + '/bottleneck_v1')

    left_embedding = embedding_dims
    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        net, endpoints = resnet_v1.resnet_v1_50(batch_imgs,
                                                num_classes=0,
                                                global_pool=False,
                                                is_training=phase_is_train)
        for i in range(n_heads):
            emb_dim = int(math.ceil(left_embedding / float(n_heads - i)))
            left_embedding -= emb_dim
            with tf.variable_scope('loss%d' % i) as scope:
                # change fully connected to conv2d for using Regularization losses of slim in resent args scope
                endpoints['emb_%d' % i] = slim.fully_connected(
                    tf.reduce_mean(endpoints[emb_info[i]], [1, 2]),
                    emb_dim,
                    activation_fn=None)
                endpoints['embedding%d' % i] = tf.nn.l2_normalize(
                    endpoints['emb_%d' % i], dim=1)

        with tf.variable_scope('fc_embedding') as scope:
            embs = [endpoints['embedding%d' % i] for i in range(n_heads)]
            endpoints['fc_embedding'] = tf.concat(embs, 1) / np.sqrt(n_heads)
#    print('Endpoints')
#    for k,v in endpoints.items():
#        print((k,v))
    return endpoints, None
Esempio n. 32
0
def _construct_model(model_type='resnet_v1_50'):
  """Constructs model for the desired type of CNN.

  Args:
    model_type: Type of model to be used.

  Returns:
    end_points: A dictionary from components of the network to the corresponding
      activations.

  Raises:
    ValueError: If the model_type is not supported.
  """
  # Placeholder input.
  images = array_ops.placeholder(
      dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE)

  # Construct model.
  if model_type == 'inception_resnet_v2':
    _, end_points = inception.inception_resnet_v2_base(images)
  elif model_type == 'inception_resnet_v2-same':
    _, end_points = inception.inception_resnet_v2_base(
        images, align_feature_maps=True)
  elif model_type == 'inception_v2':
    _, end_points = inception.inception_v2_base(images)
  elif model_type == 'inception_v2-no-separable-conv':
    _, end_points = inception.inception_v2_base(
        images, use_separable_conv=False)
  elif model_type == 'inception_v3':
    _, end_points = inception.inception_v3_base(images)
  elif model_type == 'inception_v4':
    _, end_points = inception.inception_v4_base(images)
  elif model_type == 'alexnet_v2':
    _, end_points = alexnet.alexnet_v2(images)
  elif model_type == 'vgg_a':
    _, end_points = vgg.vgg_a(images)
  elif model_type == 'vgg_16':
    _, end_points = vgg.vgg_16(images)
  elif model_type == 'mobilenet_v1':
    _, end_points = mobilenet_v1.mobilenet_v1_base(images)
  elif model_type == 'mobilenet_v1_075':
    _, end_points = mobilenet_v1.mobilenet_v1_base(
        images, depth_multiplier=0.75)
  elif model_type == 'resnet_v1_50':
    _, end_points = resnet_v1.resnet_v1_50(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_101':
    _, end_points = resnet_v1.resnet_v1_101(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_152':
    _, end_points = resnet_v1.resnet_v1_152(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_200':
    _, end_points = resnet_v1.resnet_v1_200(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_50':
    _, end_points = resnet_v2.resnet_v2_50(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_101':
    _, end_points = resnet_v2.resnet_v2_101(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_152':
    _, end_points = resnet_v2.resnet_v2_152(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_200':
    _, end_points = resnet_v2.resnet_v2_200(
        images, num_classes=None, is_training=False, global_pool=False)
  else:
    raise ValueError('Unsupported model_type %s.' % model_type)

  return end_points
def resnet_v1_50_16s(image_batch_tensor,
                      number_of_classes,
                      is_training):
    """Returns the resnet_v1_50_16s model definition.
    The function returns the model definition of a network that was described
    in 'DeepLab: Semantic Image Segmentation with Deep Convolutional Nets,
    Atrous Convolution, and Fully Connected CRFs' by Chen et al.
    The network subsamples the input by a factor of 16 and uses the bilinear
    upsampling kernel to upsample prediction by a factor of 16. This means that
    if the image size is not of the factor 16, the prediction of different size
    will be delivered. To adapt the network for an any size input use 
    adapt_network_for_any_size_input(resnet_v1_50_16s, 16). Note: the upsampling kernel
    is fixed in this model definition, because it didn't give significant
    improvements according to aforementioned paper.
    
    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, depth] Tensor
        Tensor specifying input image batch
    number_of_classes : int
        An argument specifying the number of classes to be predicted.
        For example, for PASCAL VOC it is 21.
    is_training : boolean
        An argument specifying if the network is being evaluated or trained.
    
    Returns
    -------
    upsampled_logits : [batch_size, height, width, number_of_classes] Tensor
        Tensor with logits representing predictions for each class.
        Be careful, the output can be of different size compared to input,
        use adapt_network_for_any_size_input to adapt network for any input size.
        Otherwise, the input images sizes should be of multiple 8.
    resnet_v1_50_16s_variables_mapping : dict {string: variable}
        Dict which maps the resnet_v1_50_16s model's variables to resnet_v1_50 checkpoint variables
        names. We need this to initilize the weights of resnet_v1_50_16s model with resnet_v1_50 from
        checkpoint file. Look at ipython notebook for examples.
    """
    
    with tf.variable_scope("resnet_v1_50_16s") as resnet_v1_50_16s:

        upsample_factor = 16

        # Convert image to float32 before subtracting the
        # mean pixel value
        image_batch_float = tf.to_float(image_batch_tensor)

        # Subtract the mean pixel value from each pixel
        mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

        upsample_filter_np = bilinear_upsample_weights(upsample_factor,
                                                       number_of_classes)

        upsample_filter_tensor = tf.constant(upsample_filter_np)

        # TODO: make pull request to get this custom vgg feature accepted
        # to avoid using custom slim repo.
        
        
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_50(mean_centered_image_batch,
                                                number_of_classes,
                                                is_training=is_training,
                                                global_pool=False,
                                                output_stride=16)
        

        downsampled_logits_shape = tf.shape(logits)

        # Calculate the ouput size of the upsampled tensor
        upsampled_logits_shape = tf.pack([
                                          downsampled_logits_shape[0],
                                          downsampled_logits_shape[1] * upsample_factor,
                                          downsampled_logits_shape[2] * upsample_factor,
                                          downsampled_logits_shape[3]
                                         ])

        # Perform the upsampling
        upsampled_logits = tf.nn.conv2d_transpose(logits,
                                                  upsample_filter_tensor,
                                                  output_shape=upsampled_logits_shape,
                                                  strides=[1, upsample_factor, upsample_factor, 1])

        # Map the original vgg-16 variable names
        # to the variables in our model. This is done
        # to make it possible to use assign_from_checkpoint_fn()
        # while providing this mapping.
        # TODO: make it cleaner
        resnet_v1_50_16s_variables_mapping = {}

        resnet_v1_50_16s_variables = slim.get_variables(resnet_v1_50_16s)

        for variable in resnet_v1_50_16s_variables:

            # Here we remove the part of a name of the variable
            # that is responsible for the current variable scope
            original_resnet_v1_50_checkpoint_string = variable.name[len(resnet_v1_50_16s.original_name_scope):-2]
            resnet_v1_50_16s_variables_mapping[original_resnet_v1_50_checkpoint_string] = variable

    return upsampled_logits, resnet_v1_50_16s_variables_mapping