def __init__(self, num_classes, train_layers=None, weights_path='DEFAULT'): """Create the graph of the resnetv1_50 model. """ # Parse input arguments into class variables if weights_path == 'DEFAULT': self.WEIGHTS_PATH = "./pre_trained_models/resnet_v1_50.ckpt" else: self.WEIGHTS_PATH = weights_path self.train_layers = train_layers with tf.variable_scope("input"): self.image_size = resnet_v1.resnet_v1_50.default_image_size self.x_input = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3], name="x_input") self.y_input = tf.placeholder(tf.float32, [None, num_classes], name="y_input") self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") # train with arg_scope(resnet_v1.resnet_arg_scope()): self.logits, _ = resnet_v1.resnet_v1_50(self.x_input, num_classes=num_classes, is_training=True, reuse=tf.AUTO_REUSE ) # validation with arg_scope(resnet_v1.resnet_arg_scope()): self.logits_val, _ = resnet_v1.resnet_v1_50(self.x_input, num_classes=num_classes, is_training=False, euse=tf.AUTO_REUSE ) with tf.name_scope("loss"): self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.y_input)) self.loss_val = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits_val, labels=self.y_input)) with tf.name_scope("train"): self.global_step = tf.Variable(0, name="global_step", trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = [v for v in tf.trainable_variables() if v.name.split('/')[-2] in train_layers or v.name.split('/')[-3] in train_layers ] gradients = tf.gradients(self.loss, var_list) self.grads_and_vars = list(zip(gradients, var_list)) optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) with tf.control_dependencies(update_ops): self.train_op = optimizer.apply_gradients(grads_and_vars=self.grads_and_vars, global_step=self.global_step) with tf.name_scope("probability"): self.probability = tf.nn.softmax(self.logits_val, name="probability") with tf.name_scope("prediction"): self.prediction = tf.argmax(self.logits_val, 1, name="prediction") with tf.name_scope("accuracy"): correct_prediction = tf.equal(self.prediction, tf.argmax(self.y_input, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
def test_resnet_v1_50(img_dir): """ Test ResNet-V1-50 with a single image. :param img_dir: Path of the image to be classified :return: classification result and probability of a single image """ img = cv2.imread(img_dir) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (224, 224)) img = img.reshape((1, 224, 224, 3)) tf.reset_default_graph() inputs = tf.placeholder(name='input_images', shape=[None, 224, 224, 3], dtype=tf.float32) with slim.arg_scope(resnet_arg_scope()): _, _ = resnet_v1_50(inputs, 1000, is_training=False) with tf.Session() as sess: tf.train.Saver().restore(sess, './models/resnet_v1_50.ckpt') inputs = sess.graph.get_tensor_by_name('input_images:0') outputs = sess.graph.get_tensor_by_name( 'resnet_v1_50/SpatialSqueeze:0') pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0] prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0] pred, prob = sess.run([pred, prob], feed_dict={inputs: img}) name = label_dict[pred + 1] print('Result of ResNet-V1-50:', name, prob) return name, prob
def __init__(self): from nets import resnet_v1 self.image_size = 224 self.num_classes = 1000 self.predictions_is_correct = False self.use_larger_step_size = False self.use_smoothed_grad = False # For dataprior attacks. gamma = A^2 * D / d in the paper self.gamma = 2.7 batch_shape = [None, self.image_size, self.image_size, 3] self.x_input = tf.placeholder(tf.float32, shape=batch_shape) self.target_label = tf.placeholder(tf.int32, shape=[None]) target_onehot = tf.one_hot(self.target_label, self.num_classes) with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50( self.x_input, num_classes=self.num_classes, is_training=False) self.predicted_labels = tf.argmax(end_points['predictions'], 1) #logits -= tf.reduce_min(logits) #real = tf.reduce_max(logits * target_onehot, 1) #other = tf.reduce_max(logits * (1 - target_onehot), 1) #self.loss = other - real self.loss = tf.nn.softmax_cross_entropy_with_logits( labels=target_onehot, logits=logits) self.grad = 255.0 * tf.gradients(self.loss, self.x_input)[0] saver = tf.train.Saver(slim.get_model_variables(scope='resnet_v1')) self.sess = tf.get_default_session() saver.restore(self.sess, 'resnet_v1_50.ckpt')
def _resnet_v1_50(self, X, num_classes, dropout_keep_prob=0.8, is_train=False): arg_scope = resnet_arg_scope() with slim.arg_scope(arg_scope): net, end_points = resnet_v1_50(X, is_training=is_train) with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): with tf.variable_scope('Logits_out'): net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, normalizer_fn=None, scope='Logits_out0') net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b_out0') net = slim.conv2d(net, 200, [1, 1], activation_fn=None, normalizer_fn=None, scope='Logits_out1') net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b_out1') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Logits_out2') net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') return net
def mag(inputs, num_classes=3, num_channels=1000, is_training=True, global_pool=False, output_stride=16, upsample_ratio=2, spatial_squeeze=False, reuse=tf.AUTO_REUSE, scope='graspnet'): with tf.variable_scope(scope, 'graspnet', [inputs], reuse=reuse): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(inputs=inputs, num_classes=num_channels, is_training=is_training, global_pool=global_pool, output_stride=output_stride, spatial_squeeze=spatial_squeeze, scope='feature_extractor') with tf.variable_scope('prediction', [net]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # to do: add batch normalization to the following conv layers. with slim.arg_scope([slim.conv2d], outputs_collections=end_points_collection): net = slim.conv2d(net, 512, [1, 1], scope='conv1') net = slim.conv2d(net, 128, [1, 1], scope='conv2') net = slim.conv2d(net, num_classes, [1, 1], scope='conv3') height, width = net.get_shape().as_list()[1:3] net = tf.image.resize_bilinear(net, [height * upsample_ratio, width * upsample_ratio], name='resize_bilinear') end_points.update(slim.utils.convert_collection_to_dict(end_points_collection)) end_points['logits'] = net return net, end_points
def generate_graph(output_root): os.makedirs(output_root, exist_ok=True) slim_dir = os.path.join(output_root, "models/slim") if not os.path.exists(slim_dir): clone_slim(output_root) sys.path.append(slim_dir) from nets import resnet_v1 image_size = resnet_v1.resnet_v1.default_image_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): x = tf.placeholder(tf.float32, [1, image_size, image_size, 3]) logits, _ = resnet_v1.resnet_v1_50(x, num_classes=1000, is_training=False) y = tf.nn.softmax(logits) model_path = download_model(output_root) sess = tf.Session() slim.assign_from_checkpoint_fn(model_path, slim.get_model_variables())(sess) graph = TensorFlowConverter(sess, batch_size=1).convert([x], [y]) return sess, x, y, graph
def build_train_op(image_tensor, label_tensor, is_training): resnet_argscope = resnet_arg_scope(weight_decay=FLAGS.weight_decay) global_step = tf.get_variable(name="global_step", shape=[], dtype=tf.int32, trainable=False) with slim.arg_scope(resnet_argscope): logits, end_points = resnet_v1_50(image_tensor, is_training=is_training, num_classes=100) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label_tensor)) accuracy = tf.reduce_sum( tf.cast( tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), label_tensor), tf.int32)) end_points['loss'], end_points['accuracy'] = loss, accuracy if is_training: optimizer = tf.train.AdadeltaOptimizer( learning_rate=FLAGS.learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) return train_op, end_points else: return None, end_points
def network_entire(images): ''' A tensorflow operation that extracts features for a batch of images. Args: images: Numpy array of shape (n, h, w, 3). Returns: embedding: Tensor of shape (n, 128). ''' # Normalization. images = images - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3)) # Travel through the network and get the embedding. with slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)): _, endpoints = resnet_v1_50(images, num_classes=None, is_training=False, global_pool=True) endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean( endpoints['resnet_v1_50/block4'], [1, 2], name='pool5', keep_dims=False) with tf.name_scope('head'): endpoints = head(endpoints, embedding_dim, is_training=False) embedding = endpoints['emb'] return embedding
def __call__(self, x_input, batch_size, is_training=False): """Constructs model and return probabilities for given input.""" reuse = True if self.built else None # ResNet V1 and VGG have different preprocessing preproc = tf.map_fn( lambda img: vgg_preprocess(0.5 * 255.0 * ( img + 1.0), resnet_v1.resnet_v1.default_image_size, resnet_v1. resnet_v1.default_image_size), x_input) with slim.arg_scope(resnet_utils.resnet_arg_scope()): with tf.variable_scope(self.name): logits, end_points = resnet_v1.resnet_v1_50( preproc, num_classes=self.num_classes - 1, is_training=is_training, reuse=reuse) # VGG and ResNetV1 don't have a background class background_class = tf.constant(-np.inf, dtype=tf.float32, shape=[batch_size, 1]) logits = tf.concat([background_class, logits], axis=1) preds = tf.argmax(logits, axis=1) self.built = True self.logits = logits self.preds = preds return logits
def init_network(self): image = tf.image.resize_images(self.image, self.size, 0) image = tf.subtract(image, 0.5) image = tf.multiply(image, 2.0) # bone network net, end_points = resnet_v1.resnet_v1_50( image, is_training=self.is_training, global_pool=self.global_pool, output_stride=self.output_stride, spatial_squeeze=self.spatial_squeeze, num_classes=self.num_classes, reuse=self.reuse, scope='resnet_v1_50' ) self.feature = end_points['global_pool'] # embedding # with tf.variable_scope('embedding'): # net = end_points['global_pool'] # net = slim.flatten(net) # net = slim.fully_connected(net, 512, activation_fn=None) # net = slim.batch_norm(net, activation_fn=None) # net = LeakyRelu(net, leak=0.1) # net = slim.dropout(net, 0.5) # net = slim.fully_connected(net, self.num_classes, activation_fn=None, scope='logits') # pred = slim.softmax(net) # end_points['logits'] = net # end_points['prediction'] = pred self.end_points = end_points
def model(images, weight_decay=1e-5, is_training=True, eval=False): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images, eval) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: # logging.info(i) # logging.info(g[i-1].get_shape().as_list()) #logging.info(f[i].get_shape().as_list()) c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) ''' F_score = slim.conv2d(end_points['pool2'], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) geo_map = slim.conv2d(end_points['pool2'], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(end_points['pool2'], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] ''' F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def model(colors, depths, num_classes=3, num_channels=1000, is_training=True, global_pool=False, output_stride=16, spatial_squeeze=False, color_scope='color_tower', depth_scope='depth_tower', scope='urnet'): with slim.arg_scope(resnet_v1.resnet_arg_scope()): color_net, color_end_points = resnet_v1.resnet_v1_50( inputs=colors, num_classes=num_channels, is_training=is_training, global_pool=global_pool, output_stride=output_stride, spatial_squeeze=spatial_squeeze, scope=color_scope) depth_net, depth_end_points = resnet_v1.resnet_v1_50( inputs=depths, num_classes=num_channels, is_training=is_training, global_pool=global_pool, output_stride=output_stride, spatial_squeeze=spatial_squeeze, scope=depth_scope) net = tf.concat([color_net, depth_net], axis=3) with tf.variable_scope(scope, 'arcnet', [net]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # to do: add batch normalization to the following conv layers. with slim.arg_scope([slim.conv2d], outputs_collections=end_points_collection): net = slim.conv2d(net, 512, [1, 1], scope='conv1') net = slim.conv2d(net, 128, [1, 1], scope='conv2') net = slim.conv2d(net, num_classes, [1, 1], scope='conv3') height, width = net.get_shape().as_list()[1:3] net = tf.image.resize_bilinear(net, [height * 2, width * 2], name='resize_bilinear') end_points = slim.utils.convert_collection_to_dict( end_points_collection) end_points.update(color_end_points) end_points.update(depth_end_points) end_points['logits'] = net return net, end_points
def model(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) geo_map = slim.conv2d( g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d( g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def model(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None, None] h = [None, None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: ################ Modified by Xiaolong March. 9th #################### g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) h[4] = GlobalAveragePooling2D()(g[3]) # the predicted class score is mapped back to the previous convolutional layer to generate # the class activation mapa. the CAm highlights h4_tile = tf.tile(tf.reshape(h[4],[-1, 1, num_outputs[3], 1]), [1, tf.shape(g[3])[1], 1, 1]) ram = tf.matmul(g[3],h4_tile) g[4] = slim.conv2d(ram, num_outputs[3], 3) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[4], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[4], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[4], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry, ram
def ResNet50Model(input_tensor, weight_decay=1e-5, is_training=True): with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): input_tensor = tf.image.resize_images(input_tensor, [224, 224]) logits, end_points = resnet_v1.resnet_v1_50(input_tensor, is_training=is_training, scope='resnet_v1_50') feature = tf.reduce_mean(logits, reduction_indices=[1, 2]) fc1 = tf.contrib.layers.fully_connected(feature, num_outputs=512) fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=10) return fc2
def main(): sys.setrecursionlimit(10000) parser = argparse.ArgumentParser() parser.add_argument("--model", default="resnet50", choices=["resnet50"]) parser.add_argument('--out', '-o', default='output_tensorflow', help='Directory to output the graph descriptor') parser.add_argument("--encoding", help="name of weight encoder") parser.add_argument("--backend", default="webgpu,webgl,webassembly,fallback", help="backend") args = parser.parse_args() os.makedirs(args.out, exist_ok=True) slim_dir = os.path.join(args.out, "models/slim") if not os.path.exists(slim_dir): clone_slim(args.out) model_path = download_model(args.out) sys.path.append(slim_dir) from nets import resnet_v1 image_size = resnet_v1.resnet_v1.default_image_size checkpoints_dir = args.out sess = tf.Session() processed_images = tf.placeholder(tf.float32, [1, image_size, image_size, 3]) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, _ = resnet_v1.resnet_v1_50(processed_images, num_classes=1000, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn(model_path, slim.get_model_variables()) init_fn(sess) graph = TensorFlowConverter(sess, batch_size=1).convert([processed_images], [probabilities]) from webdnn.graph import traverse traverse.dump(graph) for backend in args.backend.split(","): graph_exec_data = generate_descriptor( backend, graph, constant_encoder_name=args.encoding) graph_exec_data.save(args.out) console.stderr("Done.")
def create_network(self): with tf.contrib.slim.arg_scope(resnet_arg_scope()): logits, end_points = resnet_v1_50(self.img, num_classes=self.nb_class, is_training=self.is_training, global_pool=True, spatial_squeeze=True) self.logits = logits self.probabilities = tf.nn.sigmoid(self.logits) self.predictions = tf.cast( self.probabilities >= self.prediction_threshold, tf.float32)
def model(images, weight_decay=1e-5, is_training=True): with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} = {}'.format(i, f[i].shape)) h = [f[0], None, None, None] num_outputs = [None, 128, 64, 32] def unpool(data): return tf.image.resize_bilinear(data, size=[tf.shape(data)[1]*2, tf.shape(data)[2]*2]) def feature_merge(data, d_concat, num_output): concat_res = tf.concat([unpool(data), d_concat], axis=-1) conv1x1_res = slim.conv2d(concat_res, num_output, 1) conv3x3_res = slim.conv2d(conv1x1_res, num_output, 3) return conv3x3_res for i in range(1,4): h[i] = feature_merge(h[i-1], f[i], num_outputs[i]) feature = slim.conv2d(h[3], 32, 3) F_score = slim.conv2d(feature, 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) geo_map = slim.conv2d(feature, 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = slim.conv2d(feature, 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) angle_map = (angle_map - 0.5) * np.pi/2 F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def __init__(self, model_dir, load_queries=False, queries_number=None, gpu_id=0): self.load_queries = load_queries self.frames = tf.placeholder(tf.uint8, shape=(None, None, None, 3), name='input') processed_frames = self.preprocess_video(self.frames) with tf.device('/gpu:%i' % gpu_id): with tf.contrib.slim.arg_scope(resnet_v1.resnet_arg_scope()): _, network = resnet_v1.resnet_v1_50(processed_frames, num_classes=None, is_training=False) self.region_vectors = self.extract_region_vectors(network) if self.load_queries: print('Queries will be loaded to the gpu') self.queries = [ tf.Variable(np.zeros((1, 9, 3840)), dtype=tf.float32, validate_shape=False) for _ in range(queries_number) ] self.candidate = tf.placeholder(tf.float32, [None, None, None], name='candidate') self.similarities = [] for q in self.queries: sim_matrix = self.frame_to_frame_similarity( q, self.candidate) sim_matrix = self.video_to_video_similarity(sim_matrix) sim_matrix = tf.squeeze(sim_matrix, [0, 3]) self.similarities.append( self.chamfer_similarity(sim_matrix)) else: self.query = tf.placeholder(tf.float32, [None, None, None], name='query') self.candidate = tf.Variable(np.zeros((1, 9, 3840)), dtype=tf.float32, validate_shape=False) sim_matrix = self.frame_to_frame_similarity( self.query, self.candidate) sim_matrix = self.video_to_video_similarity(sim_matrix) sim_matrix = tf.squeeze(sim_matrix, [0, 3]) self.similarity = self.chamfer_similarity(sim_matrix) init = self.load_model(model_dir) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.sess.run(init)
def model(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score_nrow = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) F_score_ncol = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) F_score_row = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) F_score_col = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) return F_score_nrow, F_score_ncol, F_score_row, F_score_col
def endpoints(image, is_training): if image.get_shape().ndims != 4: raise ValueError('Input must be of size [batch, height, width, 3]') image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3)) with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)): _, endpoints = resnet_v1_50(image, num_classes=None, is_training=is_training, global_pool=True) endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean( endpoints['resnet_v1_50/block4'], [1, 2], name='pool5') return endpoints, 'resnet_v1_50'
def inference( hypes, images, train=True, num_classes=1000, num_blocks=[3, 4, 6, 3], # defaults to 50-layer network preprocess=True, bottleneck=True): # if preprocess is True, input should be RGB [0,1], otherwise BGR with mean # subtracted layers = hypes['arch']['layers'] if layers == 50: num_blocks = [3, 4, 6, 3] elif layers == 101: num_blocks = [3, 4, 23, 3] elif layers == 152: num_blocks = [3, 8, 36, 3] else: assert () if preprocess: x = _imagenet_preprocess(images) is_training = tf.convert_to_tensor(train, dtype='bool', name='is_training') logits = {} with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(images, num_classes=num_classes, is_training=is_training, global_pool=False, spatial_squeeze=False) logits['images'] = images logits['fcn_in'] = end_points['resnet_v1_50/block4'] logits['feed2'] = end_points['resnet_v1_50/block3'] logits['feed4'] = end_points['resnet_v1_50/block2'] logits['early_feat'] = end_points['resnet_v1_50/block3'] logits['deep_feat'] = end_points['resnet_v1_50/block4'] if train: restore = slim.get_variables_to_restore() hypes['init_function'] = _initalize_variables hypes['restore'] = restore return logits
def classify_image(self,image_string,ext='png',ret = None): if ext not in self.valid_ext: # print "wrong image formatg" ret['result'] = (False,"please input valid image format", "png,jpg,jpeg,gif") return ret['result'] try: image_size = resnet_v1.resnet_v1.default_image_size with self.g.as_default(): #if image is from local then read file firstly if os.path.splitext(image_string)[1].strip(".") in self.valid_ext: # print "image from local" image_string = tf.read_file(image_string) if ext == "jpeg" or "jpg": # print "jpg" image = tf.image.decode_jpeg(image_string, channels=3) if ext == "png": image = tf.image.decode_png(image_string,channels=3) if ext == 'gif': image = tf.image.decode_gif(image_string,channels=3) processed_image = vgg_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # print "1" # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, _ = resnet_v1.resnet_v1_50(processed_images, num_classes=1000, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join(self.checkpoints_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables()) # print "2" with self.g.as_default(): with tf.Session() as sess: init_fn(sess) starttime = time.time() np_image, probabilities = sess.run([image, probabilities]) endtime = time.time() probabilities = probabilities[0,0,0,0:] sorted_inds = np.argsort(probabilities)[::-1] indices = sorted_inds[:5] preditions = synset[indices] meta = [(p,'%.5f'% probabilities[i]) for i, p in zip(indices,preditions)] ret['result']=(True,meta,'%.3f' % (endtime-starttime)) return ret['result'] except Exception as err: # print "error" ret['result'] = (False,"someting went wrong when classifying the image,", "Maybe try another one?") return ret['result']
def model(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def eval(adv_imgs, labels, x_inputs, total_score, total_count): image = (((adv_imgs + 1.0) * 0.5) * 255.0) processed_imgs_inv1 = preprocess_for_model(image, 'inception_v1') with slim.arg_scope(inception.inception_v1_arg_scope()): logits_inc_v1, end_points_inc_v1 = inception.inception_v1( processed_imgs_inv1, num_classes=FLAGS.num_classes, is_training=False, scope='InceptionV1', reuse=True) pred_inception = tf.argmax(end_points_inc_v1['Predictions'], 1) # rescale pixle range from [-1, 1] to [0, 255] for resnet_v1 and vgg's input processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=FLAGS.num_classes, is_training=False, scope='resnet_v1_50', reuse=True) end_points_res_v1_50['logits'] = tf.squeeze(end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax(end_points_res_v1_50['logits']) pred_resnet = tf.argmax(end_points_res_v1_50['probs'], 1) processed_imgs_inv3 = preprocess_for_model(image, 'inception_v3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_res_inception_v3, end_points_inception_v3 = inception_v3.inception_v3( processed_imgs_inv3, num_classes=FLAGS.num_classes, is_training=False, scope='InceptionV3', reuse=True) pred_inception_v3 = tf.argmax(end_points_inception_v3['Predictions'], 1) processed_imgs_inv_res = preprocess_for_model(image, 'inception_resnet_v2') with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_inception_resnet, end_points_inception_resnet = inception_resnet_v2.inception_resnet_v2( processed_imgs_inv_res, num_classes=FLAGS.num_classes, is_training=False, scope='InceptionResnetV2') pred_ince_res = tf.argmax(end_points_inception_resnet['Predictions'], 1) for i in range(adv_imgs.shape[0]): def f1(total_score, total_count): total_score = tf.add(total_score, 64) return total_score, total_count def f2(total_score, total_count): adv = (((adv_imgs[i] + 1.0) * 0.5) * 255.0) ori = (((x_inputs[i] + 1.0) * 0.5) * 255.0) diff = tf.reshape(adv, [-1, 3]) - tf.reshape(ori, [-1, 3]) distance = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(diff), axis=1))) total_score = tf.add(total_score, distance) total_count = tf.add(total_count, 1) return total_score, total_count total_score, total_count = tf.cond(tf.equal(pred_inception[i], labels[i]), lambda: f2(total_score, total_count), lambda: f1(total_score, total_count)) total_score, total_count = tf.cond(tf.equal(pred_resnet[i], labels[i]), lambda: f2(total_score, total_count), lambda: f1(total_score, total_count)) # total_score, total_count = tf.cond(tf.equal(pred_inception_v3[i], labels[i]), lambda: f2(total_score, total_count), lambda: f1(total_score, total_count)) total_score, total_count = tf.cond(tf.equal(pred_ince_res[i], labels[i]), lambda: f2(total_score, total_count), lambda: f1(total_score, total_count)) return total_score, total_count
def resnet_v1_50(inputs, is_training, opts): with slim.arg_scope(resnet_v1.resnet_arg_scope( weight_decay=opts.weight_decay, batch_norm_decay=opts.batch_norm_decay, batch_norm_epsilon=opts.batch_norm_epsilon, activation_fn=tf.nn.relu)): return resnet_v1.resnet_v1_50( inputs, num_classes=opts.num_classes, is_training=is_training, global_pool=opts.global_pool, output_stride=None, spatial_squeeze=opts.spatial_squeeze, reuse=None)
def build_FPN(images, config, is_training, backbone='resnet50'): # images: [batch, h, w, channels] # Return: pyramid_feature Dict{P2, P3, P4, P5} of feature maps from different level of the # feature pyramid. Each is [batch, height, width, channels] pyramid = {} # build backbone network with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)): if backbone == "resnet50": logits, end_points = resnet_v1.resnet_v1_50( images, is_training=is_training, scope='resnet_v1_50') pyramid['C2'] = end_points[ 'resnet_v1_50/block1/unit_2/bottleneck_v1'] pyramid['C3'] = end_points[ 'resnet_v1_50/block2/unit_3/bottleneck_v1'] pyramid['C4'] = end_points[ 'resnet_v1_50/block3/unit_5/bottleneck_v1'] pyramid['C5'] = end_points[ 'resnet_v1_50/block4/unit_3/bottleneck_v1'] elif backbone == "resnet101": logits, end_points = resnet_v1.resnet_v1_101( images, is_training=is_training, scope='resnet_v1_101') pyramid['C2'] = end_points[ 'resnet_v1_101/block1/unit_2/bottleneck_v1'] pyramid['C3'] = end_points[ 'resnet_v1_101/block2/unit_3/bottleneck_v1'] pyramid['C4'] = end_points[ 'resnet_v1_101/block3/unit_22/bottleneck_v1'] pyramid['C5'] = end_points[ 'resnet_v1_101/block4/unit_3/bottleneck_v1'] else: print("Unkown backbone : ", backbone) # build FPN pyramid_feature = {} arg_scope = _extra_conv_arg_scope_with_bn() with tf.variable_scope('FPN'): with slim.arg_scope(arg_scope): pyramid_feature['P5'] = slim.conv2d(pyramid['C5'], config.TOP_DOWN_PYRAMID_SIZE, 1) for i in range(4, 1, -1): upshape = tf.shape(pyramid['C%d' % i]) u = tf.image.resize_bilinear(pyramid_feature['P%d' % (i+1)], \ size = (upshape[1], upshape[2])) c = slim.conv2d(pyramid['C%d' % i], config.TOP_DOWN_PYRAMID_SIZE, 1) s = tf.add(c, u) pyramid_feature['P%d' % i] = slim.conv2d( s, config.TOP_DOWN_PYRAMID_SIZE, 3) return pyramid_feature
def _tower_fn(is_training, weight_decay, feature, label, data_format, num_layers, batch_norm_decay, batch_norm_epsilon): """Build computation tower (Resnet). Args: is_training: true if is training graph. weight_decay: weight regularization strength, a float. feature: a Tensor. label: a Tensor. data_format: channels_last (NHWC) or channels_first (NCHW). num_layers: number of layers, an int. batch_norm_decay: decay for batch normalization, a float. batch_norm_epsilon: epsilon for batch normalization, a float. Returns: A tuple with the loss for the tower, the gradients and parameters, and predictions. """ with slim.arg_scope( resnet_v1.resnet_arg_scope(batch_norm_decay=batch_norm_decay, batch_norm_epsilon=batch_norm_epsilon)): net, end_points = resnet_v1.resnet_v1_50(feature, is_training=is_training) net = slim.conv2d(net, 397, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') end_points['logits'] = net net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') end_points['spatial_squeeze'] = net logits = net tower_pred = { 'classes': tf.argmax(input=logits, axis=1), 'probabilities': tf.nn.softmax(logits) } tower_loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=label) tower_loss = tf.reduce_mean(tower_loss) model_params = tf.trainable_variables() tower_loss += weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in model_params]) tower_grad = tf.gradients(tower_loss, model_params) return tower_loss, zip(tower_grad, model_params), tower_pred
def model(images, weight_decay=1e-5, is_training=True): images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = {'decay': 0.997,'epsilon': 1e-5,'scale': True,'is_training': is_training} with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] for i in range(4): h[i]=slim.conv2d(f[i], 256, 1) for i in range(4): print('Shape of h_{} {}'.format(i, h[i].shape)) g[0]=RefineBlock(high_inputs=None,low_inputs=h[0]) print('Shape of g_{} {}'.format(0, g[0].shape)) g[1]=RefineBlock(g[0],h[1]) print('Shape of g_{} {}'.format(1, g[1].shape)) g[2]=RefineBlock(g[1],h[2]) print('Shape of g_{} {}'.format(2, g[2].shape)) g[3]=RefineBlock(g[2],h[3]) g[3] = slim.conv2d(g[3], 128, 3) g[3] = slim.conv2d(g[3], 64, 3) g[3] = slim.conv2d(g[3], 32, 3) print('Shape of g_{} {}'.format(3, g[3].shape)) #g[3]=unpool(g[3],scale=4) #g[3] = horizontal_vertical_lstm_together(g[3], 128, scope_n="layer1") F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) #F_score = slim.conv2d(g[3], 21, 1, activation_fn=tf.nn.relu, normalizer_fn=None) return F_score, F_geometry
def endpoints(image, block4_units, is_training, embedding_dim=128): if image.get_shape().ndims != 4: raise ValueError('Input must be of size [batch, height, width, 3]') image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3)) with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)): _, endpoints = resnet_v1_50(image, block4_units=block4_units, num_classes=None, is_training=is_training, global_pool=True) endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean( endpoints['resnet_v1_50/block4'], [1, 2], name='pool5', keep_dims=False) endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected( endpoints['model_output'], embedding_dim, activation_fn=None, weights_initializer=tf.orthogonal_initializer(), scope='emb') return endpoints
def make_resnetv1_50_multi_embeddings(batch_imgs, embedding_dims, n_heads, phase_is_train, uniform_bias=False, weight_decay=0.00004): blocks = 4 units = [3, 4, 6, 3] emb_info = [ 'resnet_v1_50/block1', 'resnet_v1_50/block2', 'resnet_v1_50/block3', 'resnet_v1_50/block4' ] if (n_heads == 16): emb_info = [] for i in xrange(blocks): for j in xrange(units[i]): emb_info.append('resnet_v1_50/block' + str(i + 1) + '/unit_' + str(j + 1) + '/bottleneck_v1') left_embedding = embedding_dims with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): net, endpoints = resnet_v1.resnet_v1_50(batch_imgs, num_classes=0, global_pool=False, is_training=phase_is_train) for i in range(n_heads): emb_dim = int(math.ceil(left_embedding / float(n_heads - i))) left_embedding -= emb_dim with tf.variable_scope('loss%d' % i) as scope: # change fully connected to conv2d for using Regularization losses of slim in resent args scope endpoints['emb_%d' % i] = slim.fully_connected( tf.reduce_mean(endpoints[emb_info[i]], [1, 2]), emb_dim, activation_fn=None) endpoints['embedding%d' % i] = tf.nn.l2_normalize( endpoints['emb_%d' % i], dim=1) with tf.variable_scope('fc_embedding') as scope: embs = [endpoints['embedding%d' % i] for i in range(n_heads)] endpoints['fc_embedding'] = tf.concat(embs, 1) / np.sqrt(n_heads) # print('Endpoints') # for k,v in endpoints.items(): # print((k,v)) return endpoints, None
def _construct_model(model_type='resnet_v1_50'): """Constructs model for the desired type of CNN. Args: model_type: Type of model to be used. Returns: end_points: A dictionary from components of the network to the corresponding activations. Raises: ValueError: If the model_type is not supported. """ # Placeholder input. images = array_ops.placeholder( dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE) # Construct model. if model_type == 'inception_resnet_v2': _, end_points = inception.inception_resnet_v2_base(images) elif model_type == 'inception_resnet_v2-same': _, end_points = inception.inception_resnet_v2_base( images, align_feature_maps=True) elif model_type == 'inception_v2': _, end_points = inception.inception_v2_base(images) elif model_type == 'inception_v2-no-separable-conv': _, end_points = inception.inception_v2_base( images, use_separable_conv=False) elif model_type == 'inception_v3': _, end_points = inception.inception_v3_base(images) elif model_type == 'inception_v4': _, end_points = inception.inception_v4_base(images) elif model_type == 'alexnet_v2': _, end_points = alexnet.alexnet_v2(images) elif model_type == 'vgg_a': _, end_points = vgg.vgg_a(images) elif model_type == 'vgg_16': _, end_points = vgg.vgg_16(images) elif model_type == 'mobilenet_v1': _, end_points = mobilenet_v1.mobilenet_v1_base(images) elif model_type == 'mobilenet_v1_075': _, end_points = mobilenet_v1.mobilenet_v1_base( images, depth_multiplier=0.75) elif model_type == 'resnet_v1_50': _, end_points = resnet_v1.resnet_v1_50( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_101': _, end_points = resnet_v1.resnet_v1_101( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_152': _, end_points = resnet_v1.resnet_v1_152( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_200': _, end_points = resnet_v1.resnet_v1_200( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_50': _, end_points = resnet_v2.resnet_v2_50( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_101': _, end_points = resnet_v2.resnet_v2_101( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_152': _, end_points = resnet_v2.resnet_v2_152( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_200': _, end_points = resnet_v2.resnet_v2_200( images, num_classes=None, is_training=False, global_pool=False) else: raise ValueError('Unsupported model_type %s.' % model_type) return end_points
def resnet_v1_50_16s(image_batch_tensor, number_of_classes, is_training): """Returns the resnet_v1_50_16s model definition. The function returns the model definition of a network that was described in 'DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs' by Chen et al. The network subsamples the input by a factor of 16 and uses the bilinear upsampling kernel to upsample prediction by a factor of 16. This means that if the image size is not of the factor 16, the prediction of different size will be delivered. To adapt the network for an any size input use adapt_network_for_any_size_input(resnet_v1_50_16s, 16). Note: the upsampling kernel is fixed in this model definition, because it didn't give significant improvements according to aforementioned paper. Parameters ---------- image_batch_tensor : [batch_size, height, width, depth] Tensor Tensor specifying input image batch number_of_classes : int An argument specifying the number of classes to be predicted. For example, for PASCAL VOC it is 21. is_training : boolean An argument specifying if the network is being evaluated or trained. Returns ------- upsampled_logits : [batch_size, height, width, number_of_classes] Tensor Tensor with logits representing predictions for each class. Be careful, the output can be of different size compared to input, use adapt_network_for_any_size_input to adapt network for any input size. Otherwise, the input images sizes should be of multiple 8. resnet_v1_50_16s_variables_mapping : dict {string: variable} Dict which maps the resnet_v1_50_16s model's variables to resnet_v1_50 checkpoint variables names. We need this to initilize the weights of resnet_v1_50_16s model with resnet_v1_50 from checkpoint file. Look at ipython notebook for examples. """ with tf.variable_scope("resnet_v1_50_16s") as resnet_v1_50_16s: upsample_factor = 16 # Convert image to float32 before subtracting the # mean pixel value image_batch_float = tf.to_float(image_batch_tensor) # Subtract the mean pixel value from each pixel mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN] upsample_filter_np = bilinear_upsample_weights(upsample_factor, number_of_classes) upsample_filter_tensor = tf.constant(upsample_filter_np) # TODO: make pull request to get this custom vgg feature accepted # to avoid using custom slim repo. with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(mean_centered_image_batch, number_of_classes, is_training=is_training, global_pool=False, output_stride=16) downsampled_logits_shape = tf.shape(logits) # Calculate the ouput size of the upsampled tensor upsampled_logits_shape = tf.pack([ downsampled_logits_shape[0], downsampled_logits_shape[1] * upsample_factor, downsampled_logits_shape[2] * upsample_factor, downsampled_logits_shape[3] ]) # Perform the upsampling upsampled_logits = tf.nn.conv2d_transpose(logits, upsample_filter_tensor, output_shape=upsampled_logits_shape, strides=[1, upsample_factor, upsample_factor, 1]) # Map the original vgg-16 variable names # to the variables in our model. This is done # to make it possible to use assign_from_checkpoint_fn() # while providing this mapping. # TODO: make it cleaner resnet_v1_50_16s_variables_mapping = {} resnet_v1_50_16s_variables = slim.get_variables(resnet_v1_50_16s) for variable in resnet_v1_50_16s_variables: # Here we remove the part of a name of the variable # that is responsible for the current variable scope original_resnet_v1_50_checkpoint_string = variable.name[len(resnet_v1_50_16s.original_name_scope):-2] resnet_v1_50_16s_variables_mapping[original_resnet_v1_50_checkpoint_string] = variable return upsampled_logits, resnet_v1_50_16s_variables_mapping