def __init__(self, num_classes, train_layers=None, weights_path='DEFAULT'): """Create the graph of the resnetv2_101 model. """ # Parse input arguments into class variables if weights_path == 'DEFAULT': self.WEIGHTS_PATH = "./pre_trained_models/resnet_v2_101.ckpt" else: self.WEIGHTS_PATH = weights_path self.train_layers = train_layers with tf.variable_scope("input"): self.image_size = resnet_v2.resnet_v2_101.default_image_size self.x_input = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3], name="x_input") self.y_input = tf.placeholder(tf.float32, [None, num_classes], name="y_input") self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") # train with arg_scope(resnet_v2.resnet_arg_scope()): self.logits, _ = resnet_v2.resnet_v2_101(self.x_input, num_classes=num_classes, is_training=True, reuse=tf.AUTO_REUSE ) # validation with arg_scope(resnet_v2.resnet_arg_scope()): self.logits_val, _ = resnet_v2.resnet_v2_101(self.x_input, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE ) with tf.name_scope("loss"): self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.y_input)) self.loss_val = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits_val, labels=self.y_input)) with tf.name_scope("train"): self.global_step = tf.Variable(0, name="global_step", trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = [v for v in tf.trainable_variables() if v.name.split('/')[-2] in train_layers or v.name.split('/')[-3] in train_layers ] gradients = tf.gradients(self.loss, var_list) self.grads_and_vars = list(zip(gradients, var_list)) optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) with tf.control_dependencies(update_ops): self.train_op = optimizer.apply_gradients(grads_and_vars=self.grads_and_vars, global_step=self.global_step) with tf.name_scope("probability"): self.probability = tf.nn.softmax(self.logits_val, name="probability") with tf.name_scope("prediction"): self.prediction = tf.argmax(self.logits_val, 1, name="prediction") with tf.name_scope("accuracy"): correct_prediction = tf.equal(self.prediction, tf.argmax(self.y_input, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
def GetAttentionPrelogit( self, images, weight_decay=0.0001, attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0], attention_type=_SUPPORTED_ATTENTION_TYPES[0], kernel=1, training_resnet=False, training_attention=False, reuse=False, use_batch_norm=True): """Constructs attention model on resnet_v2_50. Args: images: A tensor of size [batch, height, width, channels]. weight_decay: The parameters for weight_decay regularizer. attention_nonlinear: Type of non-linearity on top of the attention function. attention_type: Type of the attention structure. kernel: Convolutional kernel to use in attention layers (eg, [3, 3]). training_resnet: Whether or not the Resnet blocks from the model are in training mode. training_attention: Whether or not the attention part of the model is in training mode. reuse: Whether or not the layer and its variables should be reused. use_batch_norm: Whether or not to use batch normalization. Returns: prelogits: A tensor of size [batch, 1, 1, channels]. attention_prob: Attention score after the non-linearity. attention_score: Attention score before the non-linearity. feature_map: Features extracted from the model, which are not l2-normalized. end_points: Set of activations for external use. """ # Construct Resnet50 features. with slim.arg_scope( resnet_v2.resnet_arg_scope(use_batch_norm=use_batch_norm)): _, end_points = self.GetResnet50Subnetwork( images, is_training=training_resnet, reuse=reuse) feature_map = end_points[self._target_layer_type] # Construct attention subnetwork on top of features. with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay, use_batch_norm=use_batch_norm)): with slim.arg_scope([slim.batch_norm], is_training=training_attention): (prelogits, attention_prob, attention_score, end_points) = self._GetAttentionSubnetwork( feature_map, end_points, attention_nonlinear=attention_nonlinear, attention_type=attention_type, kernel=kernel, reuse=reuse) return prelogits, attention_prob, attention_score, feature_map, end_points
def __call__(self, x_input): if (self.build): tf.get_variable_scope().reuse_variables() else: self.build = True inception_imags = (x_input / 255.0 - 0.5) * 2 resized_images_vgg = tf.image.resize_images( x_input, [224, 224]) - tf.constant([123.68, 116.78, 103.94]) with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg16, _ = self.network_fn_vgg16( resized_images_vgg, num_classes=self.num_classes, is_training=False) resized_images_res = ( tf.image.resize_images(x_input, [224, 224]) / 255.0 - 0.5) * 2 with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_res, _ = self.network_fn_res(resized_images_res, num_classes=self.num_classes + 1, is_training=False) logits_res = tf.reshape(logits_res, (-1, 1001)) logits_res = tf.slice(logits_res, [0, 1], [FLAGS.batch_size, self.num_classes]) with slim.arg_scope(inception_utils.inception_arg_scope()): logits_incepv3, _ = self.network_fn_incepv3( inception_imags, num_classes=self.num_classes + 1, is_training=False) logits_incepv3 = tf.slice(logits_incepv3, [0, 1], [FLAGS.batch_size, self.num_classes]) with slim.arg_scope(inception_utils.inception_arg_scope()): logits_incepv4, _ = self.network_fn_incepv4( inception_imags, num_classes=self.num_classes + 1, is_training=False) logits_incepv4 = tf.slice(logits_incepv4, [0, 1], [FLAGS.batch_size, self.num_classes]) with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_incep_res, _ = self.network_fn_incep_res( inception_imags, num_classes=self.num_classes + 1, is_training=False) logits_incep_res = tf.slice(logits_incep_res, [0, 1], [FLAGS.batch_size, self.num_classes]) alex_images = tf.image.resize_images(x_input, [256, 256]) alex_images = tf.reverse(alex_images, axis=[-1]) alex_mean_npy = np.load('model/alex_mean.npy').swapaxes(0, 1).swapaxes( 1, 2).astype(np.float32) alex_mean_images = tf.constant(alex_mean_npy) alex_images = alex_images[:, ] - alex_mean_images alex_images = tf.slice(alex_images, [0, 14, 14, 0], [FLAGS.batch_size, 227, 227, 3]) _, logits_alex = self.network_fn_alex(alex_images) logits = [ logits_vgg16, logits_res, logits_incepv3, logits_incepv4, logits_incep_res, logits_alex ] ensemble_logits = tf.reduce_mean(tf.stack(logits), 0) return ensemble_logits
def graph(x, y, i, x_max, x_min, grad): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter momentum = FLAGS.momentum num_classes = 1001 with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( x + momentum * grad, num_classes=num_classes, is_training=False) pred = tf.argmax(end_points_resnet['predictions'], 1) first_round = tf.cast(tf.equal(i, 0), tf.int64) y = first_round * pred + (1 - first_round) * y one_hot = tf.one_hot(y, num_classes) logits = (logits_resnet) / 7.25 cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = momentum * grad + noise x = x + alpha * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def __init__(self, source_image_size, use_smoothed_grad=False): self.image_size = 299 self.source_image_size = source_image_size self.num_classes = 1001 self.predictions_is_correct = False batch_shape = [None, self.image_size, self.image_size, 3] self.x_input = tf.placeholder(tf.float32, shape=batch_shape) self.target_label = tf.placeholder(tf.int32, shape=[None]) target_onehot = tf.one_hot(self.target_label, self.num_classes) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_152( self.x_input, num_classes=self.num_classes, is_training=False) self.predicted_labels = tf.argmax(end_points['predictions'], 1) #logits -= tf.reduce_min(logits) #real = tf.reduce_max(logits * target_onehot, 1) #other = tf.reduce_max(logits * (1 - target_onehot), 1) #self.loss = other - real self.loss = tf.nn.softmax_cross_entropy_with_logits( labels=target_onehot, logits=logits) self.grad = 2 * tf.gradients(self.loss, self.x_input)[0] if use_smoothed_grad: self.grad = tf.nn.depthwise_conv2d(self.grad, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') saver = tf.train.Saver(slim.get_model_variables(scope='resnet_v2')) self.sess = tf.get_default_session() saver.restore(self.sess, 'resnet_v2_152.ckpt')
def build_train_op(image_tensor, label_tensor, is_training): resnet_argscope = resnet_arg_scope(weight_decay=FLAGS.weight_decay) global_step = tf.get_variable(name="global_step", shape=[], dtype=tf.int32, trainable=False) with slim.arg_scope(resnet_argscope): logits, end_points = resnet_v2_50(image_tensor, is_training=is_training, num_classes=100) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label_tensor)) accuracy = tf.reduce_sum( tf.cast( tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), label_tensor), tf.int32)) end_points['loss'], end_points['accuracy'] = loss, accuracy if is_training: optimizer = tf.train.AdadeltaOptimizer( learning_rate=FLAGS.learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) return train_op, end_points else: return None, end_points
def graph(x, y, i, x_max, x_min, grad, eps_inside): num_iter = FLAGS.num_iter alpha = eps_inside / num_iter momentum = FLAGS.momentum num_classes = 1001 with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_50( x, num_classes=num_classes, is_training=False) pred = tf.argmax(end_points_resnet['predictions'], 1) first_round = tf.cast(tf.equal(i, 0), tf.int64) y = first_round * pred + (1 - first_round) * y one_hot = tf.one_hot(y, num_classes) logits = logits_resnet cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] x = x + alpha * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise, eps_inside
def fpn(img): with slim.arg_scope(resnet_arg_scope()): _, endpoint = resnet_v2_50(img) c1 = endpoint['resnet_v2_50/block1'] c2 = endpoint['resnet_v2_50/block2'] c3 = endpoint['resnet_v2_50/block3'] c4 = endpoint['resnet_v2_50/block4'] p5 = slim.conv2d(c3, 256, 1, activation_fn=None) p5_upsample = tf.image.resize_bilinear(p5, tf.shape(c2)[1:3]) p5 = slim.conv2d(p5, 256, 3, rate=4) p5 = slim.conv2d(p5, 256, 3, activation_fn=None) p4 = slim.conv2d(c2, 256, 1, activation_fn=None) p4 = p4 + p5_upsample p4_upsample = tf.image.resize_bilinear(p4, tf.shape(c1)[1:3]) p4 = slim.conv2d(p4, 256, 3, rate=4) p4 = slim.conv2d(p4, 256, 3, activation_fn=None) p3 = slim.conv2d(c1, 256, 1, activation_fn=None) p3 = p3 + p4_upsample p3 = slim.conv2d(p3, 256, 3, rate=4) p3 = slim.conv2d(p3, 256, 3, activation_fn=None) p6 = slim.conv2d(c4, 1024, kernel_size=1) p6 = slim.conv2d(p6, 256, 3, rate=4) p6 = slim.conv2d(p6, 256, kernel_size=3, stride=1, activation_fn=None) p7 = slim.nn.relu(p6) p7 = slim.conv2d(p7, 256, kernel_size=3, stride=2, activation_fn=None) return [p3, p4, p5, p6, p7]
def get_embeddings(instances): image_size = 299 query_embeddings = {} with tf.Graph().as_default(): image = tf.placeholder(tf.uint8, (None, None, 3)) processed_image = inception_preprocessing.preprocess_image( image, image_size, image_size, is_training=False) processed_image = tf.expand_dims(processed_image, 0) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_101(processed_image, 1001, is_training=False) pool5 = tf.get_default_graph().get_tensor_by_name( "resnet_v2_101/pool5:0") init_fn = slim.assign_from_checkpoint_fn( 'resnet_v2_101.ckpt', slim.get_model_variables('resnet_v2')) with tf.Session() as sess: init_fn(sess) for cls_id in instances.keys(): ins, patch = instances[cls_id] scaled_img, logit_vals, embedding = sess.run( [processed_image, logits, pool5], feed_dict={image: patch}) query_embeddings[cls_id] = embedding[0, 0, 0, :] return query_embeddings
def __init__(self): slim = tf.contrib.slim CLASSES = ['anger', ' happy ', 'neutral', ' sad ', 'surprise'] image_size = 160 checkpoints_dir = '/root/catkin_ws/src/ros_emotion_detect/src/models/inception_5/' logging.basicConfig(filename='result.log', filemode='w', level=logging.INFO) self.logger = logging.getLogger('emotion classifier') # loading model with tf.Graph().as_default(): self.image = tf.placeholder(tf.uint8, [None, None, 3]) self.processed_image = inception_preprocessing.preprocess_image( self.image, image_size, image_size, is_training=False) self.processed_images = tf.placeholder( tf.float32, [None, image_size, image_size, 3]) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_50(self.processed_images, num_classes=len(CLASSES), is_training=False) self.probs = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'model.ckpt-60000'), slim.get_model_variables('resnet_v2_50')) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True self.sess = tf.Session(config=config) init_fn(self.sess)
def graph(x, y, i, x_max, x_min, grad): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter momentum = FLAGS.momentum num_classes = 1001 # should keep original x here for output with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( input_diversity(x), num_classes=num_classes, is_training=False) logits_v3_rotated, _ = inception_v3.inception_v3( rotate(x), num_classes=num_classes, is_training=False, reuse=True) with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( input_diversity(x), num_classes=num_classes, is_training=False) logits_v4_rotated, _ = inception_v4.inception_v4( rotate(x), num_classes=num_classes, is_training=False, reuse=True) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( input_diversity(x), num_classes=num_classes, is_training=False, reuse=True) logits_res_v2_rotated, _ = inception_resnet_v2.inception_resnet_v2( rotate(x), num_classes=num_classes, is_training=False, reuse=True) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_152( input_diversity(x), num_classes=num_classes, is_training=False) logits_resnet_rotated, _ = resnet_v2.resnet_v2_152( rotate(x), num_classes=num_classes, is_training=False, reuse=True) logits = (logits_v3 + logits_v4 + logits_res_v2 + logits_resnet + logits_v3_rotated + logits_v4_rotated + logits_res_v2_rotated + logits_resnet_rotated) / 8 auxlogits = (end_points_v3['AuxLogits'] + end_points_v4['AuxLogits'] + end_points_res_v2['AuxLogits']) / 3 cross_entropy = tf.losses.softmax_cross_entropy(y, logits, label_smoothing=0.0, weights=1.0) cross_entropy += tf.losses.softmax_cross_entropy(y, auxlogits, label_smoothing=0.0, weights=0.4) noise = tf.gradients(cross_entropy, x)[0] # noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = momentum * grad + noise x = x + alpha * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def hcd_model(inputs, num_classes, is_training=True, keep_prob=0.8, attention_module=None, scope='HCD_model'): ''' :param inputs: N x H x W x C tensor :return: ''' # with tf.variable_scope(scope, 'HCD_model', [inputs]): with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = \ resnet_v2.resnet_v2_50(inputs, num_classes=num_classes, is_training=is_training, attention_module=attention_module, scope='resnet_v2_50') # out1 = GlobalMaxPooling2D()(x) net1 = tf.reduce_max(net, axis=[1, 2], keep_dims=True, name='GlobalMaxPooling2D') # out2 = GlobalAveragePooling2D()(x) net2 = tf.reduce_mean(net, axis=[1, 2], keep_dims=True, name='GlobalAveragePooling2D') # out3 = Flatten()(x) # net3 = slim.flatten(net) # out = Concatenate(axis=-1)([out1, out2, out3]) net = tf.concat([net1, net2], axis=-1) net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') batch_norm_params['is_training'] = is_training # out = Dropout(0.5)(out) net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training) # out = Dense(1, activation="sigmoid", name="3_")(out) net = slim.fully_connected(net, 768, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, scope='fc1') net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training) net = slim.fully_connected(net, 256, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, scope='fc2') net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training) logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='logits') return logits, end_points
def _GetAttentionModel( self, images, num_classes, weight_decay=0.0001, attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0], attention_type=_SUPPORTED_ATTENTION_TYPES[0], kernel=1, training_resnet=False, training_attention=False, reuse=False): """Constructs attention model on resnet_v2_50. Args: images: A tensor of size [batch, height, width, channels] num_classes: The number of output classes. weight_decay: The parameters for weight_decay regularizer. attention_nonlinear: Type of non-linearity on top of the attention function. attention_type: Type of the attention structure. kernel: Convolutional kernel to use in attention layers (eg, [3, 3]). training_resnet: Whether or not the Resnet blocks from the model are in training mode. training_attention: Whether or not the attention part of the model is in training mode. reuse: Whether or not the layer and its variables should be reused. Returns: logits: A tensor of size [batch, num_classes]. attention_prob: Attention score after the non-linearity. attention_score: Attention score before the non-linearity. feature_map: Features extracted from the model, which are not l2-normalized. """ attention_feat, attention_prob, attention_score, feature_map, _ = ( self.GetAttentionPrelogit(images, weight_decay, attention_nonlinear=attention_nonlinear, attention_type=attention_type, kernel=kernel, training_resnet=training_resnet, training_attention=training_attention, reuse=reuse)) with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay, batch_norm_scale=True)): with slim.arg_scope([slim.batch_norm], is_training=training_attention): with tf.variable_scope(_ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse): logits = slim.conv2d(attention_feat, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze') return logits, attention_prob, attention_score, feature_map
def inference_resnet_v2_152(x_input, dropout_keep_prob=1, num_classes=1001): with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_152(x_input, num_classes=num_classes, is_training=False) probs = tf.nn.softmax(logits) model_vars = [var for var in tf.global_variables() \ if var.name.startswith('resnet_v2_152/')] return probs, logits, model_vars
def resnet_v2_50(inputs): with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50( inputs, num_classes=None, is_training=False, global_pool=True, spatial_squeeze=True) return logits, end_points, resnet_v2_50_ckpt_path
def graph(x, y, i, x_max, x_min, grad, grad2): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_classes = 1001 with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( input_diversity(x), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( input_diversity(x), num_classes=num_classes, is_training=False, scope='resnet_v2_101', reuse=tf.AUTO_REUSE) logits = (logits_v3 + logits_v4 + logits_res_v2 + logits_resnet) / 4 auxlogits = (end_points_v3['AuxLogits'] + end_points_v4['AuxLogits'] + end_points_res_v2['AuxLogits']) / 3 cross_entropy = tf.losses.softmax_cross_entropy(y, logits, label_smoothing=0.0, weights=1.0) cross_entropy += tf.losses.softmax_cross_entropy(y, auxlogits, label_smoothing=0.0, weights=0.4) noise = tf.gradients(cross_entropy, x)[0] noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') noise1 = grad + 1.5 * noise noise2 = grad2 + 1.9 * noise * noise x = x + (eps / 17.6786) * ( (1 - 0.9**(i + 1)) / tf.sqrt(1 - 0.99**(i + 1))) * tf.tanh( 1.3 * noise1 / tf.sqrt(noise2)) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise1, noise2
def graph(x, y, i, grad): num_classes = 1001 x = x with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( x, num_classes=num_classes, is_training=False) pred = tf.argmax(end_points_resnet['predictions'], 1) first_round = tf.cast(tf.equal(i, 0), tf.int64) y = first_round * pred + (1 - first_round) * y return x, y, i, grad
def get_patch_score(query_embedding, images, num_cutoff=50): query_embedding = query_embedding / ( np.linalg.norm(query_embedding, ord=2) + np.finfo(float).eps) with tf.Graph().as_default(): image = tf.placeholder(tf.uint8, (None, None, 3)) if image.dtype != tf.float32: processed_image = tf.image.convert_image_dtype(image, dtype=tf.float32) else: processed_image = image processed_image = tf.subtract(processed_image, 0.5) processed_image = tf.multiply(processed_image, 2.0) processed_image = tf.expand_dims(processed_image, 0) with slim.arg_scope(resnet_v2.resnet_arg_scope()): postnorm, _ = resnet_v2.resnet_v2_101(processed_image, None, is_training=False, global_pool=False, output_stride=8) init_fn = slim.assign_from_checkpoint_fn( 'resnet_v2_101.ckpt', slim.get_model_variables('resnet_v2')) image_similar_embeddings = {} with tf.Session() as sess: init_fn(sess) for im in tqdm(images): img = cv2.imread(im) input_img, embedding = sess.run([processed_image, postnorm], feed_dict={image: img}) embedding = embedding / (np.expand_dims( np.linalg.norm(embedding, axis=3, ord=2), axis=3) + np.finfo(float).eps) similarity = np.tensordot(embedding, query_embedding, axes=1) similarity_peaks = np.unravel_index( np.argsort(-similarity, axis=None), similarity.shape) similarity_sorted = similarity[similarity_peaks] similarity_coords = [ np.expand_dims(c, axis=0) for c in \ similarity_peaks ] similarity_coords = np.transpose( np.concatenate(similarity_coords, axis=0)) image_similar_embeddings[im] = { 'locs': similarity_coords[:num_cutoff], 'embeddings': embedding[similarity_peaks][:num_cutoff].copy(), 'similarity': similarity } return image_similar_embeddings
def graph(x, y, i, x_max, x_min, grad): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter momentum = FLAGS.momentum num_classes = 1001 with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( input_diversity(x), num_classes=num_classes, is_training=False) with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_adv_v3, end_points_adv_v3 = inception_v3.inception_v3( input_diversity(x), num_classes=num_classes, is_training=False, scope='AdvInceptionV3') with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( input_diversity(x), num_classes=num_classes, is_training=False) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( input_diversity(x), num_classes=num_classes, is_training=False) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_ensadv_res_v2, end_points_ensadv_res_v2 = inception_resnet_v2.inception_resnet_v2( input_diversity(x), num_classes=num_classes, is_training=False, scope='EnsAdvInceptionResnetV2') with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_50( input_diversity(x), num_classes=num_classes, is_training=False) logits = (logits_v3 + 0.25 * logits_adv_v3 + logits_v4 + \ logits_res_v2 + logits_ensadv_res_v2 + logits_resnet) / 5.25 auxlogits = (end_points_v3['AuxLogits'] + 0.25 * end_points_adv_v3['AuxLogits'] + end_points_v4['AuxLogits'] + \ end_points_res_v2['AuxLogits'] + end_points_ensadv_res_v2['AuxLogits']) / 4.25 cross_entropy = tf.losses.softmax_cross_entropy(y, logits, label_smoothing=0.0, weights=1.0) cross_entropy += tf.losses.softmax_cross_entropy(y, auxlogits, label_smoothing=0.0, weights=0.4) noise = tf.gradients(cross_entropy, x)[0] noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') noise = noise / tf.reshape(tf.contrib.keras.backend.std(tf.reshape(noise, [FLAGS.batch_size, -1]), axis=1), [FLAGS.batch_size, 1, 1, 1]) noise = momentum * grad + noise noise = noise / tf.reshape(tf.contrib.keras.backend.std(tf.reshape(noise, [FLAGS.batch_size, -1]), axis=1), [FLAGS.batch_size, 1, 1, 1]) x = x - alpha * tf.clip_by_value(tf.round(noise), -2, 2) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def Resnet(n_layers, imgs_in, weight_decay, batch_norm_momentum, is_training): assert n_layers in {50, 101, 152, 200}, 'unsupported n_layers' network = getattr(resnet_v2, 'resnet_v2_{}'.format(n_layers)) with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay, batch_norm_decay=batch_norm_momentum)): features, _ = network(imgs_in, is_training=is_training, global_pool=False, output_stride=16) return features
def endpoints(image, is_training): if image.get_shape().ndims != 4: raise ValueError('Input must be of size [batch, height, width, 3]') image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3)) with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)): _, endpoints = resnet_v2_50(image, num_classes=None, is_training=is_training, global_pool=True) endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean( endpoints['resnet_v2_50/block4'], [1, 2], name='pool5', keep_dims=False) return endpoints, 'resnet_v2_50'
def all_feats(self, x_input): if (self.build): tf.get_variable_scope().reuse_variables() else: self.build = True resized_images_vgg = tf.image.resize_images( x_input, [224, 224]) - tf.constant([123.68, 116.78, 103.94]) with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points_vgg16 = self.network_fn_vgg16( resized_images_vgg, num_classes=self.num_classes, is_training=False) resized_images_res = ( tf.image.resize_images(x_input, [224, 224]) / 255.0 - 0.5) * 2 with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, end_points_res = self.network_fn_res( resized_images_res, num_classes=self.num_classes + 1, is_training=False) probs = [] for layer in [ 'vgg_16/conv1/conv1_1', 'vgg_16/conv1/conv1_2', 'vgg_16/conv2/conv2_1', 'vgg_16/conv2/conv2_2', 'vgg_16/conv3/conv3_1', 'vgg_16/conv3/conv3_2', 'vgg_16/conv3/conv3_3', 'vgg_16/conv4/conv4_1', 'vgg_16/conv4/conv4_2', 'vgg_16/conv4/conv4_3', 'vgg_16/conv5/conv5_1', 'vgg_16/conv5/conv5_2', 'vgg_16/conv5/conv5_3', 'vgg_16/fc6', 'vgg_16/fc7', 'vgg_16/fc8' ]: output = end_points_vgg16[layer] probs.append(output) for layer in [ 'resnet_v2_152_1/block3/unit_23/bottleneck_v2', 'resnet_v2_152_1/block3/unit_24/bottleneck_v2', 'resnet_v2_152_1/block3/unit_25/bottleneck_v2', 'resnet_v2_152_1/block3/unit_26/bottleneck_v2', 'resnet_v2_152_1/block3/unit_27/bottleneck_v2', 'resnet_v2_152_1/block3/unit_28/bottleneck_v2', 'resnet_v2_152_1/block3/unit_29/bottleneck_v2', 'resnet_v2_152_1/block3/unit_31/bottleneck_v2', 'resnet_v2_152_1/block3/unit_32/bottleneck_v2', 'resnet_v2_152_1/block3/unit_33/bottleneck_v2', 'resnet_v2_152_1/block3/unit_34/bottleneck_v2', 'resnet_v2_152_1/block3/unit_36/bottleneck_v2', 'resnet_v2_152_1/block4/unit_3/bottleneck_v2' ]: output = end_points_res[layer] probs.append(output) return probs
def __init__(self, num_classes): self.num_classes = num_classes with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): self.network_fn_incep_res = inception_resnet_v2.inception_resnet_v2 with slim.arg_scope(vgg.vgg_arg_scope()): self.network_fn_vgg16 = vgg.vgg_16 with slim.arg_scope(resnet_v2.resnet_arg_scope()): self.network_fn_res = resnet_v2.resnet_v2_152 with slim.arg_scope(inception_utils.inception_arg_scope()): self.network_fn_incepv3 = inception_v3.inception_v3 self.network_fn_incepv4 = inception_v4.inception_v4 self.network_fn_alex = AlexNet() self.build = False
def fcn_8s_resnet_v2_50(x, num_classes=1000, is_training=False, weight_decay=0.0005, ): with tf.variable_scope('resnet50_fcn_8s'): with slim.arg_scope(resnet_arg_scope(weight_decay=weight_decay)): _, end_points = resnet_v2_50(x, num_classes=num_classes, is_training=is_training, global_pool=False, spatial_squeeze=False, ) with tf.variable_scope('conv_transpose_1'): net = conv2d_transpose(end_points['resnet50_fcn_8s/resnet_v2_50/logits'], filter_size=(4, 4, num_classes, end_points['resnet50_fcn_8s/resnet_v2_50/logits'].get_shape()[3]), output_shape=[tf.shape(end_points['resnet50_fcn_8s/resnet_v2_50/block2'])[0], tf.shape(end_points['resnet50_fcn_8s/resnet_v2_50/block2'])[1], tf.shape(end_points['resnet50_fcn_8s/resnet_v2_50/block2'])[2], num_classes], strides=2, weight_decay=weight_decay) pool4_conv = slim.conv2d(end_points['resnet50_fcn_8s/resnet_v2_50/block2'], num_classes, [3, 3]) net = tf.add(net, pool4_conv) end_points['resnet50_fcn_8s/conv_transpose_1'] = net with tf.variable_scope('conv_transpose_2'): net = conv2d_transpose(net, filter_size=(4, 4, num_classes, net.get_shape()[3]), output_shape=[tf.shape(end_points['resnet50_fcn_8s/resnet_v2_50/block1'])[0], tf.shape(end_points['resnet50_fcn_8s/resnet_v2_50/block1'])[1], tf.shape(end_points['resnet50_fcn_8s/resnet_v2_50/block1'])[2], num_classes], strides=2, weight_decay=weight_decay) pool3_conv = slim.conv2d(end_points['resnet50_fcn_8s/resnet_v2_50/block1'], num_classes, [3, 3]) net = tf.add(net, pool3_conv) end_points['resnet50_fcn_8s/conv_transpose_2'] = net with tf.variable_scope('conv_transpose_3'): net = conv2d_transpose(net, filter_size=(16, 16, num_classes, net.get_shape()[3]), output_shape=(tf.shape(x)[0], tf.shape(x)[1], tf.shape(x)[2], num_classes), strides=8, weight_decay=weight_decay) end_points['resnet50_fcn_8s/conv_transpose_3'] = net return net, end_points
def resnet_v2_152(inputs, is_training, opts): with slim.arg_scope(resnet_v2.resnet_arg_scope( weight_decay=opts.weight_decay, batch_norm_decay=opts.batch_norm_decay, batch_norm_epsilon=opts.batch_norm_epsilon, activation_fn=tf.nn.relu)): return resnet_v2.resnet_v2_152( inputs, num_classes=opts.num_classes, is_training=is_training, global_pool=opts.global_pool, output_stride=None, spatial_squeeze=opts.spatial_squeeze, reuse=None)
def model_resnet_v2_101(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v2.resnet_v2_101(images, is_training=is_training, scope='resnet_v2_101') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def create(self, images, num_classes, is_training): """See baseclass.""" with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, endpoints = resnet_v2.resnet_v2_152( images, num_classes, is_training=is_training, spatial_squeeze=False) # Resnet's "predictions" endpoint is (n, 1, 1, m) but we really # want to have an (n, m) "Predictions" endpoint. We add a squeeze # op here to make that happen. endpoints['Predictions'] = tf.squeeze( endpoints['predictions'], [1, 2], name='SqueezePredictions') # Likewise, the endpoint "resnet_v2_152/logits" should be squeezed to # "Logits" endpoints['Logits'] = tf.squeeze( endpoints['resnet_v2_152/logits'], [1, 2], name='SqueezeLogits') return endpoints
def _build(self, x_input=None): reuse = True if self.built else None if x_input is None: x_input = self.input with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_101( x_input, num_classes=self.num_classes, is_training=False, reuse=reuse) self.built = True self.end_points = end_points self.logits = logits if not self.ckpt_loaded: saver = tf.train.Saver(slim.get_model_variables()) saver.restore(self.sess, ckpt_dir + 'resnet_v2_101.ckpt') self.ckpt_loaded = True
def resnetv2_ssd(img): with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_50(img, is_training=True) c1 = end_points['resnet_v2_50/block1'] c2 = end_points['resnet_v2_50/block2'] base_16_0 = end_points['resnet_v2_50/block3'] base_16_1 = end_points['resnet_v2_50/block4'] vbs = slim.get_trainable_variables() # vbs = None base_16_0 = slim.conv2d(base_16_0, 512, 1) base_16_1 = slim.conv2d(base_16_1, 512, 1) c3 = tf.concat([base_16_0, base_16_1], axis=3) return c1, c2, c3, vbs
def create(self, images, num_classes, is_training): """See baseclass.""" with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, endpoints = resnet_v2.resnet_v2_152( images, num_classes, is_training=is_training, spatial_squeeze=False) # Resnet's "predictions" endpoint is (n, 1, 1, m) but we really # want to have an (n, m) "Predictions" endpoint. We add a squeeze # op here to make that happen. endpoints['Predictions'] = tf.squeeze( endpoints['predictions'], [1, 2], name='SqueezePredictions') # Likewise, the endpoint "resnet_v2_152/logits" should be squeezed to # "Logits" endpoints['Logits'] = tf.squeeze( endpoints['resnet_v2_152/logits'], [1, 2], name='SqueezeLogits') return endpoints
def graph(x, y, i, x_max, x_min, grad, eg): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter num_classes = 1001 ro = 0.9 beta = 0.89 v = 0.1 eg = ro * eg + (1 - ro) * tf.square(grad) rms = tf.sqrt(eg + 0.000000001) x_n = x + (alpha / rms)*grad with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( input_diversity(x_n), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_v4, end_points_v4 = inception_v4.inception_v4( input_diversity(x_n), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_res_v2, end_points_res_v2 = inception_resnet_v2.inception_resnet_v2( input_diversity(x_n), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_resnet, end_points_resnet = resnet_v2.resnet_v2_101( input_diversity(x_n), num_classes=num_classes, is_training=False, scope='resnet_v2_101', reuse=tf.AUTO_REUSE) logits = (logits_v3 + logits_v4 + logits_res_v2 + logits_resnet) / 4 auxlogits = (end_points_v3['AuxLogits'] + end_points_v4['AuxLogits'] + end_points_res_v2['AuxLogits']) / 3 cross_entropy = tf.losses.softmax_cross_entropy(y, logits, label_smoothing=0.0, weights=1.0) cross_entropy += tf.losses.softmax_cross_entropy(y, auxlogits, label_smoothing=0.0, weights=0.4) noise = tf.gradients(cross_entropy, x_n)[0] noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') noise1 = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = beta * grad + (1-beta) * noise1 noise2 = (1-v) * noise + v * noise1 x = x + alpha * tf.sign(noise2) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise, eg