def __init__(self, num_classes, train_layers=None, weights_path='DEFAULT'): """Create the graph of the resnetv1_152 model. """ # Parse input arguments into class variables if weights_path == 'DEFAULT': self.WEIGHTS_PATH = "./pre_trained_models/resnet_v1_152.ckpt" else: self.WEIGHTS_PATH = weights_path self.train_layers = train_layers with tf.variable_scope("input"): self.image_size = resnet_v1.resnet_v1_152.default_image_size self.x_input = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3], name="x_input") self.y_input = tf.placeholder(tf.float32, [None, num_classes], name="y_input") self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") # train with arg_scope(resnet_v1.resnet_arg_scope()): self.logits, _ = resnet_v1.resnet_v1_152(self.x_input, num_classes=num_classes, is_training=True, reuse=tf.AUTO_REUSE ) # validation with arg_scope(resnet_v1.resnet_arg_scope()): self.logits_val, _ = resnet_v1.resnet_v1_152(self.x_input, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE ) with tf.name_scope("loss"): self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.y_input)) self.loss_val = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits_val, labels=self.y_input)) with tf.name_scope("train"): self.global_step = tf.Variable(0, name="global_step", trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = [v for v in tf.trainable_variables() if v.name.split('/')[-2] in train_layers or v.name.split('/')[-3] in train_layers ] gradients = tf.gradients(self.loss, var_list) self.grads_and_vars = list(zip(gradients, var_list)) optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) with tf.control_dependencies(update_ops): self.train_op = optimizer.apply_gradients(grads_and_vars=self.grads_and_vars, global_step=self.global_step) with tf.name_scope("probability"): self.probability = tf.nn.softmax(self.logits_val, name="probability") with tf.name_scope("prediction"): self.prediction = tf.argmax(self.logits_val, 1, name="prediction") with tf.name_scope("accuracy"): correct_prediction = tf.equal(self.prediction, tf.argmax(self.y_input, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
def build(self, weight_path, sess, input_type=InputType.BASE64_JPEG): self.input_tensor = None self.session = sess if input_type == InputType.TENSOR: self.input = tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name="input") self.input_tensor = self.input elif input_type == InputType.BASE64_JPEG: self.input = tf.placeholder(tf.string, shape=(None, ), name='input') self.input_tensor = load_base64_tensor(self.input) else: raise ValueError('invalid input type') # only load inference model with arg_scope( resnet_v1.resnet_arg_scope(activation_fn=tf.nn.relu, weight_decay=0.0001)): self.logits_val, end_points = resnet_v1.resnet_v1_152( self.input_tensor, num_classes=self.num_classes, is_training=False, reuse=tf.AUTO_REUSE) # self.predictions = tf.nn.softmax(self.logits_val, name='Softmax') self.predictions = end_points['predictions'] self.output = tf.identity(self.predictions, name='outputs') if weight_path is not None: self.load_trained_weights(weight_path)
def __call__(self, x_input, batch_size, is_training=False): """Constructs model and return probabilities for given input.""" reuse = True if self.built else None # ResNet V1 and VGG have different preprocessing preproc = tf.map_fn( lambda img: vgg_preprocess(0.5 * 255.0 * ( img + 1.0), resnet_v1.resnet_v1.default_image_size, resnet_v1. resnet_v1.default_image_size), x_input) with slim.arg_scope(resnet_utils.resnet_arg_scope()): with tf.variable_scope(self.name): logits, end_points = resnet_v1.resnet_v1_152( preproc, num_classes=self.num_classes - 1, is_training=is_training, reuse=reuse) # VGG and ResNetV1 don't have a background class background_class = tf.constant(-np.inf, dtype=tf.float32, shape=[batch_size, 1]) logits = tf.concat([background_class, logits], axis=1) preds = tf.argmax(logits, axis=1) self.built = True self.logits = logits self.preds = preds return logits
def test_resnet_v1_152(img_dir): """ Test ResNet-V1-152 with a single image. :param img_dir: Path of the image to be classified :return: classification result and probability of a single image """ img = cv2.imread(img_dir) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (224, 224)) img = img.reshape((1, 224, 224, 3)) tf.reset_default_graph() inputs = tf.placeholder(name='input_images', shape=[None, 224, 224, 3], dtype=tf.float32) with slim.arg_scope(resnet_arg_scope()): _, _ = resnet_v1_152(inputs, 1000, is_training=False) with tf.Session() as sess: tf.train.Saver().restore(sess, './models/resnet_v1_152.ckpt') inputs = sess.graph.get_tensor_by_name('input_images:0') outputs = sess.graph.get_tensor_by_name( 'resnet_v1_152/SpatialSqueeze:0') pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0] prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0] pred, prob = sess.run([pred, prob], feed_dict={inputs: img}) name = label_dict[pred + 1] print('Result of ResNet-V1-152:', name, prob) return name, prob
def extract_feature(imgList, args): # tf.reset_default_graph() # queue = tf.train.string_input_producer(imgList, num_epochs=None, shuffle=False) # reader = tf.WholeFileReader() # img_path, img_data = reader.read(queue) # img = vgg_preprocessing.preprocess_image(tf.image.decode_jpeg(contents=img_data, channels=3), 128, 256) # img = tf.expand_dims(img, 0) side_batch = tf.placeholder(tf.float32, [200, 32, 64, 3], name='side_batch') with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_152(inputs=side_batch, is_training=True) # feat1 = end_points['resnet_v1_152/block4'] feat2 = end_points['global_pool'] # saver = tf.train.Saver() checkpoint_exclude_scopes = 'Logits' exclusions = None if checkpoint_exclude_scopes: exclusions = [ scope.strip() for scope in checkpoint_exclude_scopes.split(',') ] variables_to_restore = [] for var in slim.get_model_variables(): excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True if not excluded: variables_to_restore.append(var) saver = tf.train.Saver( var_list=variables_to_restore) # keep 3 checkpoints at a time # init_op = tf.global_variables_initializer() with tf.Session() as sess: # sess.run(init_op) saver.restore(sess, args.cnnModel) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) feats1 = [] feats2 = [] for i in range(len(imgList)): # f1, f2 = sess.run([feat1, feat2]) # f1: (1, 4, 8, 2048) f2: (1, 1, 1, 2048) f2 = sess.run(feat2, feed_dict={side_batch: np.array(imgList)}) # feats1.append(f1[0]) feats2.append(f2[0][0][0]) if (i + 1) % 1000 == 0: print('%s/%s' % (i + 1, len(imgList))) coord.request_stop() coord.join(threads) # return feats1, feats2 return feats2
def create_network(self): with tf.contrib.slim.arg_scope(resnet_arg_scope()): logits, end_points = resnet_v1_152(self.img, num_classes=self.nb_class, is_training=self.is_training, global_pool=True, spatial_squeeze=True) self.logits = logits self.probabilities = tf.nn.sigmoid(self.logits) self.predictions = tf.cast( self.probabilities >= self.prediction_threshold, tf.float32)
def resnet_v1_152(inputs, is_training, opts): with slim.arg_scope(resnet_v1.resnet_arg_scope( weight_decay=opts.weight_decay, batch_norm_decay=opts.batch_norm_decay, batch_norm_epsilon=opts.batch_norm_epsilon, activation_fn=tf.nn.relu)): return resnet_v1.resnet_v1_152( inputs, num_classes=opts.num_classes, is_training=is_training, global_pool=opts.global_pool, output_stride=None, spatial_squeeze=opts.spatial_squeeze, reuse=None)
def __call__(self, x_input): """Constructs model and return probabilities for given input.""" reuse = True if self.built else None x_input = image_normalize(x_input, normalization_method[6]) x_input = tf.image.resize_images(x_input, [224, 224]) with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_152( x_input, num_classes=self.num_classes - 1, is_training=False, reuse=reuse) self.built = True end_points['predictions'] = \ tf.concat([tf.zeros([tf.shape(x_input)[0], 1]), tf.reshape(end_points['predictions'], [-1, 1000])], axis=1) output = end_points['predictions'] # Strip off the extra reshape op at the output return output
def main(_): batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 ensemble_type = FLAGS.ensemble_type tf.logging.set_verbosity(tf.logging.INFO) checkpoint_path_list = [ FLAGS.checkpoint_path_inception_v1, FLAGS.checkpoint_path_inception_v2, FLAGS.checkpoint_path_inception_v3, FLAGS.checkpoint_path_inception_v4, FLAGS.checkpoint_path_inception_resnet_v2, FLAGS.checkpoint_path_resnet_v1_101, FLAGS.checkpoint_path_resnet_v1_152, FLAGS.checkpoint_path_resnet_v2_101, FLAGS.checkpoint_path_resnet_v2_152, FLAGS.checkpoint_path_vgg_16, FLAGS.checkpoint_path_vgg_19 ] normalization_method = [ 'default', 'default', 'default', 'default', 'global', 'caffe_rgb', 'caffe_rgb', 'default', 'default', 'caffe_rgb', 'caffe_rgb' ] pred_list = [] for idx, checkpoint_path in enumerate(checkpoint_path_list, 1): with tf.Graph().as_default(): if int(FLAGS.test_idx) == 20 and idx in [3]: continue if int(FLAGS.test_idx) in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] and int(FLAGS.test_idx) != idx: continue # Prepare graph if idx in [1, 2, 6, 7, 10, 11]: _x_input = tf.placeholder(tf.float32, shape=batch_shape) x_input = tf.image.resize_images(_x_input, [224, 224]) else: _x_input = tf.placeholder(tf.float32, shape=batch_shape) x_input = _x_input x_input = image_normalize(x_input, normalization_method[idx - 1]) if idx == 1: with slim.arg_scope(inception.inception_v1_arg_scope()): _, end_points = inception.inception_v1( x_input, num_classes=num_classes, is_training=False) elif idx == 2: with slim.arg_scope(inception.inception_v2_arg_scope()): _, end_points = inception.inception_v2( x_input, num_classes=num_classes, is_training=False) elif idx == 3: with slim.arg_scope(inception.inception_v3_arg_scope()): _, end_points = inception.inception_v3( x_input, num_classes=num_classes, is_training=False) elif idx == 4: with slim.arg_scope(inception.inception_v4_arg_scope()): _, end_points = inception.inception_v4( x_input, num_classes=num_classes, is_training=False) elif idx == 5: with slim.arg_scope(inception.inception_resnet_v2_arg_scope()): _, end_points = inception.inception_resnet_v2( x_input, num_classes=num_classes, is_training=False) elif idx == 6: with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_101(x_input, num_classes=1000, is_training=False) elif idx == 7: with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_152(x_input, num_classes=1000, is_training=False) elif idx == 8: with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_101( x_input, num_classes=num_classes, is_training=False) elif idx == 9: with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_152( x_input, num_classes=num_classes, is_training=False) elif idx == 10: with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_16(x_input, num_classes=1000, is_training=False) end_points['predictions'] = tf.nn.softmax( end_points['vgg_16/fc8']) elif idx == 11: with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_19(x_input, num_classes=1000, is_training=False) end_points['predictions'] = tf.nn.softmax( end_points['vgg_19/fc8']) #end_points = tf.reduce_mean([end_points1['Predictions'], end_points2['Predictions'], end_points3['Predictions'], end_points4['Predictions']], axis=0) #predicted_labels = tf.argmax(end_points, 1) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=checkpoint_path, master=FLAGS.master) pred_in = [] filenames_list = [] with tf.train.MonitoredSession( session_creator=session_creator) as sess: for filenames, images in load_images(FLAGS.input_dir, batch_shape): #if idx in [1,2,6,7,10,11]: # # 16x299x299x3 # images = zoom(images, (1, 0.7491638795986622, 0.7491638795986622, 1), order=2) filenames_list.extend(filenames) end_points_dict = sess.run(end_points, feed_dict={_x_input: images}) if idx in [6, 7, 10, 11]: end_points_dict['predictions'] = \ np.concatenate([np.zeros([FLAGS.batch_size, 1]), np.array(end_points_dict['predictions'].reshape(-1, 1000))], axis=1) try: pred_in.extend(end_points_dict['Predictions'].reshape( -1, num_classes)) except KeyError: pred_in.extend(end_points_dict['predictions'].reshape( -1, num_classes)) pred_list.append(pred_in) if ensemble_type == 'mean': pred = np.mean(pred_list, axis=0) labels = np.argmax( pred, axis=1 ) # model_num X batch X class_num ==(np.mean)==> batch X class_num ==(np.argmax)==> batch elif ensemble_type == 'vote': pred = np.argmax( pred_list, axis=2 ) # model_num X batch X class_num ==(np.mean)==> batch X class_num ==(np.argmax)==> batch labels = np.median(pred, axis=0) with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: for filename, label in zip(filenames_list, labels): out_file.write('{0},{1}\n'.format(filename, label))
def _construct_model(model_type='resnet_v1_50'): """Constructs model for the desired type of CNN. Args: model_type: Type of model to be used. Returns: end_points: A dictionary from components of the network to the corresponding activations. Raises: ValueError: If the model_type is not supported. """ # Placeholder input. images = array_ops.placeholder( dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE) # Construct model. if model_type == 'inception_resnet_v2': _, end_points = inception.inception_resnet_v2_base(images) elif model_type == 'inception_resnet_v2-same': _, end_points = inception.inception_resnet_v2_base( images, align_feature_maps=True) elif model_type == 'inception_v2': _, end_points = inception.inception_v2_base(images) elif model_type == 'inception_v2-no-separable-conv': _, end_points = inception.inception_v2_base( images, use_separable_conv=False) elif model_type == 'inception_v3': _, end_points = inception.inception_v3_base(images) elif model_type == 'inception_v4': _, end_points = inception.inception_v4_base(images) elif model_type == 'alexnet_v2': _, end_points = alexnet.alexnet_v2(images) elif model_type == 'vgg_a': _, end_points = vgg.vgg_a(images) elif model_type == 'vgg_16': _, end_points = vgg.vgg_16(images) elif model_type == 'mobilenet_v1': _, end_points = mobilenet_v1.mobilenet_v1_base(images) elif model_type == 'mobilenet_v1_075': _, end_points = mobilenet_v1.mobilenet_v1_base( images, depth_multiplier=0.75) elif model_type == 'resnet_v1_50': _, end_points = resnet_v1.resnet_v1_50( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_101': _, end_points = resnet_v1.resnet_v1_101( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_152': _, end_points = resnet_v1.resnet_v1_152( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_200': _, end_points = resnet_v1.resnet_v1_200( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_50': _, end_points = resnet_v2.resnet_v2_50( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_101': _, end_points = resnet_v2.resnet_v2_101( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_152': _, end_points = resnet_v2.resnet_v2_152( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_200': _, end_points = resnet_v2.resnet_v2_200( images, num_classes=None, is_training=False, global_pool=False) else: raise ValueError('Unsupported model_type %s.' % model_type) return end_points
def _construct_model(model_type='resnet_v1_50'): """Constructs model for the desired type of CNN. Args: model_type: Type of model to be used. Returns: end_points: A dictionary from components of the network to the corresponding activations. Raises: ValueError: If the model_type is not supported. """ # Placeholder input. images = array_ops.placeholder(dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE) # Construct model. if model_type == 'inception_resnet_v2': _, end_points = inception.inception_resnet_v2_base(images) elif model_type == 'inception_resnet_v2-same': _, end_points = inception.inception_resnet_v2_base( images, align_feature_maps=True) elif model_type == 'inception_v2': _, end_points = inception.inception_v2_base(images) elif model_type == 'inception_v2-no-separable-conv': _, end_points = inception.inception_v2_base(images, use_separable_conv=False) elif model_type == 'inception_v3': _, end_points = inception.inception_v3_base(images) elif model_type == 'inception_v4': _, end_points = inception.inception_v4_base(images) elif model_type == 'alexnet_v2': _, end_points = alexnet.alexnet_v2(images) elif model_type == 'vgg_a': _, end_points = vgg.vgg_a(images) elif model_type == 'vgg_16': _, end_points = vgg.vgg_16(images) elif model_type == 'mobilenet_v1': _, end_points = mobilenet_v1.mobilenet_v1_base(images) elif model_type == 'mobilenet_v1_075': _, end_points = mobilenet_v1.mobilenet_v1_base(images, depth_multiplier=0.75) elif model_type == 'resnet_v1_50': _, end_points = resnet_v1.resnet_v1_50(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_101': _, end_points = resnet_v1.resnet_v1_101(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_152': _, end_points = resnet_v1.resnet_v1_152(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_200': _, end_points = resnet_v1.resnet_v1_200(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_50': _, end_points = resnet_v2.resnet_v2_50(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_101': _, end_points = resnet_v2.resnet_v2_101(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_152': _, end_points = resnet_v2.resnet_v2_152(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_200': _, end_points = resnet_v2.resnet_v2_200(images, num_classes=None, is_training=False, global_pool=False) else: raise ValueError('Unsupported model_type %s.' % model_type) return end_points
def main(_): batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 # max_epsilon over checking # get original images origin_img_list=np.sort(glob.glob(FLAGS.origin_img_dir+"*.png")); origin_imgs=np.zeros((len(origin_img_list),FLAGS.image_height,FLAGS.image_width,3),dtype=float); for i in range(len(origin_img_list)): origin_imgs[i]=imread(origin_img_list[i],mode='RGB').astype(np.float); # get adv images adv_img_list=np.sort(glob.glob(FLAGS.input_dir+"*.png")); adv_imgs=np.zeros((len(adv_img_list),FLAGS.image_height,FLAGS.image_width,3),dtype=float); for i in range(len(adv_img_list)): adv_imgs[i]=imread(adv_img_list[i],mode='RGB').astype(np.float); epsilon_list=np.linalg.norm(np.reshape(abs(origin_imgs-adv_imgs),[-1,FLAGS.image_height*FLAGS.image_width*3]),ord=np.inf,axis=1); #print(epsilon_list);exit(1); over_epsilon_list=np.zeros((len(origin_img_list),2),dtype=object); cnt=0; for i in range(len(origin_img_list)): file_name=origin_img_list[i].split("/")[-1]; file_name=file_name.split(".")[0]; over_epsilon_list[i,0]=file_name; if(epsilon_list[i]>FLAGS.max_epsilon): over_epsilon_list[i,1]="1"; cnt+=1; tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) if(FLAGS.checkpoint_file_name=="inception_v3.ckpt"): with slim.arg_scope(inception.inception_v3_arg_scope()): _, end_points = inception.inception_v3( x_input, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_v4.ckpt"): with slim.arg_scope(inception.inception_v4_arg_scope()): _, end_points = inception.inception_v4( x_input, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_resnet_v2_2016_08_30.ckpt"): with slim.arg_scope(inception.inception_resnet_v2_arg_scope()): _, end_points = inception.inception_resnet_v2( x_input, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="resnet_v2_101.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_101( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1) elif(FLAGS.checkpoint_file_name=="resnet_v2_50.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_50( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1) elif(FLAGS.checkpoint_file_name=="resnet_v2_152.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_152( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_v1.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(inception.inception_v1_arg_scope()): _, end_points = inception.inception_v1( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_v2.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(inception.inception_v2_arg_scope()): _, end_points = inception.inception_v2( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) # Resnet v1 and vgg are not working now elif(FLAGS.checkpoint_file_name=="vgg_16.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_16( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['vgg_16/fc8'], 1)+1 elif(FLAGS.checkpoint_file_name=="vgg_19.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_19( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['vgg_19/fc8'], 1)+1 elif(FLAGS.checkpoint_file_name=="resnet_v1_50.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50( x_input, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1)+1 elif(FLAGS.checkpoint_file_name=="resnet_v1_101.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_101( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1)+1 elif(FLAGS.checkpoint_file_name=="resnet_v1_152.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_152( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1)+1 # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path+FLAGS.checkpoint_file_name, master=FLAGS.master) f=open(FLAGS.true_label,"r"); t_label_list=np.array([i[:-1].split(",") for i in f.readlines()]); score=0; with tf.train.MonitoredSession(session_creator=session_creator) as sess: with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: for filenames, images in load_images(FLAGS.input_dir, batch_shape): labels = sess.run(predicted_labels, feed_dict={x_input: images}) for filename, label in zip(filenames, labels): f_name=filename.split(".")[0]; t_label=int(t_label_list[t_label_list[:,0]==f_name,1][0]); if(t_label!=label): if(over_epsilon_list[over_epsilon_list[:,0]==f_name,1]!="1"): score+=1; #out_file.write('{0},{1}\n'.format(filename, label)) print("Over max epsilon#: "+str(cnt)); print(str(FLAGS.max_epsilon)+" max epsilon Score: "+str(score));
def batch_prediction(image_id_to_path, model, sess): print "batch processing: " + str(len(image_id_to_path)) image_id_to_predictions = {} image_ids = [] count = 0 start_time_1 = time.time() for image_id, path in image_id_to_path.iteritems(): image_string = open(path, 'rb').read() image = tf.image.decode_jpeg(image_string, channels=3) if model == 'inception_v1' or model == 'inception_v2' or model == 'inception_v3' or model == 'inception_v4': processed_image = preprocess_for_inception(image, image_size, image_size, central_fraction=1.0) elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152': processed_image = vgg_preprocessing.preprocess_image( image, image_size, image_size, is_training=False) start_time = time.time() #print processed_image.shape #np_val = sess.run(processed_image) #print np_val.shape #processed_image = tf.convert_to_tensor(np_val) #print processed_image.shape #print "conversion: "+str(time.time()-start_time)+" seconds" if count == 0: processed_images = tf.expand_dims(processed_image, 0) else: local_matrix = tf.expand_dims(processed_image, 0) processed_images = tf.concat([processed_images, local_matrix], 0) image_ids.append(image_id) count = count + 1 print "Preparation: " + str(time.time() - start_time_1) + " seconds" start_time = time.time() if model == 'inception_v1': logits, _ = inception.inception_v1(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v1.ckpt'), slim.get_model_variables('InceptionV1')) elif model == 'inception_v2': logits, _ = inception.inception_v2(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v2.ckpt'), slim.get_model_variables('InceptionV2')) elif model == 'inception_v3': logits, _ = inception.inception_v3(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v3.ckpt'), slim.get_model_variables('InceptionV3')) elif model == 'inception_v4': logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v4.ckpt'), slim.get_model_variables('InceptionV4')) elif model == 'resnet_v1_50': logits, _ = resnet_v1.resnet_v1_50(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif model == 'resnet_v1_101': logits, _ = resnet_v1.resnet_v1_101(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif model == 'resnet_v1_152': logits, _ = resnet_v1.resnet_v1_152(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) elif model == 'vgg_16': logits, _ = vgg.vgg_16(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'vgg_16.ckpt'), slim.get_model_variables('vgg_16')) print "Prediction2.1: " + str(time.time() - start_time) + " seconds" start_time = time.time() init_fn(sess) print "Prediction2.2: " + str(time.time() - start_time) + " seconds" probabilities = tf.nn.softmax(logits) print "Prediction1: " + str(time.time() - start_time) + " seconds" start_time = time.time() np_image, probabilities = sess.run([image, probabilities]) runtime = time.time() - start_time print "Prediction: " + str(runtime) + " seconds" for k in range(len(image_ids)): image_id = image_ids[k] predictions = [] prob = probabilities[k, 0:] sorted_inds = [ i[0] for i in sorted(enumerate(-prob), key=lambda x: x[1]) ] for i in range(5): index = sorted_inds[i] if model == 'inception_v1' or model == 'inception_v2' or model == 'inception_v3' or model == 'inception_v4': name = names[index] elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152': name = names[index + 1] pr = prob[index] pair = (name, pr) predictions.append(pair) image_id_to_predictions[image_id] = predictions return image_id_to_predictions, runtime, sess
def __init__(self, options): num_classes = options.NUM_CLASSES activation_function = tf.nn.relu with tf.variable_scope("input"): self.image_size = options.IMAGE_SIZE self.x_input = tf.placeholder( tf.float32, [None, self.image_size, self.image_size, 3], name="x_input") self.y_input = tf.placeholder(tf.float32, [None, num_classes], name="y_input") self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") self.keep_prob = None if options.PHASE == 'train': if train_layers == 'DEFAULT': self.train_layers = self.DEFAULT_TRAIN_LAYERS else: self.train_layers = train_layers # train with arg_scope( resnet_v1.resnet_arg_scope( activation_fn=activation_function, weight_decay=options.WEIGHT_DECAY)): self.logits, _ = resnet_v1.resnet_v1_152( self.x_input, num_classes=num_classes, is_training=True, reuse=tf.AUTO_REUSE) # validation with arg_scope( resnet_v1.resnet_arg_scope( activation_fn=activation_function, weight_decay=options.WEIGHT_DECAY)): self.logits_val, _ = resnet_v1.resnet_v1_152( self.x_input, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with tf.name_scope("loss"): self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits, labels=self.y_input)) self.loss_val = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits_val, labels=self.y_input)) # self.loss = top_softmax_loss(self.logits, self.y_input, alpha=1.0) # self.loss_val = top_softmax_loss(self.logits_val, self.y_input, alpha=1.0) with tf.name_scope("train"): self.global_step = tf.Variable(0, name="global_step", trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = [ v for v in tf.trainable_variables() if v.name.split('/')[-2] in self.train_layers or v.name.split('/')[-3] in self.train_layers ] # var_list = tf.trainable_variables() # var_list = [v for v in tf.trainable_variables() if # v.name.split('/')[1] in self.train_layers] gradients = tf.gradients(self.loss, var_list) self.grads_and_vars = list(zip(gradients, var_list)) optimizer = get_optimizer(options.OPTIMIZER, self.learning_rate) with tf.control_dependencies(update_ops): self.train_op = optimizer.apply_gradients( grads_and_vars=self.grads_and_vars, global_step=self.global_step) else: # Only Validation with arg_scope( resnet_v1.resnet_arg_scope( activation_fn=activation_function, weight_decay=0.0)): self.logits_val, _ = resnet_v1.resnet_v1_152( self.x_input, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with tf.name_scope("probability"): self.probability = tf.nn.softmax(self.logits_val, name="probability") with tf.name_scope("prediction"): self.prediction = tf.argmax(self.logits_val, 1, name="prediction") with tf.name_scope("accuracy"): correct_prediction = tf.equal(self.prediction, tf.argmax(self.y_input, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
def batch_prediction(frame_id_to_path, frame_id_to_image_ids, image_id_to_coordinates, model, image_size, sess, \ debug=_prediction_debug): print "batch processing: " + str(len(image_id_to_coordinates)) if model == 'inception_v1' or model == 'inception_v2' or model == 'inception_v3' or model == 'inception_v4' or \ model == 'mobilenet_v1_0.25_128' or model == 'mobilenet_v1_0.50_160' or model == 'mobilenet_v1_1.0_224' or \ model == 'inception_resnet_v2' or model == 'nasnet_mobile' or model == 'nasnet_large': preprocessing_type = 'inception' elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152': preprocessing_type = 'vgg' image_id_to_predictions = {} image_ids = [] count = 0 start_time_1 = time.time() for frame_id, path in frame_id_to_path.iteritems(): frame_string = open(path, 'rb').read() frame = tf.image.decode_jpeg(frame_string, channels=3) #plt.imshow(PIL.Image.open(StringIO.StringIO(sess.run(tf.image.encode_jpeg(frame))))) #plt.show() frame_np = cv2.imread(path, cv2.IMREAD_COLOR) frame_height, frame_width = frame_np.shape[:2] #print frame_np.shape if preprocessing_type == 'inception': processed_frame = preprocess_for_inception(frame, frame_height, frame_width, sess, central_fraction=1.0, debug=_prediction_debug) elif preprocessing_type == 'vgg': processed_frame = preprocess_for_vgg(frame, frame_height, frame_width, frame_height, sess, debug=_prediction_debug) start_time = time.time() height, width = processed_frame.shape[:2].as_list() #print "Size: "+str(width)+", "+str(height) #plt.imshow(PIL.Image.open(StringIO.StringIO(sess.run(tf.image.encode_jpeg(tf.cast(processed_frame, tf.uint8)))))) #plt.show() for image_id in frame_id_to_image_ids[frame_id]: fields = image_id_to_coordinates[image_id].split('\t') x = int(width * float(fields[0])) y = int(height * float(fields[1])) w = int(width * float(fields[2])) h = int(height * float(fields[3])) processed_image = tf.image.crop_to_bounding_box( processed_frame, y, x, h, w) if debug: print "object at " + str(fields) print str(x) + ", " + str(y) + ", " + str(w) + ", " + str( h) + ", " + str(frame_height - y - h) if preprocessing_type == 'vgg': plt.imshow( PIL.Image.open( StringIO.StringIO( sess.run( tf.image.encode_jpeg( tf.cast(processed_image, tf.uint8)))))) elif preprocessing_type == 'inception': plt.imshow( PIL.Image.open( StringIO.StringIO( sess.run( tf.image.encode_jpeg( tf.cast( tf.multiply(processed_image, 255), tf.uint8)))))) plt.show() processed_image = tf.image.resize_images(processed_image, (image_size, image_size)) if debug: print "resized" if preprocessing_type == 'vgg': plt.imshow( PIL.Image.open( StringIO.StringIO( sess.run( tf.image.encode_jpeg( tf.cast(processed_image, tf.uint8)))))) elif preprocessing_type == 'inception': plt.imshow( PIL.Image.open( StringIO.StringIO( sess.run( tf.image.encode_jpeg( tf.cast( tf.multiply(processed_image, 255), tf.uint8)))))) plt.show() if count == 0: processed_images = tf.expand_dims(processed_image, 0) else: local_matrix = tf.expand_dims(processed_image, 0) processed_images = tf.concat([processed_images, local_matrix], 0) image_ids.append(image_id) count = count + 1 print "Preparation: " + str(time.time() - start_time_1) + " seconds" start_time = time.time() if model == 'inception_v1': logits, _ = inception.inception_v1(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v1.ckpt'), slim.get_model_variables('InceptionV1')) elif model == 'inception_v2': logits, _ = inception.inception_v2(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v2.ckpt'), slim.get_model_variables('InceptionV2')) elif model == 'inception_v3': logits, _ = inception.inception_v3(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v3.ckpt'), slim.get_model_variables('InceptionV3')) elif model == 'inception_v4': logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v4.ckpt'), slim.get_model_variables('InceptionV4')) elif model == 'resnet_v1_50': logits, _ = resnet_v1.resnet_v1_50(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif model == 'resnet_v1_101': logits, _ = resnet_v1.resnet_v1_101(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif model == 'resnet_v1_152': logits, _ = resnet_v1.resnet_v1_152(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) elif model == 'mobilenet_v1_0.25_128': logits, _ = mobilenet_v1.mobilenet_v1(processed_images, num_classes=1001, is_training=False, \ depth_multiplier=0.25) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'mobilenet_v1_0.25_128.ckpt'), slim.get_model_variables('MobilenetV1')) elif model == 'mobilenet_v1_0.50_160': logits, _ = mobilenet_v1.mobilenet_v1(processed_images, num_classes=1001, is_training=False, \ depth_multiplier=0.50) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'mobilenet_v1_0.50_160.ckpt'), slim.get_model_variables('MobilenetV1')) elif model == 'mobilenet_v1_1.0_224': logits, _ = mobilenet_v1.mobilenet_v1(processed_images, num_classes=1001, is_training=False, \ depth_multiplier=1.0) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'mobilenet_v1_1.0_224.ckpt'), slim.get_model_variables('MobilenetV1')) elif model == 'inception_resnet_v2': logits, _ = inception_resnet_v2.inception_resnet_v2(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_resnet_v2_2016_08_30.ckpt'), slim.get_model_variables('InceptionResnetV2')) elif model == 'nasnet_mobile': logits, _ = nasnet.build_nasnet_mobile(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'model.ckpt'), slim.get_model_variables()) elif model == 'nasnet_large': logits, _ = nasnet.build_nasnet_large(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'model.ckpt'), slim.get_model_variables()) elif model == 'vgg_16': logits, _ = vgg.vgg_16(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'vgg_16.ckpt'), slim.get_model_variables('vgg_16')) print "Prediction2.1: " + str(time.time() - start_time) + " seconds" start_time = time.time() init_fn(sess) print "Prediction2.2: " + str(time.time() - start_time) + " seconds" probabilities = tf.nn.softmax(logits) start_time = time.time() np_image, probabilities = sess.run([frame, probabilities]) runtime = time.time() - start_time print "Prediction: " + str(runtime) + " seconds" for k in range(len(image_ids)): image_id = image_ids[k] predictions = [] prob = probabilities[k, 0:] sorted_inds = [ i[0] for i in sorted(enumerate(-prob), key=lambda x: x[1]) ] for i in range(5): index = sorted_inds[i] if model == 'inception_v1' or model == 'inception_v2' or \ model == 'inception_v3' or model == 'inception_v4' or \ model == 'mobilenet_v1_0.25_128' or model == 'mobilenet_v1_0.50_160' or model == 'mobilenet_v1_1.0_224' or \ model == 'inception_resnet_v2' or model == 'nasnet_mobile' or model == 'nasnet_large': name = names[index] elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152': name = names[index + 1] pr = prob[index] pair = (name, pr) predictions.append(pair) image_id_to_predictions[image_id] = predictions return image_id_to_predictions, runtime, sess
def main(_): # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. tf.logging.set_verbosity(tf.logging.INFO) full_start = timer() batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] all_images_taget_class, all_images_true_label = load_target_class( FLAGS.input_dir) if not os.path.exists(FLAGS.output_dir): os.mkdir(FLAGS.output_dir) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) target_class_input = tf.placeholder(tf.int32, shape=[FLAGS.batch_size]) momentum = FLAGS.momentum eps = 2.0 * FLAGS.max_epsilon / 255.0 alpha = 0.2 num_classes = 1000 num_classes_a = 1001 # image = x_input image = input_diversity(x_input) # image = batch_dct2d(image) """ 224 input """ processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) processed_imgs_res_v1_101 = preprocess_for_model( image, 'resnet_v1_101') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_101, end_points_res_v1_101 = resnet_v1.resnet_v1_101( processed_imgs_res_v1_101, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) processed_res_v1 = preprocess_for_model(image, 'resnet_v1_152') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_152, end_points_res_v1 = resnet_v1.resnet_v1_152( processed_res_v1, num_classes=num_classes, is_training=False, scope='resnet_v1_152', reuse=tf.AUTO_REUSE) processed_imgs_vgg_16 = preprocess_for_model(image, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16( processed_imgs_vgg_16, num_classes=num_classes, is_training=False, scope='vgg_16') processed_imgs_vgg_19 = preprocess_for_model(image, 'vgg_19') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_19, end_points_vgg_19 = vgg.vgg_19( processed_imgs_vgg_19, num_classes=num_classes, is_training=False, scope='vgg_19') logits_clean_a = (logits_res_v1_50 + logits_res_v1_101 + logits_res_v1_152 + logits_vgg_16 + logits_vgg_19) / 5.0 processed_imgs_inception_v1 = preprocess_for_model( image, 'inception_v1') with slim.arg_scope(inception_v1.inception_v1_arg_scope()): logits_inception_v1, end_points_inception_v1 = inception_v1.inception_v1( processed_imgs_inception_v1, num_classes=num_classes_a, is_training=False, reuse=tf.AUTO_REUSE) """ 299 input """ x_div = preprocess_for_model(image, 'inception_v3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_inc_v3, end_points_inc_v3 = inception_v3.inception_v3( x_div, num_classes=num_classes_a, is_training=False, scope='InceptionV3') with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits_inc_v4, end_points_inc_v4 = inception_v4.inception_v4( x_div, num_classes=num_classes_a, is_training=False, scope='InceptionV4') with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_inc_res_v2, end_points_inc_res_v2 = inception_resnet_v2.inception_resnet_v2( x_div, num_classes=num_classes_a, is_training=False, scope='InceptionResnetV2') logits_clean_b = (logits_inc_v3 + logits_inc_v4 + logits_inc_res_v2 + logits_inception_v1) / 4.0 """ add adv model """ with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_adv_v3, end_points_adv_v3 = inception_v3.inception_v3( x_div, num_classes=num_classes_a, is_training=False, scope='AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens3_adv_v3, end_points_ens3_adv_v3 = inception_v3.inception_v3( x_div, num_classes=num_classes_a, is_training=False, scope='Ens3AdvInceptionV3') with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_ens4_adv_v3, end_points_ens4_adv_v3 = inception_v3.inception_v3( x_div, num_classes=num_classes_a, is_training=False, scope='Ens4AdvInceptionV3') with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_ensadv_res_v2, end_points_ensadv_res_v2 = inception_resnet_v2.inception_resnet_v2( x_div, num_classes=num_classes_a, is_training=False, scope='EnsAdvInceptionResnetV2') with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_adv_res_v2, end_points_adv_res_v2 = inception_resnet_v2.inception_resnet_v2( x_div, num_classes=num_classes_a, is_training=False, scope='AdvInceptionResnetV2') logits_ens_a = (logits_adv_v3 + logits_ens3_adv_v3 + logits_ens4_adv_v3 + logits_ensadv_res_v2 + logits_adv_res_v2) / 5.0 logits_ens_aux = (end_points_adv_v3['AuxLogits'] + end_points_ens3_adv_v3['AuxLogits'] + end_points_ens4_adv_v3['AuxLogits'] + end_points_adv_res_v2['AuxLogits'] + end_points_ensadv_res_v2['AuxLogits']) / 5.0 label_test = tf.argmax(logits_adv_v3, axis=1) """ ensemble model loss """ clean_logits = (logits_clean_a + logits_clean_b[:, 1:1001]) / 2.0 adv_logits = logits_ens_a[:, 1:1001] + logits_ens_aux[:, 1:1001] logits = (clean_logits + adv_logits) / 2.0 ens_labels = tf.argmax(logits, axis=1) one_hot = tf.one_hot(target_class_input, num_classes) loss_adv_v3 = tf.losses.softmax_cross_entropy(one_hot, logits_adv_v3[:, 1:1001], label_smoothing=0.0, weights=1.0) loss_ens3_adv_v3 = tf.losses.softmax_cross_entropy( one_hot, logits_ens3_adv_v3[:, 1:1001], label_smoothing=0.0, weights=1.0) loss_ens4_adv_v3 = tf.losses.softmax_cross_entropy( one_hot, logits_ens4_adv_v3[:, 1:1001], label_smoothing=0.0, weights=1.0) loss_ensadv_res_v2 = tf.losses.softmax_cross_entropy( one_hot, logits_ensadv_res_v2[:, 1:1001], label_smoothing=0.0, weights=1.0) loss_adv_res_v2 = tf.losses.softmax_cross_entropy( one_hot, logits_adv_res_v2[:, 1:1001], label_smoothing=0.0, weights=1.0) loss_res_v1_101 = tf.losses.softmax_cross_entropy(one_hot, logits_res_v1_101, label_smoothing=0.0, weights=1.0) loss_res_v1_50 = tf.losses.softmax_cross_entropy(one_hot, logits_res_v1_50, label_smoothing=0.0, weights=1.0) loss_vgg_16 = tf.losses.softmax_cross_entropy(one_hot, logits_vgg_16, label_smoothing=0.0, weights=1.0) loss_res_v1_152 = tf.losses.softmax_cross_entropy(one_hot, logits_res_v1_152, label_smoothing=0.0, weights=1.0) total_loss = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(total_loss, x_input)[0] kernel = gkern(15, FLAGS.sig).astype(np.float32) stack_kernel = np.stack([kernel, kernel, kernel]).swapaxes(2, 0) stack_kernel = np.expand_dims(stack_kernel, 3) noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') # [batch, out_height, out_width, in_channels * channel_multiplier] noise = noise / tf.reshape( tf.contrib.keras.backend.std(tf.reshape(noise, [FLAGS.batch_size, -1]), axis=1), [FLAGS.batch_size, 1, 1, 1]) # noise = momentum * grad + noise noise = noise / tf.reshape( tf.contrib.keras.backend.std(tf.reshape(noise, [FLAGS.batch_size, -1]), axis=1), [FLAGS.batch_size, 1, 1, 1]) s1 = tf.train.Saver(slim.get_model_variables(scope='InceptionV1')) s2 = tf.train.Saver(slim.get_model_variables(scope='InceptionV3')) s3 = tf.train.Saver(slim.get_model_variables(scope='InceptionV4')) s4 = tf.train.Saver(slim.get_model_variables(scope='resnet_v1_50')) s5 = tf.train.Saver(slim.get_model_variables(scope='resnet_v1_101')) s6 = tf.train.Saver(slim.get_model_variables(scope='resnet_v1_152')) s7 = tf.train.Saver(slim.get_model_variables(scope='vgg_16')) s8 = tf.train.Saver(slim.get_model_variables(scope='vgg_19')) s9 = tf.train.Saver( slim.get_model_variables(scope='InceptionResnetV2')) s10 = tf.train.Saver( slim.get_model_variables(scope='AdvInceptionResnetV2')) s11 = tf.train.Saver( slim.get_model_variables(scope='Ens3AdvInceptionV3')) s12 = tf.train.Saver( slim.get_model_variables(scope='Ens4AdvInceptionV3')) s13 = tf.train.Saver( slim.get_model_variables(scope='EnsAdvInceptionResnetV2')) s14 = tf.train.Saver(slim.get_model_variables(scope='AdvInceptionV3')) print('Created Graph') with tf.Session() as sess: s1.restore(sess, FLAGS.checkpoint_path_inception_v1) s2.restore(sess, FLAGS.checkpoint_path_inception_v3) s3.restore(sess, FLAGS.checkpoint_path_inception_v4) s4.restore(sess, FLAGS.checkpoint_path_resnet_v1_50) s5.restore(sess, FLAGS.checkpoint_path_resnet_v1_101) s6.restore(sess, FLAGS.checkpoint_path_resnet_v1_152) s7.restore(sess, FLAGS.checkpoint_path_vgg_16) s8.restore(sess, FLAGS.checkpoint_path_vgg_19) s9.restore(sess, FLAGS.checkpoint_path_inception_resnet_v2) s10.restore(sess, FLAGS.checkpoint_path_adv_inception_resnet_v2) s11.restore(sess, FLAGS.checkpoint_path_ens3_adv_inception_v3) s12.restore(sess, FLAGS.checkpoint_path_ens4_adv_inception_v3) s13.restore(sess, FLAGS.checkpoint_path_ens_adv_inception_resnet_v2) s14.restore(sess, FLAGS.checkpoint_path_adv_inception_v3) print('Initialized Models') processed = 0.0 defense, tgt, untgt, final = 0.0, 0.0, 0.0, 0.0 idx = 0 for filenames, images in load_images(FLAGS.input_dir, batch_shape): target_class_for_batch = ( [all_images_taget_class[n[:-4]] for n in filenames] + [0] * (FLAGS.batch_size - len(filenames))) true_label_for_batch = ( [all_images_true_label[n[:-4]] for n in filenames] + [0] * (FLAGS.batch_size - len(filenames))) x_max = np.clip(images + eps, -1.0, 1.0) x_min = np.clip(images - eps, -1.0, 1.0) adv_img = np.copy(images) for i in range(FLAGS.iterations): # loss_set = sess.run([loss_adv_v3,loss_ens3_adv_v3,loss_ens4_adv_v3,loss_ensadv_res_v2, # loss_adv_res_v2,loss_res_v1_101,loss_res_v1_50,loss_vgg_16,loss_res_v1_152], # feed_dict={x_input: batch_NLM(adv_img), # target_class_input: target_class_for_batch}) # print ("loss:",loss_set) # label_ens_model = sess.run([a,b,c,d],feed_dict={x_input: adv_img,target_class_input: target_class_for_batch}) # print ("label_ens_model:",label_ens_model) # print (target_class_for_batch,true_label_for_batch) adv_img = batch_NLM(adv_img) if i % 5 == 0 else adv_img ens_loss, pred, grad, pred_adv_v3 = sess.run( [total_loss, ens_labels, noise, label_test], feed_dict={ x_input: adv_img, target_class_input: target_class_for_batch }) adv_img = adv_img - alpha * np.clip(np.round(grad), -2, 2) adv_img = np.clip(adv_img, x_min, x_max) print("{} \t total_loss {}".format(i, ens_loss)) print('prediction :', pred) print('target_label :', target_class_for_batch) print('true_label :', true_label_for_batch) # print ("{} \t total_loss {} predction {} \t target class {} \t true label {} \t ".format(i,ens_loss,pred,target_class_for_batch,true_label_for_batch)) # print ("model predction {} \t target class {} \t true label {} \t ".format(pred,target_class_for_batch,true_label_for_batch)) print( "final prediction {} \t target class {} \t true label {} \t " .format(pred, target_class_for_batch, true_label_for_batch)) processed += FLAGS.batch_size tgt += sum( np.equal(np.array(pred), np.array(target_class_for_batch))) defense += sum( np.equal(np.array(pred), np.array(true_label_for_batch))) untgt = processed - tgt - defense print("processed {} \t acc {} {} \t tgt {} {} \t untgt {} {} ". format(processed, defense, defense / processed, tgt, tgt / processed, untgt, untgt / processed)) full_end = timer() print("DONE: Processed {} images in {} sec".format( processed, full_end - full_start)) save_images(adv_img, filenames, FLAGS.output_dir) print("DONE: Processed {} images in {} sec".format( processed, full_end - full_start))