def main(_): with tf.Graph().as_default(): device = '/cpu:0' config = tf.ConfigProto(log_device_placement=False) config.allow_soft_placement = True config.gpu_options.per_process_gpu_memory_fraction = 0.90 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.device(device): model, model_info = get_model('vgg_16', 1, include_top=True, double_prec=False) one_layer = backend.function([model.layers[7].input], [model.layers[7].output]) images = tf.zeros((1, 224,224,3)).eval() inputs = tf.zeros((1, 56,56,128)).eval() #sess.run(model.outputs[0], feed_dict={model.inputs[0]:images, backend.learning_phase():0}); for i in range(32): start_time = time.time() with tf.device(device): one_layer([inputs]) print("GPU compute time: {}".format(time.time() - start_time))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tf.set_random_seed(1234) with tf.Graph().as_default(): # Prepare graph num_batches = args.max_num_batches sgxutils = None if args.mode == 'tf-gpu': assert not args.use_sgx device = '/gpu:0' config = tf.ConfigProto(log_device_placement=False) config.allow_soft_placement = True config.gpu_options.per_process_gpu_memory_fraction = 0.90 config.gpu_options.allow_growth = True elif args.mode == 'tf-cpu': assert not args.verify and not args.use_sgx device = '/gpu:0' #config = tf.ConfigProto(log_device_placement=False) config = tf.ConfigProto(log_device_placement=False, device_count={ 'CPU': 1, 'GPU': 0 }) config.intra_op_parallelism_threads = 1 config.inter_op_parallelism_threads = 1 else: assert args.mode == 'sgxdnn' device = '/gpu:0' config = tf.ConfigProto(log_device_placement=False) config.allow_soft_placement = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: device = '/gpu:0' with tf.device(device): model, model_info = get_model(args.model_name, args.batch_size, include_top=not args.no_top) partition_at_layer = 11 #print_summary(model_full, line_length=None, positions=None, print_fn=None) #sys.exit(0) get_final_layer_model_output = backend.function( [model.input], [model.layers[-1].output]) dataset_images, labels = imagenet.load_validation( args.input_dir, args.batch_size, preprocess=model_info['preprocess'], num_preprocessing_threads=1) model, linear_ops_in, linear_ops_out = transform( model, log=False, quantize=args.verify, verif_preproc=args.preproc, bits_w=model_info['bits_w'], bits_x=model_info['bits_x']) if args.mode == 'sgxdnn': #sgxutils = SGXDNNUtils(args.use_sgx, num_enclaves=args.batch_size) #sgxutils = SGXDNNUtils(args.use_sgx, num_enclaves=2) #sgxutils = SGXDNNUtils(args.use_sgx) partitionAtLayer = 18 dtype = np.float32 if not args.verify else DTYPE_VERIFY #model_json_part1, weights_part1, model_json_part2, weights_part2 = model_to_json(sess, model, args.preproc, dtype=dtype, bits_w=model_info['bits_w'], bits_x=model_info['bits_x'], partitionAtLayer=partitionAtLayer) #sgxutils.load_model(model_json_part1, weights_part1, dtype=dtype, verify=args.verify, verify_preproc=args.preproc) num_classes = np.prod(model.output.get_shape().as_list()[1:]) print("num_classes: {}".format(num_classes)) num_inter_feats = [28, 28, 256] print( "number of intermediate features: {}".format(num_inter_feats)) print_acc = (num_classes == 1000) res = Results(acc=print_acc) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() #from multiprocessing.dummy import Pool as ThreadPool #pool = ThreadPool(3) for i in range(num_batches): images, true_labels = sess.run([dataset_images, labels]) if args.verify: images = np.round(2**model_info['bits_x'] * images) print("input images: {}".format(np.sum(np.abs(images)))) if args.mode in ['tf-gpu', 'tf-cpu']: res.start_timer() preds = sess.run(model.outputs[0], feed_dict={ model.inputs[0]: images, backend.learning_phase(): 0 }, options=run_options, run_metadata=run_metadata) print(np.sum(np.abs(images)), np.sum(np.abs(preds))) preds = np.reshape(preds, (args.batch_size, num_classes)) res.end_timer(size=len(images)) res.record_acc(preds, true_labels) res.print_results() tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('timeline.json', 'w') as f: f.write(ctf) else: res.start_timer() if args.verify: t1 = timer() linear_outputs = sess.run(linear_ops_out, feed_dict={ model.inputs[0]: images, backend.learning_phase(): 0 }, options=run_options, run_metadata=run_metadata) t2 = timer() print("GPU compute time: {:.4f}".format( (t2 - t1) / (1.0 * args.batch_size))) #mod_test(sess, model, images, linear_ops_in, linear_ops_out, verif_preproc=args.preproc) def func(data): return sgxutils.predict_and_verify( data[1], data[2], num_classes=num_classes, dtype=dtype, eid_idx=0) if not args.verify_batched: start = timer() linear_outputs_batched = [ x.reshape((args.batch_size, -1)) for x in linear_outputs ] preds = [] for i in range(args.batch_size): t1 = timer() aux_data = [ x[i] for x in linear_outputs_batched ] pred = sgxutils.predict_and_verify( images[i:i + 1], aux_data, num_classes=num_classes, dtype=dtype) t2 = timer() print("verify time: {:.4f}".format((t2 - t1))) preds.append(pred) preds = np.vstack(preds) end = timer() print("avg verify time: {:.4f}".format( (end - start) / (1.0 * args.batch_size))) #all_data = [(i, images[i:i+1], [x[i] for x in linear_outputs_batched]) for i in range(args.batch_size)] #preds = np.vstack(pool.map(func, all_data)) else: preds = sgxutils.predict_and_verify( images, linear_outputs, num_classes=num_classes, dtype=dtype) else: def func(data): return sgxutils.predict(data[1], num_classes=num_classes, eid_idx=0) #all_data = [(i, images[i:i+1]) for i in range(args.batch_size)] #preds = np.vstack(pool.map(func, all_data)) with tf.device(device): intermediate_feats = np.zeros((4, 224, 224, 3)) preds = [] for i in range(args.batch_size): # Overloading num_classes variable to collect intermediate feature maps # Predict partition 1 in enclave #intermediate_feats = sgxutils.predict(images[i:i + 1], num_classes=0, is_intermediate=True, num_inter_feats=num_inter_feats) print("intermediate features shape: {}".format( intermediate_feats.shape)) #print("intermediate features: {}".format(intermediate_feats)) # Send data to gpu start_inter_time = time.time() #device = '/cpu:0' with tf.device(device): # Predict partition 2 outside enclave, in GPU pred = get_final_layer_model_output( [intermediate_feats])[0] print("pred shape: {}".format(pred.shape)) print("GPU compute time: {}".format( time.time() - start_inter_time)) preds.append(pred) preds = np.vstack(preds) #res.end_timer(size=len(images)) #res.record_acc(preds, true_labels) #res.print_results() #tl = timeline.Timeline(run_metadata.step_stats) #ctf = tl.generate_chrome_trace_format() #ctf_j = json.loads(ctf) #events = [e["ts"] for e in ctf_j["traceEvents"] if "ts" in e] #print("TF Timeline: {:.4f}".format((np.max(events) - np.min(events)) / (1000000.0 * args.batch_size))) sys.stdout.flush() coord.request_stop() coord.join(threads) if sgxutils is not None: sgxutils.destroy()
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tf.set_random_seed(1234) with tf.Graph().as_default(): num_batches = args.max_num_batches batch_size = args.batch_size device = '/gpu:0' config = tf.ConfigProto(log_device_placement=False) config.allow_soft_placement = True config.gpu_options.per_process_gpu_memory_fraction = 0.90 config.gpu_options.allow_growth = True quantize = True slalom = not args.no_slalom blinded = args.blinding integrity = args.integrity simulate = args.simulate with tf.Session(config=config) as sess: with tf.device(device): model, model_info = get_model(args.model_name, batch_size, include_top=True, double_prec=False) dataset_images, labels = imagenet.load_validation( args.input_dir, batch_size, preprocess=model_info['preprocess']) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) #sgxutils = SGXDNNUtils(args.use_sgx, num_enclaves=batch_size) sgxutils = SGXDNNUtils(args.use_sgx, num_enclaves=1) num_linear_layers = len(get_all_linear_layers(model)) if blinded and not simulate: queues = [ tf.FIFOQueue(capacity=num_batches + 1, dtypes=[tf.float32]) for _ in range(num_linear_layers) ] else: queues = None model, linear_ops_in, linear_ops_out = transform( model, log=False, quantize=quantize, verif_preproc=True, slalom=slalom, slalom_integrity=integrity, slalom_privacy=blinded, bits_w=model_info['bits_w'], bits_x=model_info['bits_x'], sgxutils=sgxutils, queues=queues) dtype = np.float32 model_json, weights = model_to_json(sess, model, dtype=dtype, verif_preproc=True, slalom_privacy=blinded, bits_w=model_info['bits_w'], bits_x=model_info['bits_x']) sgxutils.load_model(model_json, weights, dtype=dtype, verify=True, verify_preproc=True) num_classes = np.prod(model.output.get_shape().as_list()[1:]) print("num_classes: {}".format(num_classes)) print_acc = (num_classes == 1000) res = Results(acc=print_acc) run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sgxutils.slalom_init(integrity, (blinded and not simulate), batch_size) if blinded and not simulate: in_ph, zs, out_ph, queue_ops, temps, out_funcs = build_blinding_ops( model, queues, batch_size) for i in range(num_batches): images, true_labels = sess.run([dataset_images, labels]) if quantize: images = np.round(2**model_info['bits_x'] * images).astype( np.float32) print("input images: {}".format(np.sum(np.abs(images)))) if blinded and not simulate: prepare_blinding_factors( sess, model, sgxutils, in_ph, zs, out_ph, queue_ops, batch_size, num_batches=1, #inputs=images, temps=temps, out_funcs=out_funcs ) images = sgxutils.slalom_blind_input(images) print("blinded images: {}".format( (np.min(images), np.max(images), np.sum(np.abs(images.astype(np.float64)))))) print(images.reshape(-1)[:3], images.reshape(-1)[-3:]) res.start_timer() preds = sess.run(model.outputs[0], feed_dict={ model.inputs[0]: images, backend.learning_phase(): 0 }, options=run_options, run_metadata=run_metadata) preds = np.reshape(preds, (batch_size, -1)) res.end_timer(size=len(images)) res.record_acc(preds, true_labels) res.print_results() tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open( 'timeline_{}_{}.json'.format(args.model_name, device[1:4]), 'w') as f: f.write(ctf) sys.stdout.flush() coord.request_stop() coord.join(threads) if sgxutils is not None: sgxutils.destroy()