def infer_perf_pb(pb_model_file, val_data, inputs=["x:0"], outputs=["Identity:0"]): x_test, y_test, label_test = val_data q_model = alexnet.load_pb(pb_model_file) concrete_function = get_concrete_function(graph_def=q_model.as_graph_def(), inputs=inputs, outputs=outputs, print_graph=True) bt = time.time() _frozen_graph_predictions = concrete_function(x=tf.constant(x_test)) et = time.time() accuracy = calc_accuracy(_frozen_graph_predictions[0], label_test) print('accuracy:', accuracy) throughput = x_test.shape[0] / (et - bt) print('max throughput(fps):', throughput) # latency when BS=1 times = 1000 single_test = x_test[:1] bt = 0 warmup = 20 for i in range(times): if i == warmup: bt = time.time() _frozen_graph_predictions = concrete_function(x=tf.constant(single_test)) et = time.time() latency = (et - bt) * 1000 / (times - warmup) print('latency(ms):', latency) return accuracy, throughput, latency
def auto_tune(input_graph_path, yaml_config, batch_size): fp32_graph = alexnet.load_pb(input_graph_path) quan = inc.Quantization(yaml_config) dataloader = Dataloader(batch_size) q_model = quan(fp32_graph, q_dataloader=dataloader, eval_func=None, eval_dataloader=dataloader) return q_model
def auto_tune(input_graph_path, yaml_config, batch_size): fp32_graph = alexnet.load_pb(input_graph_path) tuner = ilit.Tuner(yaml_config) dataloader = Dataloader(batch_size) q_model = tuner.tune( fp32_graph, q_dataloader=dataloader, eval_func=None, eval_dataloader=dataloader) return q_model