Ejemplo n.º 1
0
def infer_perf_pb(pb_model_file, val_data, inputs=["x:0"], outputs=["Identity:0"]):
    x_test, y_test, label_test = val_data
    q_model = alexnet.load_pb(pb_model_file)
    concrete_function = get_concrete_function(graph_def=q_model.as_graph_def(),
                                              inputs=inputs,
                                              outputs=outputs,
                                              print_graph=True)

    bt = time.time()
    _frozen_graph_predictions = concrete_function(x=tf.constant(x_test))
    et = time.time()

    accuracy = calc_accuracy(_frozen_graph_predictions[0], label_test)
    print('accuracy:', accuracy)
    throughput = x_test.shape[0] / (et - bt)
    print('max throughput(fps):', throughput)

    # latency when BS=1
    times = 1000
    single_test = x_test[:1]

    bt = 0
    warmup = 20
    for i in range(times):
        if i == warmup:
            bt = time.time()
        _frozen_graph_predictions = concrete_function(x=tf.constant(single_test))
    et = time.time()

    latency = (et - bt) * 1000 / (times - warmup)
    print('latency(ms):', latency)

    return accuracy, throughput, latency
Ejemplo n.º 2
0
def auto_tune(input_graph_path, yaml_config, batch_size):
    fp32_graph = alexnet.load_pb(input_graph_path)
    quan = inc.Quantization(yaml_config)
    dataloader = Dataloader(batch_size)

    q_model = quan(fp32_graph,
                   q_dataloader=dataloader,
                   eval_func=None,
                   eval_dataloader=dataloader)
    return q_model
Ejemplo n.º 3
0
def auto_tune(input_graph_path, yaml_config, batch_size):
    fp32_graph = alexnet.load_pb(input_graph_path)
    tuner = ilit.Tuner(yaml_config)
    dataloader = Dataloader(batch_size)

    q_model = tuner.tune(
        fp32_graph,
        q_dataloader=dataloader,
        eval_func=None,
        eval_dataloader=dataloader)
    return q_model