Beispiel #1
0
def _predict(dense_features, embedding_columns, row_ptrs, config_file, model_name):
    inference_params = InferenceParams(
        model_name=model_name,
        max_batchsize=64,
        hit_rate_threshold=0.5,
        dense_model_file=DENSE_FILE,
        sparse_model_files=[SPARSE_FILES],
        device_id=0,
        use_gpu_embedding_cache=True,
        cache_size_percentage=0.1,
        i64_input_key=True,
        use_mixed_precision=False,
    )
    inference_session = CreateInferenceSession(config_file, inference_params)
    output = inference_session.predict(dense_features, embedding_columns, row_ptrs)  # , True)

    test_data_path = DATA_DIR + "test/"
    embedding_columns_df = pd.DataFrame()
    embedding_columns_df["embedding_columns"] = embedding_columns
    embedding_columns_df.to_csv(test_data_path + "embedding_columns.csv")

    row_ptrs_df = pd.DataFrame()
    row_ptrs_df["row_ptrs"] = row_ptrs
    row_ptrs_df.to_csv(test_data_path + "row_ptrs.csv")

    output_df = pd.DataFrame()
    output_df["output"] = output
    output_df.to_csv(test_data_path + "output.csv")
def hugectr2onnx_dcn_test(batch_size, num_batches, data_source, data_file,
                          graph_config, dense_model, sparse_models,
                          onnx_model_path, model_name):
    hugectr2onnx.converter.convert(onnx_model_path, graph_config, dense_model,
                                   True, sparse_models)
    label, dense, keys = read_samples_for_dcn(data_file,
                                              batch_size * num_batches,
                                              slot_num=26)
    sess = ort.InferenceSession(onnx_model_path)
    res = sess.run(output_names=[sess.get_outputs()[0].name],
                   input_feed={
                       sess.get_inputs()[0].name: dense,
                       sess.get_inputs()[1].name: keys
                   })
    res = res[0].reshape(batch_size * num_batches, )

    inference_params = InferenceParams(model_name=model_name,
                                       max_batchsize=batch_size,
                                       hit_rate_threshold=1,
                                       dense_model_file=dense_model,
                                       sparse_model_files=sparse_models,
                                       device_id=0,
                                       use_gpu_embedding_cache=True,
                                       cache_size_percentage=0.6,
                                       i64_input_key=False)
    inference_session = CreateInferenceSession(graph_config, inference_params)
    predictions = inference_session.predict(num_batches, data_source,
                                            hugectr.DataReaderType_t.Norm,
                                            hugectr.Check_t.Sum)
    compare_array_approx(res, predictions, model_name, 1e-3, 1e-2)
Beispiel #3
0
def dcn_inference(config_file, model_name, data_path, use_gpu_embedding_cache):
    # read data from file
    data_file = open(data_path)
    labels = [int(item) for item in data_file.readline().split(' ')]
    dense_features = [float(item) for item in data_file.readline().split(' ')]
    embedding_columns = [int(item) for item in data_file.readline().split(' ')]
    row_ptrs = [int(item) for item in data_file.readline().split(' ')]
    # create parameter server, embedding cache and inference session
    inference_params = InferenceParams(
        model_name=model_name,
        max_batchsize=4096,
        hit_rate_threshold=0.6,
        dense_model_file="/hugectr/test/utest/_dense_10000.model",
        sparse_model_files=["/hugectr/test/utest/0_sparse_10000.model"],
        device_id=0,
        use_gpu_embedding_cache=use_gpu_embedding_cache,
        cache_size_percentage=0.9,
        i64_input_key=False)
    inference_session = CreateInferenceSession(config_file, inference_params)
    # make prediction and calculate accuracy
    output = inference_session.predict(dense_features, embedding_columns,
                                       row_ptrs)
    accuracy = calculate_accuracy(labels, output)
    if use_gpu_embedding_cache:
        print(
            "[HUGECTR][INFO] Use gpu embedding cache, prediction number samples: {}, accuracy: {}"
            .format(len(labels), accuracy))
    else:
        print(
            "[HUGECTR][INFO] Use cpu parameter server, prediction number samples: {}, accuracy: {}"
            .format(len(labels), accuracy))
def hugectr2onnx_din_test(batch_size, num_batches, data_source, data_file,
                          graph_config, dense_model, sparse_models,
                          onnx_model_path, model_name):
    hugectr2onnx.converter.convert(onnx_model_path, graph_config, dense_model,
                                   True, sparse_models)
    dense, user, good, cate = read_samples_for_din(data_file,
                                                   batch_size * num_batches,
                                                   slot_num=23)
    sess = ort.InferenceSession(onnx_model_path)
    res = sess.run(output_names=[sess.get_outputs()[0].name],
                   input_feed={
                       sess.get_inputs()[0].name: dense,
                       sess.get_inputs()[1].name: user,
                       sess.get_inputs()[2].name: good,
                       sess.get_inputs()[3].name: cate
                   })
    res = res[0].reshape(batch_size * num_batches, )

    inference_params = InferenceParams(model_name=model_name,
                                       max_batchsize=batch_size,
                                       hit_rate_threshold=1,
                                       dense_model_file=dense_model,
                                       sparse_model_files=sparse_models,
                                       device_id=0,
                                       use_gpu_embedding_cache=True,
                                       cache_size_percentage=0.6,
                                       i64_input_key=True)
    inference_session = CreateInferenceSession(graph_config, inference_params)
    slot_size_array = [
        192403, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63001, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 801
    ]
    predictions = inference_session.predict(num_batches, data_source,
                                            hugectr.DataReaderType_t.Parquet,
                                            hugectr.Check_t.Non,
                                            slot_size_array)

    compare_array_approx(res, predictions, model_name, 1e-2, 1e-1)
Beispiel #5
0
from hugectr.inference import InferenceParams, CreateInferenceSession
import numpy as np
batch_size = 16384
num_batches = 1
data_source = "./wdl_data/file_list_test.txt"
inference_params = InferenceParams(model_name = "wdl",
                                max_batchsize = batch_size,
                                hit_rate_threshold = 1.0,
                                dense_model_file = "/dump_infer/wdl_dense_2000.model",
                                sparse_model_files = ["/dump_infer/wdl0_sparse_2000.model", 
                                                      "/dump_infer/wdl1_sparse_2000.model"],
                                device_id = 0,
                                use_gpu_embedding_cache = False,
                                cache_size_percentage = 1.0,
                                i64_input_key = False,
                                use_mixed_precision = True,
                                use_cuda_graph = True)
inference_session = CreateInferenceSession("/dump_infer/wdl.json", inference_params)
predictions = inference_session.predict(num_batches = num_batches,
                                      source = data_source,
                                      data_reader_type = hugectr.DataReaderType_t.Norm,
                                      check_type = hugectr.Check_t.Sum)
grount_truth = np.loadtxt("/dump_infer/wdl_pred_2000")
diff = predictions-grount_truth
mse = np.mean(diff*diff)
if mse > 1e-3:
  raise RuntimeError("Too large mse between WDL multi hot inference and training: {}".format(mse))
  sys.exit(1)
else:
  print("WDL multi hot inference results are consistent with those during training, mse: {}".format(mse))
inference_params = InferenceParams(
    model_name="multi_cross_entropy_loss",
    max_batchsize=1024,
    hit_rate_threshold=1.0,
    dense_model_file="/dump_infer/multi_cross_entropy_loss_dense_1000.model",
    sparse_model_files=[
        "/dump_infer/multi_cross_entropy_loss0_sparse_1000.model"
    ],
    device_id=0,
    use_gpu_embedding_cache=True,
    cache_size_percentage=0.5,
    use_mixed_precision=False,
    i64_input_key=True,
)

inference_session = CreateInferenceSession(
    '/dump_infer/multi_cross_entropy_loss.json', inference_params)

preds = inference_session.predict(
    num_batches=1,
    source="./multi_cross/data/test/_file_list.txt",
    data_reader_type=hugectr.DataReaderType_t.Parquet,
    check_type=hugectr.Check_t.Sum,
    slot_size_array=[10001, 10001, 10001, 10001],
)

ground_truth = np.loadtxt("/dump_infer/multi_cross_entropy_loss_pred_1000")
predictions = preds.flatten()
diff = predictions - ground_truth
mse = np.mean(diff * diff)
if mse > 1e-3:
    raise RuntimeError(
Beispiel #7
0
def wdl_inference(model_name, network_file, dense_file, embedding_file_list,
                  data_file, enable_cache):
    CATEGORICAL_COLUMNS = ["C" + str(x)
                           for x in range(1, 27)] + ["C1_C2", "C3_C4"]
    CONTINUOUS_COLUMNS = ["I" + str(x) for x in range(1, 14)]
    LABEL_COLUMNS = ['label']
    emb_size = [
        202546, 18795, 14099, 6889, 18577, 4, 6349, 1247, 48, 186730, 71084,
        66832, 11, 2158, 7415, 61, 4, 923, 15, 202617, 143251, 198823, 61025,
        9057, 73, 34, 225812, 354963
    ]
    shift = np.insert(np.cumsum(emb_size), 0, 0)[:-1]
    result = [
        0.05634006857872009, 0.04185676947236061, 0.007268941029906273,
        0.10255379974842072, 0.14059557020664215, 0.011040309444069862,
        0.005499477963894606, 0.24404558539390564, 0.012491216883063316,
        0.005486942362040281
    ]

    test_df = pd.read_csv(data_file)
    config_file = network_file
    row_ptrs = list(range(0, 21)) + list(range(0, 261))
    dense_features = list(test_df[CONTINUOUS_COLUMNS].values.flatten())
    test_df[CATEGORICAL_COLUMNS].astype(np.int64)
    embedding_columns = list(
        (test_df[CATEGORICAL_COLUMNS] + shift).values.flatten())

    hash_map_database = hugectr.inference.VolatileDatabaseParams()
    rocksdb_database = hugectr.inference.PersistentDatabaseParams(
        path="/hugectr/test/utest/wdl_test_files/rocksdb")

    # create parameter server, embedding cache and inference session
    inference_params = InferenceParams(model_name=model_name,
                                       max_batchsize=64,
                                       hit_rate_threshold=1.0,
                                       dense_model_file=dense_file,
                                       sparse_model_files=embedding_file_list,
                                       device_id=0,
                                       use_gpu_embedding_cache=enable_cache,
                                       cache_size_percentage=0.9,
                                       i64_input_key=True,
                                       use_mixed_precision=False,
                                       number_of_worker_buffers_in_pool=4,
                                       number_of_refresh_buffers_in_pool=1,
                                       deployed_devices=[0],
                                       default_value_for_each_table=[0.0, 0.0],
                                       volatile_db=hash_map_database,
                                       persistent_db=rocksdb_database)
    inference_session = CreateInferenceSession(config_file, inference_params)
    # predict for the first time
    output1 = inference_session.predict(dense_features, embedding_columns,
                                        row_ptrs)
    miss1 = np.mean((np.array(output1) - np.array(result))**2)
    # refresh emebdding cache, void operation since there is no update for the parameter server
    inference_session.refresh_embedding_cache()
    # predict for the second time
    output2 = inference_session.predict(dense_features, embedding_columns,
                                        row_ptrs)
    miss2 = np.mean((np.array(output2) - np.array(result))**2)
    print("WDL multi-embedding table inference result should be {}".format(
        result))
    miss = max(miss1, miss2)
    if enable_cache:
        if miss > 0.0001:
            raise RuntimeError(
                "WDL multi-embedding table inference using GPU cache, prediction error is greater than threshold: {}, error is {}"
                .format(0.0001, miss))
            sys.exit(1)
        else:
            print(
                "[HUGECTR][INFO] WDL multi-embedding table inference using GPU cache, prediction error is less  than threshold:{}, error is {}"
                .format(0.0001, miss))
    else:
        if miss > 0.0001:
            raise RuntimeError(
                "[HUGECTR][INFO] WDL multi-embedding table inference without GPU cache, prediction error is greater than threshold:{}, error is {}"
                .format(0.0001, miss))
            sys.exit(1)
        else:
            print(
                "[HUGECTR][INFO] WDL multi-embedding table inference without GPU cache, prediction error is less than threshold: {}, error is {}"
                .format(0.0001, miss))
Beispiel #8
0
def movie_inference(model_name, network_file, dense_file, embedding_file_list,
                    data_file, enable_cache):
    CATEGORICAL_COLUMNS = ['userId', 'movieId']
    LABEL_COLUMNS = ['rating']
    emb_size = [162542, 56586]
    shift = np.insert(np.cumsum(emb_size), 0, 0)[:-1]
    result = [
        0.8336379528045654, 0.24868586659431458, 0.4039016664028168,
        0.9553083777427673, 0.6617599725723267, 0.5613522529602051,
        0.16344544291496277, 0.537512481212616, 0.5185080766677856,
        0.2947561740875244
    ]

    test_df = pd.read_parquet(data_file)
    config_file = network_file
    row_ptrs = list(range(0, 21))
    dense_features = []
    test_df[CATEGORICAL_COLUMNS].astype(np.int64)
    embedding_columns = list(
        (test_df.head(10)[CATEGORICAL_COLUMNS] + shift).values.flatten())

    # create parameter server, embedding cache and inference session
    inference_params = InferenceParams(model_name=model_name,
                                       max_batchsize=64,
                                       hit_rate_threshold=1.0,
                                       dense_model_file=dense_file,
                                       sparse_model_files=embedding_file_list,
                                       device_id=0,
                                       use_gpu_embedding_cache=enable_cache,
                                       cache_size_percentage=0.9,
                                       i64_input_key=True,
                                       use_mixed_precision=False)
    inference_session = CreateInferenceSession(config_file, inference_params)
    output1 = inference_session.predict(dense_features, embedding_columns,
                                        row_ptrs)
    miss1 = np.mean((np.array(output1) - np.array(result))**2)
    inference_session.refresh_embedding_cache()
    output2 = inference_session.predict(dense_features, embedding_columns,
                                        row_ptrs)
    miss2 = np.mean((np.array(output2) - np.array(result))**2)
    print(
        "Movielens model(no dense input) inference result should be {}".format(
            result))
    miss = max(miss1, miss2)
    if enable_cache:
        if miss > 0.0001:
            raise RuntimeError(
                "Movielens model(no dense input) inference using GPU cache, prediction error is greater than threshold: {}, error is {}"
                .format(0.0001, miss))
            sys.exit(1)
        else:
            print(
                "[HUGECTR][INFO] Movielens model(no dense input) inference using GPU cache, prediction error is less  than threshold:{}, error is {}"
                .format(0.0001, miss))
    else:
        if miss > 0.0001:
            raise RuntimeError(
                "[HUGECTR][INFO] Movielens model(no dense input) inference without GPU cache, prediction error is greater than threshold:{}, error is {}"
                .format(0.0001, miss))
            sys.exit(1)
        else:
            print(
                "[HUGECTR][INFO] Movielens model(no dense input) inference without GPU cache, prediction error is less than threshold: {}, error is {}"
                .format(0.0001, miss))