def _predict(dense_features, embedding_columns, row_ptrs, config_file, model_name): inference_params = InferenceParams( model_name=model_name, max_batchsize=64, hit_rate_threshold=0.5, dense_model_file=DENSE_FILE, sparse_model_files=[SPARSE_FILES], device_id=0, use_gpu_embedding_cache=True, cache_size_percentage=0.1, i64_input_key=True, use_mixed_precision=False, ) inference_session = CreateInferenceSession(config_file, inference_params) output = inference_session.predict(dense_features, embedding_columns, row_ptrs) # , True) test_data_path = DATA_DIR + "test/" embedding_columns_df = pd.DataFrame() embedding_columns_df["embedding_columns"] = embedding_columns embedding_columns_df.to_csv(test_data_path + "embedding_columns.csv") row_ptrs_df = pd.DataFrame() row_ptrs_df["row_ptrs"] = row_ptrs row_ptrs_df.to_csv(test_data_path + "row_ptrs.csv") output_df = pd.DataFrame() output_df["output"] = output output_df.to_csv(test_data_path + "output.csv")
def hugectr2onnx_dcn_test(batch_size, num_batches, data_source, data_file, graph_config, dense_model, sparse_models, onnx_model_path, model_name): hugectr2onnx.converter.convert(onnx_model_path, graph_config, dense_model, True, sparse_models) label, dense, keys = read_samples_for_dcn(data_file, batch_size * num_batches, slot_num=26) sess = ort.InferenceSession(onnx_model_path) res = sess.run(output_names=[sess.get_outputs()[0].name], input_feed={ sess.get_inputs()[0].name: dense, sess.get_inputs()[1].name: keys }) res = res[0].reshape(batch_size * num_batches, ) inference_params = InferenceParams(model_name=model_name, max_batchsize=batch_size, hit_rate_threshold=1, dense_model_file=dense_model, sparse_model_files=sparse_models, device_id=0, use_gpu_embedding_cache=True, cache_size_percentage=0.6, i64_input_key=False) inference_session = CreateInferenceSession(graph_config, inference_params) predictions = inference_session.predict(num_batches, data_source, hugectr.DataReaderType_t.Norm, hugectr.Check_t.Sum) compare_array_approx(res, predictions, model_name, 1e-3, 1e-2)
def dcn_inference(config_file, model_name, data_path, use_gpu_embedding_cache): # read data from file data_file = open(data_path) labels = [int(item) for item in data_file.readline().split(' ')] dense_features = [float(item) for item in data_file.readline().split(' ')] embedding_columns = [int(item) for item in data_file.readline().split(' ')] row_ptrs = [int(item) for item in data_file.readline().split(' ')] # create parameter server, embedding cache and inference session inference_params = InferenceParams( model_name=model_name, max_batchsize=4096, hit_rate_threshold=0.6, dense_model_file="/hugectr/test/utest/_dense_10000.model", sparse_model_files=["/hugectr/test/utest/0_sparse_10000.model"], device_id=0, use_gpu_embedding_cache=use_gpu_embedding_cache, cache_size_percentage=0.9, i64_input_key=False) inference_session = CreateInferenceSession(config_file, inference_params) # make prediction and calculate accuracy output = inference_session.predict(dense_features, embedding_columns, row_ptrs) accuracy = calculate_accuracy(labels, output) if use_gpu_embedding_cache: print( "[HUGECTR][INFO] Use gpu embedding cache, prediction number samples: {}, accuracy: {}" .format(len(labels), accuracy)) else: print( "[HUGECTR][INFO] Use cpu parameter server, prediction number samples: {}, accuracy: {}" .format(len(labels), accuracy))
def hugectr2onnx_din_test(batch_size, num_batches, data_source, data_file, graph_config, dense_model, sparse_models, onnx_model_path, model_name): hugectr2onnx.converter.convert(onnx_model_path, graph_config, dense_model, True, sparse_models) dense, user, good, cate = read_samples_for_din(data_file, batch_size * num_batches, slot_num=23) sess = ort.InferenceSession(onnx_model_path) res = sess.run(output_names=[sess.get_outputs()[0].name], input_feed={ sess.get_inputs()[0].name: dense, sess.get_inputs()[1].name: user, sess.get_inputs()[2].name: good, sess.get_inputs()[3].name: cate }) res = res[0].reshape(batch_size * num_batches, ) inference_params = InferenceParams(model_name=model_name, max_batchsize=batch_size, hit_rate_threshold=1, dense_model_file=dense_model, sparse_model_files=sparse_models, device_id=0, use_gpu_embedding_cache=True, cache_size_percentage=0.6, i64_input_key=True) inference_session = CreateInferenceSession(graph_config, inference_params) slot_size_array = [ 192403, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 801 ] predictions = inference_session.predict(num_batches, data_source, hugectr.DataReaderType_t.Parquet, hugectr.Check_t.Non, slot_size_array) compare_array_approx(res, predictions, model_name, 1e-2, 1e-1)
model.graph_to_json(graph_config_file="/dump_infer/wdl.json") model.fit(max_iter = 2300, display = 200, eval_interval = 2000, snapshot = 2000, snapshot_prefix = "/dump_infer/wdl") model.export_predictions("/dump_infer/wdl_pred_" + str(2000), "/dump_infer/wdl_label_" + str(2000)) from hugectr.inference import InferenceParams, CreateInferenceSession import numpy as np batch_size = 16384 num_batches = 1 data_source = "./wdl_data/file_list_test.txt" inference_params = InferenceParams(model_name = "wdl", max_batchsize = batch_size, hit_rate_threshold = 1.0, dense_model_file = "/dump_infer/wdl_dense_2000.model", sparse_model_files = ["/dump_infer/wdl0_sparse_2000.model", "/dump_infer/wdl1_sparse_2000.model"], device_id = 0, use_gpu_embedding_cache = False, cache_size_percentage = 1.0, i64_input_key = False, use_mixed_precision = True, use_cuda_graph = True) inference_session = CreateInferenceSession("/dump_infer/wdl.json", inference_params) predictions = inference_session.predict(num_batches = num_batches, source = data_source, data_reader_type = hugectr.DataReaderType_t.Norm, check_type = hugectr.Check_t.Sum) grount_truth = np.loadtxt("/dump_infer/wdl_pred_2000") diff = predictions-grount_truth mse = np.mean(diff*diff) if mse > 1e-3: raise RuntimeError("Too large mse between WDL multi hot inference and training: {}".format(mse))
"/dump_infer/multi_cross_entropy_loss_pred_" + str(1000), "/dump_infer/multi_cross_entropy_loss_label_" + str(1000)) from hugectr.inference import InferenceParams, CreateInferenceSession from mpi4py import MPI import hugectr import pandas as pd import numpy as np inference_params = InferenceParams( model_name="multi_cross_entropy_loss", max_batchsize=1024, hit_rate_threshold=1.0, dense_model_file="/dump_infer/multi_cross_entropy_loss_dense_1000.model", sparse_model_files=[ "/dump_infer/multi_cross_entropy_loss0_sparse_1000.model" ], device_id=0, use_gpu_embedding_cache=True, cache_size_percentage=0.5, use_mixed_precision=False, i64_input_key=True, ) inference_session = CreateInferenceSession( '/dump_infer/multi_cross_entropy_loss.json', inference_params) preds = inference_session.predict( num_batches=1, source="./multi_cross/data/test/_file_list.txt", data_reader_type=hugectr.DataReaderType_t.Parquet, check_type=hugectr.Check_t.Sum,
def wdl_inference(model_name, network_file, dense_file, embedding_file_list, data_file, enable_cache): CATEGORICAL_COLUMNS = ["C" + str(x) for x in range(1, 27)] + ["C1_C2", "C3_C4"] CONTINUOUS_COLUMNS = ["I" + str(x) for x in range(1, 14)] LABEL_COLUMNS = ['label'] emb_size = [ 202546, 18795, 14099, 6889, 18577, 4, 6349, 1247, 48, 186730, 71084, 66832, 11, 2158, 7415, 61, 4, 923, 15, 202617, 143251, 198823, 61025, 9057, 73, 34, 225812, 354963 ] shift = np.insert(np.cumsum(emb_size), 0, 0)[:-1] result = [ 0.05634006857872009, 0.04185676947236061, 0.007268941029906273, 0.10255379974842072, 0.14059557020664215, 0.011040309444069862, 0.005499477963894606, 0.24404558539390564, 0.012491216883063316, 0.005486942362040281 ] test_df = pd.read_csv(data_file) config_file = network_file row_ptrs = list(range(0, 21)) + list(range(0, 261)) dense_features = list(test_df[CONTINUOUS_COLUMNS].values.flatten()) test_df[CATEGORICAL_COLUMNS].astype(np.int64) embedding_columns = list( (test_df[CATEGORICAL_COLUMNS] + shift).values.flatten()) hash_map_database = hugectr.inference.VolatileDatabaseParams() rocksdb_database = hugectr.inference.PersistentDatabaseParams( path="/hugectr/test/utest/wdl_test_files/rocksdb") # create parameter server, embedding cache and inference session inference_params = InferenceParams(model_name=model_name, max_batchsize=64, hit_rate_threshold=1.0, dense_model_file=dense_file, sparse_model_files=embedding_file_list, device_id=0, use_gpu_embedding_cache=enable_cache, cache_size_percentage=0.9, i64_input_key=True, use_mixed_precision=False, number_of_worker_buffers_in_pool=4, number_of_refresh_buffers_in_pool=1, deployed_devices=[0], default_value_for_each_table=[0.0, 0.0], volatile_db=hash_map_database, persistent_db=rocksdb_database) inference_session = CreateInferenceSession(config_file, inference_params) # predict for the first time output1 = inference_session.predict(dense_features, embedding_columns, row_ptrs) miss1 = np.mean((np.array(output1) - np.array(result))**2) # refresh emebdding cache, void operation since there is no update for the parameter server inference_session.refresh_embedding_cache() # predict for the second time output2 = inference_session.predict(dense_features, embedding_columns, row_ptrs) miss2 = np.mean((np.array(output2) - np.array(result))**2) print("WDL multi-embedding table inference result should be {}".format( result)) miss = max(miss1, miss2) if enable_cache: if miss > 0.0001: raise RuntimeError( "WDL multi-embedding table inference using GPU cache, prediction error is greater than threshold: {}, error is {}" .format(0.0001, miss)) sys.exit(1) else: print( "[HUGECTR][INFO] WDL multi-embedding table inference using GPU cache, prediction error is less than threshold:{}, error is {}" .format(0.0001, miss)) else: if miss > 0.0001: raise RuntimeError( "[HUGECTR][INFO] WDL multi-embedding table inference without GPU cache, prediction error is greater than threshold:{}, error is {}" .format(0.0001, miss)) sys.exit(1) else: print( "[HUGECTR][INFO] WDL multi-embedding table inference without GPU cache, prediction error is less than threshold: {}, error is {}" .format(0.0001, miss))
def movie_inference(model_name, network_file, dense_file, embedding_file_list, data_file, enable_cache): CATEGORICAL_COLUMNS = ['userId', 'movieId'] LABEL_COLUMNS = ['rating'] emb_size = [162542, 56586] shift = np.insert(np.cumsum(emb_size), 0, 0)[:-1] result = [ 0.8336379528045654, 0.24868586659431458, 0.4039016664028168, 0.9553083777427673, 0.6617599725723267, 0.5613522529602051, 0.16344544291496277, 0.537512481212616, 0.5185080766677856, 0.2947561740875244 ] test_df = pd.read_parquet(data_file) config_file = network_file row_ptrs = list(range(0, 21)) dense_features = [] test_df[CATEGORICAL_COLUMNS].astype(np.int64) embedding_columns = list( (test_df.head(10)[CATEGORICAL_COLUMNS] + shift).values.flatten()) # create parameter server, embedding cache and inference session inference_params = InferenceParams(model_name=model_name, max_batchsize=64, hit_rate_threshold=1.0, dense_model_file=dense_file, sparse_model_files=embedding_file_list, device_id=0, use_gpu_embedding_cache=enable_cache, cache_size_percentage=0.9, i64_input_key=True, use_mixed_precision=False) inference_session = CreateInferenceSession(config_file, inference_params) output1 = inference_session.predict(dense_features, embedding_columns, row_ptrs) miss1 = np.mean((np.array(output1) - np.array(result))**2) inference_session.refresh_embedding_cache() output2 = inference_session.predict(dense_features, embedding_columns, row_ptrs) miss2 = np.mean((np.array(output2) - np.array(result))**2) print( "Movielens model(no dense input) inference result should be {}".format( result)) miss = max(miss1, miss2) if enable_cache: if miss > 0.0001: raise RuntimeError( "Movielens model(no dense input) inference using GPU cache, prediction error is greater than threshold: {}, error is {}" .format(0.0001, miss)) sys.exit(1) else: print( "[HUGECTR][INFO] Movielens model(no dense input) inference using GPU cache, prediction error is less than threshold:{}, error is {}" .format(0.0001, miss)) else: if miss > 0.0001: raise RuntimeError( "[HUGECTR][INFO] Movielens model(no dense input) inference without GPU cache, prediction error is greater than threshold:{}, error is {}" .format(0.0001, miss)) sys.exit(1) else: print( "[HUGECTR][INFO] Movielens model(no dense input) inference without GPU cache, prediction error is less than threshold: {}, error is {}" .format(0.0001, miss))