def daal_model(df): TRAIN_SIZE, TEST_SIZE, PRED_SIZE = int(args.row_count * 0.8), int( args.row_count * 0.1), int(args.row_count * 0.1) train_df, test_df, pred_df = df.iloc[:TRAIN_SIZE], df.iloc[ TRAIN_SIZE:TRAIN_SIZE + TEST_SIZE], df.iloc[-PRED_SIZE:] train_x, train_y = train_df[args.src_cols], train_df[args.tgt_col] test_x, test_y = test_df[args.src_cols], test_df[args.tgt_col] pred_x = pred_df[args.src_cols] best_score = float("inf") best_params = None params_grid = ParameterGrid(args.daal_params_grid) for params in params_grid: model = d4p.gbt_regression_training(**params) train_result = model.compute(train_x, train_y) predict_alg = d4p.gbt_regression_prediction() pred = predict_alg.compute(test_x, train_result.model).prediction score = (np.mean(abs(pred - test_y.values) / test_y.values)) if score < best_score: best_score = score best_params = params best_model = d4p.gbt_regression_training(**best_params) best_train_result = best_model.compute( df.iloc[:TRAIN_SIZE + TEST_SIZE][args.src_cols], df.iloc[:TRAIN_SIZE + TEST_SIZE][args.tgt_col]) best_predict_alg = d4p.gbt_regression_prediction() pred_df['pred'] = best_predict_alg.compute( pred_x, best_train_result.model).prediction return pred_df
def run_inference(num_observations: int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data(num_observations) #test_df = common.get_test_data(num_observations) #data = pd.DataFrame(test_df, dtype=np.float32) #predictor = d4p.gbt_regression_prediction(**PARAMS) num_rows = len(test_df) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() daal_predict_algo = d4p.gbt_regression_prediction(fptype='float') daal_prediction = daal_predict_algo.compute(test_df, daal_model) #predictor.compute(data, MODEL) end_time = timer() total_time = end_time - start_time run_times.append(total_time * 10e3) inference_time = total_time * (10e6) / num_rows inference_times.append(inference_time) print(num_observations, ", ", common.calculate_stats(inference_times))
def main(): maxIterations = 40 # input data file infile = "./data/batch/df_regression_train.csv" testfile = "./data/batch/df_regression_test.csv" # Configure a training object train_algo = d4p.gbt_regression_training(maxIterations=maxIterations) # Read data. Let's use 3 features per observation data = read_csv(infile, range(13)) deps = read_csv(infile, range(13, 14)) train_result = train_algo.compute(data, deps) # Now let's do some prediction predict_algo = d4p.gbt_regression_prediction() # read test data (with same #features) pdata = read_csv(testfile, range(13)) ptdata = read_csv(testfile, range(13, 14)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction assert (predict_result.prediction.shape == (pdata.shape[0], 1)) return (train_result, predict_result, ptdata)
def compute(train_indep_data, train_dep_data, test_indep_data, maxIterations): # Configure a training object train_algo = d4p.gbt_regression_training(maxIterations=maxIterations) train_result = train_algo.compute(train_indep_data, train_dep_data) # Now let's do some prediction predict_algo = d4p.gbt_regression_prediction() # now predict using the model from the training above return predict_algo.compute(test_indep_data, train_result.model)
def predict(self, X): # Check is fit had been called check_is_fitted(self, ['n_features_']) # Input validation X = check_array(X, dtype=[np.single, np.double]) if X.shape[1] != self.n_features_: raise ValueError('Shape of input is different from what was seen in `fit`') if not hasattr(self, 'daal_model_'): raise ValueError(("The class {} instance does not have 'daal_model_' attribute set. " "Call 'fit' with appropriate arguments before using this method.").format(type(self).__name__)) # Define type of data fptype = getFPType(X) # Prediction predict_algo = d4p.gbt_regression_prediction(fptype=fptype) predict_result = predict_algo.compute(X, self.daal_model_) return predict_result.prediction.ravel()
def main(readcsv=read_csv, method='defaultDense'): maxIterations = 200 # input data file infile = "./data/batch/df_regression_train.csv" testfile = "./data/batch/df_regression_test.csv" # Configure a training object train_algo = d4p.gbt_regression_training(maxIterations=maxIterations) # Read data. Let's use 3 features per observation data = readcsv(infile, range(13), t=np.float32) deps = readcsv(infile, range(13, 14), t=np.float32) train_result = train_algo.compute(data, deps) # Now let's do some prediction predict_algo = d4p.gbt_regression_prediction() # read test data (with same #features) pdata = readcsv(testfile, range(13), t=np.float32) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction ptdata = np.loadtxt(testfile, usecols=range(13, 14), delimiter=',', ndmin=2, dtype=np.float32) # ptdata = np.loadtxt('../tests/unittest_data/gradient_boosted_regression_batch.csv', # delimiter=',', ndmin=2, dtype=np.float32) if hasattr(ptdata, 'toarray'): ptdata = ptdata.toarray() # to make the next assertion work with scipy's csr_matrix assert True or \ np.square(predict_result.prediction - ptdata).mean() < 1e-2, \ np.square(predict_result.prediction - ptdata).mean() return (train_result, predict_result, ptdata)
t_trans, model_daal = bench.measure_function_time( daal4py.get_gbt_model_from_lightgbm, model_lgbm, params=params) if hasattr(params, 'n_classes'): predict_algo = daal4py.gbt_classification_prediction( nClasses=params.n_classes, resultsToEvaluate='computeClassLabels', fptype='float') t_daal_pred, daal_pred = bench.measure_function_time(predict_algo.compute, X_test, model_daal, params=params) test_metric_daal = metric_func(y_test, daal_pred.prediction) else: predict_algo = daal4py.gbt_regression_prediction() t_daal_pred, daal_pred = bench.measure_function_time(predict_algo.compute, X_test, model_daal, params=params) test_metric_daal = metric_func(y_test, daal_pred.prediction) utils.print_output( library='modelbuilders', algorithm=f'lightgbm_{task}_and_modelbuilder', stages=['lgbm_train', 'lgbm_predict', 'daal4py_predict'], params=params, functions=[ 'lgbm_dataset', 'lgbm_dataset', 'lgbm_train', 'lgbm_predict', 'lgbm_to_daal', 'daal_compute' ],