Beispiel #1
0
def daal_model(df):
    TRAIN_SIZE, TEST_SIZE, PRED_SIZE = int(args.row_count * 0.8), int(
        args.row_count * 0.1), int(args.row_count * 0.1)
    train_df, test_df, pred_df = df.iloc[:TRAIN_SIZE], df.iloc[
        TRAIN_SIZE:TRAIN_SIZE + TEST_SIZE], df.iloc[-PRED_SIZE:]
    train_x, train_y = train_df[args.src_cols], train_df[args.tgt_col]
    test_x, test_y = test_df[args.src_cols], test_df[args.tgt_col]
    pred_x = pred_df[args.src_cols]
    best_score = float("inf")
    best_params = None
    params_grid = ParameterGrid(args.daal_params_grid)
    for params in params_grid:
        model = d4p.gbt_regression_training(**params)
        train_result = model.compute(train_x, train_y)
        predict_alg = d4p.gbt_regression_prediction()
        pred = predict_alg.compute(test_x, train_result.model).prediction
        score = (np.mean(abs(pred - test_y.values) / test_y.values))
        if score < best_score:
            best_score = score
            best_params = params
    best_model = d4p.gbt_regression_training(**best_params)
    best_train_result = best_model.compute(
        df.iloc[:TRAIN_SIZE + TEST_SIZE][args.src_cols],
        df.iloc[:TRAIN_SIZE + TEST_SIZE][args.tgt_col])
    best_predict_alg = d4p.gbt_regression_prediction()
    pred_df['pred'] = best_predict_alg.compute(
        pred_x, best_train_result.model).prediction
    return pred_df
Beispiel #2
0
def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data(num_observations)
    #test_df = common.get_test_data(num_observations)
    #data = pd.DataFrame(test_df, dtype=np.float32)
    #predictor = d4p.gbt_regression_prediction(**PARAMS)
    num_rows = len(test_df)

    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        daal_predict_algo = d4p.gbt_regression_prediction(fptype='float')
        daal_prediction = daal_predict_algo.compute(test_df, daal_model)
        #predictor.compute(data, MODEL)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time * 10e3)

        inference_time = total_time * (10e6) / num_rows
        inference_times.append(inference_time)

    print(num_observations, ", ", common.calculate_stats(inference_times))
Beispiel #3
0
def main():
    maxIterations = 40

    # input data file
    infile = "./data/batch/df_regression_train.csv"
    testfile = "./data/batch/df_regression_test.csv"

    # Configure a training object
    train_algo = d4p.gbt_regression_training(maxIterations=maxIterations)

    # Read data. Let's use 3 features per observation
    data = read_csv(infile, range(13))
    deps = read_csv(infile, range(13, 14))
    train_result = train_algo.compute(data, deps)

    # Now let's do some prediction
    predict_algo = d4p.gbt_regression_prediction()
    # read test data (with same #features)
    pdata = read_csv(testfile, range(13))
    ptdata = read_csv(testfile, range(13, 14))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    assert (predict_result.prediction.shape == (pdata.shape[0], 1))

    return (train_result, predict_result, ptdata)
def compute(train_indep_data, train_dep_data, test_indep_data, maxIterations):
    # Configure a training object
    train_algo = d4p.gbt_regression_training(maxIterations=maxIterations)
    train_result = train_algo.compute(train_indep_data, train_dep_data)
    # Now let's do some prediction
    predict_algo = d4p.gbt_regression_prediction()
    # now predict using the model from the training above
    return predict_algo.compute(test_indep_data, train_result.model)
Beispiel #5
0
    def predict(self, X):
        # Check is fit had been called
        check_is_fitted(self, ['n_features_'])

        # Input validation
        X = check_array(X, dtype=[np.single, np.double])
        if X.shape[1] != self.n_features_:
            raise ValueError('Shape of input is different from what was seen in `fit`')

        if not hasattr(self, 'daal_model_'):
            raise ValueError(("The class {} instance does not have 'daal_model_' attribute set. "
                              "Call 'fit' with appropriate arguments before using this method.").format(type(self).__name__))

        # Define type of data
        fptype = getFPType(X)

        # Prediction
        predict_algo = d4p.gbt_regression_prediction(fptype=fptype)
        predict_result = predict_algo.compute(X, self.daal_model_)

        return predict_result.prediction.ravel()
Beispiel #6
0
def main(readcsv=read_csv, method='defaultDense'):
    maxIterations = 200

    # input data file
    infile = "./data/batch/df_regression_train.csv"
    testfile = "./data/batch/df_regression_test.csv"

    # Configure a training object
    train_algo = d4p.gbt_regression_training(maxIterations=maxIterations)

    # Read data. Let's use 3 features per observation
    data = readcsv(infile, range(13), t=np.float32)
    deps = readcsv(infile, range(13, 14), t=np.float32)
    train_result = train_algo.compute(data, deps)

    # Now let's do some prediction
    predict_algo = d4p.gbt_regression_prediction()
    # read test data (with same #features)
    pdata = readcsv(testfile, range(13), t=np.float32)
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    ptdata = np.loadtxt(testfile,
                        usecols=range(13, 14),
                        delimiter=',',
                        ndmin=2,
                        dtype=np.float32)
    # ptdata = np.loadtxt('../tests/unittest_data/gradient_boosted_regression_batch.csv',
    #                     delimiter=',', ndmin=2, dtype=np.float32)
    if hasattr(ptdata, 'toarray'):
        ptdata = ptdata.toarray()
        # to make the next assertion work with scipy's csr_matrix
    assert True or \
           np.square(predict_result.prediction - ptdata).mean() < 1e-2, \
           np.square(predict_result.prediction - ptdata).mean()

    return (train_result, predict_result, ptdata)
Beispiel #7
0
t_trans, model_daal = bench.measure_function_time(
    daal4py.get_gbt_model_from_lightgbm, model_lgbm, params=params)

if hasattr(params, 'n_classes'):
    predict_algo = daal4py.gbt_classification_prediction(
        nClasses=params.n_classes,
        resultsToEvaluate='computeClassLabels',
        fptype='float')
    t_daal_pred, daal_pred = bench.measure_function_time(predict_algo.compute,
                                                         X_test,
                                                         model_daal,
                                                         params=params)
    test_metric_daal = metric_func(y_test, daal_pred.prediction)
else:
    predict_algo = daal4py.gbt_regression_prediction()
    t_daal_pred, daal_pred = bench.measure_function_time(predict_algo.compute,
                                                         X_test,
                                                         model_daal,
                                                         params=params)
    test_metric_daal = metric_func(y_test, daal_pred.prediction)

utils.print_output(
    library='modelbuilders',
    algorithm=f'lightgbm_{task}_and_modelbuilder',
    stages=['lgbm_train', 'lgbm_predict', 'daal4py_predict'],
    params=params,
    functions=[
        'lgbm_dataset', 'lgbm_dataset', 'lgbm_train', 'lgbm_predict',
        'lgbm_to_daal', 'daal_compute'
    ],