def main(readcsv=read_csv, method='defaultDense'):
    infile = "./data/batch/df_regression_train.csv"
    testfile = "./data/batch/df_regression_test.csv"

    # Configure a Linear regression training object
    train_algo = d4p.decision_forest_regression_training(
        nTrees=100,
        varImportance='MDA_Raw',
        bootstrap=True,
        engine=d4p.engines_mt2203(seed=777),
        resultsToCompute=
        'computeOutOfBagError|computeOutOfBagErrorPerObservation')

    # Read data. Let's have 13 independent, and 1 dependent variables (for each observation)
    indep_data = readcsv(infile, range(13), t=np.float32)
    dep_data = readcsv(infile, range(13, 14), t=np.float32)
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(indep_data, dep_data)
    # Traiing result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance

    # Now let's do some prediction
    predict_algo = d4p.decision_forest_regression_prediction()
    # read test data (with same #features)
    pdata = readcsv(testfile, range(13), t=np.float32)
    ptdata = readcsv(testfile, range(13, 14), t=np.float32)
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # The prediction result provides prediction
    assert predict_result.prediction.shape == (pdata.shape[0],
                                               dep_data.shape[1])

    return (train_result, predict_result, ptdata)
def run_inference(num_observations:int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data_df(X=common.X_df,size = num_observations)
    num_rows = len(test_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):
        
        start_time = timer()
        predict_algo = d4p.decision_forest_regression_prediction(fptype='float')
        predict_result = predict_algo.compute(test_df, train_result.model)
        #predictor.compute(data, MODEL)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time*10e3)

        inference_time = total_time*(10e6)/num_rows
        inference_times.append(inference_time)

    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
Example #3
0
def df_regr_predict(X, training_result):

    algorithm = decision_forest_regression_prediction(fptype='float')

    result = algorithm.compute(X, training_result.model)

    return result.prediction
Example #4
0
def _daal_predict_regressor(self, X):
    X = self._validate_X_predict(X)
    X_fptype = getFPType(X)
    dfr_alg = daal4py.decision_forest_regression_prediction(fptype=X_fptype)
    dfr_predictionResult = dfr_alg.compute(X, self.daal_model_)

    pred = dfr_predictionResult.prediction

    return pred.ravel()
Example #5
0
    def daal_predict(self, X):
        check_is_fitted(self, 'daal_model_')
        X = self._validate_X_predict(X)

        dfr_alg = daal4py.decision_forest_regression_prediction(fptype='float')
        dfr_predictionResult = dfr_alg.compute(X, self.daal_model_)

        pred = dfr_predictionResult.prediction

        return pred.ravel()
Example #6
0
def _daal_predict_regressor(self, X):
    if X.shape[1] != self.n_features_in_:
        raise ValueError((f'X has {X.shape[1]} features, '
                          f'but RandomForestRegressor is expecting '
                          f'{self.n_features_in_} features as input'))
    X_fptype = getFPType(X)
    dfr_alg = daal4py.decision_forest_regression_prediction(fptype=X_fptype)
    dfr_predictionResult = dfr_alg.compute(X, self.daal_model_)

    pred = dfr_predictionResult.prediction

    return pred.ravel()
Example #7
0
    def _daal_predict(self, X):
        if LooseVersion(sklearn_version) >= LooseVersion("0.22"):
            check_is_fitted(self)
        else:
            check_is_fitted(self, 'daal_model_')
        X = self._validate_X_predict(X)

        dfr_alg = daal4py.decision_forest_regression_prediction(fptype='float')
        dfr_predictionResult = dfr_alg.compute(X, self.daal_model_)

        pred = dfr_predictionResult.prediction

        return pred.ravel()
Example #8
0
    def predict(cls, input):
        """For the input, do the predictions and return them.
		Args:
			input (a pandas dataframe): The data on which to do the predictions. There will be
				one prediction per row in the dataframe"""
        with open(param_path, "r") as pf:
            params = json.load(pf)
            predict_algo = decision_forest_regression_prediction(
                fptype=params["fptype"],
                method=params["method"],
                distributed=(True
                             if params["distributed"] == "True" else False))
            dtype = (np.float64
                     if params["fptype"] == "double" else np.float32)
            clf = cls.get_model()
        return predict_algo.compute(input, clf)
Example #9
0
def compute(train_data, train_labels, predict_data, method='defaultDense'):
    # Configure a training object 
    train_algo = d4p.decision_forest_regression_training(nTrees=100,
                                                         engine = d4p.engines_mt2203(seed=777),
                                                         varImportance='MDA_Raw',
                                                         bootstrap=True,
                                                         resultsToCompute='computeOutOfBagError|computeOutOfBagErrorPerObservation',
                                                         method=method
                                                         )
    # Training result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance
    train_result = train_algo.compute(train_data, train_labels)

    # now predict using the model from the training above
    predict_algo = d4p.decision_forest_regression_prediction()

    predict_result = predict_algo.compute(predict_data, train_result.model)

    return train_result, predict_result
Example #10
0
def compute(train_data, train_labels, predict_data):
    # Configure a training object
    train_algo = d4p.decision_forest_regression_training(
        method='hist',
        maxBins=256,
        minBinSize=1,
        nTrees=100,
        fptype='float',
        varImportance='MDA_Raw',
        bootstrap=True,
        engine=d4p.engines_mt2203(seed=777),
        resultsToCompute=
        'computeOutOfBagError|computeOutOfBagErrorPerObservation')

    # Training result provides (depending on parameters) model,
    # outOfBagError, outOfBagErrorPerObservation and/or variableImportance
    train_result = train_algo.compute(train_data, train_labels)

    # now predict using the model from the training above
    predict_algo = d4p.decision_forest_regression_prediction(fptype='float')

    predict_result = predict_algo.compute(predict_data, train_result.model)

    return train_result, predict_result