def main(readcsv=read_csv, method='defaultDense'): infile = "./data/batch/df_regression_train.csv" testfile = "./data/batch/df_regression_test.csv" # Configure a Linear regression training object train_algo = d4p.decision_forest_regression_training( nTrees=100, varImportance='MDA_Raw', bootstrap=True, engine=d4p.engines_mt2203(seed=777), resultsToCompute= 'computeOutOfBagError|computeOutOfBagErrorPerObservation') # Read data. Let's have 13 independent, and 1 dependent variables (for each observation) indep_data = readcsv(infile, range(13), t=np.float32) dep_data = readcsv(infile, range(13, 14), t=np.float32) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(indep_data, dep_data) # Traiing result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance # Now let's do some prediction predict_algo = d4p.decision_forest_regression_prediction() # read test data (with same #features) pdata = readcsv(testfile, range(13), t=np.float32) ptdata = readcsv(testfile, range(13, 14), t=np.float32) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) return (train_result, predict_result, ptdata)
def run_inference(num_observations:int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data_df(X=common.X_df,size = num_observations) num_rows = len(test_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() predict_algo = d4p.decision_forest_regression_prediction(fptype='float') predict_result = predict_algo.compute(test_df, train_result.model) #predictor.compute(data, MODEL) end_time = timer() total_time = end_time - start_time run_times.append(total_time*10e3) inference_time = total_time*(10e6)/num_rows inference_times.append(inference_time) return_elem = common.calculate_stats(inference_times) print(num_observations, ", ", return_elem) return return_elem
def df_regr_predict(X, training_result): algorithm = decision_forest_regression_prediction(fptype='float') result = algorithm.compute(X, training_result.model) return result.prediction
def _daal_predict_regressor(self, X): X = self._validate_X_predict(X) X_fptype = getFPType(X) dfr_alg = daal4py.decision_forest_regression_prediction(fptype=X_fptype) dfr_predictionResult = dfr_alg.compute(X, self.daal_model_) pred = dfr_predictionResult.prediction return pred.ravel()
def daal_predict(self, X): check_is_fitted(self, 'daal_model_') X = self._validate_X_predict(X) dfr_alg = daal4py.decision_forest_regression_prediction(fptype='float') dfr_predictionResult = dfr_alg.compute(X, self.daal_model_) pred = dfr_predictionResult.prediction return pred.ravel()
def _daal_predict_regressor(self, X): if X.shape[1] != self.n_features_in_: raise ValueError((f'X has {X.shape[1]} features, ' f'but RandomForestRegressor is expecting ' f'{self.n_features_in_} features as input')) X_fptype = getFPType(X) dfr_alg = daal4py.decision_forest_regression_prediction(fptype=X_fptype) dfr_predictionResult = dfr_alg.compute(X, self.daal_model_) pred = dfr_predictionResult.prediction return pred.ravel()
def _daal_predict(self, X): if LooseVersion(sklearn_version) >= LooseVersion("0.22"): check_is_fitted(self) else: check_is_fitted(self, 'daal_model_') X = self._validate_X_predict(X) dfr_alg = daal4py.decision_forest_regression_prediction(fptype='float') dfr_predictionResult = dfr_alg.compute(X, self.daal_model_) pred = dfr_predictionResult.prediction return pred.ravel()
def predict(cls, input): """For the input, do the predictions and return them. Args: input (a pandas dataframe): The data on which to do the predictions. There will be one prediction per row in the dataframe""" with open(param_path, "r") as pf: params = json.load(pf) predict_algo = decision_forest_regression_prediction( fptype=params["fptype"], method=params["method"], distributed=(True if params["distributed"] == "True" else False)) dtype = (np.float64 if params["fptype"] == "double" else np.float32) clf = cls.get_model() return predict_algo.compute(input, clf)
def compute(train_data, train_labels, predict_data, method='defaultDense'): # Configure a training object train_algo = d4p.decision_forest_regression_training(nTrees=100, engine = d4p.engines_mt2203(seed=777), varImportance='MDA_Raw', bootstrap=True, resultsToCompute='computeOutOfBagError|computeOutOfBagErrorPerObservation', method=method ) # Training result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance train_result = train_algo.compute(train_data, train_labels) # now predict using the model from the training above predict_algo = d4p.decision_forest_regression_prediction() predict_result = predict_algo.compute(predict_data, train_result.model) return train_result, predict_result
def compute(train_data, train_labels, predict_data): # Configure a training object train_algo = d4p.decision_forest_regression_training( method='hist', maxBins=256, minBinSize=1, nTrees=100, fptype='float', varImportance='MDA_Raw', bootstrap=True, engine=d4p.engines_mt2203(seed=777), resultsToCompute= 'computeOutOfBagError|computeOutOfBagErrorPerObservation') # Training result provides (depending on parameters) model, # outOfBagError, outOfBagErrorPerObservation and/or variableImportance train_result = train_algo.compute(train_data, train_labels) # now predict using the model from the training above predict_algo = d4p.decision_forest_regression_prediction(fptype='float') predict_result = predict_algo.compute(predict_data, train_result.model) return train_result, predict_result