def main(): # input data file infile = "./data/batch/df_classification_train.csv" testfile = "./data/batch/df_classification_test.csv" # Configure a training object (5 classes) train_algo = d4p.decision_forest_classification_training( 5, nTrees=10, minObservationsInLeafNode=8, featuresPerNode=3, engine=d4p.engines_mt19937(seed=777), varImportance='MDI', bootstrap=True, resultsToCompute='computeOutOfBagError') # Read data. Let's use 3 features per observation data = read_csv(infile, range(3), t=np.float32) labels = read_csv(infile, range(3, 4), t=np.float32) train_result = train_algo.compute(data, labels) # Traiing result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance # Now let's do some prediction predict_algo = d4p.decision_forest_classification_prediction(5) # read test data (with same #features) pdata = read_csv(testfile, range(3), t=np.float32) plabels = read_csv(testfile, range(3, 4), t=np.float32) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction assert (predict_result.prediction.shape == (pdata.shape[0], 1)) return (train_result, predict_result, plabels)
def compute(train_data, train_labels, predict_data, method='defaultDense'): # Configure a training object (5 classes) train_algo = d4p.decision_forest_classification_training( 5, fptype='float', nTrees=10, minObservationsInLeafNode=8, featuresPerNode=3, engine=d4p.engines_mt19937(seed=777), varImportance='MDI', bootstrap=True, resultsToCompute='computeOutOfBagError', method=method) # Training result provides (depending on parameters) model, # outOfBagError, outOfBagErrorPerObservation and/or variableImportance train_result = train_algo.compute(train_data, train_labels) # now predict using the model from the training above predict_algo = d4p.decision_forest_classification_prediction( nClasses=5, fptype='float', resultsToEvaluate="computeClassLabels|computeClassProbabilities", votingMethod="unweighted") predict_result = predict_algo.compute(predict_data, train_result.model) return train_result, predict_result
def _daal_predict(self, X): X = self._validate_X_predict(X) if daal_version < (2020,1): dfc_algorithm = daal4py.decision_forest_classification_prediction( nClasses = int(self.n_classes_), fptype = 'float' ) else: dfc_algorithm = daal4py.decision_forest_classification_prediction( nClasses = int(self.n_classes_), fptype = 'float', resultsToEvaluate="computeClassLabels" ) dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_) pred = dfc_predictionResult.prediction return np.take(self.classes_, pred.ravel().astype(np.int, casting='unsafe'))
def df_clsf_predict(X, training_result, n_classes, verbose=False): algorithm = decision_forest_classification_prediction( nClasses=n_classes, fptype='float', # we give float here specifically to match sklearn ) result = algorithm.compute(X, training_result.model) return result.prediction
def daal_predict(self, X): X = self._validate_X_predict(X) dfc_algorithm = daal4py.decision_forest_classification_prediction( nClasses=int(self.n_classes_), fptype='float') dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_) pred = dfc_predictionResult.prediction return np.take(self.classes_, pred.ravel().astype(np.int, casting='unsafe'))
def _daal_predict_proba(self, X): X_fptype = getFPType(X) dfc_algorithm = daal4py.decision_forest_classification_prediction( nClasses=int(self.n_classes_), fptype=X_fptype, resultsToEvaluate="computeClassProbabilities") dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_) pred = dfc_predictionResult.probabilities return pred
def _daal_predict_proba(self, X): if not daal_check_version((2021, 'P', 200)): X = self._validate_X_predict(X) X_fptype = getFPType(X) dfc_algorithm = daal4py.decision_forest_classification_prediction( nClasses=int(self.n_classes_), fptype=X_fptype, resultsToEvaluate="computeClassProbabilities") dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_) pred = dfc_predictionResult.probabilities return pred
def _daal_predict_classifier(self, X): X_fptype = getFPType(X) dfc_algorithm = daal4py.decision_forest_classification_prediction( nClasses=int(self.n_classes_), fptype=X_fptype, resultsToEvaluate="computeClassLabels") if X.shape[1] != self.n_features_in_: raise ValueError((f'X has {X.shape[1]} features, ' f'but RandomForestClassifier is expecting ' f'{self.n_features_in_} features as input')) dfc_predictionResult = dfc_algorithm.compute(X, self.daal_model_) pred = dfc_predictionResult.prediction return np.take(self.classes_, pred.ravel().astype(np.int64, casting='unsafe'))
def predict(cls, input): """For the input, do the predictions and return them. Args: input (a pandas dataframe): The data on which to do the predictions. There will be one prediction per row in the dataframe""" with open(param_path, "r") as pf: params = json.load(pf) predict_algo = decision_forest_classification_prediction( int(params["nClasses"]), fptype=params["fptype"], method=params["method"], distributed=(True if params["distributed"] == "True" else False)) dtype = (np.float64 if params["fptype"] == "double" else np.float32) clf = cls.get_model() return predict_algo.compute(input, clf)