def main(readcsv=read_csv, method='defaultDense'): nClasses = 5 nFeatures = 6 # read training data from file with 6 features per observation and 1 class label trainfile = "./data/batch/logreg_train.csv" train_data = readcsv(trainfile, range(nFeatures)) train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1)) # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # read testing data from file with 6 features per observation testfile = "./data/batch/logreg_test.csv" predict_data = readcsv(testfile, range(nFeatures)) # set parameters and compute predictions # previous version has different interface from daal4py import __daal_link_version__ as dv daal_version = tuple(map(int, (dv[0:4], dv[4:8]))) if daal_version < (2020, 0): predict_alg = d4p.logistic_regression_prediction( nClasses=nClasses, resultsToCompute= "computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities" ) else: predict_alg = d4p.logistic_regression_prediction( nClasses=nClasses, resultsToEvaluate= "computeClassLabels|computeClassProbabilities|computeClassLogProbabilities" ) predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction, probabilities and logProbabilities assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses) assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses) predict_labels = np.loadtxt(testfile, usecols=range(nFeatures, nFeatures + 1), delimiter=',', ndmin=2) assert np.count_nonzero(predict_result.prediction - predict_labels) / predict_labels.shape[0] < 0.025 return (train_result, predict_result, predict_labels)
def main(readcsv=read_csv, method='defaultDense'): nClasses = 2 nFeatures = 20 # read training data from file with 20 features per observation and 1 class label trainfile = "./data/batch/binary_cls_train.csv" train_data = readcsv(trainfile, range(nFeatures)) train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1)) # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # read testing data from file with 20 features per observation testfile = "./data/batch/binary_cls_test.csv" predict_data = readcsv(testfile, range(nFeatures)) predict_labels = readcsv(testfile, range(nFeatures, nFeatures + 1)) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses) predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction assert predict_result.prediction.shape == (predict_data.shape[0], train_labels.shape[1]) return (train_result, predict_result, predict_labels)
def main(): nClasses = 2 nFeatures = 20 # read training data from file with 20 features per observation and 1 class label # and use only a chunk per process trainfile = "./data/batch/binary_cls_train.csv" train_data = np.split(read_csv(trainfile, range(nFeatures)), d4p.num_procs())[d4p.my_procid()] train_labels = np.split(read_csv(trainfile, range(nFeatures, nFeatures + 1)), d4p.num_procs())[d4p.my_procid()] # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, interceptFlag=True, distributed=True) train_result = train_alg.compute(train_data, train_labels) # Now let's do some prediction # It operates on the same data on each process # read testing data from file with 20 features per observation testfile = "./data/batch/binary_cls_test.csv" predict_data = read_csv(testfile, range(nFeatures)) predict_labels = read_csv(testfile, range(nFeatures, nFeatures + 1)) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses) predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction assert predict_result.prediction.shape == (predict_data.shape[0], train_labels.shape[1]) return (train_result, predict_result, predict_labels)
def run_inference(num_observations: int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data_df(X=common.X_dfc, size=num_observations) num_rows = len(test_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() predict_algo = d4p.logistic_regression_prediction( nClasses=2, resultsToEvaluate= "computeClassLabels|computeClassProbabilities|computeClassLogProbabilities" ) predict_result = predict_algo.compute(test_df, train_result.model) #predictor.compute(data, MODEL) end_time = timer() total_time = end_time - start_time run_times.append(total_time * 10e3) inference_time = total_time * (10e6) / num_rows inference_times.append(inference_time) return_elem = common.calculate_stats(inference_times) print(num_observations, ", ", return_elem) return return_elem
def main(readcsv=read_csv, method='defaultDense'): nClasses = 5 nFeatures = 6 # read training data from file with 6 features per observation and 1 class label trainfile = "./data/batch/logreg_train.csv" train_data = readcsv(trainfile, range(nFeatures)) train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1)) # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # read testing data from file with 6 features per observation testfile = "./data/batch/logreg_test.csv" predict_data = readcsv(testfile, range(nFeatures)) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses, resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities") predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction, probabilities and logProbabilities assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses) assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses) predict_labels = np.loadtxt(testfile, usecols=range(nFeatures, nFeatures + 1), delimiter=',', ndmin=2) assert np.count_nonzero(predict_result.prediction-predict_labels)/predict_labels.shape[0] < 0.025 return (train_result, predict_result, predict_labels)
def prdct_impl(n, d, model): w = np.ones((n, d), dtype=np.double) - 22.5 algo = d4p.logistic_regression_prediction( 2, resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities" ) return algo.compute(w, model)
def compute(train_data, train_labels, predict_data, nClasses): # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses) return predict_alg.compute(predict_data, train_result.model), train_result
def predict(self, predict_data, model): # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(fptype = self.dtype, nClasses=self.nClasses, resultsToCompute = self.resultsToCompute) predict_result = predict_alg.compute(predict_data, model) self.prediction = predict_result.prediction self.probabilities = predict_result.probabilities return self
def compute(train_data, train_labels, predict_data, nClasses): # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses, resultsToEvaluate="computeClassLabels|computeClassProbabilities|computeClassLogProbabilities") return predict_alg.compute(predict_data, train_result.model), train_result
def test_breast_cancer_without_intercept(self): X, y = load_breast_cancer(return_X_y=True) n_classes=2 clf = LogisticRegression(fit_intercept=False, max_iter=10000, random_state=0).fit(X, y) builder = d4p.logistic_regression_model_builder(n_classes=n_classes, n_features=X.shape[1]) builder.set_beta(clf.coef_, clf.intercept_) alg_pred = d4p.logistic_regression_prediction(nClasses=n_classes) pred_daal = alg_pred.compute(X, builder.model).prediction.flatten() pred_sklearn = clf.predict(X) self.assertTrue(np.allclose(pred_daal, pred_sklearn))
def main(): X, y = load_iris(return_X_y=True) n_classes=3 # set parameters and train clf = LogisticRegression(fit_intercept=True, max_iter=1000, random_state=0).fit(X, y) #set parameters and call model builder builder = d4p.logistic_regression_model_builder(n_classes=n_classes, n_features=X.shape[1]) builder.set_beta(clf.coef_, clf.intercept_) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=n_classes, resultsToEvaluate="computeClassLabels") # set parameters and compute predictions predict_result_daal = predict_alg.compute(X, builder.model) predict_result_sklearn = clf.predict(X) assert np.allclose(predict_result_daal.prediction.flatten(), predict_result_sklearn) return (builder, predict_result_daal)
def main(): nClasses = 5 nFeatures = 6 # read training data from file with 6 features per observation and 1 class label trainfile = "./data/batch/logreg_train.csv" train_data = read_csv(trainfile, range(nFeatures)) train_labels = read_csv(trainfile, range(nFeatures, nFeatures + 1)) # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # read testing data from file with 6 features per observation testfile = "./data/batch/logreg_test.csv" predict_data = read_csv(testfile, range(nFeatures)) predict_labels = read_csv(testfile, range(nFeatures, nFeatures + 1)) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction( nClasses=nClasses, resultsToCompute= "computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities" ) predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction, probabilities and logProbabilities assert predict_result.prediction.shape == (predict_data.shape[0], train_labels.shape[1]) assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses) assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses) return (train_result, predict_result, predict_labels)
def daal4py_predict(self, X, resultsToEvaluate): check_is_fitted(self) X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32]) try: fptype = getFPType(X) except ValueError: fptype = None daal_ready = self.multi_class in ["multinomial", "warn"] or \ self.classes_.size == 2 or resultsToEvaluate == 'computeClassLabels' daal_ready = daal_ready and not sparse.issparse(X) and \ not sparse.issparse(self.coef_) and fptype is not None if daal_ready: logging.info("sklearn.linear_model.LogisticRegression." "predict: " + get_patch_message("daal")) n_features = self.coef_.shape[1] if X.shape[1] != n_features: raise ValueError( f'X has {X.shape[1]} features, ' f'but LogisticRegression is expecting {n_features} features as input' ) builder = d4p.logistic_regression_model_builder( X.shape[1], len(self.classes_)) builder.set_beta(self.coef_, self.intercept_) predict = d4p.logistic_regression_prediction( nClasses=len(self.classes_), fptype=fptype, method='defaultDense', resultsToEvaluate=resultsToEvaluate) res = predict.compute(X, builder.model) if resultsToEvaluate == 'computeClassLabels': res = res.prediction if not np.array_equal(self.classes_, np.arange(0, len(self.classes_))) or \ self.classes_.dtype != X.dtype: res = self.classes_.take(np.asarray(res, dtype=np.intp)) elif resultsToEvaluate == 'computeClassProbabilities': res = res.probabilities elif resultsToEvaluate == 'computeClassLogProbabilities': res = res.logProbabilities else: raise ValueError( 'resultsToEvaluate must be in [computeClassLabels, \ computeClassProbabilities, computeClassLogProbabilities]') if res.shape[1] == 1: res = np.ravel(res) return res if resultsToEvaluate == 'computeClassLabels': logging.info("sklearn.linear_model.LogisticRegression." "predict: " + get_patch_message("sklearn")) return LogisticRegression_original.predict(self, X) if resultsToEvaluate == 'computeClassProbabilities': logging.info("sklearn.linear_model.LogisticRegression." "predict_proba: " + get_patch_message("sklearn")) return LogisticRegression_original.predict_proba(self, X) if resultsToEvaluate == 'computeClassLogProbabilities': logging.info("sklearn.linear_model.LogisticRegression." "predict_log_proba: " + get_patch_message("sklearn")) return LogisticRegression_original.predict_log_proba(self, X) raise ValueError('resultsToEvaluate must be in [computeClassLabels, \ computeClassProbabilities, computeClassLogProbabilities]')
def daal4py_predict(self, X, resultsToEvaluate): check_is_fitted(self) if sklearn_check_version('1.0'): self._check_feature_names(X, reset=False) X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32]) try: fptype = getFPType(X) except ValueError: fptype = None if resultsToEvaluate == 'computeClassLabels': _function_name = 'predict' elif resultsToEvaluate == 'computeClassProbabilities': _function_name = 'predict_proba' elif resultsToEvaluate == 'computeClassLogProbabilities': _function_name = 'predict_log_proba' else: raise ValueError('resultsToEvaluate must be in [computeClassLabels, \ computeClassProbabilities, computeClassLogProbabilities]') _patching_status = PatchingConditionsChain( f"sklearn.linear_model.LogisticRegression.{_function_name}") _patching_status.and_conditions( [(self.multi_class in ["multinomial", "warn"], f"{self.multi_class} multiclass option is not supported. " "Only 'multinomial' or 'warn' options are supported."), (self.classes_.size == 2, "Number of classes != 2."), (resultsToEvaluate == 'computeClassLabels', "resultsToEvaluate != 'computeClassLabels'.")], conditions_merging=any) _dal_ready = _patching_status.and_conditions([ (not sparse.issparse(X), "X is sparse. Sparse input is not supported."), (not sparse.issparse(self.coef_), "self.coef_ is sparse. Sparse coefficients are not supported."), (fptype is not None, "Unable to get dtype.") ]) _patching_status.write_log() if _dal_ready: n_features = self.coef_.shape[1] if X.shape[1] != n_features: raise ValueError( f'X has {X.shape[1]} features, ' f'but LogisticRegression is expecting {n_features} features as input' ) builder = d4p.logistic_regression_model_builder( X.shape[1], len(self.classes_)) builder.set_beta(self.coef_, self.intercept_) predict = d4p.logistic_regression_prediction( nClasses=len(self.classes_), fptype=fptype, method='defaultDense', resultsToEvaluate=resultsToEvaluate) res = predict.compute(X, builder.model) if resultsToEvaluate == 'computeClassLabels': res = res.prediction if not np.array_equal(self.classes_, np.arange(0, len(self.classes_))) or \ self.classes_.dtype != X.dtype: res = self.classes_.take(np.asarray(res, dtype=np.intp)) elif resultsToEvaluate == 'computeClassProbabilities': res = res.probabilities elif resultsToEvaluate == 'computeClassLogProbabilities': res = res.logProbabilities else: raise ValueError( 'resultsToEvaluate must be in [computeClassLabels, \ computeClassProbabilities, computeClassLogProbabilities]') if res.shape[1] == 1: res = np.ravel(res) return res if resultsToEvaluate == 'computeClassLabels': return LogisticRegression_original.predict(self, X) if resultsToEvaluate == 'computeClassProbabilities': return LogisticRegression_original.predict_proba(self, X) if resultsToEvaluate == 'computeClassLogProbabilities': return LogisticRegression_original.predict_log_proba(self, X)