def main(readcsv=read_csv, method='defaultDense'): nClasses = 2 nFeatures = 20 # read training data from file with 20 features per observation and 1 class label trainfile = "./data/batch/binary_cls_train.csv" train_data = readcsv(trainfile, range(nFeatures)) train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1)) # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # read testing data from file with 20 features per observation testfile = "./data/batch/binary_cls_test.csv" predict_data = readcsv(testfile, range(nFeatures)) predict_labels = readcsv(testfile, range(nFeatures, nFeatures + 1)) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses) predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction assert predict_result.prediction.shape == (predict_data.shape[0], train_labels.shape[1]) return (train_result, predict_result, predict_labels)
def main(readcsv=read_csv, method='defaultDense'): nClasses = 5 nFeatures = 6 # read training data from file with 6 features per observation and 1 class label trainfile = "./data/batch/logreg_train.csv" train_data = readcsv(trainfile, range(nFeatures)) train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1)) # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # read testing data from file with 6 features per observation testfile = "./data/batch/logreg_test.csv" predict_data = readcsv(testfile, range(nFeatures)) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses, resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities") predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction, probabilities and logProbabilities assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses) assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses) predict_labels = np.loadtxt(testfile, usecols=range(nFeatures, nFeatures + 1), delimiter=',', ndmin=2) assert np.count_nonzero(predict_result.prediction-predict_labels)/predict_labels.shape[0] < 0.025 return (train_result, predict_result, predict_labels)
def main(): nClasses = 2 nFeatures = 20 # read training data from file with 20 features per observation and 1 class label # and use only a chunk per process trainfile = "./data/batch/binary_cls_train.csv" train_data = np.split(read_csv(trainfile, range(nFeatures)), d4p.num_procs())[d4p.my_procid()] train_labels = np.split(read_csv(trainfile, range(nFeatures, nFeatures + 1)), d4p.num_procs())[d4p.my_procid()] # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, interceptFlag=True, distributed=True) train_result = train_alg.compute(train_data, train_labels) # Now let's do some prediction # It operates on the same data on each process # read testing data from file with 20 features per observation testfile = "./data/batch/binary_cls_test.csv" predict_data = read_csv(testfile, range(nFeatures)) predict_labels = read_csv(testfile, range(nFeatures, nFeatures + 1)) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses) predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction assert predict_result.prediction.shape == (predict_data.shape[0], train_labels.shape[1]) return (train_result, predict_result, predict_labels)
def compute(train_data, train_labels, predict_data, nClasses): # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses) return predict_alg.compute(predict_data, train_result.model), train_result
def run_inference(num_observations: int = 1000): """Run xgboost for specified number of observations""" # Load data train_x_df = common.get_test_data_df(X=common.X_dfc, size=num_observations) train_y_df = common.get_test_data_df(X=common.y_dfc, size=num_observations) num_rows = len(train_x_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() MODEL = d4p.logistic_regression_training(nClasses=2) train_result = MODEL.compute(train_x_df, train_y_df) end_time = timer() total_time = end_time - start_time run_times.append(total_time * 10e3) inference_time = total_time * (10e6) / num_rows inference_times.append(inference_time) return_elem = common.calculate_stats(inference_times) print(num_observations, ", ", return_elem) return return_elem
def train_impl(n, d): X = np.ones((n,d), dtype=np.double)+.5 Y = np.ones((n,1), dtype=np.double) algo = d4p.logistic_regression_training(2, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True) return algo.compute(X, Y)
def compute(train_data, train_labels, predict_data, nClasses): # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses, resultsToEvaluate="computeClassLabels|computeClassProbabilities|computeClassLogProbabilities") return predict_alg.compute(predict_data, train_result.model), train_result
def main(readcsv=read_csv, method='defaultDense'): nClasses = 5 nFeatures = 6 # read training data from file with 6 features per observation and 1 class label trainfile = "./data/batch/logreg_train.csv" train_data = readcsv(trainfile, range(nFeatures)) train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1)) # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # read testing data from file with 6 features per observation testfile = "./data/batch/logreg_test.csv" predict_data = readcsv(testfile, range(nFeatures)) # set parameters and compute predictions # previous version has different interface from daal4py import __daal_link_version__ as dv daal_version = tuple(map(int, (dv[0:4], dv[4:8]))) if daal_version < (2020, 0): predict_alg = d4p.logistic_regression_prediction( nClasses=nClasses, resultsToCompute= "computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities" ) else: predict_alg = d4p.logistic_regression_prediction( nClasses=nClasses, resultsToEvaluate= "computeClassLabels|computeClassProbabilities|computeClassLogProbabilities" ) predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction, probabilities and logProbabilities assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses) assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses) predict_labels = np.loadtxt(testfile, usecols=range(nFeatures, nFeatures + 1), delimiter=',', ndmin=2) assert np.count_nonzero(predict_result.prediction - predict_labels) / predict_labels.shape[0] < 0.025 return (train_result, predict_result, predict_labels)
def train(self, train_data, train_labels): dtype = (np.float64 if self.dtype == "double" else np.float32) optSolver = None #create a solver if self.optSolverParam['solverName'] == 'sgd': lrs = np.array([[self.optSolverParam['solverLearningRate']]], dtype=dtype) batchSize_ = int(self.optSolverParam['solverBatchSize']) method = self.optSolverParam["solverMethod"] if method == "defaultDense": batchSize_ = 1 optSolver = d4p.optimization_solver_sgd(function = None, learningRateSequence = lrs, method = method, accuracyThreshold = dtype(self.optSolverParam['solverAccuracyThreshold']), nIterations = int(self.optSolverParam['solverMaxIterations']), batchSize = batchSize_ ) if self.optSolverParam['solverName'] == 'lbfgs': sls = np.array([[self.optSolverParam['solverStepLength']]], dtype=dtype) optSolver = d4p.optimization_solver_lbfgs(function = None, stepLengthSequence=sls, accuracyThreshold = dtype(self.optSolverParam['solverAccuracyThreshold']), nIterations = int(self.optSolverParam['solverMaxIterations']), batchSize = int(self.optSolverParam['solverBatchSize']), correctionPairBatchSize = int(self.optSolverParam['solverCorrectionPairBatchSize']), L = int(self.optSolverParam['solverL']) ) if self.optSolverParam['solverName'] == 'adagrad': lr = np.array([[self.optSolverParam['solverLearningRate']]], dtype=dtype) optSolver = d4p.optimization_solver_adagrad(function = None, learningRate=lr, accuracyThreshold = dtype(self.optSolverParam['solverAccuracyThreshold']), nIterations = int(self.optSolverParam['solverMaxIterations']), batchSize = int(self.optSolverParam['solverBatchSize']) ) train_alg = d4p.logistic_regression_training(nClasses = self.nClasses, penaltyL1 = self.penaltyL1, penaltyL2 = self.penaltyL2, interceptFlag = self.interceptFlag, fptype = self.dtype, optimizationSolver = optSolver ) self.trainingResult = train_alg.compute(train_data, train_labels) return self
def main(): nClasses = 5 nFeatures = 6 # read training data from file with 6 features per observation and 1 class label trainfile = "./data/batch/logreg_train.csv" train_data = read_csv(trainfile, range(nFeatures)) train_labels = read_csv(trainfile, range(nFeatures, nFeatures + 1)) # set parameters and train train_alg = d4p.logistic_regression_training(nClasses=nClasses, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True) train_result = train_alg.compute(train_data, train_labels) # read testing data from file with 6 features per observation testfile = "./data/batch/logreg_test.csv" predict_data = read_csv(testfile, range(nFeatures)) predict_labels = read_csv(testfile, range(nFeatures, nFeatures + 1)) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction( nClasses=nClasses, resultsToCompute= "computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities" ) predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction, probabilities and logProbabilities assert predict_result.prediction.shape == (predict_data.shape[0], train_labels.shape[1]) assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses) assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses) return (train_result, predict_result, predict_labels)
from timeit import default_timer as timer from sklearn.metrics import mean_squared_error import daal4py as d4p import numpy as np import pandas as pd import common NUM_LOOPS = 100 d4p.daalinit() print("Computing for Logistic Regression With Daal") MODEL = d4p.logistic_regression_training(nClasses=2) train_result = MODEL.compute(common.X_dfc, common.y_dfc) def run_inference(num_observations: int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data_df(X=common.X_dfc, size=num_observations) num_rows = len(test_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() predict_algo = d4p.logistic_regression_prediction( nClasses=2,