def test_non_contig(self): from numpy.random import rand p = 10007 nx = 1017 ny = 77 X = rand(p + 1, nx + 1) Xp = rand(p + 1, nx + 1) y = rand(p + 1, ny + 1) Xn = X[:p, :nx] Xpn = Xp[:p, :nx] yn = y[:p, :ny] Xc = np.ascontiguousarray(Xn) Xpc = np.ascontiguousarray(Xpn) yc = np.ascontiguousarray(yn) self.assertTrue(not Xn.flags['C_CONTIGUOUS'] and not Xpn.flags['C_CONTIGUOUS'] and not yn.flags['C_CONTIGUOUS']) self.assertTrue(Xc.flags['C_CONTIGUOUS'] and Xpc.flags['C_CONTIGUOUS'] and yc.flags['C_CONTIGUOUS']) self.assertTrue( np.allclose(Xc, Xn) and np.allclose(Xpc, Xpn) and np.allclose(yc, yn)) regr_train = d4p.linear_regression_training() rtc = regr_train.compute(Xc, yc) regr_predict = d4p.linear_regression_prediction() rpc = regr_predict.compute(Xpc, rtc.model) regr_train = d4p.linear_regression_training() rtn = regr_train.compute(Xn, yn) regr_predict = d4p.linear_regression_prediction() rpn = regr_predict.compute(Xpn, rtn.model) self.assertTrue(np.allclose(rpn.prediction, rpc.prediction))
def main(readcsv=read_csv, method='defaultDense'): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" # Configure a Linear regression training object train_algo = d4p.linear_regression_training(interceptFlag=True) # Read data. Let's have 10 independent, # and 2 dependent variables (for each observation) indep_data = readcsv(infile, range(10)) dep_data = readcsv(infile, range(10, 12)) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(indep_data, dep_data) # Now let's do some prediction predict_algo = d4p.linear_regression_prediction() # read test data (with same #features) pdata = readcsv(testfile, range(10)) ptdata = readcsv(testfile, range(10, 12)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) return (train_result, predict_result, ptdata)
def run_inference(num_observations: int = 1000): """Run xgboost for specified number of observations""" # Load data test_df = common.get_test_data_df(X=common.X_df, size=num_observations) num_rows = len(test_df) ###################### print("_______________________________________") print("Total Number of Rows", num_rows) run_times = [] inference_times = [] for _ in range(NUM_LOOPS): start_time = timer() predict_algo = d4p.linear_regression_prediction(fptype='float') predict_result = predict_algo.compute(test_df, train_result.model) #predictor.compute(data, MODEL) end_time = timer() total_time = end_time - start_time run_times.append(total_time * 10e3) inference_time = total_time * (10e6) / num_rows inference_times.append(inference_time) return_elem = common.calculate_stats(inference_times) print(num_observations, ", ", return_elem) return return_elem
def compute(train_indep_data, train_dep_data, test_indep_data): # Configure a Linear regression training object train_algo = d4p.linear_regression_training(interceptFlag=True) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(train_indep_data, train_dep_data) # Now let's do some prediction predict_algo = d4p.linear_regression_prediction() # now predict using the model from the training above return predict_algo.compute(test_indep_data, train_result.model), train_result
def linearRegression(self, Data_Path, test_data_path, target, n): ''' daal4py Linear Regression SPMD Mode ''' # Initialize SPMD mode d4p.daalinit(nthreads=n) # training setup file = Data_Path + str(d4p.my_procid() + 1) + ".csv" data = pd.read_csv(file) X = data.drop(columns=target) y = data[target] train_algo = d4p.linear_regression_training(method='qrDense', distributed=True) self.logger.info('Training the Linear Regression in pydaal SPMD Mode') start = time.time() train_result = train_algo.compute(X, y) self.latency['Parallel_LinearRegression_Pydaal_Time'] = time.time() - \ start # test file setup test = pd.read_csv(test_data_path) y_test = test[target] X_test = test.drop(target, axis=1) if d4p.my_procid() == 0: predict_algo = d4p.linear_regression_prediction() # now predict using the model from the training above predict_result = predict_algo.compute(X_test, train_result.model) self.latency[ "Overall Parallel Linear Regression Prediction SPMD Time"] = time.time( ) - start # The prediction result provides prediction #assert predict_result.prediction.shape == (X_test.shape[0], y.shape[1]) d4p.daalfini() self.logger.info('Completed Linear Regression in pydaal SPMD Mode') # Compute metrics mse = mean_squared_error(y_test, predict_result.prediction) r2score = r2_score(y_test, predict_result.prediction) # Store the time taken and model metrics self.metrics['MSE_Parallel_LinearRegression_Pydaal'] = mse self.metrics['r2score_Parallel_LinearRegression_Pydaal'] = r2score return
def _daal4py_predict(self, X): X = make2d(X) _fptype = getFPType(self.coef_) lr_pred = daal4py.linear_regression_prediction( fptype=_fptype, method='defaultDense') lr_res = lr_pred.compute(X, self.daal_model_) res = lr_res.prediction if res.shape[1] == 1: res = np.ravel(res) return res
def _daal4py_predict(self, X): X = make2d(X) _fptype = getFPType(self.coef_) lr_pred = daal4py.linear_regression_prediction(fptype=_fptype, method='defaultDense') if X.shape[1] != self.n_features_in_: raise ValueError((f'X has {X.shape[1]} features, ' f'but LinearRegression is expecting ' f'{self.n_features_in_} features as input')) lr_res = lr_pred.compute(X, self.daal_model_) res = lr_res.prediction if res.shape[1] == 1 and self.coef_.ndim == 1: res = np.ravel(res) return res
def _daal4py_predict(self, X): X = make2d(X) _fptype = getFPType(self.coef_) lr_pred = daal4py.linear_regression_prediction( fptype=_fptype, method='defaultDense' ) try: lr_res = lr_pred.compute(X, self.daal_model_) except RuntimeError: raise ValueError('Input data shape {} is inconsistent with the trained model'.format(X.shape)) res = lr_res.prediction if res.shape[1] == 1 and self.coef_.ndim == 1: res = np.ravel(res) return res
def _daal4py_predict(self, X): X = make2d(X) _fptype = getFPType(self.coef_) lr_pred = daal4py.linear_regression_prediction(fptype=_fptype, method='defaultDense') if sklearn_check_version('0.23'): if X.shape[1] != self.n_features_in_: raise ValueError(f'X has {X.shape[1]} features, ' f'but LinearRegression is expecting ' f'{self.n_features_in_} features as input') try: lr_res = lr_pred.compute(X, self.daal_model_) except RuntimeError: raise ValueError( f'Input data shape {X.shape} is inconsistent with the trained model' ) res = lr_res.prediction if res.shape[1] == 1 and self.coef_.ndim == 1: res = np.ravel(res) return res
def main(readcsv=read_csv, method='defaultDense'): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" # Configure a Linear regression training object for streaming train_algo = d4p.linear_regression_training(interceptFlag=True, streaming=True) chunk_size = 250 lines_read = 0 # read and feed chunk by chunk while True: # Read data in chunks # Let's have 10 independent, and 2 dependent variables (for each observation) try: indep_data = readcsv(infile, range(10), lines_read, chunk_size) dep_data = readcsv(infile, range(10, 12), lines_read, chunk_size) except: break # Now feed chunk train_algo.compute(indep_data, dep_data) lines_read += indep_data.shape[0] # All chunks are done, now finalize the computation train_result = train_algo.finalize() # Now let's do some prediction predict_algo = d4p.linear_regression_prediction() # read test data (with same #features) pdata = readcsv(testfile, range(10)) ptdata = readcsv(testfile, range(10, 12)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) return (train_result, predict_result, ptdata)
def linearRegression(self, X_train, X_test, y_train, y_test, target): ''' Method for Linear Regression ''' # Configure a Linear regression training object train_algo = d4p.linear_regression_training(method='qrDense') self.logger.info( 'Training the Linear Regression in pydaal Batch/Serial Mode') start = time.time() # Now train/compute, the result provides the model for prediction lm_trained = train_algo.compute(X_train, y_train) self.latency["Serial Linear Regression Batch Time"] = time.time() - \ start y_pred = d4p.linear_regression_prediction().compute( X_test, lm_trained.model).prediction self.latency[ 'Overall Serial Linear Regression Prediction Batch Time'] = time.time( ) - start self.logger.info( 'Completed Linear Regression in pydaal Batch/Serial Mode') # Compute metrics mse = mean_squared_error(y_test, y_pred) r2score = r2_score(y_test, y_pred) # Store the time taken and model metrics self.metrics['MSE_serial_linear_regression_pydaal'] = mse self.metrics['r2_score_serial_linear_regression_pydaal'] = r2score return
# saving model to a file pickle.dump(model, open(model_filename, "wb")) # Now let's **load up the model** and look at one of the model's features. # In[6]: # loading the training model from a file loaded_model = pickle.load(open(model_filename, "rb")) print("Here is one of our loaded model's features: \n\n", loaded_model.Beta) # ## Making a Prediction and Saving the Results # Time to **make a prediction!** # In[7]: # now predicting the target feature(s) using the trained model y_pred = d4p.linear_regression_prediction().compute(X_test, loaded_model).prediction # Now let's **export the results to a CSV file**. # In[8]: np.savetxt("./results/linear_regression_batch_results.csv", y_pred, delimiter=",") print("[CODE_SAMPLE_COMPLETED_SUCCESFULLY]")
t1 = timeit.default_timer() r = func(*args, **keyArgs) t2 = timeit.default_timer() times.append(t2-t1) print(min(times)) return r return st_func p = args.size[0] n = args.size[1] X = rand(p,n) Xp = rand(p,n) y = rand(p,n) regr_train = linear_regression_training() regr_predict = linear_regression_prediction() @st_time def test_fit(X,y): regr_train.compute(X, y) @st_time def test_predict(X, m): regr_predict.compute(X, m) print (','.join([args.batchID, args.arch, args.prefix, "Linear.fit", coreString(args.num_threads), "Double", "%sx%s" % (p,n)]), end=',') test_fit(X, y) res = regr_train.compute(X, y) print (','.join([args.batchID, args.arch, args.prefix, "Linear.prediction", coreString(args.num_threads), "Double", "%sx%s" % (p,n)]), end=',') test_predict(Xp, res.model)
def test_predict(Xp, model): regr_predict = linear_regression_prediction(fptype=getFPType(Xp)) return regr_predict.compute(Xp, model)
infile = "./data/distributed/linear_regression_train_" + str( d4p.my_procid() + 1) + ".csv" # Configure a Linear regression training object train_algo = d4p.linear_regression_training(distributed=True) # Read data. Let's have 10 independent, and 2 dependent variables (for each observation) indep_data = loadtxt(infile, delimiter=',', usecols=range(10)) dep_data = loadtxt(infile, delimiter=',', usecols=range(10, 12)) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(indep_data, dep_data) # Now let's do some prediction # It run only on a single node if d4p.my_procid() == 0: predict_algo = d4p.linear_regression_prediction() # read test data (with same #features) pdata = loadtxt("./data/distributed/linear_regression_test.csv", delimiter=',', usecols=range(10)) # now predict using the model from the training above predict_result = d4p.linear_regression_prediction().compute( pdata, train_result.model) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) print('All looks good!') d4p.daalfini()
# loading the training model from a file loaded_model = joblib.load(open(model_filename, "rb")) print("Here is one of our loaded model's features: \n\n", loaded_model.Beta) # ## Making a Prediction and Saving the Results # Time to **make a prediction!** # In[19]: # read test data test_data = pd.read_csv("./data/linear_regression_test.csv").drop(["target"], axis=1) # now predict using the model from the training above predict_result = d4p.linear_regression_prediction().compute( test_data, train_result.model).prediction # Now let's **export the results to a CSV file**. We will also **stop the distribution engine.** # In[20]: # now export the results to a CSV file results_filename = "./results/daal4py_Distributed_LinearRegression_results" + str( d4p.my_procid() + 1) + ".csv" np.savetxt(results_filename, predict_result, delimiter=",") d4p.daalfini() # stops the distribution engine print("[CODE_SAMPLE_COMPLETED_SUCCESFULLY]") # In[ ]:
def lr_predict(N, D, model): data = np.random.ranf((N / 2, D)) return daal4py.linear_regression_prediction().compute(data, model)