def test_svd_simple(): indata = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) dataSource = HomogenNumericTable(indata) _in_rows, in_columns = indata.shape algorithm = svd.Batch(method=svd.defaultDense, leftSingularMatrix=svd.requiredInPackedForm, rightSingularMatrix=svd.requiredInPackedForm) algorithm.input.set(svd.data, dataSource) result = algorithm.compute() sigma = getNumpyArray(result.get(svd.singularValues)) U = getNumpyArray(result.get(svd.leftSingularMatrix)) V = getNumpyArray(result.get(svd.rightSingularMatrix)) assert sigma.shape[1] == in_columns assert indata.shape == U.shape assert in_columns == V.shape[0] == V.shape[1] assert_array_almost_equal(np.array([[14.269, 0.6268]]), sigma, decimal=4) assert_array_almost_equal(np.array([[-0.152, -0.823], [-0.350, -0.421], [-0.547, -0.020], [-0.745, 0.381]]), U, decimal=3) assert_array_almost_equal(np.array([[-0.641, -0.767], [0.767, -0.641]]), V, decimal=3)
def test_linear_regression_simple(): # calculate beta coefficients x = np.array([0., 2., 3.]).reshape(3, 1) nt_x = nt_y = HomogenNumericTable(x) lr_alg = linear_training.Batch(method=linear_training.qrDense) lr_alg.input.set(linear_training.data, nt_x) lr_alg.input.set(linear_training.dependentVariables, nt_y) result = lr_alg.compute() model = result.get(linear_training.model) beta_coeff = model.getBeta() np_beta_coeff = getNumpyArray(beta_coeff) res_beta_coeff = np.array([0, 1]).reshape(1, 2) assert_almost_equal(res_beta_coeff, np_beta_coeff) # predict lr_alg_predict = linear_prediction.Batch() lr_alg_predict.input.setModel(linear_prediction.model, model) lr_alg_predict.input.setTable(linear_prediction.data, nt_x) result = lr_alg_predict.compute() np_predict = getNumpyArray(result.get(linear_prediction.prediction)) assert_array_almost_equal(x, np_predict)
def test_svd_daal_vs_sklearn(rows=1000, columns=1000): indata = get_random_array(rows, columns) daal_input = HomogenNumericTable(indata) algorithm = svd.Batch() algorithm.input.set(svd.data, daal_input) start_sklearn = time.time() _U, s, _Vh = np.linalg.svd(indata, full_matrices=False) end_sklearn = time.time() start_daal = time.time() result = algorithm.compute() end_daal = time.time() if os.getenv("CHECKPERFORMANCE") is not None: assert (end_daal - start_daal <= end_sklearn - start_sklearn) sigma = getNumpyArray(result.get(svd.singularValues)) _rows, cols = sigma.shape d_sigma = sigma.reshape(cols, ) assert_array_almost_equal(d_sigma, s) print("SVD for matrix[{}][{}]".format(rows, columns)) print("+ Sklearn SVD: {}".format(end_sklearn - start_sklearn)) print("+ Sklearn Daal: {}".format(end_daal - start_daal))
def test_intercept_flag(rows=10, columns=9): inout = get_random_array(rows, columns) x = inout[0] y = inout[1] ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) ridge_training_algorithm = ridge_training.Batch() ridge_training_algorithm.input.set(ridge_training.data, ntX) ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY) # set parameter alpha = 1.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt result = ridge_training_algorithm.compute() model = result.get(ridge_training.model) beta_coeff = model.getBeta() np_beta = getNumpyArray(beta_coeff) daal_intercept = np_beta[0,0] regression = ScikitRidgeRegression(alpha=1.0, fit_intercept=True) regression.fit(x, y) scikit_intercept = regression.intercept_ assert_array_almost_equal(scikit_intercept, [daal_intercept])
def test_coeff_size(rows=10, columns=9): ''' number of beta coefficients (with intercept flag on) is the same number as size of data sample ''' inout = get_random_array(rows, columns) x = inout[0] y = inout[1] ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) ridge_training_algorithm = ridge_training.Batch() ridge_training_algorithm.input.set(ridge_training.data, ntX) ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY) # set parameter alpha = 1.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt result = ridge_training_algorithm.compute() model = result.get(ridge_training.model) beta_coeff = model.getBeta() np_beta = getNumpyArray(beta_coeff) assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\
def get_daal_prediction(x=np.arange(10).reshape(10,1), y=np.arange(10).reshape(10,1)): ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) ridge_training_algorithm = ridge_training.Batch() ridge_training_algorithm.input.set(ridge_training.data, ntX) ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY) # set parameter alpha = 0.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt result = ridge_training_algorithm.compute() model = result.get(ridge_training.model) ridge_prediction_algorithm = ridge_prediction.Batch() ridge_prediction_algorithm.input.setModel(ridge_prediction.model, model) ridge_prediction_algorithm.input.setTable(ridge_prediction.data, ntX) result = ridge_prediction_algorithm.compute() np_predicted = getNumpyArray(result.get(ridge_prediction.prediction)) # assert the same as the initial dependent variable assert_array_almost_equal(y, np_predicted) return np_predicted
def test_svd_simple_check(): indata = np.array([[1, 3, 4], [5, 6, 9], [1, 2, 3], [7, 6, 8]]) dataSource = HomogenNumericTable(indata) algorithm = svd.Batch() algorithm.input.set(svd.data, dataSource) result = algorithm.compute() sigma = getNumpyArray(result.get(svd.singularValues)) U = getNumpyArray(result.get(svd.leftSingularMatrix)) V = getNumpyArray(result.get(svd.rightSingularMatrix)) # create diagonal matrix of Singular values _rows, cols = sigma.shape d_sigma = sigma.reshape(cols, ) outdata = np.dot(U, np.dot(np.diag(d_sigma), V)) assert_array_almost_equal(outdata, indata)
def test_zscore_multicolumns(): input_ = np.random.rand(10, 3) sc_zscore = stats.zscore(input_, axis=0, ddof=1) da_input = HomogenNumericTable(input_) da_zscore = z_score(da_input) np_da_zscore = getNumpyArray(da_zscore) assert_array_almost_equal(sc_zscore, np_da_zscore)
def test_ridge_regression_simple(): # calculate beta coefficients x = np.array([0., 2., 3.]).reshape(3, 1) nt_x = nt_y = HomogenNumericTable(x) ridge_training_algorithm = ridge_training.Batch() # set input values ridge_training_algorithm.input.set(ridge_training.data, nt_x) ridge_training_algorithm.input.set(ridge_training.dependentVariables, nt_y) # check if intercept flag is set #ridge_training_algorithm.parameter.interceptFlag = True \ # if 'intercept' in self.parameters else True # set parameter alpha = 1.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt # calculate res = ridge_training_algorithm.compute() # return trained model model = res.get(ridge_training.model) beta_coeff = model.getBeta() np_beta_coeff = getNumpyArray(beta_coeff) res_beta_coeff = np.array([0.294118, 0.823529]).reshape(1, 2) assert_array_almost_equal(res_beta_coeff, np_beta_coeff) # predict ridge_prediction_algorithm = ridge_prediction.Batch_Float64DefaultDense( ) ridge_prediction_algorithm.input.setModel(ridge_prediction.model, model) ridge_prediction_algorithm.input.setTable(ridge_prediction.data, nt_x) result = ridge_prediction_algorithm.compute() np_predict = getNumpyArray(result.get(ridge_prediction.prediction)) assert_array_almost_equal(x, np_predict, decimal=0)
def predict(self, X): ''' Make prediction for X - unseen data using a trained model :param X:new data intercept: from parameters, a boolean indicating if calculate Beta0 (intercept) ''' Data = IInput.HomogenousDaalData(X).getNumericTable() ridge_prediction_algorithm = \ ridge_prediction.Batch() # set input ridge_prediction_algorithm.input.setModel(ridge_prediction.model, self.model) ridge_prediction_algorithm.input.setTable(ridge_prediction.data, Data) if 'intercept' in self.parameters: beta_coeff = self.get_beta() np_beta = getNumpyArray(beta_coeff) self.intercept_ = [np_beta[0, 0]] # calculate res = ridge_prediction_algorithm.compute() return getNumpyArray(res.get(ridge_prediction.prediction))
def get_daal_prediction(x=np.array([1, 2, 3]), y=np.array([1, 2, 3])): ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) lr_train = linear_training.Batch() lr_train.input.set(linear_training.data, ntX) lr_train.input.set(linear_training.dependentVariables, ntY) result = lr_train.compute() model = result.get(linear_training.model) lr_predict = linear_prediction.Batch() lr_predict.input.setModel(linear_prediction.model, model) lr_predict.input.setTable(linear_prediction.data, ntX) result = lr_predict.compute() np_predicted = getNumpyArray(result.get(linear_prediction.prediction)) # assert the same as the initial dependent variable assert_array_almost_equal(y, np_predicted) return np_predicted
def test_coeff_size(rows=10, columns=9): ''' number of beta coefficients (with intercept flag on) is the same number as size of data sample ''' inout = get_random_array(rows, columns) test_overfitting(rows, columns) x = inout[0] y = inout[1] ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) lr_train = linear_training.Batch() lr_train.input.set(linear_training.data, ntX) lr_train.input.set(linear_training.dependentVariables, ntY) result = lr_train.compute() model = result.get(linear_training.model) beta_coeff = model.getBeta() np_beta = getNumpyArray(beta_coeff) assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\
def predict(self, X): ''' Make prediction for X - unseen data using a trained model :param X:new data intercept: from parameters, a boolean indicating if calculate Beta0 (intercept) ''' Data = IInput.HomogenousDaalData(X).getNumericTable() linear_prediction_algorithm = \ linear_prediction.Batch() # set input linear_prediction_algorithm.input.setModel(linear_prediction.model, self.model) linear_prediction_algorithm.input.setTable(linear_prediction.data, Data) # TODO #if 'intercept' in self.parameters: # linear_prediction_algorithm.parameter.interceptFlag = True res = linear_prediction_algorithm.compute() return getNumpyArray(res.get(linear_prediction.prediction))
def test_intercept_flag(rows=10, columns=9): inout = get_random_array(rows, columns) test_overfitting(rows, columns) x = inout[0] y = inout[1] ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) lr_train = linear_training.Batch() lr_train.input.set(linear_training.data, ntX) lr_train.input.set(linear_training.dependentVariables, ntY) result = lr_train.compute() model = result.get(linear_training.model) beta_coeff = model.getBeta() np_beta = getNumpyArray(beta_coeff) daal_intercept = np_beta[0, 0] from sklearn.linear_model.base import LinearRegression as ScikitLinearRegression regression = ScikitLinearRegression() regression.fit(x, y) scikit_intercept = regression.intercept_ assert_array_almost_equal(scikit_intercept, [daal_intercept])