def train(self, X, y=None): ''' :param X: training data :param y: dependent variables (responses) :return: Ridge Regression model object ''' # Training data and responses Input = IInput.HomogenousDaalData(X).getNumericTable() Responses = IInput.HomogenousDaalData(y).getNumericTable() # Training object with normalization ridge_training_algorithm = ridge_training.Batch() # set input values ridge_training_algorithm.input.set(ridge_training.data, Input) ridge_training_algorithm.input.set(ridge_training.dependentVariables, Responses) # check if intercept flag is set ridge_training_algorithm.parameter.interceptFlag = True \ if 'intercept' in self.parameters else True # set parameter alpha_nt = HomogenNumericTable(np.array([self.alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt # calculate res = ridge_training_algorithm.compute() # return trained model self.model = res.get(ridge_training.model) return self.model
def test_intercept_flag(rows=10, columns=9): inout = get_random_array(rows, columns) x = inout[0] y = inout[1] ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) ridge_training_algorithm = ridge_training.Batch() ridge_training_algorithm.input.set(ridge_training.data, ntX) ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY) # set parameter alpha = 1.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt result = ridge_training_algorithm.compute() model = result.get(ridge_training.model) beta_coeff = model.getBeta() np_beta = getNumpyArray(beta_coeff) daal_intercept = np_beta[0,0] regression = ScikitRidgeRegression(alpha=1.0, fit_intercept=True) regression.fit(x, y) scikit_intercept = regression.intercept_ assert_array_almost_equal(scikit_intercept, [daal_intercept])
def test_coeff_size(rows=10, columns=9): ''' number of beta coefficients (with intercept flag on) is the same number as size of data sample ''' inout = get_random_array(rows, columns) x = inout[0] y = inout[1] ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) ridge_training_algorithm = ridge_training.Batch() ridge_training_algorithm.input.set(ridge_training.data, ntX) ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY) # set parameter alpha = 1.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt result = ridge_training_algorithm.compute() model = result.get(ridge_training.model) beta_coeff = model.getBeta() np_beta = getNumpyArray(beta_coeff) assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\
def get_daal_prediction(x=np.arange(10).reshape(10,1), y=np.arange(10).reshape(10,1)): ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) ridge_training_algorithm = ridge_training.Batch() ridge_training_algorithm.input.set(ridge_training.data, ntX) ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY) # set parameter alpha = 0.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt result = ridge_training_algorithm.compute() model = result.get(ridge_training.model) ridge_prediction_algorithm = ridge_prediction.Batch() ridge_prediction_algorithm.input.setModel(ridge_prediction.model, model) ridge_prediction_algorithm.input.setTable(ridge_prediction.data, ntX) result = ridge_prediction_algorithm.compute() np_predicted = getNumpyArray(result.get(ridge_prediction.prediction)) # assert the same as the initial dependent variable assert_array_almost_equal(y, np_predicted) return np_predicted
def test_ridge_regression_simple(): # calculate beta coefficients x = np.array([0., 2., 3.]).reshape(3, 1) nt_x = nt_y = HomogenNumericTable(x) ridge_training_algorithm = ridge_training.Batch() # set input values ridge_training_algorithm.input.set(ridge_training.data, nt_x) ridge_training_algorithm.input.set(ridge_training.dependentVariables, nt_y) # check if intercept flag is set #ridge_training_algorithm.parameter.interceptFlag = True \ # if 'intercept' in self.parameters else True # set parameter alpha = 1.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt # calculate res = ridge_training_algorithm.compute() # return trained model model = res.get(ridge_training.model) beta_coeff = model.getBeta() np_beta_coeff = getNumpyArray(beta_coeff) res_beta_coeff = np.array([0.294118, 0.823529]).reshape(1, 2) assert_array_almost_equal(res_beta_coeff, np_beta_coeff) # predict ridge_prediction_algorithm = ridge_prediction.Batch_Float64DefaultDense( ) ridge_prediction_algorithm.input.setModel(ridge_prediction.model, model) ridge_prediction_algorithm.input.setTable(ridge_prediction.data, nt_x) result = ridge_prediction_algorithm.compute() np_predict = getNumpyArray(result.get(ridge_prediction.prediction)) assert_array_almost_equal(x, np_predict, decimal=0)
def training(self, trainData, trainDependentVariables): if self.method == 'defaultDense': method = training.normEqDense else: warnings.warn( 'Invalid method, using default dense Normal Equation method') method = training.normEqDense if type(self.ridgeParameters) is list: if len(self.ridgeParameters ) == trainDependentVariables.getNumberOfRows(): self.ridgeParameters = np.array(self.ridgeParameters, ndmin=2) else: warnings.warn( 'no. of ridgeParameters must be equal to no. of dependent variables' ) raise SystemExit elif type(self.ridgeParameters) is int or type( self.ridgeParameters) is float: self.ridgeParameters = np.array(self.ridgeParameters, ndmin=2) elif type(self.ridgeParameters) is np.ndarray: pass else: warnings.warn( 'Invalid aplha type. ridgeParameters must be type int or float or list' ) raise SystemExit nT_ridgeParams = HomogenNumericTable(self.ridgeParameters) algorithm = training.Batch(method=method, fptype=self.dtype) algorithm.parameter.ridgeParameters = nT_ridgeParams algorithm.input.set(training.data, trainData) algorithm.input.set(training.dependentVariables, trainDependentVariables) # Build Ridge regression model and retrieve the algorithm results trainingResult = algorithm.compute() return trainingResult