Esempio n. 1
0
    def train(self, X, y=None):
        '''
        :param X: training data
        :param y: dependent variables (responses)
        :return: Ridge Regression model object
        '''

        # Training data and responses
        Input = IInput.HomogenousDaalData(X).getNumericTable()
        Responses = IInput.HomogenousDaalData(y).getNumericTable()

        # Training object with normalization
        ridge_training_algorithm = ridge_training.Batch()

        # set input values
        ridge_training_algorithm.input.set(ridge_training.data, Input)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables,
                                           Responses)
        # check if intercept flag is set
        ridge_training_algorithm.parameter.interceptFlag = True \
            if 'intercept' in self.parameters else True
        # set parameter
        alpha_nt = HomogenNumericTable(np.array([self.alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt
        # calculate
        res = ridge_training_algorithm.compute()
        # return trained model
        self.model = res.get(ridge_training.model)
        return self.model
    def test_intercept_flag(rows=10, columns=9):
        inout = get_random_array(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        ridge_training_algorithm = ridge_training.Batch()
        ridge_training_algorithm.input.set(ridge_training.data, ntX)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY)

        # set parameter
        alpha = 1.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt

        result = ridge_training_algorithm.compute()

        model = result.get(ridge_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)
        daal_intercept = np_beta[0,0]

        regression = ScikitRidgeRegression(alpha=1.0, fit_intercept=True)
        regression.fit(x, y)

        scikit_intercept = regression.intercept_
        assert_array_almost_equal(scikit_intercept, [daal_intercept])
    def test_coeff_size(rows=10, columns=9):
        '''
        number of beta coefficients (with intercept flag on)
        is the same number as size of data sample
        '''
        inout = get_random_array(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        ridge_training_algorithm = ridge_training.Batch()
        ridge_training_algorithm.input.set(ridge_training.data, ntX)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY)

        # set parameter
        alpha = 1.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt

        result = ridge_training_algorithm.compute()
        model = result.get(ridge_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)

        assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\
    def get_daal_prediction(x=np.arange(10).reshape(10,1), y=np.arange(10).reshape(10,1)):

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        ridge_training_algorithm = ridge_training.Batch()
        ridge_training_algorithm.input.set(ridge_training.data, ntX)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY)

        # set parameter
        alpha = 0.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt

        result = ridge_training_algorithm.compute()
        model = result.get(ridge_training.model)

        ridge_prediction_algorithm = ridge_prediction.Batch()
        ridge_prediction_algorithm.input.setModel(ridge_prediction.model, model)
        ridge_prediction_algorithm.input.setTable(ridge_prediction.data, ntX)
        result = ridge_prediction_algorithm.compute()

        np_predicted = getNumpyArray(result.get(ridge_prediction.prediction))
        # assert the same as the initial dependent variable
        assert_array_almost_equal(y, np_predicted)
        return np_predicted
Esempio n. 5
0
    def test_ridge_regression_simple():

        # calculate beta coefficients
        x = np.array([0., 2., 3.]).reshape(3, 1)

        nt_x = nt_y = HomogenNumericTable(x)

        ridge_training_algorithm = ridge_training.Batch()
        # set input values
        ridge_training_algorithm.input.set(ridge_training.data, nt_x)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables,
                                           nt_y)
        # check if intercept flag is set
        #ridge_training_algorithm.parameter.interceptFlag = True \
        #    if 'intercept' in self.parameters else True
        # set parameter
        alpha = 1.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt
        # calculate
        res = ridge_training_algorithm.compute()
        # return trained model
        model = res.get(ridge_training.model)
        beta_coeff = model.getBeta()
        np_beta_coeff = getNumpyArray(beta_coeff)

        res_beta_coeff = np.array([0.294118, 0.823529]).reshape(1, 2)

        assert_array_almost_equal(res_beta_coeff, np_beta_coeff)

        # predict
        ridge_prediction_algorithm = ridge_prediction.Batch_Float64DefaultDense(
        )
        ridge_prediction_algorithm.input.setModel(ridge_prediction.model,
                                                  model)
        ridge_prediction_algorithm.input.setTable(ridge_prediction.data, nt_x)

        result = ridge_prediction_algorithm.compute()
        np_predict = getNumpyArray(result.get(ridge_prediction.prediction))
        assert_array_almost_equal(x, np_predict, decimal=0)
Esempio n. 6
0
    def training(self, trainData, trainDependentVariables):

        if self.method == 'defaultDense':
            method = training.normEqDense
        else:
            warnings.warn(
                'Invalid method, using default dense Normal Equation method')
            method = training.normEqDense
        if type(self.ridgeParameters) is list:
            if len(self.ridgeParameters
                   ) == trainDependentVariables.getNumberOfRows():
                self.ridgeParameters = np.array(self.ridgeParameters, ndmin=2)
            else:
                warnings.warn(
                    'no. of ridgeParameters must be equal to no. of dependent variables'
                )
                raise SystemExit
        elif type(self.ridgeParameters) is int or type(
                self.ridgeParameters) is float:
            self.ridgeParameters = np.array(self.ridgeParameters, ndmin=2)
        elif type(self.ridgeParameters) is np.ndarray:
            pass
        else:
            warnings.warn(
                'Invalid aplha type. ridgeParameters must be type int or float or list'
            )
            raise SystemExit
        nT_ridgeParams = HomogenNumericTable(self.ridgeParameters)
        algorithm = training.Batch(method=method, fptype=self.dtype)
        algorithm.parameter.ridgeParameters = nT_ridgeParams
        algorithm.input.set(training.data, trainData)
        algorithm.input.set(training.dependentVariables,
                            trainDependentVariables)
        # Build Ridge regression model and retrieve the algorithm results
        trainingResult = algorithm.compute()
        return trainingResult