Ejemplo n.º 1
0
    def test_svd_simple():
        indata = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
        dataSource = HomogenNumericTable(indata)
        _in_rows, in_columns = indata.shape

        algorithm = svd.Batch(method=svd.defaultDense,
                              leftSingularMatrix=svd.requiredInPackedForm,
                              rightSingularMatrix=svd.requiredInPackedForm)

        algorithm.input.set(svd.data, dataSource)
        result = algorithm.compute()

        sigma = getNumpyArray(result.get(svd.singularValues))
        U = getNumpyArray(result.get(svd.leftSingularMatrix))
        V = getNumpyArray(result.get(svd.rightSingularMatrix))

        assert sigma.shape[1] == in_columns
        assert indata.shape == U.shape
        assert in_columns == V.shape[0] == V.shape[1]

        assert_array_almost_equal(np.array([[14.269, 0.6268]]),
                                  sigma,
                                  decimal=4)

        assert_array_almost_equal(np.array([[-0.152, -0.823], [-0.350, -0.421],
                                            [-0.547, -0.020], [-0.745,
                                                               0.381]]),
                                  U,
                                  decimal=3)

        assert_array_almost_equal(np.array([[-0.641, -0.767], [0.767,
                                                               -0.641]]),
                                  V,
                                  decimal=3)
Ejemplo n.º 2
0
    def test_linear_regression_simple():

        # calculate beta coefficients
        x = np.array([0., 2., 3.]).reshape(3, 1)

        nt_x = nt_y = HomogenNumericTable(x)

        lr_alg = linear_training.Batch(method=linear_training.qrDense)
        lr_alg.input.set(linear_training.data, nt_x)
        lr_alg.input.set(linear_training.dependentVariables, nt_y)
        result = lr_alg.compute()
        model = result.get(linear_training.model)
        beta_coeff = model.getBeta()
        np_beta_coeff = getNumpyArray(beta_coeff)

        res_beta_coeff = np.array([0, 1]).reshape(1, 2)

        assert_almost_equal(res_beta_coeff, np_beta_coeff)

        # predict
        lr_alg_predict = linear_prediction.Batch()
        lr_alg_predict.input.setModel(linear_prediction.model, model)
        lr_alg_predict.input.setTable(linear_prediction.data, nt_x)
        result = lr_alg_predict.compute()
        np_predict = getNumpyArray(result.get(linear_prediction.prediction))
        assert_array_almost_equal(x, np_predict)
Ejemplo n.º 3
0
    def test_svd_daal_vs_sklearn(rows=1000, columns=1000):
        indata = get_random_array(rows, columns)
        daal_input = HomogenNumericTable(indata)
        algorithm = svd.Batch()
        algorithm.input.set(svd.data, daal_input)

        start_sklearn = time.time()
        _U, s, _Vh = np.linalg.svd(indata, full_matrices=False)
        end_sklearn = time.time()

        start_daal = time.time()
        result = algorithm.compute()
        end_daal = time.time()

        if os.getenv("CHECKPERFORMANCE") is not None:
            assert (end_daal - start_daal <= end_sklearn - start_sklearn)

        sigma = getNumpyArray(result.get(svd.singularValues))
        _rows, cols = sigma.shape
        d_sigma = sigma.reshape(cols, )

        assert_array_almost_equal(d_sigma, s)

        print("SVD for matrix[{}][{}]".format(rows, columns))
        print("+ Sklearn SVD: {}".format(end_sklearn - start_sklearn))
        print("+ Sklearn Daal: {}".format(end_daal - start_daal))
Ejemplo n.º 4
0
    def test_intercept_flag(rows=10, columns=9):
        inout = get_random_array(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        ridge_training_algorithm = ridge_training.Batch()
        ridge_training_algorithm.input.set(ridge_training.data, ntX)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY)

        # set parameter
        alpha = 1.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt

        result = ridge_training_algorithm.compute()

        model = result.get(ridge_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)
        daal_intercept = np_beta[0,0]

        regression = ScikitRidgeRegression(alpha=1.0, fit_intercept=True)
        regression.fit(x, y)

        scikit_intercept = regression.intercept_
        assert_array_almost_equal(scikit_intercept, [daal_intercept])
Ejemplo n.º 5
0
    def test_coeff_size(rows=10, columns=9):
        '''
        number of beta coefficients (with intercept flag on)
        is the same number as size of data sample
        '''
        inout = get_random_array(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        ridge_training_algorithm = ridge_training.Batch()
        ridge_training_algorithm.input.set(ridge_training.data, ntX)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY)

        # set parameter
        alpha = 1.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt

        result = ridge_training_algorithm.compute()
        model = result.get(ridge_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)

        assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\
Ejemplo n.º 6
0
    def get_daal_prediction(x=np.arange(10).reshape(10,1), y=np.arange(10).reshape(10,1)):

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        ridge_training_algorithm = ridge_training.Batch()
        ridge_training_algorithm.input.set(ridge_training.data, ntX)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY)

        # set parameter
        alpha = 0.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt

        result = ridge_training_algorithm.compute()
        model = result.get(ridge_training.model)

        ridge_prediction_algorithm = ridge_prediction.Batch()
        ridge_prediction_algorithm.input.setModel(ridge_prediction.model, model)
        ridge_prediction_algorithm.input.setTable(ridge_prediction.data, ntX)
        result = ridge_prediction_algorithm.compute()

        np_predicted = getNumpyArray(result.get(ridge_prediction.prediction))
        # assert the same as the initial dependent variable
        assert_array_almost_equal(y, np_predicted)
        return np_predicted
Ejemplo n.º 7
0
    def test_svd_simple_check():
        indata = np.array([[1, 3, 4], [5, 6, 9], [1, 2, 3], [7, 6, 8]])
        dataSource = HomogenNumericTable(indata)

        algorithm = svd.Batch()
        algorithm.input.set(svd.data, dataSource)
        result = algorithm.compute()

        sigma = getNumpyArray(result.get(svd.singularValues))
        U = getNumpyArray(result.get(svd.leftSingularMatrix))
        V = getNumpyArray(result.get(svd.rightSingularMatrix))

        # create diagonal matrix of Singular values
        _rows, cols = sigma.shape
        d_sigma = sigma.reshape(cols, )
        outdata = np.dot(U, np.dot(np.diag(d_sigma), V))

        assert_array_almost_equal(outdata, indata)
Ejemplo n.º 8
0
    def test_zscore_multicolumns():

        input_ = np.random.rand(10, 3)
        sc_zscore = stats.zscore(input_, axis=0, ddof=1)

        da_input = HomogenNumericTable(input_)
        da_zscore = z_score(da_input)
        np_da_zscore = getNumpyArray(da_zscore)

        assert_array_almost_equal(sc_zscore, np_da_zscore)
Ejemplo n.º 9
0
    def test_ridge_regression_simple():

        # calculate beta coefficients
        x = np.array([0., 2., 3.]).reshape(3, 1)

        nt_x = nt_y = HomogenNumericTable(x)

        ridge_training_algorithm = ridge_training.Batch()
        # set input values
        ridge_training_algorithm.input.set(ridge_training.data, nt_x)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables,
                                           nt_y)
        # check if intercept flag is set
        #ridge_training_algorithm.parameter.interceptFlag = True \
        #    if 'intercept' in self.parameters else True
        # set parameter
        alpha = 1.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt
        # calculate
        res = ridge_training_algorithm.compute()
        # return trained model
        model = res.get(ridge_training.model)
        beta_coeff = model.getBeta()
        np_beta_coeff = getNumpyArray(beta_coeff)

        res_beta_coeff = np.array([0.294118, 0.823529]).reshape(1, 2)

        assert_array_almost_equal(res_beta_coeff, np_beta_coeff)

        # predict
        ridge_prediction_algorithm = ridge_prediction.Batch_Float64DefaultDense(
        )
        ridge_prediction_algorithm.input.setModel(ridge_prediction.model,
                                                  model)
        ridge_prediction_algorithm.input.setTable(ridge_prediction.data, nt_x)

        result = ridge_prediction_algorithm.compute()
        np_predict = getNumpyArray(result.get(ridge_prediction.prediction))
        assert_array_almost_equal(x, np_predict, decimal=0)
Ejemplo n.º 10
0
    def predict(self, X):
        '''
        Make prediction for X - unseen data using a trained model
        :param X:new data
        intercept: from parameters, a boolean indicating
        if calculate Beta0 (intercept)
        '''

        Data = IInput.HomogenousDaalData(X).getNumericTable()
        ridge_prediction_algorithm = \
            ridge_prediction.Batch()
        # set input
        ridge_prediction_algorithm.input.setModel(ridge_prediction.model,
                                                  self.model)
        ridge_prediction_algorithm.input.setTable(ridge_prediction.data, Data)

        if 'intercept' in self.parameters:
            beta_coeff = self.get_beta()
            np_beta = getNumpyArray(beta_coeff)
            self.intercept_ = [np_beta[0, 0]]
        # calculate
        res = ridge_prediction_algorithm.compute()
        return getNumpyArray(res.get(ridge_prediction.prediction))
Ejemplo n.º 11
0
    def get_daal_prediction(x=np.array([1, 2, 3]), y=np.array([1, 2, 3])):
        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        lr_train = linear_training.Batch()
        lr_train.input.set(linear_training.data, ntX)
        lr_train.input.set(linear_training.dependentVariables, ntY)
        result = lr_train.compute()
        model = result.get(linear_training.model)

        lr_predict = linear_prediction.Batch()
        lr_predict.input.setModel(linear_prediction.model, model)
        lr_predict.input.setTable(linear_prediction.data, ntX)
        result = lr_predict.compute()

        np_predicted = getNumpyArray(result.get(linear_prediction.prediction))
        # assert the same as the initial dependent variable
        assert_array_almost_equal(y, np_predicted)
        return np_predicted
Ejemplo n.º 12
0
    def test_coeff_size(rows=10, columns=9):
        '''
        number of beta coefficients (with intercept flag on)
        is the same number as size of data sample
        '''
        inout = get_random_array(rows, columns)
        test_overfitting(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        lr_train = linear_training.Batch()
        lr_train.input.set(linear_training.data, ntX)
        lr_train.input.set(linear_training.dependentVariables, ntY)
        result = lr_train.compute()
        model = result.get(linear_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)

        assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\
Ejemplo n.º 13
0
    def predict(self, X):
        '''
        Make prediction for X - unseen data using a trained model
        :param X:new data
        intercept: from parameters, a boolean indicating
        if calculate Beta0 (intercept)
        '''

        Data = IInput.HomogenousDaalData(X).getNumericTable()
        linear_prediction_algorithm = \
            linear_prediction.Batch()
        # set input
        linear_prediction_algorithm.input.setModel(linear_prediction.model,
                                                   self.model)
        linear_prediction_algorithm.input.setTable(linear_prediction.data,
                                                   Data)

        # TODO
        #if 'intercept' in self.parameters:
        #    linear_prediction_algorithm.parameter.interceptFlag = True

        res = linear_prediction_algorithm.compute()
        return getNumpyArray(res.get(linear_prediction.prediction))
Ejemplo n.º 14
0
    def test_intercept_flag(rows=10, columns=9):
        inout = get_random_array(rows, columns)
        test_overfitting(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        lr_train = linear_training.Batch()
        lr_train.input.set(linear_training.data, ntX)
        lr_train.input.set(linear_training.dependentVariables, ntY)
        result = lr_train.compute()
        model = result.get(linear_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)
        daal_intercept = np_beta[0, 0]

        from sklearn.linear_model.base import LinearRegression as ScikitLinearRegression
        regression = ScikitLinearRegression()
        regression.fit(x, y)

        scikit_intercept = regression.intercept_
        assert_array_almost_equal(scikit_intercept, [daal_intercept])