Beispiel #1
0
def main(readcsv=read_csv, method='defaultDense'):
    infile = "./data/batch/linear_regression_train.csv"
    testfile = "./data/batch/linear_regression_test.csv"

    # Configure a Lasso regression training object
    train_algo = d4p.lasso_regression_training(interceptFlag=True)

    # Read data. Let's have 10 independent, and 2 dependent variables (for each observation)
    indep_data = readcsv(infile, range(10))
    dep_data = readcsv(infile, range(10, 12))
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(indep_data, dep_data)

    # Now let's do some prediction
    predict_algo = d4p.lasso_regression_prediction()
    # read test data (with same #features)
    pdata = readcsv(testfile, range(10))
    ptdata = readcsv(testfile, range(10, 12))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # The prediction result provides prediction
    assert predict_result.prediction.shape == (pdata.shape[0],
                                               dep_data.shape[1])

    # the example is used in tests with the scipy.sparse matrix
    # we use this trick until subtracting a sparse matrix is not supported
    if hasattr(ptdata, 'toarray'):
        ptdata = ptdata.toarray()
    # this assertion is outdated, will be fixed in next release
    # assert np.square(predict_result.prediction - np.asarray(ptdata)).mean() < 2.2

    return (predict_result, ptdata)
Beispiel #2
0
def main(readcsv=read_csv, method='defaultDense'):
    infile = "./data/batch/linear_regression_train.csv"
    testfile = "./data/batch/linear_regression_test.csv"

    # Configure a Lasso regression training object
    train_algo = d4p.lasso_regression_training(interceptFlag=True)

    # Read data. Let's have 10 independent, and 1 dependent variable (for each observation)
    indep_data = readcsv(infile, range(10))
    dep_data = readcsv(infile, range(10, 11))
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(indep_data, dep_data)

    # Now let's do some prediction
    predict_algo = d4p.lasso_regression_prediction()
    # read test data (with same #features)
    pdata = readcsv(testfile, range(10))
    ptdata = readcsv(testfile, range(10, 11))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # The prediction result provides prediction
    assert predict_result.prediction.shape == (pdata.shape[0],
                                               dep_data.shape[1])
    assert np.square(predict_result.prediction - ptdata).mean() < 2.4

    return (predict_result, ptdata)
def _daal4py_fit_lasso(self, X, y_, check_input):

    # appropriate checks
    _daal4py_check(self, X, y_, check_input)
    X = make2d(X)
    y = make2d(y_)
    _fptype = getFPType(X)

    # only for dual_gap computation, it is not required for Intel(R) oneAPI
    # Data Analytics Library
    self._X = X
    self._y = y

    #normalizing and centering
    X_offset = np.zeros(X.shape[1], dtype=X.dtype)
    X_scale = np.ones(X.shape[1], dtype=X.dtype)
    if y.ndim == 1:
        y_offset = X.dtype.type(0)
    else:
        y_offset = np.zeros(y.shape[1], dtype=X.dtype)

    if self.fit_intercept:
        X_offset = np.average(X, axis=0)
        if self.normalize:
            if self.copy_X:
                X = np.copy(X) - X_offset
            else:
                X -= X_offset
            X, X_scale = normalize(X, axis=0, copy=False, return_norm=True)
            y_offset = np.average(y, axis=0)
            y = y - y_offset

    # only for compliance with Sklearn
    if isinstance(self.precompute, np.ndarray) and (
            self.fit_intercept
            and not np.allclose(X_offset, np.zeros(X.shape[1])) or
            self.normalize and not np.allclose(X_scale, np.ones(X.shape[1]))):
        warnings.warn(
            "Gram matrix was provided but X was centered"
            " to fit intercept, "
            "or X was normalized : recomputing Gram matrix.", UserWarning)

    mse_alg = daal4py.optimization_solver_mse(numberOfTerms=X.shape[0],
                                              fptype=_fptype,
                                              method='defaultDense')
    mse_alg.setup(X, y, None)

    cd_solver = daal4py.optimization_solver_coordinate_descent(
        function=mse_alg,
        fptype=_fptype,
        method='defaultDense',
        selection=self.selection,
        seed=0 if (self.random_state is None) else self.random_state,
        nIterations=self.max_iter,
        positive=self.positive,
        accuracyThreshold=self.tol)

    # set warm_start
    if self.warm_start and hasattr(self, "coef_") and \
            isinstance(self.coef_, np.ndarray):
        n_rows = y.shape[1]
        n_cols = X.shape[1] + 1
        inputArgument = np.zeros((n_rows, n_cols), dtype=_fptype)
        for i in range(n_rows):
            inputArgument[i][0] = self.intercept_ if (
                n_rows == 1) else self.intercept_[i]
            inputArgument[i][1:] = self.coef_[:].copy(
                order='C') if (n_rows == 1) else self.coef_[i, :].copy(
                    order='C')
        cd_solver.setup(inputArgument)
    doUse_condition = self.copy_X is False or \
        (self.fit_intercept and self.normalize and self.copy_X)
    lasso_alg = daal4py.lasso_regression_training(
        fptype=_fptype,
        method='defaultDense',
        interceptFlag=(self.fit_intercept is True),
        dataUseInComputation='doUse' if doUse_condition else 'doNotUse',
        lassoParameters=np.asarray(self.alpha, dtype=X.dtype).reshape((1, -1)),
        optimizationSolver=cd_solver)
    try:
        if isinstance(self.precompute, np.ndarray):
            lasso_res = lasso_alg.compute(data=X,
                                          dependentVariables=y,
                                          gramMatrix=self.precompute)
        else:
            lasso_res = lasso_alg.compute(data=X, dependentVariables=y)
    except RuntimeError:
        return None

    # set coef_ and intersept_ results
    lasso_model = lasso_res.model
    self.daal_model_ = lasso_model

    # update coefficients if normalizing and centering
    if self.fit_intercept and self.normalize:
        lasso_model.Beta[:, 1:] = lasso_model.Beta[:, 1:] / X_scale
        lasso_model.Beta[:, 0] = \
            (y_offset - np.dot(X_offset, lasso_model.Beta[:, 1:].T)).T

    coefs = lasso_model.Beta

    self.intercept_ = coefs[:, 0].copy(order='C')
    self.coef_ = coefs[:, 1:].copy(order='C')

    # only for compliance with Sklearn
    if y.shape[1] == 1:
        self.coef_ = np.ravel(self.coef_)
    self.intercept_ = np.ravel(self.intercept_)
    if self.intercept_.shape[0] == 1:
        self.intercept_ = self.intercept_[0]

    # set n_iter_
    n_iter = cd_solver.__get_result__().nIterations[0][0]
    if y.shape[1] == 1:
        self.n_iter_ = n_iter
    else:
        self.n_iter_ = np.full(y.shape[1], n_iter)

    # only for compliance with Sklearn
    if (self.max_iter == n_iter + 1):
        warnings.warn(
            "Objective did not converge. You might want to "
            "increase the number of iterations.", ConvergenceWarning)

    return self
Beispiel #4
0
def _daal4py_fit_lasso(self, X, y_, check_input):

    #appropriate checks
    _daal4py_check(self, X, y_, check_input)
    X = make2d(X)
    y = make2d(y_)
    _fptype = getFPType(X)

    mse_alg = daal4py.optimization_solver_mse(numberOfTerms=X.shape[0],
                                              fptype=_fptype,
                                              method='defaultDense')
    mse_alg.setup(X, y, None)

    cd_solver = daal4py.optimization_solver_coordinate_descent(
        function=mse_alg,
        fptype=_fptype,
        method='defaultDense',
        selection=self.selection,
        seed=0 if (self.random_state == None) else self.random_state,
        nIterations=self.max_iter,
        positive=self.positive,
        accuracyThreshold=self.tol)

    #set warm_start
    if (self.warm_start and hasattr(self, "coef_")
            and isinstance(self.coef_, np.ndarray)):
        n_rows = y.shape[1]
        n_cols = X.shape[1] + 1
        inputArgument = np.zeros((n_rows, n_cols), dtype=_fptype)
        for i in range(n_rows):
            inputArgument[i][0] = self.intercept_ if (
                n_rows == 1) else self.intercept_[i]
            inputArgument[i][1:] = self.coef_[:].copy(
                order='C') if (n_rows == 1) else self.coef_[i, :].copy(
                    order='C')
        cd_solver.setup(inputArgument)

    lasso_alg = daal4py.lasso_regression_training(
        fptype=_fptype,
        method='defaultDense',
        interceptFlag=(self.fit_intercept is True),
        dataUseInComputation='doUse' if (self.copy_X == False) else 'doNotUse',
        lassoParameters=np.asarray(self.alpha, dtype=X.dtype).reshape((1, -1)),
        optimizationSolver=cd_solver)
    try:
        if isinstance(self.precompute, np.ndarray):
            lasso_res = lasso_alg.compute(data=X,
                                          dependentVariables=y,
                                          gramMatrix=self.precompute)
        else:
            lasso_res = lasso_alg.compute(data=X, dependentVariables=y)
    except RuntimeError:
        return None

    #set coef_ and intersept_ results
    lasso_model = lasso_res.model
    self.daal_model_ = lasso_model
    coefs = lasso_model.Beta

    self.intercept_ = coefs[:, 0].copy(order='C')
    self.coef_ = coefs[:, 1:].copy(order='C')

    #only for compliance with Sklearn
    if y.shape[1] == 1:
        self.coef_ = np.ravel(self.coef_)
    self.intercept_ = np.ravel(self.intercept_)
    if self.intercept_.shape[0] == 1:
        self.intercept_ = self.intercept_[0]

    #set n_iter_
    n_iter = cd_solver.__get_result__().nIterations[0][0]
    if y.shape[1] == 1:
        self.n_iter_ = n_iter
    else:
        self.n_iter_ = np.full(y.shape[1], n_iter)

    #only for compliance with Sklearn
    if (self.max_iter == n_iter + 1):
        warnings.warn(
            "Objective did not converge. You might want to "
            "increase the number of iterations.", ConvergenceWarning)

    #only for dual_gap computation, it is not required for DAAL
    self._X = X
    self._y = y

    return self