def main(readcsv=read_csv, method='defaultDense'): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" # Configure a Lasso regression training object train_algo = d4p.lasso_regression_training(interceptFlag=True) # Read data. Let's have 10 independent, and 2 dependent variables (for each observation) indep_data = readcsv(infile, range(10)) dep_data = readcsv(infile, range(10, 12)) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(indep_data, dep_data) # Now let's do some prediction predict_algo = d4p.lasso_regression_prediction() # read test data (with same #features) pdata = readcsv(testfile, range(10)) ptdata = readcsv(testfile, range(10, 12)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) # the example is used in tests with the scipy.sparse matrix # we use this trick until subtracting a sparse matrix is not supported if hasattr(ptdata, 'toarray'): ptdata = ptdata.toarray() # this assertion is outdated, will be fixed in next release # assert np.square(predict_result.prediction - np.asarray(ptdata)).mean() < 2.2 return (predict_result, ptdata)
def main(readcsv=read_csv, method='defaultDense'): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" # Configure a Lasso regression training object train_algo = d4p.lasso_regression_training(interceptFlag=True) # Read data. Let's have 10 independent, and 1 dependent variable (for each observation) indep_data = readcsv(infile, range(10)) dep_data = readcsv(infile, range(10, 11)) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(indep_data, dep_data) # Now let's do some prediction predict_algo = d4p.lasso_regression_prediction() # read test data (with same #features) pdata = readcsv(testfile, range(10)) ptdata = readcsv(testfile, range(10, 11)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) assert np.square(predict_result.prediction - ptdata).mean() < 2.4 return (predict_result, ptdata)
def _daal4py_fit_lasso(self, X, y_, check_input): # appropriate checks _daal4py_check(self, X, y_, check_input) X = make2d(X) y = make2d(y_) _fptype = getFPType(X) # only for dual_gap computation, it is not required for Intel(R) oneAPI # Data Analytics Library self._X = X self._y = y #normalizing and centering X_offset = np.zeros(X.shape[1], dtype=X.dtype) X_scale = np.ones(X.shape[1], dtype=X.dtype) if y.ndim == 1: y_offset = X.dtype.type(0) else: y_offset = np.zeros(y.shape[1], dtype=X.dtype) if self.fit_intercept: X_offset = np.average(X, axis=0) if self.normalize: if self.copy_X: X = np.copy(X) - X_offset else: X -= X_offset X, X_scale = normalize(X, axis=0, copy=False, return_norm=True) y_offset = np.average(y, axis=0) y = y - y_offset # only for compliance with Sklearn if isinstance(self.precompute, np.ndarray) and ( self.fit_intercept and not np.allclose(X_offset, np.zeros(X.shape[1])) or self.normalize and not np.allclose(X_scale, np.ones(X.shape[1]))): warnings.warn( "Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", UserWarning) mse_alg = daal4py.optimization_solver_mse(numberOfTerms=X.shape[0], fptype=_fptype, method='defaultDense') mse_alg.setup(X, y, None) cd_solver = daal4py.optimization_solver_coordinate_descent( function=mse_alg, fptype=_fptype, method='defaultDense', selection=self.selection, seed=0 if (self.random_state is None) else self.random_state, nIterations=self.max_iter, positive=self.positive, accuracyThreshold=self.tol) # set warm_start if self.warm_start and hasattr(self, "coef_") and \ isinstance(self.coef_, np.ndarray): n_rows = y.shape[1] n_cols = X.shape[1] + 1 inputArgument = np.zeros((n_rows, n_cols), dtype=_fptype) for i in range(n_rows): inputArgument[i][0] = self.intercept_ if ( n_rows == 1) else self.intercept_[i] inputArgument[i][1:] = self.coef_[:].copy( order='C') if (n_rows == 1) else self.coef_[i, :].copy( order='C') cd_solver.setup(inputArgument) doUse_condition = self.copy_X is False or \ (self.fit_intercept and self.normalize and self.copy_X) lasso_alg = daal4py.lasso_regression_training( fptype=_fptype, method='defaultDense', interceptFlag=(self.fit_intercept is True), dataUseInComputation='doUse' if doUse_condition else 'doNotUse', lassoParameters=np.asarray(self.alpha, dtype=X.dtype).reshape((1, -1)), optimizationSolver=cd_solver) try: if isinstance(self.precompute, np.ndarray): lasso_res = lasso_alg.compute(data=X, dependentVariables=y, gramMatrix=self.precompute) else: lasso_res = lasso_alg.compute(data=X, dependentVariables=y) except RuntimeError: return None # set coef_ and intersept_ results lasso_model = lasso_res.model self.daal_model_ = lasso_model # update coefficients if normalizing and centering if self.fit_intercept and self.normalize: lasso_model.Beta[:, 1:] = lasso_model.Beta[:, 1:] / X_scale lasso_model.Beta[:, 0] = \ (y_offset - np.dot(X_offset, lasso_model.Beta[:, 1:].T)).T coefs = lasso_model.Beta self.intercept_ = coefs[:, 0].copy(order='C') self.coef_ = coefs[:, 1:].copy(order='C') # only for compliance with Sklearn if y.shape[1] == 1: self.coef_ = np.ravel(self.coef_) self.intercept_ = np.ravel(self.intercept_) if self.intercept_.shape[0] == 1: self.intercept_ = self.intercept_[0] # set n_iter_ n_iter = cd_solver.__get_result__().nIterations[0][0] if y.shape[1] == 1: self.n_iter_ = n_iter else: self.n_iter_ = np.full(y.shape[1], n_iter) # only for compliance with Sklearn if (self.max_iter == n_iter + 1): warnings.warn( "Objective did not converge. You might want to " "increase the number of iterations.", ConvergenceWarning) return self
def _daal4py_fit_lasso(self, X, y_, check_input): #appropriate checks _daal4py_check(self, X, y_, check_input) X = make2d(X) y = make2d(y_) _fptype = getFPType(X) mse_alg = daal4py.optimization_solver_mse(numberOfTerms=X.shape[0], fptype=_fptype, method='defaultDense') mse_alg.setup(X, y, None) cd_solver = daal4py.optimization_solver_coordinate_descent( function=mse_alg, fptype=_fptype, method='defaultDense', selection=self.selection, seed=0 if (self.random_state == None) else self.random_state, nIterations=self.max_iter, positive=self.positive, accuracyThreshold=self.tol) #set warm_start if (self.warm_start and hasattr(self, "coef_") and isinstance(self.coef_, np.ndarray)): n_rows = y.shape[1] n_cols = X.shape[1] + 1 inputArgument = np.zeros((n_rows, n_cols), dtype=_fptype) for i in range(n_rows): inputArgument[i][0] = self.intercept_ if ( n_rows == 1) else self.intercept_[i] inputArgument[i][1:] = self.coef_[:].copy( order='C') if (n_rows == 1) else self.coef_[i, :].copy( order='C') cd_solver.setup(inputArgument) lasso_alg = daal4py.lasso_regression_training( fptype=_fptype, method='defaultDense', interceptFlag=(self.fit_intercept is True), dataUseInComputation='doUse' if (self.copy_X == False) else 'doNotUse', lassoParameters=np.asarray(self.alpha, dtype=X.dtype).reshape((1, -1)), optimizationSolver=cd_solver) try: if isinstance(self.precompute, np.ndarray): lasso_res = lasso_alg.compute(data=X, dependentVariables=y, gramMatrix=self.precompute) else: lasso_res = lasso_alg.compute(data=X, dependentVariables=y) except RuntimeError: return None #set coef_ and intersept_ results lasso_model = lasso_res.model self.daal_model_ = lasso_model coefs = lasso_model.Beta self.intercept_ = coefs[:, 0].copy(order='C') self.coef_ = coefs[:, 1:].copy(order='C') #only for compliance with Sklearn if y.shape[1] == 1: self.coef_ = np.ravel(self.coef_) self.intercept_ = np.ravel(self.intercept_) if self.intercept_.shape[0] == 1: self.intercept_ = self.intercept_[0] #set n_iter_ n_iter = cd_solver.__get_result__().nIterations[0][0] if y.shape[1] == 1: self.n_iter_ = n_iter else: self.n_iter_ = np.full(y.shape[1], n_iter) #only for compliance with Sklearn if (self.max_iter == n_iter + 1): warnings.warn( "Objective did not converge. You might want to " "increase the number of iterations.", ConvergenceWarning) #only for dual_gap computation, it is not required for DAAL self._X = X self._y = y return self