def dual_gap_(self): if (self.__gap is None): l1_reg = np.asarray(self.alpha * self.l1_ratio, dtype=getFPType(self.X)) * self.X.shape[0] l2_reg = np.asarray(self.alpha * (1.0 - self.l1_ratio), dtype=getFPType(self.X)) * self.X.shape[0] l1_reg = l2_reg.reshape((1, -1)) l2_reg = l2_reg.reshape((1, -1)) n_targets = self.y.shape[1] if (n_targets == 1): self.__gap = self.tol + 1.0 X_offset = np.average(self.X, axis=0) y_offset = np.average(self.y, axis=0) coef = np.reshape(self.coef_, (self.coef_.shape[0], 1)) R = (self.y - y_offset) - np.dot((self.X - X_offset), coef) XtA = np.dot((self.X - X_offset).T, R) - l2_reg * coef R_norm2 = np.dot(R.T, R) coef_norm2 = np.dot(self.coef_, self.coef_) dual_norm_XtA = np.max(XtA) if self.positive else np.max( np.abs(XtA)) if dual_norm_XtA > l1_reg: const = l1_reg / dual_norm_XtA A_norm2 = R_norm2 * (const**2) self.__gap = 0.5 * (R_norm2 + A_norm2) else: const = 1.0 self.__gap = R_norm2 l1_norm = np.sum(np.abs(self.coef_)) self.__gap += (l1_reg * l1_norm - const * np.dot(R.T, (self.y - y_offset)) + 0.5 * l2_reg * (1 + const**2) * coef_norm2) self.__gap = self.__gap[0][0] else: self.__gap = np.full(n_targets, self.tol + 1.0) X_offset = np.average(self.X, axis=0) y_offset = np.average(self.y, axis=0) for k in range(n_targets): R = (self.y[:, k] - y_offset[k]) - np.dot( (self.X - X_offset), self.coef_[k, :].T) XtA = np.dot( (self.X - X_offset).T, R) - l2_reg * self.coef_[k, :].T R_norm2 = np.dot(R.T, R) coef_norm2 = np.dot(self.coef_[k, :], self.coef_[k, :].T) dual_norm_XtA = np.max(XtA) if self.positive else np.max( np.abs(XtA)) if dual_norm_XtA > l1_reg: const = l1_reg / dual_norm_XtA A_norm2 = R_norm2 * (const**2) self.__gap[k] = 0.5 * (R_norm2 + A_norm2) else: const = 1.0 self.__gap[k] = R_norm2 l1_norm = np.sum(np.abs(self.coef_[k, :])) self.__gap[k] += ( l1_reg * l1_norm - const * np.dot(R.T, (self.y[:, k] - y_offset[k])) + 0.5 * l2_reg * (1 + const**2) * coef_norm2) return self.__gap
def pca_fit_daal(X, n_components, method): if n_components < 1: n_components = min(X.shape) fptype = getFPType(X) centering_algo = normalization_zscore( fptype=fptype, doScale=False ) pca_algorithm = pca( fptype=fptype, method=method, normalization=centering_algo, resultsToCompute='mean|variance|eigenvalue', isDeterministic=True, nComponents=n_components ) pca_result = pca_algorithm.compute(X) eigenvectors = pca_result.eigenvectors eigenvalues = pca_result.eigenvalues.ravel() singular_values = np.sqrt((X.shape[0] - 1) * eigenvalues) return pca_result, eigenvalues, eigenvectors, singular_values
def pca_transform_daal(pca_result, X, n_components, fit_n_samples, eigenvalues, eigenvectors, whiten=False, scale_eigenvalues=False): fptype = getFPType(X) tr_data = {} tr_data['mean'] = pca_result.dataForTransform['mean'] if whiten: if scale_eigenvalues: tr_data['eigenvalue'] = (fit_n_samples - 1) \ * pca_result.eigenvalues else: tr_data['eigenvalue'] = pca_result.eigenvalues elif scale_eigenvalues: tr_data['eigenvalue'] = np.full((1, pca_result.eigenvalues.size), fit_n_samples - 1, dtype=X.dtype) transform_algorithm = pca_transform(fptype=fptype, nComponents=n_components) transform_result = transform_algorithm.compute(X, pca_result.eigenvectors, tr_data) return transform_result.transformedData
def test_predict(X, X_init): algorithm = kmeans(fptype=getFPType(X), nClusters=params.n_clusters, maxIterations=0, assignFlag=True, accuracyThreshold=0.0) return algorithm.compute(X, X_init)
def _daal4py_check(self, X, y, check_input): _fptype = getFPType(X) # check alpha if self.alpha == 0: warnings.warn( "With alpha=0, this algorithm does not converge " "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) # check l1_ratio if not isinstance(self.l1_ratio, numbers.Number) or \ self.l1_ratio < 0 or self.l1_ratio > 1: raise ValueError("l1_ratio must be between 0 and 1; " f"got l1_ratio={self.l1_ratio}") # check precompute if isinstance(self.precompute, np.ndarray): if check_input: check_array(self.precompute, dtype=_fptype) self.precompute = make2d(self.precompute) else: if self.precompute not in [False, True, 'auto']: raise ValueError("precompute should be one of True, False, " "'auto' or array-like. Got %r" % self.precompute) # check selection if self.selection not in ['random', 'cyclic']: raise ValueError("selection should be either random or cyclic.")
def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None): if eps <= 0.0: raise ValueError("eps must be positive.") X = check_array(X, dtype=[np.float64, np.float32]) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) ww = make2d(sample_weight) else: ww = None XX = make2d(X) fpt = getFPType(XX) alg = daal4py.dbscan(method='defaultDense', fptype=fpt, epsilon=float(eps), minObservations=int(min_samples), memorySavingMode=False, resultsToCompute="computeCoreIndices") daal_res = alg.compute(XX, ww) n_clusters = daal_res.nClusters[0, 0] assignments = daal_res.assignments.ravel() if daal_res.coreIndices is not None: core_ind = daal_res.coreIndices.ravel() else: core_ind = np.array([], dtype=np.intc) return (core_ind, assignments)
def df_clsf_fit(X, y, n_classes, n_trees=100, seed=12345, n_features_per_node=0, max_depth=0, min_impurity=0, bootstrap=True, verbose=False): fptype = getFPType(X) features_per_node = X.shape[1] if n_features_per_node > 0 and n_features_per_node < features_per_node: features_per_node = n_features_per_node engine = engines_mt2203(seed=seed, fptype=fptype) algorithm = decision_forest_classification_training( nClasses=n_classes, fptype=fptype, method='defaultDense', nTrees=n_trees, observationsPerTreeFraction=1., featuresPerNode=features_per_node, maxTreeDepth=max_depth, minObservationsInLeafNode=1, engine=engine, impurityThreshold=min_impurity, varImportance='MDI', resultsToCompute='', memorySavingMode=False, bootstrap=bootstrap ) df_clsf_result = algorithm.compute(X, y) return df_clsf_result
def _daal4py_predict_enet(self, X): X = make2d(X) _fptype = getFPType(self.coef_) elastic_net_palg = daal4py.elastic_net_prediction(fptype=_fptype, method='defaultDense') elastic_net_res = elastic_net_palg.compute(X, self.daal_model_) res = elastic_net_res.prediction if res.shape[1] == 1 and self.coef_.ndim == 1: res = np.ravel(res) return res
def _daal4py_predict_lasso(self, X): X = make2d(X) _fptype = getFPType(self.coef_) lasso_palg = daal4py.lasso_regression_prediction(fptype=_fptype, method='defaultDense') lasso_res = lasso_palg.compute(X, self.daal_model_) res = lasso_res.prediction if res.shape[1] == 1 and self.coef_.ndim == 1: res = np.ravel(res) return res
def _daal4py_check(self, X, y, check_input): _fptype = getFPType(X) #check alpha if self.alpha == 0: warnings.warn("With alpha=0, this algorithm does not converge " "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) #check l1_ratio if (not isinstance(self.l1_ratio, numbers.Number) or self.l1_ratio < 0 or self.l1_ratio > 1): raise ValueError("l1_ratio must be between 0 and 1; " f"got l1_ratio={self.l1_ratio}") #check precompute if isinstance(self.precompute, np.ndarray): if check_input: check_array(self.precompute, dtype=_fptype) self.precompute = make2d(self.precompute) #only for compliance with Sklearn if self.fit_intercept: X_offset = np.average(X, axis=0, weights=None) if self.normalize: X_scale = row_norms(X) if np.isscalar(X_scale): if X_scale == .0: X_scale = 1. elif isinstance(X_scale, np.ndarray): X_scale[X_scale == 0.0] = 1.0 else: X_scale = np.ones(X.shape[1], dtype=_fptype) else: X_offset = np.zeros(X.shape[1], dtype=_fptype) X_scale = np.ones(X.shape[1], dtype=_fptype) if (self.fit_intercept and not np.allclose(X_offset, np.zeros(X.shape[1])) or self.normalize and not np.allclose(X_scale, np.ones(X.shape[1]))): warnings.warn("Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", UserWarning) else: if self.precompute not in [False, True, 'auto']: raise ValueError("precompute should be one of True, False, " "'auto' or array-like. Got %r" % self.precompute) #check selection if self.selection not in ['random', 'cyclic']: raise ValueError("selection should be either random or cyclic.")
def _daal4py_predict_lasso(self, X): X = make2d(X) _fptype = getFPType(self.coef_) lasso_palg = daal4py.lasso_regression_prediction(fptype=_fptype, method='defaultDense') if self.n_features_in_ != X.shape[1]: raise ValueError((f'X has {X.shape[1]} features, ' f'but Lasso is expecting ' f'{self.n_features_in_} features as input')) lasso_res = lasso_palg.compute(X, self.daal_model_) res = lasso_res.prediction if res.shape[1] == 1 and self.coef_.ndim == 1: res = np.ravel(res) return res
def _daal4py_predict_enet(self, X): X = make2d(X) _fptype = getFPType(self.coef_) elastic_net_palg = daal4py.elastic_net_prediction(fptype=_fptype, method='defaultDense') if sklearn_check_version('0.23'): if self.n_features_in_ != X.shape[1]: raise ValueError(f'X has {X.shape[1]} features, ' f'but ElasticNet is expecting ' f'{self.n_features_in_} features as input') elastic_net_res = elastic_net_palg.compute(X, self.daal_model_) res = elastic_net_res.prediction if res.shape[1] == 1 and self.coef_.ndim == 1: res = np.ravel(res) return res
def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None): ww = make2d(sample_weight) if sample_weight is not None else None XX = make2d(X) fpt = getFPType(XX) alg = daal4py.dbscan(method='defaultDense', fptype=fpt, epsilon=float(eps), minObservations=int(min_samples), memorySavingMode=False, resultsToCompute="computeCoreIndices") daal_res = alg.compute(XX, ww) assignments = daal_res.assignments.ravel() if daal_res.coreIndices is not None: core_ind = daal_res.coreIndices.ravel() else: core_ind = np.array([], dtype=np.intc) return (core_ind, assignments)
def test_predict(Xp, model): regr_predict = ridge_regression_prediction(fptype=getFPType(Xp)) return regr_predict.compute(Xp, model)
def test_fit(X, y): regr_train = ridge_regression_training(fptype=getFPType(X), ridgeParameters=np.array( [[params.alpha]]), interceptFlag=params.fit_intercept) return regr_train.compute(X, y)
def _daal4py_fit_enet(self, X, y_, check_input): #appropriate checks _daal4py_check(self, X, y_, check_input) X = make2d(X) y = make2d(y_) _fptype = getFPType(X) penalty_L1 = np.asarray(self.alpha * self.l1_ratio, dtype=X.dtype) penalty_L2 = np.asarray(self.alpha * (1.0 - self.l1_ratio), dtype=X.dtype) if (penalty_L1.size != 1 or penalty_L2.size != 1): raise ValueError("alpha or l1_ratio length is wrong") penalty_L1 = penalty_L1.reshape((1, -1)) penalty_L2 = penalty_L2.reshape((1, -1)) mse_alg = daal4py.optimization_solver_mse(numberOfTerms=X.shape[0], fptype=_fptype, method='defaultDense') mse_alg.setup(X, y, None) cd_solver = daal4py.optimization_solver_coordinate_descent( function=mse_alg, fptype=_fptype, method='defaultDense', selection=self.selection, seed=0 if (self.random_state == None) else self.random_state, nIterations=self.max_iter, positive=self.positive, accuracyThreshold=self.tol) #set warm_start if (self.warm_start and hasattr(self, "coef_") and isinstance(self.coef_, np.ndarray)): n_rows = y.shape[1] n_cols = X.shape[1] + 1 inputArgument = np.zeros((n_rows, n_cols), dtype=_fptype) for i in range(n_rows): inputArgument[i][0] = self.intercept_ if ( n_rows == 1) else self.intercept_[i] inputArgument[i][1:] = self.coef_[:].copy( order='C') if (n_rows == 1) else self.coef_[i, :].copy( order='C') cd_solver.setup(inputArgument) elastic_net_alg = daal4py.elastic_net_training( fptype=_fptype, method='defaultDense', interceptFlag=(self.fit_intercept is True), dataUseInComputation='doUse' if (self.copy_X == False) else 'doNotUse', penaltyL1=penalty_L1, penaltyL2=penalty_L2, optimizationSolver=cd_solver) try: if isinstance(self.precompute, np.ndarray): elastic_net_res = elastic_net_alg.compute( data=X, dependentVariables=y, gramMatrix=self.precompute) else: elastic_net_res = elastic_net_alg.compute(data=X, dependentVariables=y) except RuntimeError: return None #set coef_ and intersept_ results elastic_net_model = elastic_net_res.model self.daal_model_ = elastic_net_model coefs = elastic_net_model.Beta self.intercept_ = coefs[:, 0].copy(order='C') self.coef_ = coefs[:, 1:].copy(order='C') #only for compliance with Sklearn if y.shape[1] == 1: self.coef_ = np.ravel(self.coef_) self.intercept_ = np.ravel(self.intercept_) if self.intercept_.shape[0] == 1: self.intercept_ = self.intercept_[0] #set n_iter_ n_iter = cd_solver.__get_result__().nIterations[0][0] if y.shape[1] == 1: self.n_iter_ = n_iter else: self.n_iter_ = np.full(y.shape[1], n_iter) #only for compliance with Sklearn if (self.max_iter == n_iter + 1): warnings.warn( "Objective did not converge. You might want to " "increase the number of iterations.", ConvergenceWarning) #only for dual_gap computation, it is not required for DAAL self._X = X self._y = y return self
def compute_distances(pairwise_distances, X): algorithm = pairwise_distances(fptype=getFPType(X)) return algorithm.compute(X)
def test_dbscan(X): algorithm = dbscan(fptype=getFPType(X), epsilon=params.eps, minObservations=params.min_samples, resultsToCompute='computeCoreIndices') return algorithm.compute(X)
def _daal4py_check(self, X, y_, check_input): #conver to 2d format X = make2d(X) y = make2d(y_) #convet from list type if isinstance(X, list): X = np.asarray(X, np.float64) if isinstance(y, list): y = np.asarray(y, np.float64) _fptype = getFPType(X) #check alpha if self.alpha == 0: warnings.warn( "With alpha=0, this algorithm does not converge " "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) #check precompute if isinstance(self.precompute, np.ndarray): if check_input: check_array(self.precompute, dtype=_fptype) self.precompute = make2d(self.precompute) #only for compliance with Sklearn if self.fit_intercept: X_offset = np.average(X, axis=0, weights=None) if self.normalize: X_scale = row_norms(X) if np.isscalar(X_scale): if X_scale == .0: X_scale = 1. elif isinstance(X_scale, np.ndarray): X_scale[X_scale == 0.0] = 1.0 else: X_scale = np.ones(X.shape[1], dtype=_fptype) else: X_offset = np.zeros(X.shape[1], dtype=_fptype) X_scale = np.ones(X.shape[1], dtype=_fptype) if (self.fit_intercept and not np.allclose(X_offset, np.zeros(X.shape[1])) or self.normalize and not np.allclose(X_scale, np.ones(X.shape[1]))): warnings.warn( "Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", UserWarning) else: if self.precompute not in [False, True, 'auto']: raise ValueError("precompute should be one of True, False, " "'auto' or array-like. Got %r" % self.precompute) #check X and y if check_input: X, y = check_X_y(X, y, dtype=[np.float64, np.float32], multi_output=True, y_numeric=True) else: #only for compliance with Sklearn, this assert is not required for DAAL if (X.flags['F_CONTIGUOUS'] == False): raise ValueError("ndarray is not Fortran contiguous") #check selection if self.selection not in ['random', 'cyclic']: raise ValueError("selection should be either random or cyclic.") return X, y
def _daal4py_fit_enet(self, X, y_, check_input): # appropriate checks _daal4py_check(self, X, y_, check_input) X = make2d(X) y = make2d(y_) _fptype = getFPType(X) # only for dual_gap computation, it is not required for Intel(R) oneAPI # Data Analytics Library self._X = X self._y = y penalty_L1 = np.asarray(self.alpha * self.l1_ratio, dtype=X.dtype) penalty_L2 = np.asarray(self.alpha * (1.0 - self.l1_ratio), dtype=X.dtype) if (penalty_L1.size != 1 or penalty_L2.size != 1): raise ValueError("alpha or l1_ratio length is wrong") penalty_L1 = penalty_L1.reshape((1, -1)) penalty_L2 = penalty_L2.reshape((1, -1)) #normalizing and centering X_offset = np.zeros(X.shape[1], dtype=X.dtype) X_scale = np.ones(X.shape[1], dtype=X.dtype) if y.ndim == 1: y_offset = X.dtype.type(0) else: y_offset = np.zeros(y.shape[1], dtype=X.dtype) if self.fit_intercept: X_offset = np.average(X, axis=0) if self.normalize: if self.copy_X: X = np.copy(X) - X_offset else: X -= X_offset X, X_scale = normalize(X, axis=0, copy=False, return_norm=True) y_offset = np.average(y, axis=0) y = y - y_offset # only for compliance with Sklearn if isinstance(self.precompute, np.ndarray) and ( self.fit_intercept and not np.allclose(X_offset, np.zeros(X.shape[1])) or self.normalize and not np.allclose(X_scale, np.ones(X.shape[1]))): warnings.warn( "Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", UserWarning) mse_alg = daal4py.optimization_solver_mse(numberOfTerms=X.shape[0], fptype=_fptype, method='defaultDense') mse_alg.setup(X, y, None) cd_solver = daal4py.optimization_solver_coordinate_descent( function=mse_alg, fptype=_fptype, method='defaultDense', selection=self.selection, seed=0 if (self.random_state is None) else self.random_state, nIterations=self.max_iter, positive=self.positive, accuracyThreshold=self.tol) # set warm_start if self.warm_start and hasattr(self, "coef_") and \ isinstance(self.coef_, np.ndarray): n_rows = y.shape[1] n_cols = X.shape[1] + 1 inputArgument = np.zeros((n_rows, n_cols), dtype=_fptype) for i in range(n_rows): inputArgument[i][0] = self.intercept_ if ( n_rows == 1) else self.intercept_[i] inputArgument[i][1:] = self.coef_[:].copy( order='C') if (n_rows == 1) else self.coef_[i, :].copy( order='C') cd_solver.setup(inputArgument) elastic_net_alg = daal4py.elastic_net_training( fptype=_fptype, method='defaultDense', interceptFlag=(self.fit_intercept is True), dataUseInComputation='doUse' if ((self.copy_X is False) or (self.fit_intercept and self.normalize and self.copy_X)) else 'doNotUse', penaltyL1=penalty_L1, penaltyL2=penalty_L2, optimizationSolver=cd_solver) try: if isinstance(self.precompute, np.ndarray): elastic_net_res = elastic_net_alg.compute( data=X, dependentVariables=y, gramMatrix=self.precompute) else: elastic_net_res = elastic_net_alg.compute(data=X, dependentVariables=y) except RuntimeError: return None # set coef_ and intersept_ results elastic_net_model = elastic_net_res.model self.daal_model_ = elastic_net_model # update coefficients if normalizing and centering if self.fit_intercept and self.normalize: elastic_net_model.Beta[:, 1:] = elastic_net_model.Beta[:, 1:] / X_scale elastic_net_model.Beta[:, 0] = ( y_offset - np.dot(X_offset, elastic_net_model.Beta[:, 1:].T)).T coefs = elastic_net_model.Beta self.intercept_ = coefs[:, 0].copy(order='C') self.coef_ = coefs[:, 1:].copy(order='C') # only for compliance with Sklearn if y.shape[1] == 1: self.coef_ = np.ravel(self.coef_) self.intercept_ = np.ravel(self.intercept_) if self.intercept_.shape[0] == 1: self.intercept_ = self.intercept_[0] # set n_iter_ n_iter = cd_solver.__get_result__().nIterations[0][0] if y.shape[1] == 1: self.n_iter_ = n_iter else: self.n_iter_ = np.full(y.shape[1], n_iter) # only for compliance with Sklearn if (self.max_iter == n_iter + 1): warnings.warn( "Objective did not converge. You might want to " "increase the number of iterations.", ConvergenceWarning) return self
def test_fit(X, y): regr_train = linear_regression_training(fptype=getFPType(X), method=params.method, interceptFlag=params.fit_intercept) return regr_train.compute(X, y)