def fit(self, y_train, priors=(0.5, 0.5), ε=1e-6, n_epoch=100, **kwargs): self.θ = priors for idx_epoch in range(n_epoch): if self.single_iter(y_train, **kwargs) < ε: return idx_epoch raise ConvergenceWarning( 'Did not converge within {} epochs'.format(n_epoch)) return n_epoch
def fit(self, X, y, sample_weight=None, zero_coef_alert=True): """Fit non-negative linear model. Parameters ---------- X : numpy array or sparse matrix of shape [n_samples, n_features] Training data y : numpy array of shape [n_samples,] Target values sample_weight : numpy array of shape [n_samples] Individual weights for each sample Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, y_numeric=True, multi_output=False) if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1: raise ValueError("Sample weights must be 1D array or scalar") X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) self.coef_, result = nnls(X, y.squeeze()) if np.all(self.coef_ == 0) and zero_coef_alert == True: raise ConvergenceWarning("All coefficients estimated to be zero in" " the non-negative least squares fit.") self._set_intercept(X_offset, y_offset, X_scale) self.opt_result_ = OptimizeResult(success=True, status=0, x=self.coef_, fun=result) return self
def _partial_fit(self, currentItem, X, topK): ConvergenceWarning('ignore') model = ElasticNet(alpha=1.0, l1_ratio=self.l1_ratio, positive=self.positive_only, fit_intercept=False, copy_X=False, precompute=True, selection='random', max_iter=100, tol=1e-4) # WARNING: make a copy of X to avoid race conditions on column j # TODO: We can probably come up with something better here. X_j = X.copy() # get the target column y = X_j[:, currentItem].toarray() # set the j-th column of X to zero X_j.data[X_j.indptr[currentItem]:X_j.indptr[currentItem + 1]] = 0.0 # fit one ElasticNet model per column model.fit(X_j, y) # self.model.coef_ contains the coefficient of the ElasticNet model # let's keep only the non-zero values # nnz_idx = model.coef_ > 0.0 relevant_items_partition = (-model.coef_).argpartition(topK)[0:topK] relevant_items_partition_sorting = np.argsort( -model.coef_[relevant_items_partition]) ranking = relevant_items_partition[relevant_items_partition_sorting] notZerosMask = model.coef_[ranking] > 0.0 ranking = ranking[notZerosMask] values = model.coef_[ranking] rows = ranking cols = [currentItem] * len(ranking) return values, rows, cols
def _final_centroids(self, Xs, centroids): r''' Computes the final cluster centroids based on consensus samples across both views. Consensus samples are those that are assigned to the same partition in both views. Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) This list must be of size 2, corresponding to the two views of the data. The two views can each have a different number of features, but they must have the same number of samples. centroids : list of array-likes - centroids length: n_views - centroids[i] shape: (n_clusters, n_features_i) The cluster centroids for each of the two views. centroids[0] corresponds to the centroids of view 1 and centroids[1] corresponds to the centroids of view 2. These are not yet the final cluster centroids. ''' # Compute consensus vectors for final clustering v1_consensus = list() v2_consensus = list() for clust in range(self.n_clusters): v1_distances = self._compute_dist(Xs[0], centroids[0]) v1_partitions = np.argmin(v1_distances, axis=1).flatten() v2_distances = self._compute_dist(Xs[1], centroids[1]) v2_partitions = np.argmin(v2_distances, axis=1).flatten() # Find data points in the same partition in both views part_indices = (v1_partitions == clust) * (v2_partitions == clust) # Recompute centroids based on these data points if (np.sum(part_indices) != 0): cent1 = np.mean(Xs[0][part_indices], axis=0) v1_consensus.append(cent1) cent2 = np.mean(Xs[1][part_indices], axis=0) v2_consensus.append(cent2) # Check if there are no consensus vectors self.centroids_ = [None, None] if (len(v1_consensus) == 0): msg = 'No distinct cluster centroids have been found.' raise ConvergenceWarning(msg) else: self.centroids_[0] = np.vstack(v1_consensus) self.centroids_[1] = np.vstack(v2_consensus) # Check if the number of consensus clusters is less than n_clusters if (self.centroids_[0].shape[0] < self.n_clusters): msg = ('Number of distinct cluster centroids (' + str(self.centroids_[0].shape[0]) + ') found is smaller than n_clusters (' + str(self.n_clusters) + ').') raise ConvergenceWarning(msg) # Updates k if number of consensus clusters less than original # n_clusters value self.n_clusters = self.centroids_[0].shape[0]
import logging from copy import deepcopy from typing import Callable, Tuple import numpy as np from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import LogisticRegression from pandas_ml_common import Typing from pandas_ml_utils.ml.data.extraction import FeaturesAndLabels from pandas_ml_utils.ml.summary import Summary from .base_model import Model _log = logging.getLogger(__name__) ConvergenceWarning('ignore') class SkModel(Model): def __init__(self, skit_model, features_and_labels: FeaturesAndLabels, summary_provider: Callable[[Typing.PatchedDataFrame], Summary] = Summary, **kwargs): super().__init__(features_and_labels, summary_provider, **kwargs) self.skit_model = skit_model self.label_shape = None def fit_fold(self, fold_nr: int, x: np.ndarray, y: np.ndarray, x_val: np.ndarray, y_val: np.ndarray,