def __init__(self, **params): ''' Parameters ---------- ms_h : float, sets the bandwidth of the mean shift algorithm, defaults to None, whereby the algorithm automatically determines the bandwidth automatic_ms_h : bool, if True forces the algorithm to determine its own bandwidth, overrides any ms_h setting ms_sub : float, sets the percentage (0 < ms_sub <= 100) of the data points supplied to self.__call__ that are used to compute the ms seed points rho_threshold : float, ratio of 2nd largest to largest cluster eigenvalues, above which cluster centers are removed from the output ''' super(lpcMeanShift, self).__init__() self._lpcParameters = { 'ms_h': None, 'automatic_ms_h': False, 'ms_sub': 30, 'rho_threshold': 0.2 } self._prm_list = [self._lpcParameters] self.user_prm = None #extension of parameter set disallowed self._type_check.update({ 'ms_h': lambda x: (x is None) or lpcMeanShift._positivityCheck(x) or (isinstance(x, list) and all(map(lpcMeanShift._positivityCheck, x)) ) , 'automatic_ms_h': (bool,), 'ms_sub': lambda x: lpcMeanShift._positivityCheck and x <= 100, 'rho_threshold': lambda x: lpcMeanShift._positivityCheck and x < 1 }) self.set(**params) if self._lpcParameters['automatic_ms_h'] or self._lpcParameters['ms_h'] is None : self._meanShift = MeanShift() else: self._meanShift = MeanShift(bandwidth = mean(self._lpcParameters['ms_h']))
def calculate_cluster(lena, lena_mat, quantile): bandwidth = estimate_bandwidth(lena_mat, quantile=quantile, n_samples=500) ms = MeanShift(bandwidth=bandwidth, bin_seeding=True) ms.fit(lena_mat) labels = ms.labels_ cluster_centers = ms.cluster_centers_ labels_unique = np.unique(labels) n_clusters_ = len(labels_unique) lena_clustered = lena.copy() lena_clustered_value = lena.copy() lena_mat_clustered = lena_mat.copy() lena_mat_clustered_value = lena_mat.copy() for point, pointb, value in zip(lena_mat_clustered, lena_mat_clustered_value, labels): point[2] = value pointb[2] = cluster_centers[value, 2] lena_clustered[point[0], point[1]] = value lena_clustered_value[point[0], point[1]] = cluster_centers[value, 2] image = {"image": lena_clustered_value, "quantile": quantile, "clusters": n_clusters_} return image
def calculate_cluster(camera, camera_mat, quantile): bandwidth = estimate_bandwidth(camera_mat, quantile=quantile, n_samples=500) ms = MeanShift(bandwidth=bandwidth, bin_seeding=True) ms.fit(camera_mat) labels = ms.labels_ cluster_centers = ms.cluster_centers_ labels_unique = np.unique(labels) n_clusters_ = len(labels_unique) camera_clustered = camera.copy() camera_clustered_value = camera.copy() camera_mat_clustered = camera_mat.copy() camera_mat_clustered_value = camera_mat.copy() for point, pointb, value in zip(camera_mat_clustered, camera_mat_clustered_value, labels): point[2] = value pointb[2] = cluster_centers[value, 2] camera_clustered[point[0], point[1]] = value camera_clustered_value[point[0], point[1]] = cluster_centers[value, 2] image = { "image": camera_clustered_value, "quantile": quantile, "clusters": n_clusters_ } return image
def meanshift(desc, quantile, hs=16, hr=16, copy=True): """ Do nothing for now... """ if copy: desc = desc.copy() desc[:, :2] /= hs desc[:, 2:] /= hr bandwidth = estimate_bandwidth(desc, quantile=quantile, n_samples=500) ms = MeanShift(bandwidth=bandwidth, bin_seeding=True) ms.fit(desc) ms.cluster_centers_[:, :2] *= hs ms.cluster_centers_[:, 2:] *= hr return ms
def setScaleParameters(self, ms_h = None): '''This is for initially setting the scale parameters, and only has an effect if self._lpcParamters['automatic_ms_h'] is False Parameters ---------- ms_h : float or None, sets the bandwidth of meanshift algorithm, default (None) has no effect ''' if not self._lpcParameters['automatic_ms_h'] and ms_h is not None: self.set_in_dict('ms_h', ms_h, self._lpcParameters) bandwidth = mean(self._lpcParameters['ms_h']) self._meanShift = MeanShift(bandwidth = bandwidth)
class lpcMeanShift(PrmDictBase): ''' Wrapper around the scikit-learn class sklearn.cluster.MeanShift to approximately mimic the behavior of the LPCM CRAN package Callable that generates n starting points based on local density modes of X. Seed points for modes are given by x0; ms.h controls the kernel bandwidth (original CRAN package allowed x0 as a vector with separate bandwidth per dimension, the sklearn MeanShift class allows only a scalar bandwidth, so the mean is taken), ms.sub is the percentage of data points that should be used a seeds for selecting local density modes ''' @staticmethod def _positivityCheck(x): return isinstance(x, (int, float)) and x > 0 def _removeNonTracklikeClusterCenters(self): '''NOTE : Much of this code is copied from LPCMImpl.followXSingleDirection (factor out?) ''' labels = self._meanShift.labels_ labels_unique = unique(labels) cluster_centers = self._meanShift.cluster_centers_ rsp = lpcRandomStartPoints() cluster_representatives = [] for k in range(len(labels_unique)): cluster_members = labels == k cluster_center = cluster_centers[k] cluster = self._Xi[cluster_members,:] mean_sub = cluster - cluster_center cov_x = dot(transpose(mean_sub), mean_sub) eigen_cov = eigh(cov_x) sorted_eigen_cov = zip(eigen_cov[0],map(ravel,vsplit(eigen_cov[1].transpose(),len(eigen_cov[1])))) sorted_eigen_cov.sort(key = lambda elt: elt[0], reverse = True) rho = sorted_eigen_cov[1][0] / sorted_eigen_cov[0][0] #Ratio of two largest eigenvalues if rho < self._lpcParameters['rho_threshold']: cluster_representatives.append(cluster_center) else: #append a random element of the cluster random_cluster_element = rsp(cluster, 1)[0] cluster_representatives.append(random_cluster_element) return array(cluster_representatives) def __init__(self, **params): ''' Parameters ---------- ms_h : float, sets the bandwidth of the mean shift algorithm, defaults to None, whereby the algorithm automatically determines the bandwidth automatic_ms_h : bool, if True forces the algorithm to determine its own bandwidth, overrides any ms_h setting ms_sub : float, sets the percentage (0 < ms_sub <= 100) of the data points supplied to self.__call__ that are used to compute the ms seed points rho_threshold : float, ratio of 2nd largest to largest cluster eigenvalues, above which cluster centers are removed from the output ''' super(lpcMeanShift, self).__init__() self._lpcParameters = { 'ms_h': None, 'automatic_ms_h': False, 'ms_sub': 30, 'rho_threshold': 0.2 } self._prm_list = [self._lpcParameters] self.user_prm = None #extension of parameter set disallowed self._type_check.update({ 'ms_h': lambda x: (x is None) or lpcMeanShift._positivityCheck(x) or (isinstance(x, list) and all(map(lpcMeanShift._positivityCheck, x)) ) , 'automatic_ms_h': (bool,), 'ms_sub': lambda x: lpcMeanShift._positivityCheck and x <= 100, 'rho_threshold': lambda x: lpcMeanShift._positivityCheck and x < 1 }) self.set(**params) if self._lpcParameters['automatic_ms_h'] or self._lpcParameters['ms_h'] is None : self._meanShift = MeanShift() else: self._meanShift = MeanShift(bandwidth = mean(self._lpcParameters['ms_h'])) def __call__(self, X, n = None, x0 = None): ''' Generates n seed points for the lpc algorithm. X, 2 dimensional [#points, #dimension of points] array containing the data for which local density modes is to calculated n, required number of seed points, if n = None, returns exactly the local density modes, otherwise lpcRandomStartPoints is called with x0 equal to the local density modes (local density modes are the cluster centers) x0, 2-dimensional array containing #rows equal to number of explicitly defined mean shift seed points and #columns equal to dimension of the individual data points (called number of features in MeanShift docs). Returns the lpc seed points as a 2 dimensional [#seed points, #dimension of seed points] array ''' self._Xi = X if x0 is None: N = self._Xi.shape[0] ms_sub = float(self._lpcParameters['ms_sub']) #guarantees ms_sub <= ms_sub % of N <= 10 * ms_sub seed points (could give the option of using seed point binning in MeanShift) Nsub = int(min(max(ms_sub, floor(ms_sub * N / 100)), 10 * ms_sub)) ms_seeds = self._Xi[sample(xrange(0, N), Nsub),:] else: ms_seeds = x0 self._meanShift.seeds = ms_seeds self._meanShift.fit(self._Xi) cluster_respresentatives = self._removeNonTracklikeClusterCenters() if len(cluster_respresentatives) == 0: cluster_respresentatives = None lpcRSP = lpcRandomStartPoints() if n is None: return lpcRSP(self._Xi, n = 2, x0 = cluster_respresentatives) else: return lpcRSP(self._Xi, n = n, x0 = cluster_respresentatives) def setScaleParameters(self, ms_h = None): '''This is for initially setting the scale parameters, and only has an effect if self._lpcParamters['automatic_ms_h'] is False Parameters ---------- ms_h : float or None, sets the bandwidth of meanshift algorithm, default (None) has no effect ''' if not self._lpcParameters['automatic_ms_h'] and ms_h is not None: self.set_in_dict('ms_h', ms_h, self._lpcParameters) bandwidth = mean(self._lpcParameters['ms_h']) self._meanShift = MeanShift(bandwidth = bandwidth) def getClusterLabels(self): return self._meanShift.labels_
'LassoLarsCV':LassoLarsCV(), 'LassoLarsIC':LassoLarsIC(), 'LatentDirichletAllocation':LatentDirichletAllocation(), 'LedoitWolf':LedoitWolf(), 'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(), 'LinearRegression':LinearRegression(), 'LinearSVC':LinearSVC(), 'LinearSVR':LinearSVR(), 'LocallyLinearEmbedding':LocallyLinearEmbedding(), 'LogisticRegression':LogisticRegression(), 'LogisticRegressionCV':LogisticRegressionCV(), 'MDS':MDS(), 'MLPClassifier':MLPClassifier(), 'MLPRegressor':MLPRegressor(), 'MaxAbsScaler':MaxAbsScaler(), 'MeanShift':MeanShift(), 'MinCovDet':MinCovDet(), 'MinMaxScaler':MinMaxScaler(), 'MiniBatchDictionaryLearning':MiniBatchDictionaryLearning(), 'MiniBatchKMeans':MiniBatchKMeans(), 'MiniBatchSparsePCA':MiniBatchSparsePCA(), 'MultiTaskElasticNet':MultiTaskElasticNet(), 'MultiTaskElasticNetCV':MultiTaskElasticNetCV(), 'MultiTaskLasso':MultiTaskLasso(), 'MultiTaskLassoCV':MultiTaskLassoCV(), 'MultinomialNB':MultinomialNB(), 'NMF':NMF(), 'NearestCentroid':NearestCentroid(), 'NearestNeighbors':NearestNeighbors(), 'Normalizer':Normalizer(), 'NuSVC':NuSVC(),