Esempio n. 1
0
 def __init__(self, **params):
   '''
   Parameters
   ----------
   
   ms_h : float, sets the bandwidth of the mean shift algorithm, defaults to None, whereby the algorithm automatically
   determines the bandwidth
   
   automatic_ms_h : bool, if True forces the algorithm to determine its own bandwidth, overrides any ms_h setting
   
   ms_sub : float, sets the percentage (0 < ms_sub <= 100) of the data points supplied to self.__call__ that are used to compute the ms seed
   points
   
   rho_threshold : float, ratio of 2nd largest to largest cluster eigenvalues, above which cluster centers are
   removed from the output
   ''' 
   super(lpcMeanShift, self).__init__()
   self._lpcParameters = { 'ms_h': None,
                           'automatic_ms_h': False, 
                           'ms_sub': 30,
                           'rho_threshold': 0.2
                         }
   self._prm_list = [self._lpcParameters] 
   self.user_prm = None #extension of parameter set disallowed
   self._type_check.update({ 'ms_h': lambda x: (x is None) or lpcMeanShift._positivityCheck(x) or (isinstance(x, list) and all(map(lpcMeanShift._positivityCheck, x)) ) ,
                             'automatic_ms_h': (bool,), 
                             'ms_sub': lambda x: lpcMeanShift._positivityCheck and x <= 100,
                             'rho_threshold': lambda x: lpcMeanShift._positivityCheck and x < 1
                           })
   self.set(**params)
   if self._lpcParameters['automatic_ms_h'] or self._lpcParameters['ms_h'] is None :
     self._meanShift = MeanShift()
   else:
     self._meanShift = MeanShift(bandwidth = mean(self._lpcParameters['ms_h']))
Esempio n. 2
0
def calculate_cluster(lena, lena_mat, quantile):
    bandwidth = estimate_bandwidth(lena_mat, quantile=quantile, n_samples=500)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(lena_mat)
    labels = ms.labels_
    cluster_centers = ms.cluster_centers_

    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)

    lena_clustered = lena.copy()
    lena_clustered_value = lena.copy()
    lena_mat_clustered = lena_mat.copy()
    lena_mat_clustered_value = lena_mat.copy()

    for point, pointb, value in zip(lena_mat_clustered, lena_mat_clustered_value, labels):
        point[2] = value
        pointb[2] = cluster_centers[value, 2]
        lena_clustered[point[0], point[1]] = value
        lena_clustered_value[point[0], point[1]] = cluster_centers[value, 2]

    image = {"image": lena_clustered_value,
             "quantile": quantile,
             "clusters": n_clusters_}
    return image
Esempio n. 3
0
def calculate_cluster(camera, camera_mat, quantile):
    bandwidth = estimate_bandwidth(camera_mat,
                                   quantile=quantile,
                                   n_samples=500)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(camera_mat)
    labels = ms.labels_
    cluster_centers = ms.cluster_centers_

    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)

    camera_clustered = camera.copy()
    camera_clustered_value = camera.copy()
    camera_mat_clustered = camera_mat.copy()
    camera_mat_clustered_value = camera_mat.copy()

    for point, pointb, value in zip(camera_mat_clustered,
                                    camera_mat_clustered_value, labels):
        point[2] = value
        pointb[2] = cluster_centers[value, 2]
        camera_clustered[point[0], point[1]] = value
        camera_clustered_value[point[0], point[1]] = cluster_centers[value, 2]

    image = {
        "image": camera_clustered_value,
        "quantile": quantile,
        "clusters": n_clusters_
    }
    return image
Esempio n. 4
0
def meanshift(desc, quantile, hs=16, hr=16, copy=True):
    """
    Do nothing for now...
    """
    if copy:
        desc = desc.copy()
    desc[:, :2] /= hs
    desc[:, 2:] /= hr
    bandwidth = estimate_bandwidth(desc, quantile=quantile, n_samples=500)

    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(desc)
    ms.cluster_centers_[:, :2] *= hs
    ms.cluster_centers_[:, 2:] *= hr

    return ms
Esempio n. 5
0
 def setScaleParameters(self, ms_h = None):
   '''This is for initially setting the scale parameters, and only has an effect if 
   self._lpcParamters['automatic_ms_h'] is False
   
   Parameters
   ----------
   ms_h : float or None, sets the bandwidth of meanshift algorithm, default (None) has no effect
   '''
   if not self._lpcParameters['automatic_ms_h'] and ms_h is not None:
       self.set_in_dict('ms_h', ms_h, self._lpcParameters)  
       bandwidth = mean(self._lpcParameters['ms_h'])
       self._meanShift = MeanShift(bandwidth = bandwidth)
Esempio n. 6
0
class lpcMeanShift(PrmDictBase):
  '''
  Wrapper around the scikit-learn class sklearn.cluster.MeanShift to approximately mimic the behavior of the LPCM CRAN package
  Callable that generates n starting points based on local density modes of X. Seed points for modes are given by x0;
  ms.h controls the kernel bandwidth (original CRAN package allowed x0 as a vector with separate bandwidth per dimension,
  the sklearn MeanShift class allows only a scalar bandwidth, so the mean is taken), ms.sub is the percentage of data points 
  that should be used a seeds for selecting local density modes
'''
  @staticmethod
  def _positivityCheck(x):
    return isinstance(x, (int, float)) and x > 0
 
  def _removeNonTracklikeClusterCenters(self):
    '''NOTE : Much of this code is copied from LPCMImpl.followXSingleDirection (factor out?)
    '''
    labels = self._meanShift.labels_
    labels_unique = unique(labels)
    cluster_centers = self._meanShift.cluster_centers_
    rsp = lpcRandomStartPoints()
    cluster_representatives = []
    for k in range(len(labels_unique)):
      cluster_members = labels == k
      cluster_center = cluster_centers[k]
      cluster = self._Xi[cluster_members,:]
      mean_sub = cluster - cluster_center 
      cov_x = dot(transpose(mean_sub), mean_sub) 
      eigen_cov = eigh(cov_x)
      sorted_eigen_cov = zip(eigen_cov[0],map(ravel,vsplit(eigen_cov[1].transpose(),len(eigen_cov[1]))))
      sorted_eigen_cov.sort(key = lambda elt: elt[0], reverse = True)   
      rho = sorted_eigen_cov[1][0] / sorted_eigen_cov[0][0] #Ratio of two largest eigenvalues   
      if rho < self._lpcParameters['rho_threshold']:
        cluster_representatives.append(cluster_center)
      else: #append a random element of the cluster
        random_cluster_element = rsp(cluster, 1)[0]
        cluster_representatives.append(random_cluster_element)
    
    return array(cluster_representatives)
  
  def __init__(self, **params):
    '''
    Parameters
    ----------
    
    ms_h : float, sets the bandwidth of the mean shift algorithm, defaults to None, whereby the algorithm automatically
    determines the bandwidth
    
    automatic_ms_h : bool, if True forces the algorithm to determine its own bandwidth, overrides any ms_h setting
    
    ms_sub : float, sets the percentage (0 < ms_sub <= 100) of the data points supplied to self.__call__ that are used to compute the ms seed
    points
    
    rho_threshold : float, ratio of 2nd largest to largest cluster eigenvalues, above which cluster centers are
    removed from the output
    ''' 
    super(lpcMeanShift, self).__init__()
    self._lpcParameters = { 'ms_h': None,
                            'automatic_ms_h': False, 
                            'ms_sub': 30,
                            'rho_threshold': 0.2
                          }
    self._prm_list = [self._lpcParameters] 
    self.user_prm = None #extension of parameter set disallowed
    self._type_check.update({ 'ms_h': lambda x: (x is None) or lpcMeanShift._positivityCheck(x) or (isinstance(x, list) and all(map(lpcMeanShift._positivityCheck, x)) ) ,
                              'automatic_ms_h': (bool,), 
                              'ms_sub': lambda x: lpcMeanShift._positivityCheck and x <= 100,
                              'rho_threshold': lambda x: lpcMeanShift._positivityCheck and x < 1
                            })
    self.set(**params)
    if self._lpcParameters['automatic_ms_h'] or self._lpcParameters['ms_h'] is None :
      self._meanShift = MeanShift()
    else:
      self._meanShift = MeanShift(bandwidth = mean(self._lpcParameters['ms_h']))
    
  def __call__(self, X, n = None, x0 = None):
    '''
    Generates n seed points for the lpc algorithm. 
    X, 2 dimensional [#points, #dimension of points] array containing the data for which local density modes is to calculated
    n, required number of seed points, if n = None, returns exactly the local density modes, otherwise lpcRandomStartPoints is called with x0 equal
    to the local density modes (local density modes are the cluster centers)
    x0, 2-dimensional array containing #rows equal to number of explicitly defined mean shift seed points and #columns equal 
    to dimension of the individual data points (called number of features in MeanShift docs).
    
    Returns the lpc seed points as a 2 dimensional [#seed points, #dimension of seed points] array
    '''
    self._Xi = X
    if x0 is None:
      N = self._Xi.shape[0]
      ms_sub = float(self._lpcParameters['ms_sub'])
      #guarantees ms_sub <= ms_sub % of N <= 10 * ms_sub seed points (could give the option of using seed point binning in MeanShift)
      Nsub = int(min(max(ms_sub, floor(ms_sub * N / 100)), 10 * ms_sub))
      ms_seeds = self._Xi[sample(xrange(0, N), Nsub),:]
    else:
      ms_seeds = x0
    self._meanShift.seeds = ms_seeds
    self._meanShift.fit(self._Xi)
    
    cluster_respresentatives = self._removeNonTracklikeClusterCenters()
    if len(cluster_respresentatives) == 0:
      cluster_respresentatives = None
    lpcRSP = lpcRandomStartPoints()
    if n is None:
      return lpcRSP(self._Xi, n = 2, x0 = cluster_respresentatives)
    else:
      return lpcRSP(self._Xi, n = n, x0 = cluster_respresentatives)
  
  def setScaleParameters(self, ms_h = None):
    '''This is for initially setting the scale parameters, and only has an effect if 
    self._lpcParamters['automatic_ms_h'] is False
    
    Parameters
    ----------
    ms_h : float or None, sets the bandwidth of meanshift algorithm, default (None) has no effect
    '''
    if not self._lpcParameters['automatic_ms_h'] and ms_h is not None:
        self.set_in_dict('ms_h', ms_h, self._lpcParameters)  
        bandwidth = mean(self._lpcParameters['ms_h'])
        self._meanShift = MeanShift(bandwidth = bandwidth)
      
  def getClusterLabels(self):
    return self._meanShift.labels_
    
    
Esempio n. 7
0
			'LassoLarsCV':LassoLarsCV(),
			'LassoLarsIC':LassoLarsIC(),
			'LatentDirichletAllocation':LatentDirichletAllocation(),
			'LedoitWolf':LedoitWolf(),
			'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(),
			'LinearRegression':LinearRegression(),
			'LinearSVC':LinearSVC(),
			'LinearSVR':LinearSVR(),
			'LocallyLinearEmbedding':LocallyLinearEmbedding(),
			'LogisticRegression':LogisticRegression(),
			'LogisticRegressionCV':LogisticRegressionCV(),
			'MDS':MDS(),
			'MLPClassifier':MLPClassifier(),
			'MLPRegressor':MLPRegressor(),
			'MaxAbsScaler':MaxAbsScaler(),
			'MeanShift':MeanShift(),
			'MinCovDet':MinCovDet(),
			'MinMaxScaler':MinMaxScaler(),
			'MiniBatchDictionaryLearning':MiniBatchDictionaryLearning(),
			'MiniBatchKMeans':MiniBatchKMeans(),
			'MiniBatchSparsePCA':MiniBatchSparsePCA(),
			'MultiTaskElasticNet':MultiTaskElasticNet(),
			'MultiTaskElasticNetCV':MultiTaskElasticNetCV(),
			'MultiTaskLasso':MultiTaskLasso(),
			'MultiTaskLassoCV':MultiTaskLassoCV(),
			'MultinomialNB':MultinomialNB(),
			'NMF':NMF(),
			'NearestCentroid':NearestCentroid(),
			'NearestNeighbors':NearestNeighbors(),
			'Normalizer':Normalizer(),
			'NuSVC':NuSVC(),