Beispiel #1
0
    def __init__(self,
                 n_clusters=8,
                 init='k-means++',
                 n_init=10,
                 max_iter=300,
                 tol=1e-4,
                 precompute_distances='auto',
                 verbose=0,
                 random_state=None,
                 copy_x=True,
                 n_jobs=None,
                 algorithm='full',
                 norm='L2'):

        KMeans.__init__(self,
                        n_clusters=n_clusters,
                        init=init,
                        n_init=n_init,
                        max_iter=max_iter,
                        tol=tol,
                        precompute_distances=precompute_distances,
                        verbose=verbose,
                        random_state=random_state,
                        copy_x=copy_x,
                        n_jobs=n_jobs,
                        algorithm=algorithm)
        self.norm = norm.lower()
        if self.norm == 'l1' and self.algorithm != 'full':
            raise NotImplementedError(  # pragma no cover
                "Only algorithm 'full' is implemented with norm 'l1'.")
Beispiel #2
0
    def __init__(self, rank=10, clusters=1, iterations=3, metric='euclidean'):
        """ Iterations is the max iterations """

        sk_kmeans.__init__(self, n_clusters=clusters, max_iter=iterations)
        # Cluster ranks is a list of lists of knn sorted elements for each cluster w.r.t. the cluster mean
        self.rank = rank
        self.metric = metric
 def __init__(self, n_clusters=8, **kwargs):
     """
     Initialize and inherits from Scikit-Learn's KMeans class. 
     Currently only takes in 
     
     Parameters
     ----------
     n_clusters: int, number of clusters to use. Equivalent to n_clusters for
                 KMeans. 
     **kwargs: other acceptable arguments to KMeans.
     """
     KMeans.__init__(self, n_clusters=n_clusters, **kwargs)
     # A list of input data files from which data was read and stored
     self.source_files = []
     # Initialize indices and coordinates (lon, lat) in 3D and 2D formats
     self.ind = None
     self.ind2d = None
     self.coords = None
     self.coords2d = None
     # Initialize the mask that indicates null values
     # Note that True in the mask indicates null values/values out of domain
     self.mask = None
     # Initialize data in 3D and 2D formats
     self.raw_data = None
     self.data2d = None
Beispiel #4
0
    def __init__(self,
                 n_clusters=8,
                 m=1,
                 init='k-means++',
                 n_init=10,
                 max_iter=300,
                 tol=1e-4,
                 precompute_distances='auto',
                 verbose=0,
                 random_state=None,
                 copy_x=True,
                 n_jobs=None,
                 algorithm='auto'):

        FuzzyKMeans.__init__(self,
                             k=n_clusters,
                             m=m,
                             max_iter=max_iter,
                             random_state=random_state,
                             tol=tol)
        KMeans.__init__(self,
                        n_clusters=n_clusters,
                        init=init,
                        n_init=n_init,
                        max_iter=max_iter,
                        tol=tol,
                        precompute_distances=precompute_distances,
                        verbose=verbose,
                        random_state=random_state,
                        copy_x=copy_x,
                        n_jobs=n_jobs,
                        algorithm=algorithm)
Beispiel #5
0
 def __init__(self, n_clusters=8, **kwargs):
     """
     Initialize and inherits from Scikit-Learn's KMeans class. 
     Currently only takes in 
     
     Parameters
     ----------
     n_clusters: int, number of clusters to use. Equivalent to n_clusters for
                 KMeans. 
     **kwargs: other acceptable arguments to KMeans.
     """
     KMeans.__init__(self, n_clusters=n_clusters, **kwargs)
     # A list of input data files from which data was read and stored
     self.source_files = []
     # Initialize indices and coordinates (lon, lat) in 3D and 2D formats
     self.ind = None
     self.ind2d = None
     self.coords = None
     self.coords2d = None
     # Initialize the mask that indicates null values
     # Note that True in the mask indicates null values/values out of domain
     self.mask = None
     # Initialize data in 3D and 2D formats
     self.raw_data = None
     self.data2d = None
 def __init__(self,
              n_clusters=8,
              init='k-means++',
              n_init=10,
              max_iter=500,
              tol=0.0001,
              precompute_distances='deprecated',
              verbose=0,
              random_state=None,
              copy_x=True,
              n_jobs=1,
              algorithm='auto',
              balanced_predictions=False,
              strategy='gain',
              kmeans0=True,
              learning_rate=1.,
              history=False):
     """
     @param      n_clusters              number of clusters
     @param      init                    used by :epkg:`k-means`
     @param      n_init                  used by :epkg:`k-means`
     @param      max_iter                used by :epkg:`k-means`
     @param      tol                     used by :epkg:`k-means`
     @param      precompute_distances    used by :epkg:`k-means`
     @param      verbose                 used by :epkg:`k-means`
     @param      random_state            used by :epkg:`k-means`
     @param      copy_x                  used by :epkg:`k-means`
     @param      n_jobs                  used by :epkg:`k-means`
     @param      algorithm               used by :epkg:`k-means`
     @param      balanced_predictions    produced balanced prediction
                                         or the regular ones
     @param      strategy                strategy or algorithm used to abide
                                         by the constraint
     @param      kmeans0                 if True, applies *k-means* algorithm first
     @param      history                 keeps centers accress iterations
     @param      learning_rate           learning rate, used by strategy `'weights'`
     """
     KMeans.__init__(self,
                     n_clusters=n_clusters,
                     init=init,
                     n_init=n_init,
                     max_iter=max_iter,
                     tol=tol,
                     precompute_distances=precompute_distances,
                     verbose=verbose,
                     random_state=random_state,
                     copy_x=copy_x,
                     n_jobs=n_jobs,
                     algorithm=algorithm)
     self.balanced_predictions = balanced_predictions
     self.strategy = strategy
     self.kmeans0 = kmeans0
     self.history = history
     self._n_threads = None
     self.learning_rate = learning_rate
     if strategy not in ConstraintKMeans._strategy_value:
         raise ValueError('strategy must be in {0}'.format(
             ConstraintKMeans._strategy_value))
 def __init__(self, dataset, labels=None, inertia=0.):
     KMeans.__init__(self)
     self.cluster_centers_ = dataset
     self.n_clusters = dataset.shape[0]
     if labels is None:
         self.labels_ = numpy.arange(dataset.shape[0]).astype(numpy.int32)
     else:
         self.labels_ = labels
     self.inertia_ = inertia
 def __init__(self, dataset, labels=None, inertia=0.):
     KMeans.__init__(self)
     self.cluster_centers_ = dataset
     self.n_clusters = dataset.shape[0]
     if labels is None:
         self.labels_ = numpy.arange(dataset.shape[0]).astype(numpy.int32)
     else:
         self.labels_ = labels
     self.inertia_ = inertia
Beispiel #9
0
 def __init__(self):
     KMeans.__init__(self)
     # Setting of k-means model
     self.n_clusters = 15
     self.init = 'k-means++'
     self.max_iter = 300
     self.n_init = 10
     self.random_state = 0
     # Other attributes
     self.users_cluster = None
     self.clusters_movies_df = None
Beispiel #10
0
 def __init__(
     self,
     data,
     max_k=8,
     min_k=2,
     init="k-means++",
     n_clusters=8,
     n_init=10,
     max_iter=300,
     tol=0.0001,
     precompute_distances="auto",
     verbose=0,
     random_state=None,
     copy_x=True,
     n_jobs=None,
     algorithm="auto",
 ):
     KMeans.__init__(
         self,
         n_clusters=8,
         n_init=10,
         max_iter=300,
         tol=0.0001,
         precompute_distances="auto",
         verbose=0,
         random_state=None,
         copy_x=True,
         n_jobs=None,
         algorithm="auto",
     )
     self.data = data
     self.k_range = range(min_k, max_k + 1)
     self.km_results = {k: {} for k in self.k_range}
     for k in tqdm(self.k_range):
         kms = KMeans(n_clusters=k)
         self.km_results[k]["cluster_labels"] = kms.fit_predict(self.data)
         self.km_results[k]["centers"] = kms.cluster_centers_
         self.km_results[k]["inertia"] = kms.inertia_
 def __init__(self, *args, **kwargs):
     KMeans.__init__(self, *args, **kwargs)