Beispiel #1
0
    def search_for_clusters(self):

        if self.n_entries > 2 * threshold_n_points:

            try:

                vars = ["ra_deg", "dec_deg", "rfc_score"]

                data = np.array([[getattr(x, var) for var in vars]
                                 for x in self.entries])

                k_means = cluster.MeanShift()
                k_means.fit(data)

                self.clf["Spatial"] = k_means.labels_

                vars.append("date_mjd")

                all_data = np.array([[getattr(x, var) for var in vars]
                                 for x in self.entries])

                all_k_means = cluster.MeanShift()
                all_k_means.fit(all_data)

                # print all_k_means, all_k_means.labels_, all_k_means.fit(all_data)
                #
                # raw_input("prompt")


                self.clf["Time"] = all_k_means.labels_

            except ValueError:
                pass
Beispiel #2
0
def definition_clusters(subset):
    #Importante -> normalizar el conjunto de datos que utilizamos
    normalized_set = preprocessing.normalize(subset, norm='l2')

    print("-------- Definiendo los clusteres...")

    k_means = cl.KMeans(init='k-means++', n_clusters=5, n_init=100)

    # estimate bandwidth for mean shift
    bandwidth = cl.estimate_bandwidth(normalized_set, quantile=0.3)
    ms = cl.MeanShift(bandwidth=bandwidth, bin_seeding=True)

    #Utilizarlo para casos de estudio pequeños
    spectral = cl.SpectralClustering(n_clusters=5, affinity="rbf")

    dbscan = cl.DBSCAN(eps=0.1)

    #Ponemos threshold bajo porque nos daba un warning en el fit_predict
    brc = cl.Birch(n_clusters=5, threshold=0.1)

    #Los añadimos a una lista
    clustering_algorithms = (('K-Means', k_means), ('MeanShift',
                                                    ms), ('DBSCAN', dbscan),
                             ('Birch', brc), ('SpectralClustering', spectral))

    return clustering_algorithms
Beispiel #3
0
def definition_clusters(subset):
    #Importante -> normalizar el conjunto de datos que utilizamos
    normalized_set = preprocessing.normalize(subset, norm='l2')

    print("-------- Definiendo los clusteres...")

    k_means = cl.KMeans(init='k-means++', n_clusters=5, n_init=100)

    # estimate bandwidth for mean shift
    bandwidth = cl.estimate_bandwidth(normalized_set, quantile=0.3)
    ms = cl.MeanShift(bandwidth=bandwidth)

    two_means = cl.MiniBatchKMeans(n_clusters=5, init='k-means++')

    # connectivity matrix for structured Ward
    connectivity = kneighbors_graph(normalized_set,
                                    n_neighbors=10,
                                    include_self=False)
    # make connectivity symmetric
    connectivity = 0.5 * (connectivity + connectivity.T)
    ward = cl.AgglomerativeClustering(n_clusters=5, linkage='ward')

    #dbscan = cl.DBSCAN(eps=0.3, n_clusters=5)

    brc = cl.Birch(n_clusters=5, threshold=0.1)

    #Los añadimos a una lista
    clustering_algorithms = (('K-Means', k_means),
                             ('MiniBatchKMeans', two_means), ('MeanShift', ms),
                             ('Agglomerative', ward), ('Birch', brc))

    return clustering_algorithms
Beispiel #4
0
    def ClusterHouses(matches, plot_groups=False):
        groups = {}
        try:
            N = len(matches)
            X = np.zeros((N, 2))
            for m in range(N):
                loc = RFAPI.house_location(matches[m])
                #logging.debug("ClusterHouses({})".format(loc))
                X[m] = (loc[0], loc[1])

            params = {
                'quantile': .3,
                'eps': .15,
                'damping': .9,
                'preference': -5,
                'n_neighbors': 2,
                'n_clusters': 5
            }

            # a bit buggy..
            spectral = cluster.SpectralClustering(
                n_clusters=params['n_clusters'],
                eigen_solver='arpack',
                affinity="nearest_neighbors")

            # best so far!
            gmm = mixture.GaussianMixture(n_components=params['n_clusters'],
                                          covariance_type='full')

            # yielded one cluster..
            affinity_propagation = cluster.AffinityPropagation(
                damping=params['damping'], preference=params['preference'])

            bandwidth = cluster.estimate_bandwidth(X,
                                                   quantile=params['quantile'])
            ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)

            algorithm = ms

            algorithm.fit(X)
            if hasattr(algorithm, 'labels_'):
                y_pred = algorithm.labels_.astype(np.int)
            else:
                y_pred = algorithm.predict(X)
            for m in range(len(matches)):
                key = str(y_pred[m])
                if groups.get(key, None) == None:
                    groups[key] = []

                groups[key].append({
                    "adress": RFAPI.house_address(matches[m]),
                    "location": [X[m][0], X[m][1]]
                })
            logging.debug("groups = {}".format(groups))
            if plot_groups:
                HouseScore._plot_groups(X, y_pred)
        except Exception as e:
            groups["error"] = str(e)
            logging.error(groups["error"])
        return groups
Beispiel #5
0
def definition_clusters(subset):
    #Importante -> normalizar el conjunto de datos que utilizamos
    normalized_set = preprocessing.normalize(subset, norm='l2')

    print("-------- Definiendo los clusteres...")

    k_means = cl.KMeans(init='k-means++', n_clusters=5, n_init=100)

    two_means = cl.MiniBatchKMeans(n_clusters=5, init='k-means++')

    # estimate bandwidth for mean shift
    bandwidth = cl.estimate_bandwidth(normalized_set, quantile=0.3)
    ms = cl.MeanShift(bandwidth=bandwidth)

    # connectivity matrix for structured Ward
    #connectivity = kneighbors_graph(normalized_set, n_neighbors=10, include_self=False)
    # make connectivity symmetric
    #connectivity = 0.5 * (connectivity + connectivity.T)
    ward = cl.AgglomerativeClustering(n_clusters=100, linkage='ward')

    average = cl.AgglomerativeClustering(n_clusters=100, linkage='average')

    #Utilizarlo para casos de estudio pequeños
    #n_jobs = -1 para q vaya en paralelo
    #spectral = cl.SpectralClustering(n_clusters=3, affinity="nearest_neighbors",n_jobs=-1, n_neighbors = 3)

    #dbscan = cl.DBSCAN(eps=0.3)

    #Los añadimos a una lista
    clustering_algorithms = (('K-Means', k_means), ('MeanShift', ms),
                             ('MiniBatchMeans',
                              two_means), ('AgglomerativeWard', ward),
                             ('AgglomerativeAverage', average))

    return clustering_algorithms
Beispiel #6
0
    def fit_meanshift(self, data, bandwidth=None, bin_seeding=False, **kwargs):
        """
        Fit MeanShift clustering algorithm to data.

        Parameters
        ----------
        data : array-like
            A dataset formatted by `classifier.fitting_data`.
        bandwidth : float
            The bandwidth value used during clustering.
            If none, determined automatically. Note:
            the data are scaled before clutering, so
            this is not in the same units as the data.
        bin_seeding : bool
            Whether or not to use 'bin_seeding'. See
            documentation for `sklearn.cluster.MeanShift`.
        **kwargs
            passed to `sklearn.cluster.MeanShift`.

        Returns
        -------
        Fitted `sklearn.cluster.MeanShift` object.
        """
        if bandwidth is None:
            bandwidth = cl.estimate_bandwidth(data)
        ms = cl.MeanShift(bandwidth=bandwidth, bin_seeding=bin_seeding)
        ms.fit(data)
        return ms
Beispiel #7
0
def investigateOptimalAlgorithms(kmerId, kmerPca):
    plot.setLibrary('bokeh')

    pca = kmerPca.loc[:, PCA_DATA_COL_NAMES]
    plots = {}
    algos = (('KMeans', cluster.KMeans()), ('Affinity',
                                            cluster.AffinityPropagation()),
             ('MeanShift',
              cluster.MeanShift()), ('Spectral', cluster.SpectralClustering()),
             ('Agglomerative',
              cluster.AgglomerativeClustering(linkage='average')),
             ('Agglomerative',
              cluster.AgglomerativeClustering(linkage='ward')),
             ('DBSCAN', cluster.DBSCAN()), ('Gaussian', GaussianMixture()))

    ## Visualise data and manually determine which algorithm will be good
    for i, (name, algo) in enumerate(algos, 1):
        labels = _getLabels(algo, pca)
        labels = pd.DataFrame(labels, columns=[CLABEL_COL_NAME])
        kmerDf = pd.concat([kmerId, pca, labels], axis=1)

        dataset = hv.Dataset(kmerDf, PCA_DATA_COL_NAMES)
        scatter = dataset.to(hv.Scatter,
                             PCA_DATA_COL_NAMES,
                             groupby=CLABEL_COL_NAME).overlay()
        scatter.opts(opts.Scatter(size=10, show_legend=True))
        plots[name] = scatter

    plots = hv.HoloMap(plots, kdims='algo')
    plots = plots.collate()
    return plots
Beispiel #8
0
def clusterization(cluster_radius, number_of_processors, atom_coords):
    """ It builds the clusters according to the atomic coordinates that are
    supplied.

    PARAMETERS
    ----------
    cluster_radius : int
                     radius that defines the width of each cluster.
    number_of_processors: int
                          number of processors that will be used to read the
                          trajectories and clusterize all the points.
    atom_coords : list
                  filtered list of ordered atom coordinates.

    RETURNS
    -------
    estimator : sklearn.cluster.MeanShift object
                clusterization implementation that clusterizes through the
                MeanShift method.
    results : list
              list with the results of the clusterization. Each element is the
              cluster in which each atom belongs.
    """
    if (number_of_processors > 2 and number_of_processors == cpu_count()):
        number_of_processors = int(number_of_processors / 2)

    estimator = cluster.MeanShift(bandwidth=cluster_radius,
                                  n_jobs=number_of_processors,
                                  cluster_all=True)
    results = estimator.fit_predict(atom_coords)

    return estimator, results
Beispiel #9
0
def meanshift(samples, samples_to_predict):
    # bandwidth = cluster.estimate_bandwidth(samples, n_jobs=-1, n_samples=10000, quantile=0.2)
    bandwidth = 60
    print('bandwidth: {}'.format(bandwidth))
    ms = cluster.MeanShift(bandwidth=bandwidth, n_jobs=-1)
    ms.fit(samples)
    return ms.predict(samples_to_predict)
Beispiel #10
0
def cluster_model(newdata, data, model_name, input_param):
    ds = data
    params = input_param
    if str.lower(model_name) == 'kmeans':
        cluster_obj = cluster.KMeans(n_clusters=params['n_clusters'])
    if str.lower(model_name) == str.lower('MiniBatchKMeans'):
        cluster_obj = cluster.MiniBatchKMeans(n_clusters=params['n_clusters'])
    if str.lower(model_name) == str.lower('SpectralClustering'):
        cluster_obj = cluster.SpectralClustering(n_clusters=params['n_clusters'])
    if str.lower(model_name) == str.lower('MeanShift'):
        cluster_obj = cluster.MeanShift(bandwidth=params['bandwidth'])
    if str.lower(model_name) == str.lower('DBSCAN'):
        cluster_obj = cluster.DBSCAN(eps=params['eps'])
    if str.lower(model_name) == str.lower('AffinityPropagation'):
        cluster_obj = cluster.AffinityPropagation(damping=params['damping'],
                                                  preference=params['preference'])
        cluster_obj.fit(ds)
    if str.lower(model_name) == str.lower('Birch'):
        cluster_obj = cluster.Birch(n_clusters=input_param['n_clusters'])
    if str.lower(model_name) == str.lower('GaussianMixture'):
        cluster_obj = mixture.GaussianMixture(n_components=params['n_clusters'],
                                              covariance_type='full')
        cluster_obj.fit(ds)

    if str.lower(model_name) in ['affinitypropagation', 'gaussianmixture']:
        model_result = cluster_obj.predict(ds)
    else:
        model_result = cluster_obj.fit_predict(ds)

    newdata[model_name] = pd.DataFrame(model_result)

    return newdata
Beispiel #11
0
    def update_data(self, attrname, old, new):

        #store the models here
        models = [
            cluster.MiniBatchKMeans(n_clusters=self.k_means_slider.value),
            cluster.DBSCAN(eps=self.DBSCAN_slider.value),
            cluster.Birch(n_clusters=self.birch_slider.value),
            cluster.MeanShift(bandwidth=self.bandwidth, bin_seeding=True)
        ]
        #AgglomerativeClustering

        assert len(models) == 4

        for model in models:
            model.fit(self.X)

        for i in range(4):
            if hasattr(model, 'labels_'):
                y_pred = models[i].labels_.astype(np.int)
            else:
                y_pred = models[i].predict(self.X)

            self.colors[i] = [Spectral6[f % 6] for f in y_pred]

            self.source[i].data['colors'] = self.colors[i]
def meanshift(feat, bw, num_process, min_bin_freq, **kwargs):
    print('#num_process:', num_process)
    print('min_bin_freq:', min_bin_freq)
    ms = cluster.MeanShift(bandwidth=bw,
                           n_jobs=num_process,
                           min_bin_freq=min_bin_freq).fit(feat)
    return ms.labels_
def get_data(session_ds, inc_eval_ds, ms_band, db_eps):
    session_data = list(session_ds)
    inc_eval_data = list(inc_eval_ds)
    session_emb = np.squeeze([utils.t2a(d[0][0]) for d in session_data])
    session_lab = np.squeeze([d[1] for d in session_data])

    inc_eval_emb = np.squeeze([utils.t2a(d[0][0]) for d in inc_eval_data])
    inc_eval_lab = np.squeeze([d[1] for d in inc_eval_data])

    X = np.concatenate((session_emb, inc_eval_emb))
    y = np.concatenate((session_lab, inc_eval_lab))

    meanshifts = [cl.MeanShift(bandwidth=b).fit_predict(X) for b in ms_band]
    optics = cl.OPTICS(min_samples=1).fit_predict(X)
    dbscans = [cl.DBSCAN(eps=e, min_samples=1).fit_predict(X) for e in db_eps]

    res = np.array(meanshifts + dbscans + [optics])
    inc_pred = res[:, session_lab.size:]

    aris = [adjusted_rand_score(p, inc_eval_lab) for p in inc_pred]
    amis = [
        adjusted_mutual_info_score(p, inc_eval_lab, average_method='max')
        for p in inc_pred
    ]

    return np.array(aris), np.array(amis), inc_pred, inc_eval_lab
def use_meanShift(mat, n_cluster):
    clusters = cls.MeanShift().fit(mat)
    n_cluster = max(clusters.labels_) + 1
    hist, bin_edges = np.histogram(clusters.labels_,
                                   bins=np.arange(n_cluster + 1))
    print 'Mean Shift clustering:', clusters.labels_
    print hist
    return clusters.labels_
Beispiel #15
0
def configuraciones_meanshift(subset):
    normalized_set = preprocessing.normalize(subset, norm='l2')

    # estimate bandwidth for mean shift
    bandwidth1 = cl.estimate_bandwidth(normalized_set, quantile=0.3)
    bandwidth2 = cl.estimate_bandwidth(normalized_set, quantile=0.4)
    bandwidth3 = cl.estimate_bandwidth(normalized_set, quantile=0.5)

    ms1 = cl.MeanShift(bandwidth=bandwidth1)
    ms2 = cl.MeanShift(bandwidth=bandwidth2)
    ms3 = cl.MeanShift(bandwidth=bandwidth3)

    #Los añadimos a una lista
    clustering_algorithms = (('MeanShift-1', ms1), ('MeanShift-2', ms2),
                             ('MeanShift-3', ms3))

    return clustering_algorithms
Beispiel #16
0
def meanshift_1():
    x_data, y_label = datasets.make_blobs(n_samples=500, random_state=20)
    y_predict = cluster.MeanShift().fit_predict(x_data)

    color = ['red', 'green', 'blue']
    for x, y in zip(x_data, y_predict):
        plt.scatter(x[0], x[1], c=color[y])
    plt.show()
    def getSortedRowClusters(self, objs):
        '''
        Determine row clusters and their order.

        Clusters that create rows are determined by the user-specified
        algorithm. They are then sorted by location, and lists of indices for
        each cluster are returned in order.
        '''
        if self.row_algorithm == 'affinity':
            algorithm = cluster.AffinityPropagation(**self.row_params)
        elif self.row_algorithm == 'DBSCAN':
            algorithm = cluster.DBSCAN(**self.row_params)
        elif self.row_algorithm == 'MeanShift':
            algorithm = cluster.MeanShift(**self.row_params)

        Y = np.array([[y.baseline] for y in objs], dtype=np.float64)
        rows = algorithm.fit_predict(Y)

        if self.row_algorithm == 'affinity':
            # Here, samples are the found location, so just sort directly.
            row_set = set(rows)

            def ordered_clusters():
                # ABBYY coordinates are bottom-to-top, so reverse list.
                for i in sorted(row_set, reverse=True):
                    yield np.where(rows == i)[0]

            return ordered_clusters(), len(row_set), False

        elif self.row_algorithm == 'DBSCAN':
            # Here, samples are labelled, so go back and find the original
            # locations.
            fuzzy = -1 in rows
            num_clusters = len(set(rows)) - (1 if fuzzy else 0)
            clusters = []
            cluster_centres = np.empty(num_clusters)
            for i in range(num_clusters):
                index = np.where(rows == i)
                clusters.append(index[0])
                cluster_centres[i] = np.mean(np.take(Y, index))

            ordered_clusters = (
                clust
                for centre, clust in sorted(zip(cluster_centres, clusters)))
            return ordered_clusters, num_clusters, fuzzy

        elif self.row_algorithm == 'MeanShift':
            # Here, samples are labelled, but cluster locations are provided.
            fuzzy = -1 in rows
            num_clusters = len(set(rows)) - (1 if fuzzy else 0)
            clusters = []
            for i in range(num_clusters):
                index = np.where(rows == i)
                clusters.append(index[0])

            ordered_clusters = (clust for centre, clust in sorted(
                zip(algorithm.cluster_centers_, clusters)))
            return ordered_clusters, num_clusters, fuzzy
def plot_clusters(data, algorithm, args, kwds):
    '''
    This function takes in a dataframe, algorithm, arguments, and clusters.
    It returns a plot of the clusters.
    '''

    sns.set_context('poster')
    sns.set_color_codes()
    plot_kwds = {'alpha': 0.25, 's': 80, 'linewidths': 0}

    if algorithm == 'k':
        start_time = time.time()
        labels = cluster.KMeans(*args, **kwds).fit_predict(data)
        end_time = time.time()
        palette = sns.color_palette('deep', np.unique(labels).max() + 1)
        colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels]
        plt.scatter(data[0], data[1], c=colors, **plot_kwds)
        frame = plt.gca()
        frame.axes.get_xaxis().set_visible(False)
        frame.axes.get_yaxis().set_visible(False)
        plt.title('Clusters found by {}'.format(str(algorithm)), fontsize=24)

    elif algorithm == 'mean':
        start_time = time.time()
        labels = cluster.MeanShift(*args, **kwds).fit_predict(data)
        end_time = time.time()
        palette = sns.color_palette('deep', np.unique(labels).max() + 1)
        colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels]
        plt.scatter(data[0], data[1], c=colors, **plot_kwds)
        frame = plt.gca()
        frame.axes.get_xaxis().set_visible(False)
        frame.axes.get_yaxis().set_visible(False)
        plt.title('Clusters found by {}'.format(str(algorithm)), fontsize=24)

    elif algorithm == 'spec':
        start_time = time.time()
        labels = cluster.SpectralClustering(*args, **kwds).fit_predict(data)
        end_time = time.time()
        palette = sns.color_palette('deep', np.unique(labels).max() + 1)
        colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels]
        plt.scatter(data[0], data[1], c=colors, **plot_kwds)
        frame = plt.gca()
        frame.axes.get_xaxis().set_visible(False)
        frame.axes.get_yaxis().set_visible(False)
        plt.title('Clusters found by {}'.format(str(algorithm)), fontsize=24)

    else:
        start_time = time.time()
        labels = cluster.AgglomerativeClustering(*args,
                                                 **kwds).fit_predict(data)
        end_time = time.time()
        palette = sns.color_palette('deep', np.unique(labels).max() + 1)
        colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels]
        plt.scatter(data[0], data[1], c=colors, **plot_kwds)
        frame = plt.gca()
        frame.axes.get_xaxis().set_visible(False)
        frame.axes.get_yaxis().set_visible(False)
        plt.title('Clusters found by {}'.format(str(algorithm)), fontsize=24)
Beispiel #19
0
def get_centroids(X):
    # TODO another method could be used, as this does not ensure the amount of clusters
    # TODO Birch for example (but then you would get more centroids
    ms = cluster.MeanShift(bandwidth=4)
    ms.fit(X)
    labels, counts = np.unique(ms.labels_, return_counts=True)
    fraud = labels[np.argmin(counts)]
    return zip(*ms.cluster_centers_.T.tolist(),
               [bool(l == fraud) for l in labels])
Beispiel #20
0
def mean_shift(data, bandwith=None, n_samples=500, quantile=0.3):
    if bandwith is None:
        bandwidth = skcluster.estimate_bandwidth(data,
                                                 quantile=quantile,
                                                 n_samples=n_samples)

    ms = skcluster.MeanShift(bandwidth=bandwidth).fit(data)
    labels = ms.labels_
    return labels
Beispiel #21
0
 def _cluster(self, acts, method='KM', param_dict=None):
     print('Starting clustering with {} for {} activations'.format(
         method, acts.shape[0]))
     if param_dict is None:
         param_dict = {}
     centers = None
     if method == 'KM':
         n_clusters = param_dict.pop('n_clusters', 25)
         km = cluster.KMeans(n_clusters)
         d = km.fit(acts)
         centers = km.cluster_centers_
         d = np.linalg.norm(np.expand_dims(acts, 1) -
                            np.expand_dims(centers, 0),
                            ord=2,
                            axis=-1)
         asg, cost = np.argmin(d, -1), np.min(d, -1)
     elif method == 'AP':
         damping = param_dict.pop('damping', 0.5)
         ca = cluster.AffinityPropagation(damping)
         ca.fit(acts)
         centers = ca.cluster_centers_
         d = np.linalg.norm(np.expand_dims(acts, 1) -
                            np.expand_dims(centers, 0),
                            ord=2,
                            axis=-1)
         asg, cost = np.argmin(d, -1), np.min(d, -1)
     elif method == 'MS':
         ms = cluster.MeanShift(n_jobs=self.num_workers)
         asg = ms.fit_predict(acts)
     elif method == 'SC':
         n_clusters = param_dict.pop('n_clusters', 25)
         sc = cluster.SpectralClustering(n_clusters=n_clusters,
                                         n_jobs=self.num_workers)
         asg = sc.fit_predict(acts)
     elif method == 'DB':
         eps = param_dict.pop('eps', 0.5)
         min_samples = param_dict.pop('min_samples', 20)
         sc = cluster.DBSCAN(eps, min_samples, n_jobs=self.num_workers)
         asg = sc.fit_predict(acts)
     else:
         raise ValueError('Invalid Clustering Method!')
     if centers is None:  ## If clustering returned cluster centers, use medoids
         centers = np.zeros((asg.max() + 1, acts.shape[1]))
         cost = np.zeros(len(acts))
         for cluster_label in range(asg.max() + 1):
             cluster_idxs = np.where(asg == cluster_label)[0]
             cluster_points = acts[cluster_idxs]
             pw_distances = metrics.euclidean_distances(cluster_points)
             centers[cluster_label] = cluster_points[np.argmin(
                 np.sum(pw_distances, -1))]
             cost[cluster_idxs] = np.linalg.norm(
                 acts[cluster_idxs] -
                 np.expand_dims(centers[cluster_label], 0),
                 ord=2,
                 axis=-1)
     print('Created {} clusters'.format(len(np.unique(asg))))
     return asg, cost, centers
def MeanShift(data):
    bandwidth = cls.estimate_bandwidth(data, quantile=0.2)
    ms = cls.MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(data)
    labels = ms.labels_
    labels_unique = np.unique(labels)
    cluster_centers = ms.cluster_centers_
    n_clusters_ = len(labels_unique)
    return labels, n_clusters_
Beispiel #23
0
def cluster(image):
    '''Inputs bird's eye view skeleton and outputs clusters of the skeleton'''

    X, Y = np.nonzero(image)

    try:
        bandwidth = clstr.estimate_bandwidth(Y.reshape(-1, 1), quantile=0.15)
        ms = clstr.MeanShift(bandwidth=bandwidth,
                             bin_seeding=True,
                             min_bin_freq=15,
                             cluster_all=False)
        kmeansoutput = ms.fit(Y.reshape(-1, 1))
    except:
        ms = clstr.MeanShift()
        kmeansoutput = ms.fit(Y.reshape(-1, 1))

    labels = kmeansoutput.labels_
    return X, Y, labels
Beispiel #24
0
def clustering(X, algorithm, n_clusters=2):

    X = np.transpose(X)

    # normalize dataset for easier parameter selection
    X = StandardScaler().fit_transform(X)

    # estimate bandwidth for mean shift
    bandwidth = cluster.estimate_bandwidth(X, quantile=0.3)

    # connectivity matrix for structured Ward
    connectivity = kneighbors_graph(X, n_neighbors=5, include_self=False)

    # make connectivity symmetric
    connectivity = 0.5 * (connectivity + connectivity.T)

    # Generate the new colors:
    if algorithm == 'KMeans':
        model = cluster.KMeans(n_clusters=n_clusters, random_state=0)

    elif algorithm == 'Birch':
        model = cluster.Birch(n_clusters=n_clusters)

    elif algorithm == 'DBSCAN':
        model = cluster.DBSCAN(eps=.2)

    elif algorithm == 'AffinityPropagation':
        model = cluster.AffinityPropagation(damping=.9, preference=-200)

    elif algorithm == 'MeanShift':
        model = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)

    elif algorithm == 'SpectralClustering':
        model = cluster.SpectralClustering(n_clusters=n_clusters,
                                           eigen_solver='arpack',
                                           affinity="nearest_neighbors")

    elif algorithm == 'Ward':
        model = cluster.AgglomerativeClustering(n_clusters=n_clusters,
                                                linkage='ward',
                                                connectivity=connectivity)

    elif algorithm == 'AgglomerativeClustering':
        model = cluster.AgglomerativeClustering(linkage="average",
                                                affinity="cityblock",
                                                n_clusters=n_clusters,
                                                connectivity=connectivity)

    model.fit(X)

    if hasattr(model, 'labels_'):
        y_pred = model.labels_.astype(np.int)
    else:
        y_pred = model.predict(X)

    return X, y_pred
Beispiel #25
0
def mean_shift():
    data_1 = numpy.random.normal(loc=0.0, scale=0.1, size=[100, 2])
    data_2 = numpy.random.normal(loc=1, scale=0.1, size=[100, 2])
    data = numpy.concatenate([data_1, data_2], axis=0)
    x = [item[0] for item in data]
    y = [item[1] for item in data]
    # bandwidth = cluster.estimate_bandwidth(data, quantile=0.5, n_samples=500)
    y_pre = cluster.MeanShift(bandwidth = 0.01).fit_predict(data)
    plt.scatter(x, y, c=y_pre)
    plt.show()
Beispiel #26
0
def meanshift_function(dataAx, bandwidth, quantile):
    # mean_shifter = meanshift.MeanShift()
    # __, mean_shift_result, mscenters = mean_shifter.product_result(dataAx, bandwidth=bandwidth)

    bandwidth = estimate_bandwidth(dataAx, quantile=quantile)
    # print(bandwidth)
    clf = ms.MeanShift(bandwidth=bandwidth, n_jobs=-1)
    clf.fit(dataAx)
    labels = clf.labels_
    return np.array(labels)  #np.array(mean_shift_result),
Beispiel #27
0
    def mean_shift(matrix):
        mean_shift = skcluster.MeanShift()
        mean_shift.fit(matrix)

        labels = mean_shift.labels_
        # Number of clusters in labels, ignoring noise if present.
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        print('Estimated number of clusters:', n_clusters_)

        return labels
Beispiel #28
0
 def __init__(self,
              *,
              use_gpu=True,
              use_semantic=False,
              ignore_semantic_labels=None,
              **kwargs):
     if use_gpu:
         self.clusterer = MeanShiftCosine(**kwargs)
     else:
         self.clusterer = cluster.MeanShift(**kwargs)
     super().__init__(use_semantic, ignore_semantic_labels)
 def MeanShift(self, parameters):  # data, bandwidth):
     result = {}
     default_bandwidth = 3
     data = np.array(parameters['data'])
     data = preprocessing.MinMaxScaler().fit_transform(data)
     if parameters.get('bandwidth') is not None:
         default_bandwidth = int(parameters['bandwidth'])
     model = skc.MeanShift(bandwidth=default_bandwidth, bin_seeding=True)
     clustering = model.fit(data)
     result['labels'] = clustering.labels_
     return result
Beispiel #30
0
def findClusters_meanShift(data):
    '''
        Cluster data using Mean Shift method
    '''
    bandwidth = cl.estimate_bandwidth(data, quantile=0.25, n_samples=500)

    # create the classifier object
    meanShift = cl.MeanShift(bandwidth=bandwidth, bin_seeding=True)

    # fit the data
    return meanShift.fit(data)