def participants_clustering(group, block=None, *args, **kwargs):
    '''
    Using sklearn.cluster.mean_shift, cluster the participants into
    groups for each frame.

    Optional position and keyword arguments are passed as is to
    the mean_shift function.

    Returns
    =======
    (clusters_centroids, clusters_partition):
    clusters_centroids is a list of 2d point for each frame.
    clusters_partition is a map of partitions (keys are cluster index
    and values are sets of participants_numbers) for each frame.
    '''
    participants_data = data.participants_data(group, block)
    participants_nums = data.get_participants(group)
    # a list of 2d point for each frame
    clusters_centroids = []
    # a map of partitions (keys are cluster index and values
    # are sets of participants_numbers) for each frame
    clusters_partition = []
    for frame, row in participants_data.iterrows():
        pas = np.array(utils.list_to_chunks(row.values, 2))
        centroids, allocations = cluster.mean_shift(pas, *args, **kwargs)
        clusters_centroids.append(centroids)
        partition = collections.defaultdict(set)
        for participant, allocation in zip(participants_nums, allocations):
            partition[allocation].add(participant)
        clusters_partition.append(partition)

    return clusters_centroids, clusters_partition
Example #2
0
def mean_shift_labels(pointcloud, bandwidth=None, max_iter=300, n_jobs=1):
    '''
    Find an array of point-labels of clusters found by the DBSCAN algorithm.
    Parameters
    ----------
    X : array-like, shape=[n_samples, n_features]
        Input data.
    bandwidth : float, optional
        Kernel bandwidth.
        If bandwidth is not given, it is determined using a heuristic based on
        the median of all pairwise distances. This will take quadratic time in
        the number of samples. The sklearn.cluster.estimate_bandwidth function
        can be used to do this more efficiently.
    seeds : array-like, shape=[n_seeds, n_features] or None
        Point used as initial kernel locations. If None and bin_seeding=False,
        each data point is used as a seed. If None and bin_seeding=True,
        see bin_seeding.
    bin_seeding : boolean, default=False
        If true, initial kernel locations are not locations of all
        points, but rather the location of the discretized version of
        points, where points are binned onto a grid whose coarseness
        corresponds to the bandwidth. Setting this option to True will speed
        up the algorithm because fewer seeds will be initialized.
        Ignored if seeds argument is not None.
    min_bin_freq : int, default=1
       To speed up the algorithm, accept only those bins with at least
       min_bin_freq points as seeds.
    cluster_all : boolean, default True
        If true, then all points are clustered, even those orphans that are
        not within any kernel. Orphans are assigned to the nearest kernel.
        If false, then orphans are given cluster label -1.
    max_iter : int, default 300
        Maximum number of iterations, per seed point before the clustering
        operation terminates (for that seed point), if has not converged yet.
    n_jobs : int
        The number of jobs to use for the computation. This works by computing
        each of the n_init runs in parallel.
    Returns
    -------
    cluster_centers : array, shape=[n_clusters, n_features]
        Coordinates of cluster centers.
    labels : array, shape=[n_samples]
        Cluster labels for each point.
    '''

    # Set bandwidth to None if it is 0
    if bandwidth == 0:
        bandwidth = None

    _, labels = mean_shift(np.asarray(pointcloud),
                           bandwidth=bandwidth,
                           seeds=None,
                           bin_seeding=False,
                           min_bin_freq=1,
                           cluster_all=True,
                           max_iter=max_iter,
                           n_jobs=n_jobs)
    return labels
def meanshift(bw, pixels):
    pixels = np.array(revert(pixels))
    c, ind = mean_shift(pixels, bandwidth=bw, n_jobs=-1)

    out = []
    for i in ind:
        out.append(c[i].astype(np.int))
        #print c[i].astype(np.int)
    return out
Example #4
0
def test_max_iter(max_iter):
    clusters1, _ = mean_shift(X, max_iter=max_iter)
    ms = MeanShift(max_iter=max_iter).fit(X)
    clusters2 = ms.cluster_centers_

    assert ms.n_iter_ <= ms.max_iter
    assert len(clusters1) == len(clusters2)

    for c1, c2 in zip(clusters1, clusters2):
        assert np.allclose(c1, c2)
Example #5
0
    def test_mean_shift(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.cluster.mean_shift()
        expected = cluster.mean_shift(iris.data)

        self.assertEqual(len(result), 2)
        self.assert_numpy_array_almost_equal(result[0], expected[0])
        self.assertTrue(isinstance(result[1], pdml.ModelSeries))
        self.assert_index_equal(result[1].index, df.index)
        self.assert_numpy_array_equal(result[1].values, expected[1])
Example #6
0
    def test_mean_shift(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.cluster.mean_shift()
        expected = cluster.mean_shift(iris.data)

        self.assertEqual(len(result), 2)
        self.assert_numpy_array_almost_equal(result[0], expected[0])
        self.assertIsInstance(result[1], pdml.ModelSeries)
        tm.assert_index_equal(result[1].index, df.index)
        tm.assert_numpy_array_equal(result[1].values, expected[1])
Example #7
0
def test_mean_shift():
    # Test MeanShift algorithm
    bandwidth = 1.2

    ms = MeanShift(bandwidth=bandwidth)
    labels = ms.fit(X).labels_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    assert_equal(n_clusters_, n_clusters)

    cluster_centers, labels = mean_shift(X, bandwidth=bandwidth)
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    assert_equal(n_clusters_, n_clusters)
def test_mean_shift(bandwidth, cluster_all, expected, first_cluster_label):
    # Test MeanShift algorithm
    ms = MeanShift(bandwidth=bandwidth, cluster_all=cluster_all)
    labels = ms.fit(X).labels_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    assert n_clusters_ == expected
    assert labels_unique[0] == first_cluster_label

    cluster_centers, labels_mean_shift = mean_shift(X, cluster_all=cluster_all)
    labels_mean_shift_unique = np.unique(labels_mean_shift)
    n_clusters_mean_shift = len(labels_mean_shift_unique)
    assert n_clusters_mean_shift == expected
    assert labels_mean_shift_unique[0] == first_cluster_label
def test_mean_shift():
    # Test MeanShift algorithm
    bandwidth = 1.2

    ms = MeanShift(bandwidth=bandwidth)
    labels = ms.fit(X).labels_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    assert_equal(n_clusters_, n_clusters)

    cluster_centers, labels = mean_shift(X, bandwidth=bandwidth)
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    assert_equal(n_clusters_, n_clusters)
Example #10
0
def test_mean_shift():
    """ Test MeanShift algorithm
    """
    bandwidth = 1.2

    bandwidth_ = estimate_bandwidth(X, n_samples=300)
    assert_true(0.9 <= bandwidth_ <= 1.5)

    ms = MeanShift(bandwidth=bandwidth)
    labels = ms.fit(X).labels_
    cluster_centers = ms.cluster_centers_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    assert_equal(n_clusters_, n_clusters)

    cluster_centers, labels = mean_shift(X, bandwidth=bandwidth)
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    assert_equal(n_clusters_, n_clusters)
Example #11
0
def test_mean_shift(global_dtype, bandwidth, cluster_all, expected,
                    first_cluster_label):
    # Test MeanShift algorithm
    X_with_global_dtype = X.astype(global_dtype, copy=False)
    ms = MeanShift(bandwidth=bandwidth, cluster_all=cluster_all)
    labels = ms.fit(X_with_global_dtype).labels_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    assert n_clusters_ == expected
    assert labels_unique[0] == first_cluster_label
    assert ms.cluster_centers_.dtype == global_dtype

    cluster_centers, labels_mean_shift = mean_shift(X_with_global_dtype,
                                                    cluster_all=cluster_all)
    labels_mean_shift_unique = np.unique(labels_mean_shift)
    n_clusters_mean_shift = len(labels_mean_shift_unique)
    assert n_clusters_mean_shift == expected
    assert labels_mean_shift_unique[0] == first_cluster_label
    assert cluster_centers.dtype == global_dtype
def test_mean_shift():
    """ Test MeanShift algorithm
    """
    bandwidth = 1.2

    bandwidth_ = estimate_bandwidth(X, n_samples=300)
    assert_true(0.9 <= bandwidth_ <= 1.5)

    ms = MeanShift(bandwidth=bandwidth)
    labels = ms.fit(X).labels_
    cluster_centers = ms.cluster_centers_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    assert_equal(n_clusters_, n_clusters)

    cluster_centers, labels = mean_shift(X, bandwidth=bandwidth)
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    assert_equal(n_clusters_, n_clusters)
Example #13
0
    cut = img[y:y + h, x:x + w]
    # extract feature vector
    density = misc.calc_density(cut, blob)
    moments = misc.calc_hu_moments(cut, blob)
    circularity = misc.calc_circularity(cut, blob)
    features.append([density, circularity, moments[0]])

# convert list to numpy array
features = np.asarray(features)
# scale the features to comparable ranges
features = MinMaxScaler().fit_transform(features)

# now use the feature vector for clustering:
# predict = AggloClust(n_clusters=3).fit_predict(features.reshape(-1, 1))
# predict = KMeans(n_clusters=3).fit_predict(features.reshape(-1, 1))
predict = mean_shift(features, estimate_bandwidth(features))[1]

# now draw the classified objects:
# iterate through blobs
for i, blob in enumerate(blobs):
    [x, y, w, h] = cv2.boundingRect(blob)
    cv2.rectangle(img, (x, y), (x + w, y + h), colors[predict[i]], 2)
    txt = '{} d={:.2f},m={:.2f},c={:.2f}'.format(i, *features[i])
    cv2.putText(img, txt, (x + w - 75, y + h + 15), *text_opts)


def pca():
    names = ['circle', 'rectangle', 'triangle']
    f_r = PCA(n_components=2).fit_transform(features)
    y = np.array(targets)
    for i, color in enumerate(['red', 'green', 'blue']):
    clust = MeanShift(bandwidth = 10)
    res = clust.fit_predict(data[['Start']].values)
    data['Cluster'] = res
    cluster_data = concat([cluster_data, data], axis = 0, ignore_index = True)



# <codecell>

res = crosstab(rows = [cluster_data['Patient ID'], cluster_data['Visit Number']], cols = [cluster_data['TFName'], cluster_data['Cluster']])

# <codecell>

from sklearn.cluster import k_means, mean_shift

centroids, labels = mean_shift(res.values)

labels = Series(labels, index = res.index)
labels.sort()

plt.figure(figsize = (20,20))

plt.imshow(res.ix[labels.index].values)

# <codecell>

labels

# <codecell>

Example #15
0
    def action_execute_button_clicked(self):
        #打开影像
        input_img = gdal.Open(self.input_file_path.text())
        img_rows = input_img.RasterYSize
        img_cols = input_img.RasterXSize
        img_bands = input_img.RasterCount
        img_geotrans = input_img.GetGeoTransform()
        img_proj = input_img.GetProjection()

        #将影像转为mean_shift函数接受的数据格式
        input_features = []
        for i in range(1, img_bands + 1):
            band_img = input_img.GetRasterBand(i).ReadAsArray(
                0, 0, img_cols, img_rows)
            input_features.append(band_img.reshape(-1))
        input_features = np.array(input_features).T

        #执行mean_shift算法
        bandwidth = estimate_bandwidth(
            input_features,
            quantile=float(self.bandwidth_estimate.currentText()),
            n_samples=int(img_rows * img_cols * 0.05))
        #print(bandwidth)
        meanshift_result = mean_shift(input_features,
                                      bandwidth=bandwidth,
                                      bin_seeding=True,
                                      max_iter=int(
                                          self.iter_num.currentText()))

        #将各样本点灰度值转为对应聚类中心灰度值
        cluster_centers, clustered_points = meanshift_result
        output_feature = []
        for index, item in enumerate(clustered_points):
            while item > len(self.color_list) - 1:
                self.color_list.append(list(np.random.randint(256, size=3)))
            output_feature.append(self.color_list[item])
        output_feature = np.array(output_feature).T
        output_feature = np.array(
            list(map(lambda x: x.reshape((img_rows, img_cols)),
                     output_feature)))

        # 输出聚类影像
        driver = gdal.GetDriverByName("GTiff")
        output_img = driver.Create(self.output_file_path.text(), img_cols,
                                   img_rows, 3, gdal.GDT_Byte)
        output_img.SetGeoTransform(img_geotrans)
        output_img.SetProjection(img_proj)
        for i in range(1, 4):
            output_img.GetRasterBand(i).WriteArray(output_feature[i - 1])
        del output_img

        layer_legends = []  #图例数组
        for i in range(len(cluster_centers)):
            layer_legends.append({
                'name':
                'Cluster' + str(i + 1),
                'color':
                QColor(self.color_list[i][0], self.color_list[i][1],
                       self.color_list[i][2])
            })

        if (QMessageBox.question(self, "消息框", "聚类完成,是否将结果添加到图层?",
                                 QMessageBox.Yes | QMessageBox.No,
                                 QMessageBox.Yes) == QMessageBox.Yes):
            self.add_layer_signal.emit(self.output_file_path.text(),
                                       layer_legends)
Example #16
0
def cluster(points, labels=None):
    #km = KMeans(n_clusters=3, init='k-means++', max_iter=100, n_init=1,
    #            verbose=1)
    #km.fit(points)
    return mean_shift(points)
def _clusterized_column(df, column, **kwargs):
    res = cluster.mean_shift(pd.DataFrame(df[column]).values, **kwargs)
    return res[1]
Example #18
0
def mean_shift_cluster(data):  # mean_shift
    mean_shift_label = mean_shift(np.array(MinMaxScaler().fit_transform(data)))
    return list(mean_shift_label[-1])