Beispiel #1
0
def test_cluster_cutoff():
    np.random.seed(0)
    N = 100
    X = np.random.random((N, 2))
    model = HierarchicalClustering(8, edge_cutoff=0.9, min_cluster_size=10)
    model.fit(X)

    assert_allclose(np.unique(model.labels_),
                    np.arange(-1, model.n_components_))
Beispiel #2
0
def test_simple_clustering():
    np.random.seed(0)
    N = 10
    X = np.random.random((N, 2))
    model = HierarchicalClustering(8, edge_cutoff=0.5)
    model.fit(X)

    assert_(model.n_components_ == N / 2)
    assert_(np.sum(model.full_tree_.toarray() > 0) == N - 1)
    assert_(np.sum(model.cluster_graph_.toarray() > 0) == N / 2)
    assert_allclose(np.unique(model.labels_), np.arange(N / 2))
Beispiel #3
0
def mst_clustering(make_plots, x, y, band1, band2, band3, band4, p_path):

    X = np.vstack([x, y]).T

    # Boundaries for plots
    xmin, xmax = (min(x), max(x))
    ymin, ymax = (min(y), max(y))

    '''Compute MST clustering'''

    n_neighbors = 5
    edge_cutoff = 0.9
    cluster_cutoff = 20
    model = HierarchicalClustering(n_neighbors=n_neighbors,
                                   edge_cutoff=edge_cutoff,
                                   min_cluster_size=cluster_cutoff)
    model.fit(X)
    scale = np.percentile(model.full_tree_.data, 100 * edge_cutoff)
    n_components = model.n_components_
    labels = model.labels_

    # Get the x, y coordinates of the beginning and end of each line segment
    T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_)
    T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_,
                                              model.cluster_graph_)

    # Fit a GMM to each individual cluster
    Nx = 100
    Ny = 250
    Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx),
                                                np.linspace(ymin,
                                                            ymax, Ny)))).T
    density = np.zeros(Xgrid.shape[0])
    for i in range(n_components):
        ind = (labels == i)
        Npts = ind.sum()
        Nclusters = min(12, Npts / 5)
        gmm = GMM(Nclusters).fit(X[ind])
        dens = np.exp(gmm.score(Xgrid))
        density += dens / dens.max()
    density = density.reshape((Ny, Nx))

    if "mst" in make_plots:
        mst_plots(X, ymin, ymax, xmin, xmax, T_x, T_y, T_trunc_x, T_trunc_y,
                  density, band1, band2, band3, band4, p_path)

    return(scale)
setup_text_plots(fontsize=8, usetex=True)

#------------------------------------------------------------
# get data
X = fetch_great_wall()

xmin, xmax = (-375, -175)
ymin, ymax = (-300, 200)

#------------------------------------------------------------
# Compute the MST clustering model
n_neighbors = 10
edge_cutoff = 0.9
cluster_cutoff = 10
model = HierarchicalClustering(n_neighbors=10,
                               edge_cutoff=edge_cutoff,
                               min_cluster_size=cluster_cutoff)
model.fit(X)
print(" scale: %2g Mpc" %
      np.percentile(model.full_tree_.data, 100 * edge_cutoff))

n_components = model.n_components_
labels = model.labels_

#------------------------------------------------------------
# Get the x, y coordinates of the beginning and end of each line segment
T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_)
T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_)

#------------------------------------------------------------
# Fit a GaussianMixture to each individual cluster