Example #1
0
def test_cluster_cutoff():
    np.random.seed(0)
    N = 100
    X = np.random.random((N, 2))
    model = HierarchicalClustering(8, edge_cutoff=0.9, min_cluster_size=10)
    model.fit(X)

    assert_allclose(np.unique(model.labels_),
                    np.arange(-1, model.n_components_))
Example #2
0
def test_cluster_cutoff():
    np.random.seed(0)
    N = 100
    X = np.random.random((N, 2))
    model = HierarchicalClustering(8, edge_cutoff=0.9, min_cluster_size=10)
    model.fit(X)

    assert_allclose(np.unique(model.labels_),
                    np.arange(-1, model.n_components_))
Example #3
0
def test_simple_clustering():
    np.random.seed(0)
    N = 10
    X = np.random.random((N, 2))
    model = HierarchicalClustering(8, edge_cutoff=0.5)
    model.fit(X)

    assert_(model.n_components_ == N / 2)
    assert_(np.sum(model.full_tree_.toarray() > 0) == N - 1)
    assert_(np.sum(model.cluster_graph_.toarray() > 0) == N / 2)
    assert_allclose(np.unique(model.labels_), np.arange(N / 2))
Example #4
0
def test_simple_clustering():
    np.random.seed(0)
    N = 10
    X = np.random.random((N, 2))
    model = HierarchicalClustering(8, edge_cutoff=0.5)
    model.fit(X)

    assert_(model.n_components_ == N / 2)
    assert_(np.sum(model.full_tree_.toarray() > 0) == N - 1)
    assert_(np.sum(model.cluster_graph_.toarray() > 0) == N / 2)
    assert_allclose(np.unique(model.labels_), np.arange(N / 2))
Example #5
0
def mst_clustering(make_plots, x, y, band1, band2, band3, band4, p_path):

    X = np.vstack([x, y]).T

    # Boundaries for plots
    xmin, xmax = (min(x), max(x))
    ymin, ymax = (min(y), max(y))

    '''Compute MST clustering'''

    n_neighbors = 5
    edge_cutoff = 0.9
    cluster_cutoff = 20
    model = HierarchicalClustering(n_neighbors=n_neighbors,
                                   edge_cutoff=edge_cutoff,
                                   min_cluster_size=cluster_cutoff)
    model.fit(X)
    scale = np.percentile(model.full_tree_.data, 100 * edge_cutoff)
    n_components = model.n_components_
    labels = model.labels_

    # Get the x, y coordinates of the beginning and end of each line segment
    T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_)
    T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_,
                                              model.cluster_graph_)

    # Fit a GMM to each individual cluster
    Nx = 100
    Ny = 250
    Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx),
                                                np.linspace(ymin,
                                                            ymax, Ny)))).T
    density = np.zeros(Xgrid.shape[0])
    for i in range(n_components):
        ind = (labels == i)
        Npts = ind.sum()
        Nclusters = min(12, Npts / 5)
        gmm = GMM(Nclusters).fit(X[ind])
        dens = np.exp(gmm.score(Xgrid))
        density += dens / dens.max()
    density = density.reshape((Ny, Nx))

    if "mst" in make_plots:
        mst_plots(X, ymin, ymax, xmin, xmax, T_x, T_y, T_trunc_x, T_trunc_y,
                  density, band1, band2, band3, band4, p_path)

    return(scale)
Example #6
0
setup_text_plots(fontsize=8, usetex=True)

#------------------------------------------------------------
# get data
X = fetch_great_wall()

xmin, xmax = (-375, -175)
ymin, ymax = (-300, 200)

#------------------------------------------------------------
# Compute the MST clustering model
n_neighbors = 10
edge_cutoff = 0.9
cluster_cutoff = 10
model = HierarchicalClustering(n_neighbors=10,
                               edge_cutoff=edge_cutoff,
                               min_cluster_size=cluster_cutoff)
model.fit(X)
print(" scale: %2g Mpc" % np.percentile(model.full_tree_.data,
                                        100 * edge_cutoff))

n_components = model.n_components_
labels = model.labels_

#------------------------------------------------------------
# Get the x, y coordinates of the beginning and end of each line segment
T_x, T_y = get_graph_segments(model.X_train_,
                              model.full_tree_)
T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_,
                                          model.cluster_graph_)
setup_text_plots(fontsize=8, usetex=True)

#------------------------------------------------------------
# get data
X = fetch_great_wall()

xmin, xmax = (-375, -175)
ymin, ymax = (-300, 200)

#------------------------------------------------------------
# Compute the MST clustering model
n_neighbors = 10
edge_cutoff = 0.9
cluster_cutoff = 10
model = HierarchicalClustering(n_neighbors=10,
                               edge_cutoff=edge_cutoff,
                               min_cluster_size=cluster_cutoff)
model.fit(X)
print(" scale: %2g Mpc" %
      np.percentile(model.full_tree_.data, 100 * edge_cutoff))

n_components = model.n_components_
labels = model.labels_

#------------------------------------------------------------
# Get the x, y coordinates of the beginning and end of each line segment
T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_)
T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_)

#------------------------------------------------------------
# Fit a GaussianMixture to each individual cluster
def mst_clustering(data_to_cluster):
    
    x = data['x'][data_to_cluster]
    y = data['y'][data_to_cluster]
    
    X = [x, y]

    xmin, xmax = (0, 5000)
    ymin, ymax = (0, 5000)

    #------------------------------------------------------------
    # Compute the MST clustering model
    n_neighbors = 5
    edge_cutoff = 0.9
    cluster_cutoff = 20  
    model = HierarchicalClustering(n_neighbors=n_neighbors, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff)
    model.fit(X)
    
    scale = np.percentile(model.full_tree_.data, 100 * edge_cutoff)
    
    n_components = model.n_components_
    labels = model.labels_
    
    #------------------------------------------------------------
    # Get the x, y coordinates of the beginning and end of each line segment
    T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_)
    
    T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_)
    
    #------------------------------------------------------------
    # Fit a GMM to each individual cluster
    Nx = 100
    Ny = 250
    Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T
    density = np.zeros(Xgrid.shape[0])
    
    for i in range(n_components):
        ind = (labels == i)
        Npts = ind.sum()
        Nclusters = min(12, Npts / 5)
        
    gmm = GMM(Nclusters).fit(X[ind])
    dens = np.exp(gmm.score(Xgrid))
    density += dens / dens.max()
    
    density = density.reshape((Ny, Nx))
        
    #----------------------------------------------------------------------
    # Plot the results
    fig = plt.figure(figsize=(5, 6))
    fig.subplots_adjust(hspace=0, left=0.1, right=0.95, bottom=0.1, top=0.9)
        
    ax = fig.add_subplot(311, aspect='equal')
    ax.scatter(X[:, 1], X[:, 0], s=1, lw=0, c='k')
    ax.set_xlim(ymin, ymax)
    ax.set_ylim(xmin, xmax)
    ax.xaxis.set_major_formatter(plt.NullFormatter())
    ax.set_ylabel('(Mpc)')
        
    ax = fig.add_subplot(312, aspect='equal')
    ax.plot(T_y, T_x, c='k', lw=0.5)
    ax.set_xlim(ymin, ymax)
    ax.set_ylim(xmin, xmax)
    ax.xaxis.set_major_formatter(plt.NullFormatter())
    ax.set_ylabel('(Mpc)')
        
    ax = fig.add_subplot(313, aspect='equal')
    ax.plot(T_trunc_y, T_trunc_x, c='k', lw=0.5)
    ax.imshow(density.T, origin='lower', cmap=plt.cm.hot_r, extent=[ymin, ymax, xmin, xmax])
                  
    ax.set_xlim(ymin, ymax)
    ax.set_ylim(xmin, xmax)
    ax.set_xlabel('(Mpc)')
    ax.set_ylabel('(Mpc)')
                  
    plt.show()

    return(scale)