Ejemplo n.º 1
0
def mst_clustering(make_plots, x, y, band1, band2, band3, band4, p_path):

    X = np.vstack([x, y]).T

    # Boundaries for plots
    xmin, xmax = (min(x), max(x))
    ymin, ymax = (min(y), max(y))

    '''Compute MST clustering'''

    n_neighbors = 5
    edge_cutoff = 0.9
    cluster_cutoff = 20
    model = HierarchicalClustering(n_neighbors=n_neighbors,
                                   edge_cutoff=edge_cutoff,
                                   min_cluster_size=cluster_cutoff)
    model.fit(X)
    scale = np.percentile(model.full_tree_.data, 100 * edge_cutoff)
    n_components = model.n_components_
    labels = model.labels_

    # Get the x, y coordinates of the beginning and end of each line segment
    T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_)
    T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_,
                                              model.cluster_graph_)

    # Fit a GMM to each individual cluster
    Nx = 100
    Ny = 250
    Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx),
                                                np.linspace(ymin,
                                                            ymax, Ny)))).T
    density = np.zeros(Xgrid.shape[0])
    for i in range(n_components):
        ind = (labels == i)
        Npts = ind.sum()
        Nclusters = min(12, Npts / 5)
        gmm = GMM(Nclusters).fit(X[ind])
        dens = np.exp(gmm.score(Xgrid))
        density += dens / dens.max()
    density = density.reshape((Ny, Nx))

    if "mst" in make_plots:
        mst_plots(X, ymin, ymax, xmin, xmax, T_x, T_y, T_trunc_x, T_trunc_y,
                  density, band1, band2, band3, band4, p_path)

    return(scale)
Ejemplo n.º 2
0
def test_graph_segments():
    np.random.seed(0)
    N = 4
    X = np.random.random((N, 2))
    G = np.zeros([N, N])
    G[0, 1] = 1
    G[2, 1] = 1
    G[2, 3] = 1

    ind = np.array([[0, 2, 2], [1, 1, 3]])
    xseg_check = X[ind, 0]
    yseg_check = X[ind, 1]

    xseg, yseg = get_graph_segments(X, G)

    assert_allclose(xseg, xseg_check)
    assert_allclose(yseg, yseg_check)
Ejemplo n.º 3
0
def test_graph_segments():
    np.random.seed(0)
    N = 4
    X = np.random.random((N, 2))
    G = np.zeros([N, N])
    G[0, 1] = 1
    G[2, 1] = 1
    G[2, 3] = 1

    ind = np.array([[0, 2, 2],
                    [1, 1, 3]])
    xseg_check = X[ind, 0]
    yseg_check = X[ind, 1]

    xseg, yseg = get_graph_segments(X, G)

    assert_allclose(xseg, xseg_check)
    assert_allclose(yseg, yseg_check)
Ejemplo n.º 4
0
n_neighbors = 10
edge_cutoff = 0.9
cluster_cutoff = 10
model = HierarchicalClustering(n_neighbors=10,
                               edge_cutoff=edge_cutoff,
                               min_cluster_size=cluster_cutoff)
model.fit(X)
print(" scale: %2g Mpc" % np.percentile(model.full_tree_.data,
                                        100 * edge_cutoff))

n_components = model.n_components_
labels = model.labels_

#------------------------------------------------------------
# Get the x, y coordinates of the beginning and end of each line segment
T_x, T_y = get_graph_segments(model.X_train_,
                              model.full_tree_)
T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_,
                                          model.cluster_graph_)

#------------------------------------------------------------
# Fit a GMM to each individual cluster
Nx = 100
Ny = 250
Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx),
                                            np.linspace(ymin, ymax, Ny)))).T
density = np.zeros(Xgrid.shape[0])

for i in range(n_components):
    ind = (labels == i)
    Npts = ind.sum()
    Nclusters = min(12, Npts // 5)
Ejemplo n.º 5
0
n_neighbors = 10
edge_cutoff = 0.9
cluster_cutoff = 10
model = HierarchicalClustering(n_neighbors=10,
                               edge_cutoff=edge_cutoff,
                               min_cluster_size=cluster_cutoff)
model.fit(X)
print(" scale: %2g Mpc" %
      np.percentile(model.full_tree_.data, 100 * edge_cutoff))

n_components = model.n_components_
labels = model.labels_

#------------------------------------------------------------
# Get the x, y coordinates of the beginning and end of each line segment
T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_)
T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_)

#------------------------------------------------------------
# Fit a GaussianMixture to each individual cluster
Nx = 100
Ny = 250
Xgrid = np.vstack(
    map(np.ravel,
        np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax,
                                                             Ny)))).T
density = np.zeros(Xgrid.shape[0])

for i in range(n_components):
    ind = (labels == i)
    Npts = ind.sum()
Ejemplo n.º 6
0
def mst_clustering(data_to_cluster):
    
    x = data['x'][data_to_cluster]
    y = data['y'][data_to_cluster]
    
    X = [x, y]

    xmin, xmax = (0, 5000)
    ymin, ymax = (0, 5000)

    #------------------------------------------------------------
    # Compute the MST clustering model
    n_neighbors = 5
    edge_cutoff = 0.9
    cluster_cutoff = 20  
    model = HierarchicalClustering(n_neighbors=n_neighbors, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff)
    model.fit(X)
    
    scale = np.percentile(model.full_tree_.data, 100 * edge_cutoff)
    
    n_components = model.n_components_
    labels = model.labels_
    
    #------------------------------------------------------------
    # Get the x, y coordinates of the beginning and end of each line segment
    T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_)
    
    T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_)
    
    #------------------------------------------------------------
    # Fit a GMM to each individual cluster
    Nx = 100
    Ny = 250
    Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T
    density = np.zeros(Xgrid.shape[0])
    
    for i in range(n_components):
        ind = (labels == i)
        Npts = ind.sum()
        Nclusters = min(12, Npts / 5)
        
    gmm = GMM(Nclusters).fit(X[ind])
    dens = np.exp(gmm.score(Xgrid))
    density += dens / dens.max()
    
    density = density.reshape((Ny, Nx))
        
    #----------------------------------------------------------------------
    # Plot the results
    fig = plt.figure(figsize=(5, 6))
    fig.subplots_adjust(hspace=0, left=0.1, right=0.95, bottom=0.1, top=0.9)
        
    ax = fig.add_subplot(311, aspect='equal')
    ax.scatter(X[:, 1], X[:, 0], s=1, lw=0, c='k')
    ax.set_xlim(ymin, ymax)
    ax.set_ylim(xmin, xmax)
    ax.xaxis.set_major_formatter(plt.NullFormatter())
    ax.set_ylabel('(Mpc)')
        
    ax = fig.add_subplot(312, aspect='equal')
    ax.plot(T_y, T_x, c='k', lw=0.5)
    ax.set_xlim(ymin, ymax)
    ax.set_ylim(xmin, xmax)
    ax.xaxis.set_major_formatter(plt.NullFormatter())
    ax.set_ylabel('(Mpc)')
        
    ax = fig.add_subplot(313, aspect='equal')
    ax.plot(T_trunc_y, T_trunc_x, c='k', lw=0.5)
    ax.imshow(density.T, origin='lower', cmap=plt.cm.hot_r, extent=[ymin, ymax, xmin, xmax])
                  
    ax.set_xlim(ymin, ymax)
    ax.set_ylim(xmin, xmax)
    ax.set_xlabel('(Mpc)')
    ax.set_ylabel('(Mpc)')
                  
    plt.show()

    return(scale)