def mst_clustering(make_plots, x, y, band1, band2, band3, band4, p_path): X = np.vstack([x, y]).T # Boundaries for plots xmin, xmax = (min(x), max(x)) ymin, ymax = (min(y), max(y)) '''Compute MST clustering''' n_neighbors = 5 edge_cutoff = 0.9 cluster_cutoff = 20 model = HierarchicalClustering(n_neighbors=n_neighbors, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff) model.fit(X) scale = np.percentile(model.full_tree_.data, 100 * edge_cutoff) n_components = model.n_components_ labels = model.labels_ # Get the x, y coordinates of the beginning and end of each line segment T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_) T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_) # Fit a GMM to each individual cluster Nx = 100 Ny = 250 Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T density = np.zeros(Xgrid.shape[0]) for i in range(n_components): ind = (labels == i) Npts = ind.sum() Nclusters = min(12, Npts / 5) gmm = GMM(Nclusters).fit(X[ind]) dens = np.exp(gmm.score(Xgrid)) density += dens / dens.max() density = density.reshape((Ny, Nx)) if "mst" in make_plots: mst_plots(X, ymin, ymax, xmin, xmax, T_x, T_y, T_trunc_x, T_trunc_y, density, band1, band2, band3, band4, p_path) return(scale)
def test_graph_segments(): np.random.seed(0) N = 4 X = np.random.random((N, 2)) G = np.zeros([N, N]) G[0, 1] = 1 G[2, 1] = 1 G[2, 3] = 1 ind = np.array([[0, 2, 2], [1, 1, 3]]) xseg_check = X[ind, 0] yseg_check = X[ind, 1] xseg, yseg = get_graph_segments(X, G) assert_allclose(xseg, xseg_check) assert_allclose(yseg, yseg_check)
n_neighbors = 10 edge_cutoff = 0.9 cluster_cutoff = 10 model = HierarchicalClustering(n_neighbors=10, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff) model.fit(X) print(" scale: %2g Mpc" % np.percentile(model.full_tree_.data, 100 * edge_cutoff)) n_components = model.n_components_ labels = model.labels_ #------------------------------------------------------------ # Get the x, y coordinates of the beginning and end of each line segment T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_) T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_) #------------------------------------------------------------ # Fit a GMM to each individual cluster Nx = 100 Ny = 250 Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T density = np.zeros(Xgrid.shape[0]) for i in range(n_components): ind = (labels == i) Npts = ind.sum() Nclusters = min(12, Npts // 5)
n_neighbors = 10 edge_cutoff = 0.9 cluster_cutoff = 10 model = HierarchicalClustering(n_neighbors=10, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff) model.fit(X) print(" scale: %2g Mpc" % np.percentile(model.full_tree_.data, 100 * edge_cutoff)) n_components = model.n_components_ labels = model.labels_ #------------------------------------------------------------ # Get the x, y coordinates of the beginning and end of each line segment T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_) T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_) #------------------------------------------------------------ # Fit a GaussianMixture to each individual cluster Nx = 100 Ny = 250 Xgrid = np.vstack( map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T density = np.zeros(Xgrid.shape[0]) for i in range(n_components): ind = (labels == i) Npts = ind.sum()
def mst_clustering(data_to_cluster): x = data['x'][data_to_cluster] y = data['y'][data_to_cluster] X = [x, y] xmin, xmax = (0, 5000) ymin, ymax = (0, 5000) #------------------------------------------------------------ # Compute the MST clustering model n_neighbors = 5 edge_cutoff = 0.9 cluster_cutoff = 20 model = HierarchicalClustering(n_neighbors=n_neighbors, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff) model.fit(X) scale = np.percentile(model.full_tree_.data, 100 * edge_cutoff) n_components = model.n_components_ labels = model.labels_ #------------------------------------------------------------ # Get the x, y coordinates of the beginning and end of each line segment T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_) T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_) #------------------------------------------------------------ # Fit a GMM to each individual cluster Nx = 100 Ny = 250 Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T density = np.zeros(Xgrid.shape[0]) for i in range(n_components): ind = (labels == i) Npts = ind.sum() Nclusters = min(12, Npts / 5) gmm = GMM(Nclusters).fit(X[ind]) dens = np.exp(gmm.score(Xgrid)) density += dens / dens.max() density = density.reshape((Ny, Nx)) #---------------------------------------------------------------------- # Plot the results fig = plt.figure(figsize=(5, 6)) fig.subplots_adjust(hspace=0, left=0.1, right=0.95, bottom=0.1, top=0.9) ax = fig.add_subplot(311, aspect='equal') ax.scatter(X[:, 1], X[:, 0], s=1, lw=0, c='k') ax.set_xlim(ymin, ymax) ax.set_ylim(xmin, xmax) ax.xaxis.set_major_formatter(plt.NullFormatter()) ax.set_ylabel('(Mpc)') ax = fig.add_subplot(312, aspect='equal') ax.plot(T_y, T_x, c='k', lw=0.5) ax.set_xlim(ymin, ymax) ax.set_ylim(xmin, xmax) ax.xaxis.set_major_formatter(plt.NullFormatter()) ax.set_ylabel('(Mpc)') ax = fig.add_subplot(313, aspect='equal') ax.plot(T_trunc_y, T_trunc_x, c='k', lw=0.5) ax.imshow(density.T, origin='lower', cmap=plt.cm.hot_r, extent=[ymin, ymax, xmin, xmax]) ax.set_xlim(ymin, ymax) ax.set_ylim(xmin, xmax) ax.set_xlabel('(Mpc)') ax.set_ylabel('(Mpc)') plt.show() return(scale)