def test_cluster_cutoff(): np.random.seed(0) N = 100 X = np.random.random((N, 2)) model = HierarchicalClustering(8, edge_cutoff=0.9, min_cluster_size=10) model.fit(X) assert_allclose(np.unique(model.labels_), np.arange(-1, model.n_components_))
def test_simple_clustering(): np.random.seed(0) N = 10 X = np.random.random((N, 2)) model = HierarchicalClustering(8, edge_cutoff=0.5) model.fit(X) assert_(model.n_components_ == N / 2) assert_(np.sum(model.full_tree_.toarray() > 0) == N - 1) assert_(np.sum(model.cluster_graph_.toarray() > 0) == N / 2) assert_allclose(np.unique(model.labels_), np.arange(N / 2))
def mst_clustering(make_plots, x, y, band1, band2, band3, band4, p_path): X = np.vstack([x, y]).T # Boundaries for plots xmin, xmax = (min(x), max(x)) ymin, ymax = (min(y), max(y)) '''Compute MST clustering''' n_neighbors = 5 edge_cutoff = 0.9 cluster_cutoff = 20 model = HierarchicalClustering(n_neighbors=n_neighbors, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff) model.fit(X) scale = np.percentile(model.full_tree_.data, 100 * edge_cutoff) n_components = model.n_components_ labels = model.labels_ # Get the x, y coordinates of the beginning and end of each line segment T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_) T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_) # Fit a GMM to each individual cluster Nx = 100 Ny = 250 Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T density = np.zeros(Xgrid.shape[0]) for i in range(n_components): ind = (labels == i) Npts = ind.sum() Nclusters = min(12, Npts / 5) gmm = GMM(Nclusters).fit(X[ind]) dens = np.exp(gmm.score(Xgrid)) density += dens / dens.max() density = density.reshape((Ny, Nx)) if "mst" in make_plots: mst_plots(X, ymin, ymax, xmin, xmax, T_x, T_y, T_trunc_x, T_trunc_y, density, band1, band2, band3, band4, p_path) return(scale)
setup_text_plots(fontsize=8, usetex=True) #------------------------------------------------------------ # get data X = fetch_great_wall() xmin, xmax = (-375, -175) ymin, ymax = (-300, 200) #------------------------------------------------------------ # Compute the MST clustering model n_neighbors = 10 edge_cutoff = 0.9 cluster_cutoff = 10 model = HierarchicalClustering(n_neighbors=10, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff) model.fit(X) print(" scale: %2g Mpc" % np.percentile(model.full_tree_.data, 100 * edge_cutoff)) n_components = model.n_components_ labels = model.labels_ #------------------------------------------------------------ # Get the x, y coordinates of the beginning and end of each line segment T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_) T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_)
setup_text_plots(fontsize=8, usetex=True) #------------------------------------------------------------ # get data X = fetch_great_wall() xmin, xmax = (-375, -175) ymin, ymax = (-300, 200) #------------------------------------------------------------ # Compute the MST clustering model n_neighbors = 10 edge_cutoff = 0.9 cluster_cutoff = 10 model = HierarchicalClustering(n_neighbors=10, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff) model.fit(X) print(" scale: %2g Mpc" % np.percentile(model.full_tree_.data, 100 * edge_cutoff)) n_components = model.n_components_ labels = model.labels_ #------------------------------------------------------------ # Get the x, y coordinates of the beginning and end of each line segment T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_) T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_) #------------------------------------------------------------ # Fit a GaussianMixture to each individual cluster
def mst_clustering(data_to_cluster): x = data['x'][data_to_cluster] y = data['y'][data_to_cluster] X = [x, y] xmin, xmax = (0, 5000) ymin, ymax = (0, 5000) #------------------------------------------------------------ # Compute the MST clustering model n_neighbors = 5 edge_cutoff = 0.9 cluster_cutoff = 20 model = HierarchicalClustering(n_neighbors=n_neighbors, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff) model.fit(X) scale = np.percentile(model.full_tree_.data, 100 * edge_cutoff) n_components = model.n_components_ labels = model.labels_ #------------------------------------------------------------ # Get the x, y coordinates of the beginning and end of each line segment T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_) T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_) #------------------------------------------------------------ # Fit a GMM to each individual cluster Nx = 100 Ny = 250 Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T density = np.zeros(Xgrid.shape[0]) for i in range(n_components): ind = (labels == i) Npts = ind.sum() Nclusters = min(12, Npts / 5) gmm = GMM(Nclusters).fit(X[ind]) dens = np.exp(gmm.score(Xgrid)) density += dens / dens.max() density = density.reshape((Ny, Nx)) #---------------------------------------------------------------------- # Plot the results fig = plt.figure(figsize=(5, 6)) fig.subplots_adjust(hspace=0, left=0.1, right=0.95, bottom=0.1, top=0.9) ax = fig.add_subplot(311, aspect='equal') ax.scatter(X[:, 1], X[:, 0], s=1, lw=0, c='k') ax.set_xlim(ymin, ymax) ax.set_ylim(xmin, xmax) ax.xaxis.set_major_formatter(plt.NullFormatter()) ax.set_ylabel('(Mpc)') ax = fig.add_subplot(312, aspect='equal') ax.plot(T_y, T_x, c='k', lw=0.5) ax.set_xlim(ymin, ymax) ax.set_ylim(xmin, xmax) ax.xaxis.set_major_formatter(plt.NullFormatter()) ax.set_ylabel('(Mpc)') ax = fig.add_subplot(313, aspect='equal') ax.plot(T_trunc_y, T_trunc_x, c='k', lw=0.5) ax.imshow(density.T, origin='lower', cmap=plt.cm.hot_r, extent=[ymin, ymax, xmin, xmax]) ax.set_xlim(ymin, ymax) ax.set_ylim(xmin, xmax) ax.set_xlabel('(Mpc)') ax.set_ylabel('(Mpc)') plt.show() return(scale)