def LapEigenmap(affinity_matrix, dim, random_state): if random_state is None: random_state = np.random.RandomState() component_embedding = SpectralEmbedding( n_components=dim, affinity="precomputed", random_state=random_state).fit_transform(affinity_matrix) component_embedding /= component_embedding.max() return component_embedding
def component_layout(data, n_components, component_labels, dim, metric="euclidean", metric_kwds={}): """Provide a layout relating the separate connected components. This is done by taking the centroid of each component and then performing a spectral embedding of the centroids. Parameters ---------- data: array of shape (n_samples, n_features) The source data -- required so we can generate centroids for each connected component of the graph. n_components: int The number of distinct components to be layed out. component_labels: array of shape (n_samples) For each vertex in the graph the label of the component to which the vertex belongs. dim: int The chosen embedding dimension. metric: string or callable (optional, default 'euclidean') The metric used to measure distances among the source data points. metric_kwds: dict (optional, default {}) Keyword arguments to be passed to the metric function. Returns ------- component_embedding: array of shape (n_components, dim) The ``dim``-dimensional embedding of the ``n_components``-many connected components. """ component_centroids = np.empty((n_components, data.shape[1]), dtype=np.float64) for label in range(n_components): component_centroids[label] = data[component_labels == label].mean( axis=0) distance_matrix = pairwise_distances(component_centroids, metric=metric, **metric_kwds) affinity_matrix = np.exp(-distance_matrix**2) component_embedding = SpectralEmbedding( n_components=dim, affinity="precomputed").fit_transform(affinity_matrix) component_embedding /= component_embedding.max() return component_embedding
def component_layout( data, n_components, component_labels, dim, random_state, metric="euclidean", metric_kwds={}, ): """Provide a layout relating the separate connected components. This is done by taking the centroid of each component and then performing a spectral embedding of the centroids. Parameters ---------- data: array of shape (n_samples, n_features) The source data -- required so we can generate centroids for each connected component of the graph. n_components: int The number of distinct components to be layed out. component_labels: array of shape (n_samples) For each vertex in the graph the label of the component to which the vertex belongs. dim: int The chosen embedding dimension. metric: string or callable (optional, default 'euclidean') The metric used to measure distances among the source data points. metric_kwds: dict (optional, default {}) Keyword arguments to be passed to the metric function. If metric is 'precomputed', 'linkage' keyword can be used to specify 'average', 'complete', or 'single' linkage. Default is 'average' Returns ------- component_embedding: array of shape (n_components, dim) The ``dim``-dimensional embedding of the ``n_components``-many connected components. """ component_centroids = np.empty((n_components, data.shape[1]), dtype=np.float64) if metric == "precomputed": # cannot compute centroids from precomputed distances # instead, compute centroid distances using linkage distance_matrix = np.zeros((n_components, n_components), dtype=np.float64) linkage = metric_kwds.get("linkage", "average") if linkage == "average": linkage = np.mean elif linkage == "complete": linkage = np.max elif linkage == "single": linkage = np.min else: raise ValueError("Unrecognized linkage '%s'. Please choose from " "'average', 'complete', or 'single'" % linkage) for c_i in range(n_components): dm_i = data[component_labels == c_i] for c_j in range(c_i + 1, n_components): dist = linkage(dm_i[:, component_labels == c_j]) distance_matrix[c_i, c_j] = dist distance_matrix[c_j, c_i] = dist else: for label in range(n_components): component_centroids[label] = data[component_labels == label].mean( axis=0) if scipy.sparse.isspmatrix(component_centroids): warn( "Forcing component centroids to dense; if you are running out of " "memory then consider increasing n_neighbors.") component_centroids = component_centroids.toarray() if metric in SPECIAL_METRICS: distance_matrix = pairwise_special_metric(component_centroids, metric=metric) elif metric in SPARSE_SPECIAL_METRICS: distance_matrix = pairwise_special_metric( component_centroids, metric=SPARSE_SPECIAL_METRICS[metric]) else: if callable(metric) and scipy.sparse.isspmatrix(data): function_to_name_mapping = { v: k for k, v in sparse_named_distances.items() } try: metric_name = function_to_name_mapping[metric] except KeyError: raise NotImplementedError( "Multicomponent layout for custom " "sparse metrics is not implemented at " "this time.") distance_matrix = pairwise_distances(component_centroids, metric=metric_name, **metric_kwds) else: distance_matrix = pairwise_distances(component_centroids, metric=metric, **metric_kwds) affinity_matrix = np.exp(-(distance_matrix**2)) component_embedding = SpectralEmbedding( n_components=dim, affinity="precomputed", random_state=random_state).fit_transform(affinity_matrix) component_embedding /= component_embedding.max() return component_embedding
def component_layout( data, n_components, component_labels, dim, random_state, metric="euclidean", metric_kwds={}, ): """Provide a layout relating the separate connected components. This is done by taking the centroid of each component and then performing a spectral embedding of the centroids. Parameters ---------- data: array of shape (n_samples, n_features) The source data -- required so we can generate centroids for each connected component of the graph. n_components: int The number of distinct components to be layed out. component_labels: array of shape (n_samples) For each vertex in the graph the label of the component to which the vertex belongs. dim: int The chosen embedding dimension. metric: string or callable (optional, default 'euclidean') The metric used to measure distances among the source data points. metric_kwds: dict (optional, default {}) Keyword arguments to be passed to the metric function. If metric is 'precomputed', 'linkage' keyword can be used to specify 'average', 'complete', or 'single' linkage. Default is 'average' Returns ------- component_embedding: array of shape (n_components, dim) The ``dim``-dimensional embedding of the ``n_components``-many connected components. """ component_centroids = np.empty((n_components, data.shape[1]), dtype=np.float64) if metric == "precomputed": # cannot compute centroids from precomputed distances # instead, compute centroid distances using linkage distance_matrix = np.zeros((n_components, n_components), dtype=np.float64) linkage = metric_kwds.get("linkage", "average") if linkage == "average": linkage = np.mean elif linkage == "complete": linkage = np.max elif linkage == "single": linkage = np.min else: raise ValueError("Unrecognized linkage '%s'. Please choose from " "'average', 'complete', or 'single'" % linkage) for c_i in range(n_components): dm_i = data[component_labels == c_i] for c_j in range(c_i + 1, n_components): dist = linkage(dm_i[:, component_labels == c_j]) distance_matrix[c_i, c_j] = dist distance_matrix[c_j, c_i] = dist else: for label in range(n_components): component_centroids[label] = data[component_labels == label].mean( axis=0) if metric in ("hellinger", "ll_dirichlet"): distance_matrix = pairwise_special_metric(component_centroids, metric=metric) else: distance_matrix = pairwise_distances(component_centroids, metric=metric, **metric_kwds) affinity_matrix = np.exp(-(distance_matrix**2)) component_embedding = SpectralEmbedding( n_components=dim, affinity="precomputed", random_state=random_state).fit_transform(affinity_matrix) component_embedding /= component_embedding.max() return component_embedding