def test_spectral_embedding_two_components(seed=36): # Test spectral embedding with two components random_state = np.random.RandomState(seed) n_sample = 100 affinity = np.zeros(shape=[n_sample * 2, n_sample * 2]) # first component affinity[0:n_sample, 0:n_sample] = np.abs(random_state.randn(n_sample, n_sample)) + 2 # second component affinity[n_sample::, n_sample::] = np.abs(random_state.randn(n_sample, n_sample)) + 2 # Test of internal _graph_connected_component before connection component = _graph_connected_component(affinity, 0) assert_true(component[:n_sample].all()) assert_true(not component[n_sample:].any()) component = _graph_connected_component(affinity, -1) assert_true(not component[:n_sample].any()) assert_true(component[n_sample:].all()) # connection affinity[0, n_sample + 1] = 1 affinity[n_sample + 1, 0] = 1 affinity.flat[:: 2 * n_sample + 1] = 0 affinity = 0.5 * (affinity + affinity.T) true_label = np.zeros(shape=2 * n_sample) true_label[0:n_sample] = 1 se_precomp = SpectralEmbedding(n_components=1, affinity="precomputed", random_state=np.random.RandomState(seed)) embedded_coordinate = se_precomp.fit_transform(affinity) # Some numpy versions are touchy with types embedded_coordinate = se_precomp.fit_transform(affinity.astype(np.float32)) # thresholding on the first components using 0. label_ = np.array(embedded_coordinate.ravel() < 0, dtype="float") assert_equal(normalized_mutual_info_score(true_label, label_), 1.0)
def test_spectral_embedding_two_components(seed=36): # Test spectral embedding with two components random_state = np.random.RandomState(seed) n_sample = 100 affinity = np.zeros(shape=[n_sample * 2, n_sample * 2]) # first component affinity[0:n_sample, 0:n_sample] = np.abs(random_state.randn(n_sample, n_sample)) + 2 # second component affinity[n_sample::, n_sample::] = np.abs(random_state.randn(n_sample, n_sample)) + 2 # Test of internal _graph_connected_component before connection component = _graph_connected_component(affinity, 0) assert component[:n_sample].all() assert not component[n_sample:].any() component = _graph_connected_component(affinity, -1) assert not component[:n_sample].any() assert component[n_sample:].all() # connection affinity[0, n_sample + 1] = 1 affinity[n_sample + 1, 0] = 1 affinity.flat[::2 * n_sample + 1] = 0 affinity = 0.5 * (affinity + affinity.T) true_label = np.zeros(shape=2 * n_sample) true_label[0:n_sample] = 1 se_precomp = SpectralEmbedding(n_components=1, affinity="precomputed", random_state=np.random.RandomState(seed)) embedded_coordinate = se_precomp.fit_transform(affinity) # Some numpy versions are touchy with types embedded_coordinate = \ se_precomp.fit_transform(affinity.astype(np.float32)) # thresholding on the first components using 0. label_ = np.array(embedded_coordinate.ravel() < 0, dtype="float") assert normalized_mutual_info_score(true_label, label_) == 1.0
def test_sparse_graph_connected_component(): rng = np.random.RandomState(42) n_samples = 300 boundaries = [0, 42, 121, 200, n_samples] p = rng.permutation(n_samples) connections = [] for start, stop in zip(boundaries[:-1], boundaries[1:]): group = p[start:stop] # Connect all elements within the group at least once via an # arbitrary path that spans the group. for i in range(len(group) - 1): connections.append((group[i], group[i + 1])) # Add some more random connections within the group min_idx, max_idx = 0, len(group) - 1 n_random_connections = 1000 source = rng.randint(min_idx, max_idx, size=n_random_connections) target = rng.randint(min_idx, max_idx, size=n_random_connections) connections.extend(zip(group[source], group[target])) # Build a symmetric affinity matrix row_idx, column_idx = tuple(np.array(connections).T) data = rng.uniform(.1, 42, size=len(connections)) affinity = sparse.coo_matrix((data, (row_idx, column_idx))) affinity = 0.5 * (affinity + affinity.T) for start, stop in zip(boundaries[:-1], boundaries[1:]): component_1 = _graph_connected_component(affinity, p[start]) component_size = stop - start assert component_1.sum() == component_size # We should retrieve the same component mask by starting by both ends # of the group component_2 = _graph_connected_component(affinity, p[stop - 1]) assert component_2.sum() == component_size assert_array_equal(component_1, component_2)
def test_sparse_graph_connected_component(): rng = np.random.RandomState(42) n_samples = 300 boundaries = [0, 42, 121, 200, n_samples] p = rng.permutation(n_samples) connections = [] for start, stop in zip(boundaries[:-1], boundaries[1:]): group = p[start:stop] # Connect all elements within the group at least once via an # arbitrary path that spans the group. for i in range(len(group) - 1): connections.append((group[i], group[i + 1])) # Add some more random connections within the group min_idx, max_idx = 0, len(group) - 1 n_random_connections = 1000 source = rng.randint(min_idx, max_idx, size=n_random_connections) target = rng.randint(min_idx, max_idx, size=n_random_connections) connections.extend(zip(group[source], group[target])) # Build a symmetric affinity matrix row_idx, column_idx = tuple(np.array(connections).T) data = rng.uniform(.1, 42, size=len(connections)) affinity = sparse.coo_matrix((data, (row_idx, column_idx))) affinity = 0.5 * (affinity + affinity.T) for start, stop in zip(boundaries[:-1], boundaries[1:]): component_1 = _graph_connected_component(affinity, p[start]) component_size = stop - start assert_equal(component_1.sum(), component_size) # We should retrieve the same component mask by starting by both ends # of the group component_2 = _graph_connected_component(affinity, p[stop - 1]) assert_equal(component_2.sum(), component_size) assert_array_equal(component_1, component_2)