def _generate_word_network(self, progress_callback): if progress_callback: progress_callback(90.0) th = self.word_threshold data = np.array( [v / 2 for v in self.word_matrix.values() if (v / 2) >= th], dtype=np.float64) row_ind = np.array( [k[0] for k, v in self.word_matrix.items() if (v / 2) >= th], dtype=np.float64) col_ind = np.array( [k[1] for k, v in self.word_matrix.items() if (v / 2) >= th], dtype=np.float64) s = len(self.word_freqs) edges = csr_matrix((data, (row_ind, col_ind)), shape=(s, s)) ind2word = {v: k for k, v in self.word2ind.items()} words = np.array([ind2word[ind] for ind in range(s)]) freqs = np.array([self.word_freqs[ind2word[ind]] for ind in range(s)], dtype=np.float64) network = Network(nodes=words, edges=edges, name='Word Network') self.word_network = network.subgraph(self.mask) domain = Domain([ContinuousVariable('word_frequency')], None, [StringVariable('word')]) self.word_items = Table(domain, freqs.reshape((-1, 1))[self.mask], None, words.reshape((-1, 1))[self.mask]) if progress_callback: progress_callback(100.0)
def setUp(self): row, col, w = zip(*((1, 2, 1.0), (1, 3, 3.0), (2, 3, 1.0), (2, 6, 0.5), (3, 4, 1.0), (4, 5, 1.0), (4, 7, -1.0), (5, 6, 0.0), (6, 5, 0.1), (6, 2, 0.1))) dir_edges = DirectedEdges(sp.csr_matrix((w, (row, col)), shape=(8, 8))) self.toy_directed = Network(np.arange(8), dir_edges) row, col, w = zip(*((1, 2, 1.0), (1, 3, 3.0), (2, 3, 1.0), (2, 6, 0.5), (3, 4, 1.0), (4, 5, 1.0), (4, 7, -1.0), (5, 6, 0.1))) undir_edges = UndirectedEdges(sp.csr_matrix((w, (row, col)), shape=(8, 8))) self.toy_undirected = Network(np.arange(8), undir_edges)
def test_call(self): n2v = Node2Vec(num_walks=10, walk_len=80, emb_size=300) embeddings = n2v(self.toy_directed) self.assertEqual(embeddings.X.shape, (self.toy_directed.number_of_nodes(), 300)) # check that domain is extended and that the existing attributes do not change places empty = np.array([[] for _ in range(8)]) data = Table(Domain([ContinuousVariable("var1")]), np.array([[i] for i in range(8)]), empty, empty) toy_net_with_data = Network(data, self.toy_directed.edges[0]) extended_data = n2v(toy_net_with_data) self.assertEqual(extended_data.X.shape, (toy_net_with_data.number_of_nodes(), 1 + 300)) np.testing.assert_array_almost_equal(extended_data.X[:, 0], np.arange(8))
def _create_net(edges, n=None, directed=False): edge_cons = DirectedEdges if directed else UndirectedEdges row, col, data = zip(*edges) if n is None: n = max(*row, *col) + 1 return Network(np.arange(n), edge_cons(sp.coo_matrix((data, (row, col)), shape=(n, n))))
def wrapped(*args): row, col, *n = f(*args) n = n[0] if n else max(np.max(row), np.max(col)) + 1 edges = sp.csr_matrix((np.ones(len(row)), (row, col)), shape=(n, n)) return Network( range(n), edges, name=f"{f.__name__}{args}".replace(",)", ")"))
def geometric(n_nodes, n_edges): n_pairs = n_nodes * (n_nodes - 1) // 2 if n_edges > n_pairs: raise ValueError( f"There are only {n_pairs} (< {n_edges}) possible edges between " f"{n_nodes} points") xy = np.random.random((n_nodes, 2)) xx = row_norms(xy, squared=True)[:, np.newaxis] distances = np.dot(xy, xy.T) distances *= -2 distances += xx distances += xx.T ur = np.triu_indices(n_nodes, k=1) # skip zeros and repetitions dist_threshold = np.partition(distances[ur], n_edges)[n_edges] mask = distances <= dist_threshold mask[np.tril_indices(n_nodes)] = False row, col = mask.nonzero() edges = sp.csr_matrix((np.ones(len(row)), (row, col)), shape=(n_nodes, n_nodes)) return Network( range(n_nodes), edges, name=f"geometric({n_nodes},{n_edges})", coordinates=xy )
def test_show_errors(self): widget = self.widget model = widget.controls.variable.model() a, b, c, d = self.a, self.b, self.c, self.d cb_connector = widget.controls.connector_value no_data = widget.Error.no_data.is_shown no_categorical = widget.Error.no_categorical.is_shown same_values = widget.Error.same_values.is_shown self._set_graph(Table(Domain([a, b, c, d]))) self.assertSequenceEqual(model, [a, c]) self.assertFalse(no_data()) self.assertFalse(no_categorical()) self.assertFalse(same_values()) self._set_graph(Table(Domain([b, d]))) self.assertSequenceEqual(model, []) self.assertFalse(no_data()) self.assertTrue(no_categorical()) self.assertFalse(same_values()) self._set_graph(Table(Domain([a, b, c, d]))) self.assertSequenceEqual(model, [a, c]) self.assertFalse(no_data()) self.assertFalse(no_categorical()) self.assertFalse(same_values()) widget.connector_value = widget.connect_value + 1 cb_connector.activated[int].emit(widget.connector_value) self.assertFalse(no_data()) self.assertFalse(no_categorical()) self.assertTrue(same_values()) net = Network(range(3), sp.csr_matrix([[0, 1], [1, 2]])) self.send_signal(widget.Inputs.network, net) self.assertTrue(no_data()) self.assertFalse(no_categorical()) self.assertFalse(same_values()) self._set_graph(Table(Domain([a, b, c, d]))) widget.connector_value = widget.connect_value + 1 self.send_signal(widget.Inputs.network, None) self.assertFalse(no_data()) self.assertFalse(no_categorical()) self.assertFalse(same_values()) self._set_graph(Table(Domain([a, b, c, d]))) widget.connector_value = widget.connect_value + 1 cb_connector.activated[int].emit(widget.connector_value) self.assertFalse(no_data()) self.assertFalse(no_categorical()) self.assertTrue(same_values()) self._set_graph(Table(Domain([b, d]))) self.assertFalse(no_data()) self.assertTrue(no_categorical()) self.assertFalse(same_values())
class TestEmbeddings(unittest.TestCase): def setUp(self): row, col, w = zip(*((1, 2, 1.0), (1, 3, 3.0), (2, 3, 1.0), (2, 6, 0.5), (3, 4, 1.0), (4, 5, 1.0), (4, 7, -1.0), (5, 6, 0.0), (6, 5, 0.1), (6, 2, 0.1))) dir_edges = DirectedEdges(sp.csr_matrix((w, (row, col)), shape=(8, 8))) self.toy_directed = Network(np.arange(8), dir_edges) row, col, w = zip(*((1, 2, 1.0), (1, 3, 3.0), (2, 3, 1.0), (2, 6, 0.5), (3, 4, 1.0), (4, 5, 1.0), (4, 7, -1.0), (5, 6, 0.1))) undir_edges = UndirectedEdges(sp.csr_matrix((w, (row, col)), shape=(8, 8))) self.toy_undirected = Network(np.arange(8), undir_edges) def test_node_probas(self): """ Test that node probabilities get calculated correctly """ n2v = Node2Vec() # nowhere to go from isolated node self.assertEqual(len(n2v.node_probas(self.toy_directed, 0)), 0) # should not have division by zero when weights of edges to neighbors sum to 0 self.assertDictEqual(n2v.node_probas(self.toy_directed, 5), {6: 1.0}) probas = n2v.node_probas(self.toy_directed, 4) self.assertAlmostEqual(probas[5], 0.881, places=3) self.assertAlmostEqual(probas[7], 0.119, places=3) self.assertDictEqual(n2v.node_probas(self.toy_directed, 1), {2: 0.25, 3: 0.75}) self.assertDictEqual(n2v.node_probas(self.toy_undirected, 3), {1: 0.6, 2: 0.2, 4: 0.2}) def test_edge_probas(self): """ Test that edge probabilities get calculated appropriately based on shortest distance between previous node and next node (equations in 'Search bias' section of node2vec paper) """ n2v = Node2Vec(p=0.8, q=0.5) edge_probas = n2v.edge_probas(self.toy_directed, 3, 4) self.assertAlmostEqual(edge_probas[(4, 5)], 0.982, places=3) # d_tx = 2 self.assertAlmostEqual(edge_probas[(4, 7)], 0.018, places=3) # d_tx = 2 edge_probas = n2v.edge_probas(self.toy_directed, 1, 2) self.assertAlmostEqual(edge_probas[(2, 3)], 0.5) # d_tx = 1 edge_probas = n2v.edge_probas(self.toy_directed, 5, 6) self.assertAlmostEqual(edge_probas[(6, 5)], 0.385, places=3) # d_tx = 0 edge_probas = n2v.edge_probas(self.toy_undirected, 1, 2) self.assertAlmostEqual(edge_probas[(2, 1)], 0.385, places=3) # d_tx = 0 self.assertAlmostEqual(edge_probas[(2, 3)], 0.308, places=3) # d_tx = 1 self.assertAlmostEqual(edge_probas[(2, 6)], 0.308, places=3) # d_tx = 2 def test_call(self): n2v = Node2Vec(num_walks=10, walk_len=80, emb_size=300) embeddings = n2v(self.toy_directed) self.assertEqual(embeddings.X.shape, (self.toy_directed.number_of_nodes(), 300)) # check that domain is extended and that the existing attributes do not change places empty = np.array([[] for _ in range(8)]) data = Table(Domain([ContinuousVariable("var1")]), np.array([[i] for i in range(8)]), empty, empty) toy_net_with_data = Network(data, self.toy_directed.edges[0]) extended_data = n2v(toy_net_with_data) self.assertEqual(extended_data.X.shape, (toy_net_with_data.number_of_nodes(), 1 + 300)) np.testing.assert_array_almost_equal(extended_data.X[:, 0], np.arange(8))
def _generate_document_network(self, progress_callback): if progress_callback: progress_callback(90.0) edges = self.document_matrix.copy() edges[edges < self.document_threshold] = 0 self.document_network = Network(nodes=np.array(self.corpus.titles), edges=csr_matrix(edges), name='Document Network') if progress_callback: progress_callback(100.0)
def _map_network(self): edges = self.network.edges[0].edges.tocoo() row, col = edges.row, edges.col if self.weighting == self.WeightByWeights: weights = edges.data else: weights = None if self.normalize: self._normalize_weights(row, col, weights) row, col = self._map_into_feature_values(row, col) return Network(self._construct_items(), self._construct_edges(row, col, weights))
def test_filtered_edges(self): def assert_edges(actual, expected): self.assertEqual(len(actual.data), len(expected)) self.assertEqual(actual.data.dtype, float) self.assertEqual(set(zip(actual.row, actual.col, actual.data)), set(expected)) net = _create_net(((0, 4, 1.), (4, 1, 5), (1, 5, 3), (2, 4, 4), (2, 5, 2), (3, 6, 6))) # All edges assert_edges( tm._filtered_edges(net, np.array([True] * 4 + [False] * 3), np.array([False] * 4 + [True] * 3)), ((0, 4, 1.), (1, 4, 5.), (1, 5, 3.), (2, 4, 4.), (2, 5, 2.), (3, 6, 6.))) # All edges, opposite mode roles assert_edges( tm._filtered_edges(net, np.array([False] * 4 + [True] * 3), np.array([True] * 4 + [False] * 3)), ((0, 0, 1.), (0, 1, 5.), (1, 1, 3.), (0, 2, 4.), (1, 2, 2.), (2, 3, 6.))) # Not all edges assert_edges( tm._filtered_edges(net, np.array([True] * 4 + [False] * 3), np.array([False] * 5 + [True] * 2)), ((1, 5, 3.), (2, 5, 2.), (3, 6, 6.))) # One mode is empty assert_edges( tm._filtered_edges(net, np.array([True] * 4 + [False] * 3), np.array([False] * 7)), ()) # The other mode is empty assert_edges( tm._filtered_edges(net, np.array([False] * 7), np.array([False] * 5 + [True] * 2)), ()) # Both modes are empty assert_edges( tm._filtered_edges(net, np.array([False] * 7), np.array([False] * 7)), ()) # Graph is empty net = Network(range(7), sp.csr_matrix((7, 7))) assert_edges( tm._filtered_edges(net, np.array([True] * 4 + [False] * 3), np.array([False] * 7)), ())
def summarize_(net: Network): n = net.number_of_nodes() if len(net.edges) == 1: nettype = ['Network', 'Directed network'][net.edges[0].directed] details = f"<nobr>{nettype} with {n} nodes " \ f"and {net.number_of_edges()} edges</nobr>" else: details = f"<nobr>Network with {n} nodes" if net.edges: details += " and {len(net.edges)} edge types:</nobr><ul>" + "".join( f"<li>{len(edges)} edges, " f"{['undirected', 'directed'][edges.directed]}</li>" for edges in net.edges) return PartialSummary(n, details)
def to_single_mode(net, mode_mask, conn_mask, weighting): """ Convert two-mode network into a single mode Args: net: network to convert mode_mask (boolean array): a mask with nodes to connect conn_mask (boolean array): a mask with nodes to use for connecting weighting (int): normalization for edge weigthts Returns: single-mode network """ mode_edges = _filtered_edges(net, mode_mask, conn_mask) new_edges = Weighting[weighting].func(mode_edges) return Network(net.nodes[mode_mask], new_edges)
def test_weights(self): label_var = DiscreteVariable("label", values=tuple("abcde")) domain = Domain([label_var], []) items = Table.from_numpy(domain, np.array([[0, 0, 1, 3, 4, 0, 1, 2, 4]]).T) src, dst, weights = np.array([[0, 1, 5], [1, 2, 4], [3, 4, 6], [0, 5, 3], [1, 6, 1], [2, 7, 2], [3, 8, 8], [4, 8, 7], [5, 6, 2]]).T edges = sp.coo_matrix((weights.astype(float), (src, dst)), shape=(9, 9)) network = Network(items, edges) expected = np.zeros((5, 5), dtype=float) widget = self.widget buttons = widget.controls.weighting.buttons self.send_signal(widget.Inputs.network, network) buttons[widget.NoWeights].click() groups = widget._map_network() for i, j in [(0, 1), (1, 2), (3, 4)]: expected[i, j] = 1 np.testing.assert_equal(groups.edges[0].edges.todense(), expected) buttons[widget.WeightByDegrees].click() groups = widget._map_network() expected[0, 1] = 3 expected[1, 2] = 1 expected[3, 4] = 2 np.testing.assert_equal(groups.edges[0].edges.todense(), expected) buttons[widget.WeightByWeights].click() widget.normalize = False groups = widget._map_network() expected[0, 1] = 7 expected[1, 2] = 2 expected[3, 4] = 14 np.testing.assert_equal(groups.edges[0].edges.todense(), expected) widget.normalize = True groups = widget._map_network() expected[0, 1] = 1 / sqrt(10 * 3) + 4 / sqrt(10 * 6) + 2 / sqrt(5 * 3) expected[1, 2] = 2 / sqrt(6 * 2) expected[3, 4] = 6 / sqrt(14 * 13) + 8 / sqrt(14 * 15) np.testing.assert_equal(groups.edges[0].edges.todense(), expected)
def _set_graph(self, data, edges=None): net = Network( data, sp.csr_matrix((len(data), len(data))) if edges is None else edges) self.send_signal(self.widget.Inputs.network, net)
def test_empty_network(self): net = Network([], []) # should not crash self.send_signal(self.widget.Inputs.network, net)
def wrapped(*args): m = f(*args) return Network( range(len(m)), sp.csr_matrix(m), name=f"{f.__name__}{args}".replace(",)", ")"))
def wrapped(*args): edges = f(*args) return Network( range(edges.shape[0]), edges, name=f"{f.__name__}{args}".replace(",)", ")"))
def _create_net(edges, n=None): row, col, data = zip(*edges) if n is None: n = max(*row, *col) + 1 return Network(np.arange(n), sp.coo_matrix((data, (row, col)), shape=(n, n)))