def test_edge_update_nested_lists(): graph = GraphDocument(force_undirected=True) doc0 = Document(text='Document0') doc1 = Document(text='Document1') graph.add_single_edge(doc0, doc1) edge_key = graph._get_edge_key(doc0.id, doc1.id) graph.edge_features[edge_key] = { 'hey': { 'nested': True, 'list': ['elem1', 'elem2', { 'inlist': 'here' }] }, 'hoy': [0, 1], } graph.edge_features[edge_key]['hey']['nested'] = False graph.edge_features[edge_key]['hey']['list'][1] = True graph.edge_features[edge_key]['hey']['list'][2]['inlist'] = 'not here' graph.edge_features[edge_key]['hoy'][0] = 1 assert graph.edge_features[edge_key]['hey']['nested'] is False assert graph.edge_features[edge_key]['hey']['list'][1] is True assert graph.edge_features[edge_key]['hey']['list'][2][ 'inlist'] == 'not here' assert graph.edge_features[edge_key]['hoy'][0] == 1
def test_graph_add_multiple_nodes(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') doc2 = Document(text='Document2') doc3 = Document(text='Document3') graph.add_nodes([doc0, doc1, doc2, doc3]) assert graph.num_nodes == 4 assert graph.num_edges == 0
def test_add_single_edge_from_id_strings_non_existing_nodes(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') with pytest.raises(AssertionError): graph.add_single_edge(doc0.id, doc1.id, features={'text': 'I connect Doc0 and Doc1'})
def test_undirected_graph(): graph = GraphDocument() assert graph.undirected is False undirected_graph = GraphDocument(force_undirected=True) assert undirected_graph.undirected is True graph_from_undirected_no_force = GraphDocument(undirected_graph) assert graph_from_undirected_no_force.undirected is True graph_from_undirected_proto_no_force = GraphDocument( undirected_graph.proto) assert graph_from_undirected_proto_no_force.undirected is True
def test_from_dgl_graph_without_edges(): from dgl import DGLGraph dummy_graph = DGLGraph() dummy_graph.add_nodes(2) jina_graph = GraphDocument.load_from_dgl_graph(dummy_graph) assert jina_graph.num_nodes == 2
def test_from_dgl_graph(graph): dgl_graph = graph.to_dgl_graph() jina_graph = GraphDocument.load_from_dgl_graph(dgl_graph) assert graph.num_nodes == jina_graph.num_nodes assert graph.num_edges == jina_graph.num_edges assert (graph.adjacency.col == jina_graph.adjacency.col).all() assert (graph.adjacency.col == jina_graph.adjacency.col).all()
def node_and_graph_encode(self, docs: DocumentArray, **kwargs): """ This executor is going to add for each node an embedding computed as the sum of outgoing and incoming edges. Then is going to assign a graph embedding as the sum of the embeddings of all its nodes multiplied by the `feature` weight of each edge. .. # noqa: DAR201 :param docs: Array of GraphDocuments """ for doc in docs: graph = GraphDocument(doc) for node in graph.nodes: node.embedding = np.array( [graph.get_out_degree(node) + graph.get_in_degree(node)]) sum = 0 for node in graph.nodes: node_embedding = node.embedding[0] for out_node in graph.get_outgoing_nodes(node): node_embedding = ( node_embedding * graph.edge_features[f'{node.id}-{out_node.id}'] ['weight']) for in_node in graph.get_incoming_nodes(node): node_embedding = ( node_embedding * graph.edge_features[f'{in_node.id}-{node.id}'] ['weight']) sum += node_embedding graph.embedding = np.array([sum])
def graph(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') doc2 = Document(text='Document2') doc3 = Document(text='Document3') graph.add_single_edge(doc0, doc1, features={'weight': 1}) graph.add_single_edge(doc0, doc2, features={'weight': 1}) graph.add_single_edge(doc2, doc1, features={'weight': 10}) graph.add_single_edge(doc1, doc3, features={'weight': 1}) graph.add_single_edge(doc2, doc3, features={'weight': 1}) return graph
def test_add_multiple_edges_from_string(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') doc2 = Document(text='Document2') doc3 = Document(text='Document3') graph.add_nodes([doc0, doc1, doc2, doc3]) graph.add_edges( [doc0.id, doc0.id, doc2.id, doc1.id, doc2.id], [doc1.id, doc2.id, doc1.id, doc3.id, doc3.id], edge_features=[ { 'text': 'I connect Doc0 and Doc1' }, { 'text': 'I connect Doc0 and Doc2' }, { 'text': 'I connect Doc2 and Doc1' }, { 'text': 'I connect Doc1 and Doc3' }, { 'text': 'I connect Doc2 and Doc3' }, ], ) validate_graph(graph)
def graph(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') doc2 = Document(text='Document2') doc3 = Document(text='Document3') graph.add_edge(doc0, doc1, features={'text': 'I connect Doc0 and Doc1'}) graph.add_edge(doc0, doc2, features={'text': 'I connect Doc0 and Doc2'}) graph.add_edge(doc2, doc1, features={'text': 'I connect Doc2 and Doc1'}) graph.add_edge(doc1, doc3, features={'text': 'I connect Doc1 and Doc3'}) graph.add_edge(doc2, doc3, features={'text': 'I connect Doc2 and Doc3'}) return graph
def test_undirected_graph_to_dgl(graph): dgl_graph = graph.to_dgl_graph() dgl_adj_coo = dgl_graph.adjacency_matrix(scipy_fmt='coo') assert dgl_graph.num_nodes() == graph.num_nodes assert dgl_graph.num_edges() == graph.num_edges assert (graph.adjacency.row == dgl_adj_coo.row).all() assert (graph.adjacency.col == dgl_adj_coo.col).all() undirected_graph = GraphDocument(graph, force_undirected=True) dgl_undirected_graph = undirected_graph.to_dgl_graph() dgl_undirected_adj_coo = dgl_undirected_graph.adjacency_matrix( scipy_fmt='coo') assert dgl_undirected_graph.num_nodes() == graph.num_nodes assert dgl_undirected_graph.num_edges() == graph.num_edges * 2 assert (np.concatenate( (undirected_graph.adjacency.row, undirected_graph.adjacency.col)) == dgl_undirected_adj_coo.row).all() assert (np.concatenate( (undirected_graph.adjacency.col, undirected_graph.adjacency.row)) == dgl_undirected_adj_coo.col).all()
def test_remove_single_node_from_string_non_existing(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') graph.add_node(doc0) assert graph.num_nodes == 1 graph.remove_single_node(doc1.id) assert graph.num_nodes == 1
def test_remove_single_node_from_string(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') doc2 = Document(text='Document2') doc3 = Document(text='Document3') graph.add_single_node(doc0) graph.add_single_node(doc1) graph.add_single_node(doc2) graph.add_single_node(doc3) assert len(graph.nodes) == 4 assert doc0.id in graph.nodes graph.remove_single_node(doc0.id) assert len(graph.nodes) == 3 assert doc0.id not in graph.nodes
def validate_resp(resp): assert len(resp.data.docs) == 1 for doc in resp.data.docs: graph = GraphDocument(doc) assert graph.embedding[0] == 64 assert len(graph.nodes) == 4 for i, node in enumerate(graph.nodes): if i == 0: assert node.embedding[0] == 2 if i == 1: assert node.embedding[0] == 3 if i == 2: assert node.embedding[0] == 3 if i == 3: assert node.embedding[0] == 2
def test_add_remove_node_deprecated(): graph = GraphDocument() d1 = Document(id='1') d2 = Document(id='2') graph.add_node(d1) graph.add_node(d2) assert len(graph.nodes) == 2 graph.remove_node(d1) graph.remove_node(d2) assert len(graph.nodes) == 0
def test_add_remove_edge_deprecated(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') graph.add_edge(doc0, doc1, features={'text': 'I connect Doc0 and Doc1'}) assert graph.num_nodes == 2 assert graph.num_edges == 1 graph.remove_edge(doc0, doc1) assert graph.num_nodes == 2 assert graph.num_edges == 0
def test_remove_single_edge_from_string(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') doc2 = Document(text='Document2') doc3 = Document(text='Document3') graph.add_nodes([doc0, doc1, doc2, doc3]) graph.add_single_edge(doc0.id, doc1.id, features={'text': 'I connect Doc0 and Doc1'}) graph.add_single_edge(doc0.id, doc2.id, features={'text': 'I connect Doc0 and Doc2'}) graph.add_single_edge(doc2.id, doc1.id, features={'text': 'I connect Doc2 and Doc1'}) graph.add_single_edge(doc1.id, doc3.id, features={'text': 'I connect Doc1 and Doc3'}) graph.add_single_edge(doc2.id, doc3.id, features={'text': 'I connect Doc2 and Doc3'}) assert graph.num_nodes == 4 assert graph.num_edges == 5 graph.remove_single_edge(doc0.id, doc1.id) assert graph.num_nodes == 4 assert graph.num_edges == 4
def test_add_single_edge_from_id_strings(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') doc2 = Document(text='Document2') doc3 = Document(text='Document3') graph.add_nodes([doc0, doc1, doc2, doc3]) graph.add_single_edge(doc0.id, doc1.id, features={'text': 'I connect Doc0 and Doc1'}) graph.add_single_edge(doc0.id, doc2.id, features={'text': 'I connect Doc0 and Doc2'}) graph.add_single_edge(doc2.id, doc1.id, features={'text': 'I connect Doc2 and Doc1'}) graph.add_single_edge(doc1.id, doc3.id, features={'text': 'I connect Doc1 and Doc3'}) graph.add_single_edge(doc2.id, doc3.id, features={'text': 'I connect Doc2 and Doc3'}) validate_graph(graph)
dgl_undirected_adj_coo = dgl_undirected_graph.adjacency_matrix( scipy_fmt='coo') assert dgl_undirected_graph.num_nodes() == graph.num_nodes assert dgl_undirected_graph.num_edges() == graph.num_edges * 2 assert (np.concatenate( (undirected_graph.adjacency.row, undirected_graph.adjacency.col)) == dgl_undirected_adj_coo.row).all() assert (np.concatenate( (undirected_graph.adjacency.col, undirected_graph.adjacency.row)) == dgl_undirected_adj_coo.col).all() @pytest.mark.parametrize( 'graph, expected_output', [(GraphDocument(force_undirected=True), 1), (GraphDocument(), 2)], ) def test_graph_edge_behaviour_creation(graph, expected_output): doc0 = Document(text='Document0') doc1 = Document(text='Document1') graph.add_single_edge(doc0, doc1) graph.add_single_edge(doc1, doc0) assert graph.num_edges == expected_output @pytest.mark.parametrize( 'graph, expected_output', [(GraphDocument(force_undirected=True), 1), (GraphDocument(), 2)], )
def test_graph_add_multiple_edges(): graph = GraphDocument() doc0 = Document(text='Document0') doc1 = Document(text='Document1') doc2 = Document(text='Document2') doc3 = Document(text='Document3') graph.add_edges( [doc0, doc0, doc2, doc1, doc2], [doc1, doc2, doc1, doc3, doc3], edge_features=[ { 'text': 'I connect Doc0 and Doc1' }, { 'text': 'I connect Doc0 and Doc2' }, { 'text': 'I connect Doc2 and Doc1' }, { 'text': 'I connect Doc1 and Doc3' }, { 'text': 'I connect Doc2 and Doc3' }, ], ) assert graph.num_nodes == 4 assert graph.num_edges == 5 doc0 = graph.chunks[0] assert doc0.text == 'Document0' doc1 = graph.chunks[1] assert doc1.text == 'Document1' doc2 = graph.chunks[2] assert doc2.text == 'Document2' doc3 = graph.chunks[3] assert doc3.text == 'Document3' edge_features = graph.edge_features for i, (d1, d2) in enumerate(graph): if i == 0: assert (edge_features[f'{d1.id}-{d2.id}']['text'] == 'I connect Doc0 and Doc1') assert d1.text == 'Document0' assert d2.text == 'Document1' if i == 1: assert (edge_features[f'{d1.id}-{d2.id}']['text'] == 'I connect Doc0 and Doc2') assert d1.text == 'Document0' assert d2.text == 'Document2' if i == 2: assert (edge_features[f'{d1.id}-{d2.id}']['text'] == 'I connect Doc2 and Doc1') assert d1.text == 'Document2' assert d2.text == 'Document1' if i == 3: assert (edge_features[f'{d1.id}-{d2.id}']['text'] == 'I connect Doc1 and Doc3') assert d1.text == 'Document1' assert d2.text == 'Document3' if i == 4: assert (edge_features[f'{d1.id}-{d2.id}']['text'] == 'I connect Doc2 and Doc3') assert d1.text == 'Document2' assert d2.text == 'Document3'
def test_graph_document_from_graph(graph): graph2 = GraphDocument(graph) validate_graph(graph2)
def test_graph_document_from_proto(graph): graph2 = GraphDocument(graph._pb_body) validate_graph(graph2)
def test_validate_iteration_graph_without_edges(): graph = GraphDocument() assert len([x for x in graph]) == 0