def test_meta_knowledge_graph_of_complex_graph_data(): """ Test generate meta knowledge graph operation. """ input_args = { "filename": [ os.path.join(RESOURCE_DIR, "complex_graph_nodes.tsv"), os.path.join(RESOURCE_DIR, "complex_graph_edges.tsv"), ], "format": "tsv", } transformer = Transformer() transformer.transform(input_args) output_filename = os.path.join(TARGET_DIR, "test_meta_knowledge_graph-1.json") generate_meta_knowledge_graph( graph=transformer.store.graph, name="Complex Test Graph", filename=output_filename, edge_facet_properties=["aggregator_knowledge_source"]) data = json.load(open(output_filename)) assert data["name"] == "Complex Test Graph" print(f"\n{json.dumps(data, indent=4)}")
def test_summarize_graph_inspector(): """ Test for Inspector sourced graph stats, and comparing the resulting stats. """ input_args = { 'filename': [ os.path.join(RESOURCE_DIR, 'graph_nodes.tsv'), os.path.join(RESOURCE_DIR, 'graph_edges.tsv'), ], 'format': 'tsv', } transformer = Transformer(stream=True) inspector = GraphSummary('Test Graph Summary - Streamed') transformer.transform(input_args=input_args, inspector=inspector) output_filename = os.path.join(TARGET_DIR, 'test_graph-summary-from-inspection.json') with open(output_filename, 'w') as gsh: inspector.save(output_filename) data = json.load(open(output_filename)) assert data['name'] == 'Test Graph Summary - Streamed' assert 'NCBIGene' in data['nodes']['biolink:Gene']['id_prefixes'] assert 'REACT' in data['nodes']['biolink:Pathway']['id_prefixes'] assert 'HP' in data['nodes']['biolink:PhenotypicFeature']['id_prefixes'] assert data['nodes']['biolink:Gene']['count'] == 178 assert len(data['nodes']) == 8 assert len(data['edges']) == 13
def test_validate_by_stream_inspector(): """ Test generate the validate function by streaming graph data through a graph Transformer.process() Inspector """ input_args = { "filename": [ os.path.join(RESOURCE_DIR, "graph_nodes.tsv"), os.path.join(RESOURCE_DIR, "graph_edges.tsv"), ], "format": "tsv", "aggregator_knowledge_source": True, } Validator.set_biolink_model("1.8.2") # Validator assumes the currently set Biolink Release validator = Validator() transformer = Transformer(stream=True) transformer.transform( input_args=input_args, output_args={ "format": "null" }, # streaming processing throws the graph data away # ... Second, we inject the Inspector into the transform() call, # for the underlying Transformer.process() to use... inspector=validator, ) validator.write_report() e = validator.get_errors() assert len(e) == 0
def test_clique_merge(): """ Test for clique merge. """ input_args = { 'filename': [ os.path.join(RESOURCE_DIR, 'cm_nodes.csv'), os.path.join(RESOURCE_DIR, 'cm_edges.csv'), ], 'format': 'csv', } t = Transformer() t.transform(input_args) updated_graph, clique_graph = clique_merge( target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map) leaders = NxGraph.get_node_attributes(updated_graph, 'clique_leader') leader_list = list(leaders.keys()) leader_list.sort() assert len(leader_list) == 2 n1 = updated_graph.nodes()[leader_list[0]] assert n1['election_strategy'] == 'PREFIX_PRIORITIZATION' assert 'NCBIGene:100302240' in n1['same_as'] assert 'ENSEMBL:ENSG00000284458' in n1['same_as'] n2 = updated_graph.nodes()[leader_list[1]] assert n2['election_strategy'] == 'PREFIX_PRIORITIZATION' assert 'NCBIGene:8202' in n2['same_as'] assert 'OMIM:601937' in n2['same_as'] assert 'ENSEMBL:ENSG00000124151' not in n2['same_as']
def _stream_transform(query): """ Transform an input to an output via Transformer where streaming is enabled. """ t1 = Transformer(stream=True) t1.transform(query[0], query[1]) output = query[1] if output["format"] in {"tsv", "csv", "jsonl"}: input_args = { "filename": [ f"{output['filename']}_nodes.{output['format']}", f"{output['filename']}_edges.{output['format']}", ], "format": output["format"], } elif output["format"] in {"neo4j"}: input_args = { "uri": DEFAULT_NEO4J_URL, "username": DEFAULT_NEO4J_USERNAME, "password": DEFAULT_NEO4J_PASSWORD, "format": "neo4j", } else: input_args = { "filename": [f"{output['filename']}"], "format": output["format"] } t2 = Transformer() t2.transform(input_args) assert t2.store.graph.number_of_nodes() == query[2] assert t2.store.graph.number_of_edges() == query[3]
def test_transformer_infores_parser_prefix_rewrite(): input_args = { "filename": [ os.path.join(RESOURCE_DIR, "test_infores_coercion_nodes.tsv"), os.path.join(RESOURCE_DIR, "test_infores_coercion_edges.tsv"), ], "format": "tsv", "provided_by": (r"\(.+\)", "", "Monarch"), "aggregator_knowledge_source": (r"\(.+\)", "", "Monarch"), } t = Transformer() t.transform(input_args=input_args) n1 = t.store.graph.nodes()["FlyBase:FBgn0000008"] assert "provided_by" in n1 assert len(n1["provided_by"]) == 1 assert "infores:monarch-flybase" in n1["provided_by"] n2 = t.store.graph.nodes()["GO:0005912"] assert "provided_by" in n2 assert len(n2["provided_by"]) == 1 assert "infores:monarch-gene-ontology" in n2["provided_by"] et = list( t.store.graph.get_edge("FlyBase:FBgn0000008", "GO:0005912").values())[0] assert "infores:monarch-gene-ontology" in et["aggregator_knowledge_source"] irc = t.get_infores_catalog() assert len(irc) == 2 assert "Gene Ontology (Monarch version 202012)" in irc assert ("infores:monarch-gene-ontology" in irc["Gene Ontology (Monarch version 202012)"])
def test_transformer_infores_basic_formatting(): input_args = { "filename": [ os.path.join(RESOURCE_DIR, "test_infores_coercion_nodes.tsv"), os.path.join(RESOURCE_DIR, "test_infores_coercion_edges.tsv"), ], "format": "tsv", "provided_by": True, "aggregator_knowledge_source": "true", } t = Transformer() t.transform(input_args=input_args) n1 = t.store.graph.nodes()["FlyBase:FBgn0000008"] assert "provided_by" in n1 assert len(n1["provided_by"]) == 1 assert "infores:flybase-monarch-version-202012" in n1["provided_by"] n2 = t.store.graph.nodes()["GO:0005912"] assert "provided_by" in n2 assert len(n2["provided_by"]) == 1 assert "infores:gene-ontology-monarch-version-202012" in n2["provided_by"] et = list( t.store.graph.get_edge("FlyBase:FBgn0000008", "GO:0005912").values())[0] assert ("infores:gene-ontology-monarch-version-202012" in et["aggregator_knowledge_source"])
def test_transformer_infores_suppression(): input_args = { "filename": [ os.path.join(RESOURCE_DIR, "test_infores_coercion_nodes.tsv"), os.path.join(RESOURCE_DIR, "test_infores_coercion_edges.tsv"), ], "format": "tsv", "provided_by": "False", "aggregator_knowledge_source": False, } t = Transformer() t.transform(input_args=input_args) n1 = t.store.graph.nodes()["FlyBase:FBgn0000008"] assert "provided_by" not in n1 n2 = t.store.graph.nodes()["GO:0005912"] assert "provided_by" not in n2 et = list( t.store.graph.get_edge("FlyBase:FBgn0000008", "GO:0005912").values())[0] assert "aggregator_knowledge_source" not in et
def test_generate_classical_meta_knowledge_graph(): """ Test generate meta knowledge graph operation. """ input_args = { 'filename': [ os.path.join(RESOURCE_DIR, 'graph_nodes.tsv'), os.path.join(RESOURCE_DIR, 'graph_edges.tsv'), ], 'format': 'tsv', } transformer = Transformer() transformer.transform(input_args) output_filename = os.path.join(TARGET_DIR, 'test_meta_knowledge_graph-1.json') generate_meta_knowledge_graph(transformer.store.graph, 'Test Graph', output_filename) data = json.load(open(output_filename)) assert data['name'] == 'Test Graph' assert 'NCBIGene' in data['nodes']['biolink:Gene']['id_prefixes'] assert 'REACT' in data['nodes']['biolink:Pathway']['id_prefixes'] assert 'HP' in data['nodes']['biolink:PhenotypicFeature']['id_prefixes'] assert data['nodes']['biolink:Gene']['count'] == 178 assert len(data['nodes']) == 8 assert len(data['edges']) == 13
def test_generate_streaming_meta_knowledge_graph_direct(): """ Test generate meta knowledge graph operation... MetaKnowledgeGraph as direct Transformer.transform Inspector """ input_args = { 'filename': [ os.path.join(RESOURCE_DIR, 'graph_nodes.tsv'), os.path.join(RESOURCE_DIR, 'graph_edges.tsv'), ], 'format': 'tsv', } transformer = Transformer(stream=True) mkg = MetaKnowledgeGraph('Test Graph - Streamed') transformer.transform(input_args=input_args, inspector=mkg) assert mkg.get_name() == 'Test Graph - Streamed' assert mkg.get_total_nodes_count() == 512 assert mkg.get_number_of_categories() == 8 assert mkg.get_total_edges_count() == 540 assert mkg.get_edge_mapping_count() == 13 assert 'NCBIGene' in mkg.get_category('biolink:Gene').get_id_prefixes() assert 'REACT' in mkg.get_category('biolink:Pathway').get_id_prefixes() assert 'HP' in mkg.get_category( 'biolink:PhenotypicFeature').get_id_prefixes() assert mkg.get_category('biolink:Gene').get_count() == 178
def test_clique_merge(): """ Test for clique merge. """ input_args = { "filename": [ os.path.join(RESOURCE_DIR, "cm_nodes.csv"), os.path.join(RESOURCE_DIR, "cm_edges.csv"), ], "format": "csv", } t = Transformer() t.transform(input_args) updated_graph, clique_graph = clique_merge( target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map ) leaders = NxGraph.get_node_attributes(updated_graph, "clique_leader") leader_list = list(leaders.keys()) leader_list.sort() assert len(leader_list) == 2 n1 = updated_graph.nodes()[leader_list[0]] assert n1["election_strategy"] == "PREFIX_PRIORITIZATION" assert "NCBIGene:100302240" in n1["same_as"] assert "ENSEMBL:ENSG00000284458" in n1["same_as"] n2 = updated_graph.nodes()[leader_list[1]] assert n2["election_strategy"] == "PREFIX_PRIORITIZATION" assert "NCBIGene:8202" in n2["same_as"] assert "OMIM:601937" in n2["same_as"] assert "ENSEMBL:ENSG00000124151" not in n2["same_as"]
def test_generate_classical_meta_knowledge_graph(): """ Test generate meta knowledge graph operation. """ input_args = { "filename": [ os.path.join(RESOURCE_DIR, "graph_nodes.tsv"), os.path.join(RESOURCE_DIR, "graph_edges.tsv"), ], "format": "tsv", } transformer = Transformer() transformer.transform(input_args) output_filename = os.path.join(TARGET_DIR, "test_meta_knowledge_graph-1.json") generate_meta_knowledge_graph( graph=transformer.store.graph, name="Test Graph", filename=output_filename, edge_facet_properties=["aggregator_knowledge_source"]) data = json.load(open(output_filename)) assert data["name"] == "Test Graph" _check_mkg_json_contents(data)
def _stream_transform(query): """ Transform an input to an output via Transformer where streaming is enabled. """ t1 = Transformer(stream=True) t1.transform(query[0], query[1]) output = query[1] if output['format'] in {'tsv', 'csv', 'jsonl'}: input_args = { 'filename': [ f"{output['filename']}_nodes.{output['format']}", f"{output['filename']}_edges.{output['format']}", ], 'format': output['format'], } elif output['format'] in {'neo4j'}: input_args = { 'uri': DEFAULT_NEO4J_URL, 'username': DEFAULT_NEO4J_USERNAME, 'password': DEFAULT_NEO4J_PASSWORD, 'format': 'neo4j', } else: input_args = { 'filename': [f"{output['filename']}"], 'format': output['format'] } t2 = Transformer() t2.transform(input_args) assert t2.store.graph.number_of_nodes() == query[2] assert t2.store.graph.number_of_edges() == query[3]
def parse_source( key: str, source: dict, output_directory: str, prefix_map: Dict[str, str] = None, node_property_predicates: Set[str] = None, predicate_mappings: Dict[str, str] = None, checkpoint: bool = False, ) -> Sink: """ Parse a source from a merge config YAML. Parameters ---------- key: str Source key source: Dict Source configuration output_directory: str Location to write output to prefix_map: Dict[str, str] Non-canonical CURIE mappings node_property_predicates: Set[str] A set of predicates that ought to be treated as node properties (This is applicable for RDF) predicate_mappings: Dict[str, str] A mapping of predicate IRIs to property names (This is applicable for RDF) checkpoint: bool Whether to serialize each individual source to a TSV Returns ------- kgx.sink.sink.Sink Returns an instance of Sink """ log.info(f"Processing source '{key}'") if not key: key = os.path.basename(source["input"]["filename"][0]) input_args = prepare_input_args( key, source, output_directory, prefix_map, node_property_predicates, predicate_mappings, ) transformer = Transformer(stream=True) transformer.transform(input_args) transformer.store.graph.name = key if checkpoint: log.info(f"Writing checkpoint for source '{key}'") checkpoint_output = f"{output_directory}/{key}" if output_directory else key transformer.save({"filename": checkpoint_output, "format": "tsv"}) # Current "Callable" metadata not needed at this point # but causes peculiar problems downstream, so we clear it. transformer.store.clear_graph_metadata() return transformer.store
def neo4j_download( uri: str, username: str, password: str, output: str, output_format: str, output_compression: Optional[str], stream: bool, node_filters: Optional[Tuple] = None, edge_filters: Optional[Tuple] = None, ) -> Transformer: """ Download nodes and edges from Neo4j database. Parameters ---------- uri: str Neo4j URI. For example, https://localhost:7474 username: str Username for authentication password: str Password for authentication output: str Where to write the output (stdout, by default) output_format: Optional[str] The output type (``tsv``, by default) output_compression: Optional[str] The output compression type stream: bool Whether to parse input as a stream node_filters: Optional[Tuple] Node filters edge_filters: Optional[Tuple] Edge filters Returns ------- kgx.Transformer The NeoTransformer """ transformer = Transformer(stream=stream) transformer.transform({ "uri": uri, "username": username, "password": password, "format": "neo4j", "node_filters": node_filters, "edge_filters": edge_filters, }) if not output_format: output_format = "tsv" transformer.save({ "filename": output, "format": output_format, "compression": output_compression }) return transformer
def neo4j_upload( inputs: List[str], input_format: str, input_compression: Optional[str], uri: str, username: str, password: str, stream: bool, node_filters: Optional[Tuple] = None, edge_filters: Optional[Tuple] = None, ) -> Transformer: """ Upload a set of nodes/edges to a Neo4j database. Parameters ---------- inputs: List[str] A list of files that contains nodes/edges input_format: str The input format input_compression: Optional[str] The input compression type uri: str The full HTTP address for Neo4j database username: str Username for authentication password: str Password for authentication stream: bool Whether to parse input as a stream node_filters: Optional[Tuple] Node filters edge_filters: Optional[Tuple] Edge filters Returns ------- kgx.Transformer The NeoTransformer """ transformer = Transformer(stream=stream) transformer.transform({ "filename": inputs, "format": input_format, "compression": input_compression, "node_filters": node_filters, "edge_filters": edge_filters, }) transformer.save({ "uri": uri, "username": username, "password": password, "format": "neo4j" }) return transformer
def test_validate_json(): """ Validate against a valid representative Biolink Model compliant JSON. """ input_args = { "filename": [os.path.join(RESOURCE_DIR, "valid.json")], "format": "json", } t = Transformer() t.transform(input_args) validator = Validator() validator.validate(t.store.graph) assert len(validator.get_errors()) == 0
def test_validate_json(): """ Validate against a valid representative Biolink Model compliant JSON. """ input_args = { 'filename': [os.path.join(RESOURCE_DIR, 'valid.json')], 'format': 'json' } t = Transformer() t.transform(input_args) validator = Validator() e = validator.validate(t.store.graph) assert len(e) == 0
def test_meta_knowledge_graph_multiple_category_and_predicate_parsing(): """ Test meta knowledge graph parsing multiple categories """ input_args = { 'filename': [ os.path.join(RESOURCE_DIR, 'graph_multi_category_nodes.tsv'), os.path.join(RESOURCE_DIR, 'graph_multi_category_edges.tsv'), ], 'format': 'tsv', } t = Transformer(stream=True) mkg = MetaKnowledgeGraph(name='Test Graph - Multiple Node Categories') t.transform(input_args=input_args, inspector=mkg) assert mkg.get_name() == 'Test Graph - Multiple Node Categories' assert mkg.get_total_nodes_count() == 10 # unique set, including (shared) parent # classes (not including category 'unknown' ) assert mkg.get_number_of_categories() == 7 assert mkg.get_node_count_by_category("biolink:Disease") == 1 assert mkg.get_node_count_by_category("biolink:BiologicalEntity") == 5 assert mkg.get_node_count_by_category( "biolink:AnatomicalEntityEntity") == 0 # sums up all the counts of node mappings across # all categories (not including category 'unknown') assert mkg.get_total_node_counts_across_categories() == 35 # only counts 'valid' edges for which # subject and object nodes are in the nodes file assert mkg.get_total_edges_count() == 8 # total number of distinct predicates assert mkg.get_predicate_count() == 2 # counts edges with a given predicate # (ignoring edges with unknown subject or object identifiers) assert mkg.get_edge_count_by_predicate("biolink:has_phenotype") == 4 assert mkg.get_edge_count_by_predicate("biolink:involved_in") == 0 assert mkg.get_edge_mapping_count() == 25 assert mkg.get_total_edge_counts_across_mappings() == 100
def test_rdf_transform_with_filters1(query): """ Test RDF transform with filters. """ input_args = { "filename": [os.path.join(RESOURCE_DIR, "rdf", "test3.nt")], "format": "nt", "node_filters": query[0], "edge_filters": query[1], } t = Transformer() t.transform(input_args) assert t.store.graph.number_of_edges() == query[3]
def test_rdf_transform3(): """ Test parsing an RDF N-triple and round-trip. """ input_args1 = { 'filename': [os.path.join(RESOURCE_DIR, 'rdf', 'test1.nt')], 'format': 'nt' } t1 = Transformer() t1.transform(input_args1) assert t1.store.graph.number_of_nodes() == 2 assert t1.store.graph.number_of_edges() == 1 output_args1 = { 'filename': os.path.join(TARGET_DIR, 'test1-export.nt'), 'format': 'nt' } t1.save(output_args1) input_args2 = { 'filename': [os.path.join(TARGET_DIR, 'test1-export.nt')], 'format': 'nt' } t2 = Transformer() t2.transform(input_args2) assert t2.store.graph.number_of_nodes() == 2 assert t2.store.graph.number_of_edges() == 1 n1t1 = t1.store.graph.nodes()['ENSEMBL:ENSG0000000000001'] n1t2 = t2.store.graph.nodes()['ENSEMBL:ENSG0000000000001'] n1t3 = t2.store.graph.nodes()['ENSEMBL:ENSG0000000000001'] assert n1t1['type'] == n1t2['type'] == n1t3['type'] == 'SO:0000704' assert len(n1t1['category']) == len(n1t2['category']) == len( n1t3['category']) == 4 assert ('biolink:Gene' in n1t1['category'] and 'biolink:Gene' in n1t2['category'] and 'biolink:Gene' in n1t3['category']) assert ('biolink:GenomicEntity' in n1t1['category'] and 'biolink:GenomicEntity' in n1t2['category'] and 'biolink:GenomicEntity' in n1t3['category']) assert ('biolink:NamedThing' in n1t1['category'] and 'biolink:NamedThing' in n1t2['category'] and 'biolink:NamedThing' in n1t3['category']) assert n1t1['name'] == n1t2['name'] == n1t3['name'] == 'Test Gene 123' assert (n1t1['description'] == n1t2['description'] == n1t3['description'] == 'This is a Test Gene 123') assert ('Test Dataset' in n1t1['provided_by'] and 'Test Dataset' in n1t2['provided_by'] and 'Test Dataset' in n1t3['provided_by'])
def test_rdf_transform3(): """ Test parsing an RDF N-triple and round-trip. """ input_args1 = { "filename": [os.path.join(RESOURCE_DIR, "rdf", "test1.nt")], "format": "nt", } t1 = Transformer() t1.transform(input_args1) assert t1.store.graph.number_of_nodes() == 2 assert t1.store.graph.number_of_edges() == 1 output_args1 = { "filename": os.path.join(TARGET_DIR, "test1-export.nt"), "format": "nt", } t1.save(output_args1) input_args2 = { "filename": [os.path.join(TARGET_DIR, "test1-export.nt")], "format": "nt", } t2 = Transformer() t2.transform(input_args2) assert t2.store.graph.number_of_nodes() == 2 assert t2.store.graph.number_of_edges() == 1 n1t1 = t1.store.graph.nodes()["ENSEMBL:ENSG0000000000001"] n1t2 = t2.store.graph.nodes()["ENSEMBL:ENSG0000000000001"] n1t3 = t2.store.graph.nodes()["ENSEMBL:ENSG0000000000001"] assert n1t1["type"] == n1t2["type"] == n1t3["type"] == "SO:0000704" assert len(n1t1["category"]) == len(n1t2["category"]) == len( n1t3["category"]) == 4 assert ("biolink:Gene" in n1t1["category"] and "biolink:Gene" in n1t2["category"] and "biolink:Gene" in n1t3["category"]) assert ("biolink:GenomicEntity" in n1t1["category"] and "biolink:GenomicEntity" in n1t2["category"] and "biolink:GenomicEntity" in n1t3["category"]) assert ("biolink:NamedThing" in n1t1["category"] and "biolink:NamedThing" in n1t2["category"] and "biolink:NamedThing" in n1t3["category"]) assert n1t1["name"] == n1t2["name"] == n1t3["name"] == "Test Gene 123" assert (n1t1["description"] == n1t2["description"] == n1t3["description"] == "This is a Test Gene 123") assert ("Test Dataset" in n1t1["provided_by"] and "Test Dataset" in n1t2["provided_by"] and "Test Dataset" in n1t3["provided_by"])
def parse_source( key: str, source: dict, output_directory: str, prefix_map: Dict[str, str] = None, node_property_predicates: Set[str] = None, predicate_mappings: Dict[str, str] = None, checkpoint: bool = False, ) -> Sink: """ Parse a source from a merge config YAML. Parameters ---------- key: str Source key source: Dict Source configuration output_directory: str Location to write output to prefix_map: Dict[str, str] Non-canonical CURIE mappings node_property_predicates: Set[str] A set of predicates that ought to be treated as node properties (This is applicable for RDF) predicate_mappings: Dict[str, str] A mapping of predicate IRIs to property names (This is applicable for RDF) checkpoint: bool Whether to serialize each individual source to a TSV Returns ------- kgx.sink.sink.Sink Returns an instance of Sink """ log.info(f"Processing source '{key}'") if not key: key = os.path.basename(source['input']['filename'][0]) input_args = prepare_input_args(key, source, output_directory, prefix_map, node_property_predicates, predicate_mappings) transformer = Transformer() transformer.transform(input_args) transformer.store.graph.name = key if checkpoint: log.info(f"Writing checkpoint for source '{key}'") checkpoint_output = f"{output_directory}/{key}" if output_directory else key transformer.save({'filename': checkpoint_output, 'format': 'tsv'}) return transformer.store
def test_rdf_transform_with_filters1(query): """ Test RDF transform with filters. """ input_args = { 'filename': [os.path.join(RESOURCE_DIR, 'rdf', 'test3.nt')], 'format': 'nt', 'node_filters': query[0], 'edge_filters': query[1], } t = Transformer() t.transform(input_args) assert t.store.graph.number_of_nodes() == query[2] assert t.store.graph.number_of_edges() == query[3]
def test_neo_to_graph_transform(): """ Test to read from Neo4j and write to CSV. """ input_args = { 'uri': DEFAULT_NEO4J_URL, 'username': DEFAULT_NEO4J_USERNAME, 'password': DEFAULT_NEO4J_PASSWORD, 'format': 'neo4j', } output_filename = os.path.join(TARGET_DIR, 'neo_graph') output_args = {'filename': output_filename, 'format': 'csv'} t = Transformer() t.transform(input_args, output_args) assert t.store.graph.number_of_nodes() == 10 assert t.store.graph.number_of_edges() == 11 assert os.path.exists(f"{output_filename}_nodes.csv") assert os.path.exists(f"{output_filename}_edges.csv")
def test_clique_generation(): """ Test for generation of cliques. """ input_args = { "filename": [ os.path.join(RESOURCE_DIR, "cm_nodes.csv"), os.path.join(RESOURCE_DIR, "cm_edges.csv"), ], "format": "csv", } t = Transformer() t.transform(input_args) updated_graph, clique_graph = clique_merge( target_graph=t.store.graph, prefix_prioritization_map=prefix_prioritization_map ) cliques = list(nx.strongly_connected_components(clique_graph)) assert len(cliques) == 2
def test_transform_filters1(query): """ Test transform with filters. """ input_args = { 'filename': [ os.path.join(RESOURCE_DIR, 'test2_nodes.tsv'), os.path.join(RESOURCE_DIR, 'test2_edges.tsv'), ], 'format': 'tsv', 'node_filters': query[0], 'edge_filters': query[1], } t = Transformer() t.transform(input_args) assert t.store.graph.number_of_nodes() == query[2] assert t.store.graph.number_of_edges() == query[3]
def test_transform_filters1(query): """ Test transform with filters. """ input_args = { "filename": [ os.path.join(RESOURCE_DIR, "test2_nodes.tsv"), os.path.join(RESOURCE_DIR, "test2_edges.tsv"), ], "format": "tsv", "node_filters": query[0], "edge_filters": query[1], } t = Transformer() t.transform(input_args) assert t.store.graph.number_of_nodes() == query[2] assert t.store.graph.number_of_edges() == query[3]
def test_csv_to_neo4j_load_to_graph_transform(clean_database): """ Test to load a csv KGX file into Neo4j. """ logger.debug("test_csv_to_neo4j_load...") input_args1 = { "filename": [ os.path.join(RESOURCE_DIR, "cm_nodes.csv"), os.path.join(RESOURCE_DIR, "cm_edges.csv"), ], "format": "csv", } t1 = Transformer() t1.transform(input_args1) output_args = { "uri": DEFAULT_NEO4J_URL, "username": DEFAULT_NEO4J_USERNAME, "password": DEFAULT_NEO4J_PASSWORD, "format": "neo4j", } t1.save(output_args) """ Continue sequentially to test read from Neo4j to write out back to CSV. """ logger.debug("test_neo4j_to_graph_transform") input_args = { "uri": DEFAULT_NEO4J_URL, "username": DEFAULT_NEO4J_USERNAME, "password": DEFAULT_NEO4J_PASSWORD, "format": "neo4j", } output_filename = os.path.join(TARGET_DIR, "neo_graph") output_args = {"filename": output_filename, "format": "csv"} t = Transformer() t.transform(input_args, output_args) assert t.store.graph.number_of_nodes() == 10 assert t.store.graph.number_of_edges() == 11 assert os.path.exists(f"{output_filename}_nodes.csv") assert os.path.exists(f"{output_filename}_edges.csv")
def test_merge(): """ Test for merging graphs. """ input_args1 = { 'filename': [ os.path.join(RESOURCE_DIR, 'merge', 'test1_nodes.tsv'), os.path.join(RESOURCE_DIR, 'merge', 'test1_edges.tsv'), ], 'format': 'tsv', } t1 = Transformer() t1.transform(input_args1) input_args2 = { 'filename': [ os.path.join(RESOURCE_DIR, 'merge', 'test2_nodes.tsv'), os.path.join(RESOURCE_DIR, 'merge', 'test2_edges.tsv'), ], 'format': 'tsv', } t2 = Transformer() t2.transform(input_args2) merged_graph = merge_all_graphs([t1.store.graph, t2.store.graph], preserve=True) assert len(merged_graph.nodes()) == 6 assert len(merged_graph.edges()) == 8 x1 = merged_graph.nodes()['x1'] assert x1['name'] == 'node x1' assert isinstance(x1['category'], list) assert 'a' in x1['p1'] assert '1' in x1['p1'] x10 = merged_graph.nodes()['x10'] assert x10['id'] == 'x10' assert x10['name'] == 'node x10'