def test_skip_limit(database): """Test SKIP and LIMIT.""" qgraph = { "nodes": { "n0": { "categories": "biolink:Disease", "ids": "MONDO:0005148", }, "n1": { "categories": "biolink:ChemicalSubstance", }, }, "edges": { "e01": { "subject": "n1", "object": "n0", "predicates": "biolink:treats", }, }, } all_results = [] output = database.run(get_query(qgraph, limit=2)) for record in output: all_results.extend(record["results"]) assert len(record["results"]) == 2 output = database.run(get_query(qgraph, skip=2, limit=2)) for record in output: all_results.extend(record["results"]) assert len(record["results"]) == 1 assert { "CHEBI:6801", "CHEBI:47612", "CHEBI:136043", } == set(result["node_bindings"]["n1"][0]["id"] for result in all_results)
def test_unknown_operator(): """Test unknown operator.""" qgraph = [ "DNE", { "nodes": dict(), "edges": dict(), }, ] with pytest.raises(ValueError) as excinfo: get_query(qgraph) assert "Unrecognized operator" in str(excinfo.value)
def test_invalid_node(): """Test that an invalid node property value throws an error.""" qgraph = { "nodes": { "n0": { "categories": "biolink:BiologicalEntity", "dict": { "a": 1 }, }, }, "edges": dict(), } with pytest.raises(ValueError): get_query(qgraph)
def test_max_connectivity(database): """Test max_connectivity option.""" qgraph = { "nodes": { "n0": { "categories": "biolink:Disease", }, "n1": { "categories": "biolink:ChemicalSubstance", "ids": "CHEBI:6801", }, }, "edges": { "e01": { "predicates": "biolink:treats", "subject": "n1", "object": "n0", }, }, } output = database.run(get_query( qgraph, max_connectivity=5, )) for record in output: assert len(record["results"]) == 2 results = sorted( record["knowledge_graph"]["nodes"].values(), key=lambda node: node["name"], ) expected_nodes = ["carcinoma", "metformin", "obesity disorder"] for ind, node in enumerate(results): assert node["name"] == expected_nodes[ind]
def test_too_many_not(): """Test too many NOT operands.""" qgraph = [ "NOT", { "nodes": dict(), "edges": dict(), }, { "nodes": dict(), "edges": dict(), }, ] with pytest.raises(ValueError) as excinfo: get_query(qgraph) assert "NOT must have exactly one operand" in str(excinfo.value)
def test_publications(database): """Test publications.""" qgraph = { "nodes": { "n0": { "ids": "NCBIGene:836", }, "n1": { "ids": "NCBIGene:841", }, }, "edges": { "e01": { "subject": "n0", "object": "n1", }, }, } cypher = get_query(qgraph) output = list(database.run(cypher))[0] edges = output["knowledge_graph"]["edges"] assert len(edges) == 1 attributes = list(edges.values())[0]["attributes"] assert len(attributes) == 1 assert attributes[0] == { "original_attribute_name": "publications", "attribute_type_id": "EDAM:data_0971", "value": ["xxx"], }
def test_bool(database): """Test querying with boolean property.""" qgraph = { "nodes": { "n0": { "categories": "biolink:ChemicalSubstance", }, "n1": { "categories": "biolink:Disease", }, }, "edges": { "e01": { "subject": "n0", "object": "n1", "predicates": "biolink:treats", "fda_approved": True, }, }, } output = database.run(get_query(qgraph)) for record in output: assert len(record["results"]) == 1 results = sorted( record["knowledge_graph"]["nodes"].values(), key=lambda node: node["name"], ) expected_nodes = [ "metformin", "type 2 diabetes mellitus", ] for ind, result in enumerate(results): assert result["name"] == expected_nodes[ind]
def test_use_hints(database): """Test unusual curie formats.""" qgraph = { "nodes": { "n0": { "ids": [ "NCBIGene:841", ], "categories": "biolink:Gene", }, "n1": {}, }, "edges": { "e01": { "predicates": [ "biolink:molecularly_interacts_with", "biolink:increases_expression_of", ], "subject": "n1", "object": "n0", }, }, } clause = get_query(qgraph, use_hints=True, reasoner=False) assert "USING INDEX" in clause database.run(clause)
def test_onehop_subclass_categories(): """Test one-hop subclass query.""" qgraph = { "nodes": { "n0": {"ids": ["HP:0011015"], "categories": ["biolink:PhenotypicFeature"]}, "n1": {}, }, "edges": { "e01": { "subject": "n1", "object": "n0", "predicates": ["biolink:treats"] }, }, } query = get_query(qgraph) #make sure that the class (PhenotypicFeature) has been removed from n0 clause = query.split('WHERE')[0] elements = clause.split('-') checked = False for element in elements: if 'n0' in element: checked = True assert 'PhenotypicFeature' not in element assert checked
def test_predicate_list(): """Test that an edge with a list of predicates works properly.""" qgraph = { "nodes": { "n0": { "categories": "biolink:Disease", }, "n1": { "categories": "biolink:PhenotypicFeature", }, }, "edges": { "e01": { "predicates": ["biolink:capable_of", "biolink:affects_expression_in"], "subject": "n0", "object": "n1", }, }, } clause = get_query(qgraph, reasoner=False) # edges with types should be directed assert "(`n0`:`biolink:Disease`)-[`e01`:`biolink:capable_of`|`biolink:affects_expression_in`|`biolink:has_capability`]-(`n1`:`biolink:PhenotypicFeature`)" in clause # test direction assert '(type(`e01`) in ["biolink:capable_of", "biolink:affects_expression_in"] AND startNode(`e01`) = `n0`) ' \ 'OR (type(`e01`) in ["biolink:has_capability"] AND startNode(`e01`) = `n1`))' in clause
def test_multiedge_or_complicated(database): """Test parsing of compound qgraph.""" qgraph = [ "OR", { "nodes": { "n0": {}, "n1a": { "ids": ["NCBIGene:836"] }, }, "edges": { "e10a": { "subject": "n0", "object": "n1a", "predicates": "biolink:genetic_association", }, }, }, { "nodes": { "n0": {}, "n1b": { "ids": "NCBIGene:841" }, "n2b": { "ids": "HP:0012592" }, }, "edges": { "e10b": { "subject": "n0", "object": "n1b", "predicates": "biolink:genetic_association", }, "e20b": { "subject": "n0", "object": "n2b", "predicates": "biolink:has_phenotype", }, }, }, ] output = database.run(get_query(qgraph)) for record in output: assert len(record["results"]) == 2 results = sorted( record["knowledge_graph"]["nodes"].values(), key=lambda node: node["name"], ) expected_nodes = [ "CASP3", "CASP8", "albuminaria", "obesity disorder", "type 2 diabetes mellitus", ] assert len(record["knowledge_graph"]["nodes"]) == 5 for ind, node in enumerate(results): assert node["name"] == expected_nodes[ind]
def test_too_many_xor(): """Test too many XOR operands.""" qgraph = [ "XOR", { "nodes": dict(), "edges": dict(), }, { "nodes": dict(), "edges": dict(), }, { "nodes": dict(), "edges": dict(), }, ] with pytest.raises(ValueError) as excinfo: get_query(qgraph) assert "XOR must have exactly two operands" in str(excinfo.value)
def test_empty(database): """Test empty qgraph.""" qgraph = { "nodes": dict(), "edges": dict(), } output = list(database.run(get_query(qgraph)))[0] assert len(output["results"]) == 1 assert output["results"][0]["node_bindings"] == dict() assert output["results"][0]["edge_bindings"] == dict() assert output["knowledge_graph"]["nodes"] == [] assert output["knowledge_graph"]["edges"] == []
def test_dont_subclass(database): """Test disallowing subclassing.""" qgraph = { "nodes": {"n0": {"ids": ["MONDO:0000001"]}}, "edges": dict(), } query = get_query(qgraph, subclass=False) output = list(database.run(query))[0] assert len(output['results']) == 1 assert output["results"][0]["node_bindings"] == { "n0": [{"id": "MONDO:0000001"}], }
def test_not(database): """Test transpiling of compound qgraph.""" qgraph = [ "AND", { "nodes": { "n0": { "categories": "biolink:ChemicalSubstance", }, "n1": { "categories": "biolink:Disease", "ids": "MONDO:0005148", }, }, "edges": { "e01": { "subject": "n0", "object": "n1", "predicates": "biolink:treats", }, }, }, [ "NOT", { "nodes": { "n2": { "categories": [ "biolink:Disease", ], "ids": "MONDO:0011122" }, }, "edges": { "e20": { "subject": "n0", "object": "n2", "predicates": "biolink:treats", }, }, }, ], ] output = database.run(get_query(qgraph)) for record in output: results = sorted( record["knowledge_graph"]["nodes"].values(), key=lambda node: node["name"], ) expected_nodes = ["anagliptin", "type 2 diabetes mellitus"] for ind, node in enumerate(results): assert node["name"] == expected_nodes[ind]
def test_xor(database): """Test transpiling of compound qgraph.""" qgraph = [ "AND", { "nodes": { "n0": { "categories": "biolink:Disease", }, }, "edges": {}, }, [ "XOR", { "nodes": { "n1": { "categories": "biolink:ChemicalSubstance", "ids": "CHEBI:6801", } }, "edges": { "e01": { "subject": "n1", "object": "n0", "predicates": "biolink:treats", }, }, }, { "nodes": { "n2": { "categories": "biolink:ChemicalSubstance", "ids": "CHEBI:136043", } }, "edges": { "e02": { "subject": "n2", "object": "n0", "predicates": "biolink:treats", }, }, }, ], ] output = database.run(get_query(qgraph)) for record in output: assert len(record["results"]) == 2 assert len(record["knowledge_graph"]["nodes"]) == 3
def test_category_none(database): """Test node with type None.""" qgraph = { "nodes": { "n0": { "ids": "MONDO:0005148", "categories": None, } }, "edges": dict(), } cypher = get_query(qgraph) output = list(database.run(cypher))[0] assert len(output["results"]) == 1
def test_curie_int(): """Test unusual curie formats.""" qgraph = { "nodes": { "n0": { "categories": "biolink:Disease", "ids": 12, }, }, "edges": dict(), } clause = get_query(qgraph, reasoner=False) # the curie integer should be converted to a string assert "{`id`: \"12\"}" in clause
def test_node_subclass(database): """Test node-only subclass query.""" qgraph = { "nodes": {"n0": {"ids": ["MONDO:0000001"]}}, "edges": dict(), } original_qgraph = copy.deepcopy(qgraph) query = get_query(qgraph) assert qgraph == original_qgraph output = list(database.run(query))[0] assert len(output['results']) == 2 assert any( result["node_bindings"]["n0"] == [{"id": "MONDO:0005148", "qnode_id": "MONDO:0000001"}] for result in output["results"] )
def test_categories(database): """Test multiple categories.""" qgraph = { "nodes": { "n0": { "categories": [ "biolink:Disease", "biolink:Gene", ] } }, "edges": dict(), } output = list(database.run(get_query(qgraph)))[0] assert len(output['results']) == 7
def test_index_usage_multiple_labels(): """ When multiple labels are used `biolink:NamedThing` index to be used """ qgraph = { "nodes": { "n0": { "ids": ["MONDO:0005148"], "categories": ["biolink:Disease", "biolink:PhenotypicFeature"], } }, "edges": {} } cypher = get_query(qgraph, **{"use_hints": True}) # superclass node_id is suffixed with _superclass assert "USING INDEX `n0_superclass`:`biolink:NamedThing`(id)" in cypher
def test_index_usage_single_labels(): """ Test when using single labels, checks if id index is with the node type is used """ qgraph = { "nodes": { "n0": { "ids": ["MONDO:0005148"], "categories": "biolink:Disease", } }, "edges": {} } cypher = get_query(qgraph, **{"use_hints": True}) # superclass node_id is suffixed with _superclass assert "USING INDEX `n0_superclass`:`biolink:Disease`(id)" in cypher
def test_backward_subclass(database): """Test pinned-object one-hop subclass query.""" qgraph = { "nodes": { "n0": {"ids": ["MONDO:0000001"]}, "n1": {}, }, "edges": { "e01": { "subject": "n1", "object": "n0", }, }, } query = get_query(qgraph) output = list(database.run(query))[0] assert len(output['results']) == 9
def test_any(database): """Test any predicate.""" qgraph = { "nodes": { "n0": {}, "n1": { "ids": "NCBIGene:836" }, }, "edges": { "e10a": { "subject": "n1", "object": "n0", }, }, } output = database.run(get_query(qgraph)) for record in output: assert len(record["results"]) == 4
def test_inverse(database): """Test inverse predicate.""" qgraph = { "nodes": { "n0": { "ids": "NCBIGene:672", }, "n1": {}, }, "edges": { "e10": { "subject": "n0", "object": "n1", "predicates": "biolink:gene_associated_with_condition", }, }, } output = database.run(get_query(qgraph)) for record in output: assert len(record["results"]) == 1
def test_symmetric(database): """Test symmetric predicate.""" qgraph = { "nodes": { "n0": {}, "n1": { "ids": "NCBIGene:836" }, }, "edges": { "e10a": { "subject": "n1", "object": "n0", "predicates": "biolink:genetic_association", }, }, } output = database.run(get_query(qgraph)) for record in output: assert len(record["results"]) == 2
def test_sub(database): """Test sub predicate.""" qgraph = { "nodes": { "n0": { "ids": "MONDO:0004993", }, "n1": {}, }, "edges": { "e10": { "subject": "n0", "object": "n1", "predicates": "biolink:genetic_association", }, }, } output = database.run(get_query(qgraph)) for record in output: assert len(record["results"]) == 2
def test_curie_formats(database): """Test unusual curie formats.""" qgraph = { "nodes": { "n0": { "ids": [ "MONDO:0005148", "MONDO:0011122", ], "categories": "biolink:Disease", }, "n1": { "categories": "biolink:ChemicalSubstance", }, }, "edges": { "e01": { "predicates": [ "biolink:treats", ], "subject": "n1", "object": "n0", }, }, } output = database.run(get_query(qgraph)) for record in output: assert len(record["results"]) == 5 results = sorted( record["knowledge_graph"]["nodes"].values(), key=lambda node: node["name"], ) expected_nodes = [ "anagliptin", "bezafibrate", "metformin", "obesity disorder", "type 2 diabetes mellitus", ] for ind, result in enumerate(results): assert result["name"] == expected_nodes[ind]
def test_fancy_key(database): """Test qnode/qedge keys with unusual characters.""" qgraph = { "nodes": { "type-2 diabetes": { "categories": "biolink:Disease", }, "n1": { "categories": "biolink:Gene", }, }, "edges": { "interacts with": { "subject": "type-2 diabetes", "object": "n1", } }, } cypher = get_query(qgraph) output = list(database.run(cypher))[0] assert len(output["results"]) == 5
def test_pinned_subclass(database): """Test both-pinned subclass query.""" qgraph = { "nodes": { "n0": {"ids": ["MONDO:0000001"]}, "n1": {"ids": ["HP:0000118"]}, }, "edges": { "e01": { "subject": "n0", "object": "n1", }, }, } query = get_query(qgraph) output = list(database.run(query))[0] assert len(output['results']) == 1 assert output["results"][0]["node_bindings"] == { "n0": [{"id": "MONDO:0005148", "qnode_id": "MONDO:0000001"}], "n1": [{"id": "HP:0012592", "qnode_id": "HP:0000118"}], }