def assert_parse_tree (code, expected): """ Parse a block of code into a parse tree. Then assert the equality of that parse tree to a list of expected tokens. """ tranql = TranQL () tranql.resolve_names = False actual = tranql.parser.parse (code).parse_tree #print (f"{actual}") assert_lists_equal ( actual, expected)
def test_ast_implicit_conversion (requests_mock): set_mock(requests_mock, "workflow-5") tranql = TranQL () ast = tranql.parse (""" SELECT drug_exposure->chemical_substance FROM '/schema' """) select = ast.statements[0] statements = select.plan (select.planner.plan (select.query)) assert_lists_equal(statements[0].query.order,["drug_exposure","chemical_substance"]) assert statements[0].get_schema_name(tranql) == "implicit_conversion"
def test_ast_resolve_name (requests_mock): set_mock(requests_mock, "resolve_name") """ Validate that -- The SelectStatement::resolve_name method will correctly retrieve equivalent identifiers from a given name """ print("test_ast_resolve_name ()") assert_lists_equal(SelectStatement.resolve_name("ibuprofen","chemical_substance"),[ 'CHEBI:132922', 'CHEBI:5855', 'CHEBI:43415', 'PUBCHEM:3672', 'MESH:D007052', 'CHEBI:5855', 'CHEMBL:CHEMBL521'] )
def test_ast_plan_statements (requests_mock): set_mock(requests_mock, "workflow-5") print("test_ast_plan_statements ()") tranql = TranQL () tranql.resolve_names = False # QueryPlanStrategy always uses /schema regardless of the `FROM` clause. ast = tranql.parse (""" SELECT cohort_diagnosis:disease->diagnoses:disease FROM '/schema' WHERE cohort_diagnosis = 'MONDO:0004979' --asthma AND Sex = '0' AND cohort = 'all_patients' AND max_p_value = '0.5' SET '$.knowledge_graph.nodes.[*].id' AS diagnoses """) select = ast.statements[0] statements = select.plan (select.planner.plan (select.query)) assert len(statements) == 2 for statement in statements: assert_lists_equal( list(statement.query.concepts.keys()), [ "cohort_diagnosis", "diagnoses" ] ) assert statement.query.concepts['cohort_diagnosis'].nodes == ["MONDO:0004979"] assert statement.query.concepts['diagnoses'].nodes == [] # TODO: figure out why there are duplicates generated?? assert_lists_equal(statement.where, [ ['cohort_diagnosis', '=', 'MONDO:0004979'], ['Sex', '=', '0'], ['Sex', '=', '0'], ['cohort', '=', 'all_patients'], ['cohort', '=', 'all_patients'], ['max_p_value', '=', '0.5'], ['max_p_value', '=', '0.5'] ]) assert statement.set_statements == [] assert ( (statements[0].service == "/graph/gamma/quick" and statements[1].service == "/graph/rtx") or (statements[0].service == "/graph/rtx" and statements[1].service == "/graph/gamma/quick") )
def test_ast_format_constraints (requests_mock): set_mock(requests_mock, "workflow-5") """ Validate that -- The syntax to pass values to reasoners in the where clause (e.g. "icees.foo = bar") functions properly """ print("test_ast_format_constraints ()") tranql = TranQL () ast = tranql.parse (""" SELECT population_of_individual_organisms->chemical_substance FROM "/clinical/cohort/disease_to_chemical_exposure" WHERE icees.should_format = 1 AND robokop.should_not_format = 0 """) select = ast.statements[0] select.format_constraints(tranql) print(select.where) assert_lists_equal(select.where, [ ['should_format', '=', 1], ['should_format', '=', 1], ['robokop.should_not_format', '=', 0], ['robokop.should_not_format', '=', 0] ])
def test_ast_decorate_element (requests_mock): set_mock(requests_mock, "workflow-5") """ Validate that -- The SelectStatement::decorate method properly decorates both nodes and edges """ print("test_ast_decorate_element ()") tranql = TranQL () ast = tranql.parse (""" SELECT chemical_substance->disease FROM "/graph/gamma/quick" """) select = ast.statements[0] node = { "id": "CHEBI:36314", "name": "glycerophosphoethanolamine", "omnicorp_article_count": 288, "type": "chemical_substance" } edge = { "ctime": [ 1544077522.7678425 ], "edge_source": [ "chembio.graph_pubchem_to_ncbigene" ], "id": "df662e2842d44fa2c0b5d945044317e3", "predicate_id": "SIO:000203", "publications": [ "PMID:16217747" ], "relation": [ "CTD:interacts_with" ], "relation_label": [ "interacts" ], "source_id": "CHEBI:36314", "target_id": "HGNC:8971", "type": "directly_interacts_with", "weight": 0.4071474314830641 } select.decorate(node,True,{ "schema" : select.get_schema_name(tranql) }) select.decorate(edge,False,{ "schema" : select.get_schema_name(tranql) }) assert_lists_equal(node["reasoner"],["robokop"]) assert_lists_equal(edge["reasoner"],["robokop"]) assert_lists_equal(edge["source_database"],["unknown"])
def test_ast_multiple_reasoners (requests_mock): set_mock(requests_mock, "workflow-5") """ Validate that -- A query spanning multiple reasoners will query multiple reasoners. -- A transitions that multiple reasoners support will query each reasoner that supports it. """ print("test_ast_multiple_reasoners ()") tranql = TranQL () ast = tranql.parse (""" SELECT chemical_substance->disease->gene FROM "/schema" """) # RTX and Robokop both support transitions between chemical_substance->disease and only Robokop supports transitions between disease->gene select = ast.statements[0] statements = select.plan (select.planner.plan (select.query)) assert_lists_equal(statements[0].query.order,['chemical_substance','disease']) assert statements[0].get_schema_name(tranql) == "robokop" assert_lists_equal(statements[1].query.order,['chemical_substance','disease']) assert statements[1].get_schema_name(tranql) == "rtx" assert_lists_equal(statements[2].query.order,['disease','gene']) assert statements[2].get_schema_name(tranql) == "robokop"
def test_ast_merge_knowledge_maps (requests_mock): set_mock(requests_mock, "workflow-5") tranql = TranQL () tranql.asynchronous = False tranql.resolve_names = False ast = tranql.parse (""" select chemical_substance->disease->gene from "/schema" where chemical_substance="CHEMBL:CHEMBL3" """) # select = ast.statements[0] # statements = select.plan (select.planner.plan (select.query)) # print(statements[0].query.order) # (select.execute_plan(tranql)) responses = [ { 'knowledge_map' : [ { 'node_bindings' : { 'chemical_substance' : 'CHEBI:100', 'disease' : 'MONDO:50' }, 'edge_bindings' : { 'e0' : 'ROOT_EDGE' } } ], 'question_order' : ['chemical_substance','disease'] }, { 'knowledge_map' : [ { 'node_bindings' : { 'disease' : 'MONDO:50', 'gene' : 'HGNC:1', 'metabolite' : 'KEGG:C00017' }, 'edge_bindings' : { 'e1' : 'TEST_EDGE' } } ], 'question_order' : ['disease','gene','metabolite'] }, { 'knowledge_map' : [ { 'node_bindings' : { 'disease' : 'MONDO:50', 'gene' : 'HGNC:1', 'metabolite' : 'KEGG:FOOBAR' }, 'edge_bindings' : { } } ], 'question_order' : ['disease','gene','metabolite'] }, { 'knowledge_map' : [ { 'node_bindings' : { 'metabolite' : 'KEGG:FOOBAR', 'protein' : 'UniProtKB:TESTING' }, 'edge_bindings' : { } } ], 'question_order' : ['metabolite','protein'] }, { 'knowledge_map' : [ { 'node_bindings' : { 'metabolite' : 'KEGG:C00017', 'protein' : 'UniProtKB:Q9NZJ5' }, 'edge_bindings' : { } } ], 'question_order' : ['metabolite','protein'] } ] merged = SelectStatement.connect_knowledge_maps(responses,[ 'chemical_substance', 'disease', 'gene', 'metabolite', 'protein' ]) assert_lists_equal(ordered(merged), ordered([ { "node_bindings" : { "chemical_substance" : "CHEBI:100", "disease" : "MONDO:50", "gene" : "HGNC:1", "metabolite" : "KEGG:FOOBAR", "protein" : "UniProtKB:TESTING" }, "edge_bindings" : { "e0" : "ROOT_EDGE" } }, { "node_bindings" : { "chemical_substance" : "CHEBI:100", "disease" : "MONDO:50", "gene" : "HGNC:1", "metabolite" : "KEGG:C00017", "protein" : "UniProtKB:Q9NZJ5" }, "edge_bindings" : { "e0" : "ROOT_EDGE", "e1" : "TEST_EDGE", } } ]))