def test_graph_to_sparql(self): model = self.gen_model( gpad_file="resources/test/wb.gpad.WBGene00003167", test_gene="WB:WBGene00003167", filter_rule=WBFilterRule()) # Translation of a regulation of gene expression has_regulation_target extension # Write this: # GO:0003674-1 enabled_by (RO:0002333) WB:WBGene00003167-1 # GO:0003674-1 part_of (BFO:0000050) GO:0045944-1 # GO:0003674-1 has_input (RO:0002233) WB:WBGene00003167-2 # GO:0003674-2 enabled_by (RO:0002333) WB:WBGene00003167-2 # GO:0003674-1 causally upstream of, positive effect (RO:0002304) GO:0003674-2 g = AnnotationSubgraph({"source_line": "fake annotation"}) mf_a = g.add_instance_of_class("GO:0003674") gp_a = g.add_instance_of_class("WB:WBGene00003167") bp_a = g.add_instance_of_class("GO:0045944") gp_b = g.add_instance_of_class("WB:WBGene00003167") mf_b = g.add_instance_of_class("GO:0003674") g.add_edge(mf_a, "RO:0002333", gp_a) g.add_edge(mf_a, "BFO:0000050", bp_a) g.add_edge(mf_a, "RO:0002233", gp_b) g.add_edge(mf_b, "RO:0002333", gp_b) g.add_edge(mf_a, "RO:0002304", mf_b) # g.print_matches_in_model(model) results = g.find_matches_in_model(model) # self.assertEqual(len(results), 1) self.assertEqual( 1, 1 ) # TODO: This example extension is going to be converted to has_input in near future
def test_has_input(self): # See https://github.com/geneontology/gocamgen/issues/39#issuecomment-479988904 for background model = self.gen_model( gpad_file="resources/test/wb.gpad.WBGene00003167", test_gene="WB:WBGene00003167", filter_rule=WBFilterRule()) # Look for translation of 'GO:0000977 has_direct_input(WB:WBGene00036254)' found_triples = model.triples_by_ids( "GO:0000977", URIRef(expand_uri_wrapper("RO:0002233")), "WB:WBGene00036254") self.assertGreaterEqual(len(found_triples), 1, "No has_input extensions translated")
def test_sparql(self): # Just gonna see what we can sparql out of this guy. model = self.gen_model( gpad_file="resources/test/wb.gpad.WBGene00003167", test_gene="WB:WBGene00003167", filter_rule=WBFilterRule()) sparql_wrapper = RdflibSparqlWrapper() gp = "WB:WBGene00003167" term = "GO:0007638" qres = sparql_wrapper.find_involved_in_translated( model.graph, gp, term) self.assertEqual(len(qres), 1)
def test_has_regulation_target(self): # Examples: # F - MGI:MGI:107771 GO:0005096 'has_regulation_target(MGI:MGI:97846)|has_regulation_target(MGI:MGI:2180784)' # P - WB:WBGene00013591 GO:0042594 'causally_upstream_of(GO:0001934),has_regulation_target(WB:WBGene00008480)' # Which has_regulation_target bucket does this fall into? None so far (GO:0042594 is "response to starvation") # bucket = gocamgen.gocamgen.has_regulation_target_bucket(ont, "GO:0001934") # model = gen_model(gpad_file="resources/test/wb.gpad.WBGene00013591", test_gene="WB:WBGene00013591", # filter_rule=WBFilterRule()) model = self.gen_model( gpad_file="resources/test/wb.gpad.WBGene00003167", test_gene="WB:WBGene00003167", filter_rule=WBFilterRule()) self.assertEqual(1, 1)
def test_extension_pipe_separation(self): # See https://github.com/geneontology/gocamgen/issues/40 model = self.gen_model( gpad_file="resources/test/wb.gpad.WBGene00003167", test_gene="WB:WBGene00003167", filter_rule=WBFilterRule()) # Look for count of 'WB:WBGene00003167 contributes_to GO:0000977' found_triples = model.triples_by_ids("WB:WBGene00003167", gocamgen.gocamgen.CONTRIBUTES_TO, "GO:0000977") self.assertGreaterEqual( len(found_triples), 3, "Less than 3 annotations for WB:WBGene00003167 contributes_to GO:0000977" )
def test_triple_finder(self): test_gene = "WB:WBGene00006498" model = self.gen_model(gpad_file="resources/test/wb_6498.gpad", test_gene=test_gene, filter_rule=WBFilterRule()) # Get model.writer.graph whatever and check for loose evidence (not attached to axioms) # Orphaned evidence - how to I find these in debugger? They're in rdflib writer somewhere # Can this work? # (MF, ENABLED_BY, GP) & (Same MF, has input, GP) pattern_a = TriplePattern([("GO:0003674", gocamgen.gocamgen.ENABLED_BY, test_gene)]) pattern_b = TriplePattern([("GO:0003674", URIRef(expand_uri_wrapper("BFO:0000050")), "GO:0019953")]) whole_pattern = TriplePattern([ ("GO:0003674", gocamgen.gocamgen.ENABLED_BY, test_gene), ("GO:0003674", URIRef(expand_uri_wrapper("BFO:0000050")), "GO:0019953") ]) triple_finder = TriplePatternFinder() a_triples = triple_finder.find_pattern_recursive(model, pattern_a) # print("A count: {}".format(len(a_triples))) b_triples = triple_finder.find_pattern_recursive(model, pattern_b) # print("B count: {}".format(len(b_triples))) found_chains = triple_finder.find_pattern_recursive( model, whole_pattern) # print(found_chains) # print("Chain count: {}".format(len(found_chains))) # for fc in found_chains: # print(contract_uri_wrapper(model.individual_label_for_uri(fc[1][2])[0])[0]) triple_pair = TriplePair(pattern_a.ordered_triples[0], pattern_b.ordered_triples[0], connecting_entity="GO:0003674") tp_collection = TriplePairCollection() tp_collection.chain_collection.append(triple_pair) uri_tp_collection = triple_finder.find_connected_pattern( model, tp_collection) self.assertGreaterEqual(len(uri_tp_collection.chain_collection), 1)
def test_with_from_evidence(self): # Check to ensure with/from values are appropriately added to evidence or translated to has_input edges sparql_wrapper = RdflibSparqlWrapper() enabled_by = "RO:0002333" has_input = "RO:0002233" # Non-protein binding case - should have evidence-with property on evidence individual: model = self.gen_model(gpad_file="resources/test/mgi.gpa.mgi_1859682", test_gene="MGI:MGI:1859682", filter_rule=MGIFilterRule()) qres = sparql_wrapper.find_evidence_with(model.graph, "GO:0008520", enabled_by, "MGI:MGI:1859682") withs = [] for r in qres: withs.append(str(r["evi_with"])) self.assertIn("MGI:MGI:2180333", withs) # Non-protein binding case w/ commas model = self.gen_model(gpad_file="resources/test/mgi.gpa.MGI_87859", test_gene="MGI:MGI:87859", filter_rule=MGIFilterRule()) qres = sparql_wrapper.find_evidence_with(model.graph, "GO:0004713", enabled_by, "MGI:MGI:87859") withs = [] for r in qres: withs.append(str(r["evi_with"])) # Current test is invalid now that we aren't splitting commas. # AssertionError: 'MGI:MGI:3525886' not found in ['MGI:MGI:87860', 'MGI:MGI:87860', 'MGI:MGI:2176222,MGI:MGI:3525886', 'MGI:MGI:104738', 'MGI:MGI:87860'] # self.assertIn("MGI:MGI:3525886", withs) # self.assertIn("MGI:MGI:2176222", withs) # Protein binding case - should have with/from value as term-has_input->with/from: model = self.gen_model( gpad_file="resources/test/wb.gpad.WBGene00000018", test_gene="WB:WBGene00000018", filter_rule=WBFilterRule()) qres = sparql_wrapper.find_triple_by_class(model.graph, "GO:0005515", has_input, "WB:WBGene00015146") self.assertEqual(len(qres), 1) # DNA binding case - same as protein binding - should have with/from value as term-has_input->with/from: model = self.gen_model(gpad_file="resources/test/mgi.gpa.MGI_97747", test_gene="MGI:MGI:97747", filter_rule=MGIFilterRule()) qres = sparql_wrapper.find_triple_by_class(model.graph, "GO:0003677", has_input, "UniProtKB:P28700") self.assertEqual(len(qres), 1) # Protein binding descendant case - should have with/from value as term-has_input->with/from: # GO:0019894 - kinesin binding model = self.gen_model( gpad_file="resources/test/wb.gpad.WBGene00000099", test_gene="WB:WBGene00000099", filter_rule=WBFilterRule()) qres = sparql_wrapper.find_triple_by_class(model.graph, "GO:0019894", has_input, "WB:WBGene00006974") self.assertEqual(len(qres), 1) # Protein binding w/ pipes # GO:0017022 - myosin binding model = self.gen_model( gpad_file="resources/test/wb.gpad.WBGene00002173", test_gene="WB:WBGene00002173", filter_rule=WBFilterRule()) qres = sparql_wrapper.find_triple_by_class(model.graph, "GO:0017022", has_input) # Assert there are 3 distinct individuals of GO:0017022 in these results result_subj_individuals = [] for r in qres: subject_iri = r["s"] if subject_iri not in result_subj_individuals: result_subj_individuals.append(subject_iri) self.assertEqual(len(result_subj_individuals), 3)