def test_obsolete_replair_of_withfrom(): p = GafParser(config=assocparser.AssocParserConfig( ontology=OntologyFactory().create(OBSOLETE_ONT))) assocs = p.parse(open(ZFIN_GAF, "r"), skipheader=True) assert assocs[0].evidence.with_support_from == [ ConjunctiveSet(elements=[Curie(namespace='GO', identity='0005912')]) ] # Reset parser report p = GafParser(config=assocparser.AssocParserConfig( ontology=OntologyFactory().create(OBSOLETE_ONT))) p.version = "2.2" obsolete_no_replacement_line = "FB\tFBgn0003334\tScm\tlocated_in\tGO:0005634\tFB:FBrf0179383|PMID:15280237\tIC\tGO:0016458\tC\tSex comb on midleg\tCG9495|SCM|Sex Comb on Midleg|Sex Comb on the Midleg|Sex combs on midleg|Sex combs on midlegs|Su(z)302|l(3)85Ef|scm|sex comb on midleg\tprotein\ttaxon:7227\t20050203\tUniProt\t\t" assoc_result = p.parse_line(obsolete_no_replacement_line) assert assoc_result.associations == [] assert p.report.to_report_json( )["messages"]["gorule-0000020"][0]["obj"] == "GO:0016458"
def test_validate_go_idspaces(): ont = OntologyFactory().create(ONT) p = GafParser() p.config.class_idspaces = ['FOOZ'] assocs = p.parse(open(POMBASE, "r")) for m in p.report.messages: print("MESSAGE: {}".format(m)) assert len(assocs) == 0 assert len(p.report.messages) > 1 summary = p.report.to_report_json()['summary'] assert summary['association_count'] == 0 assert summary['line_count'] > 300 print(p.report.to_markdown()) # ensure config is not preserved p = GafParser() assert p.config.class_idspaces == None
def test_qualifiers_gaf_2_2(): p = GafParser() assocs = p.parse(open("tests/resources/test-qualifiers-2.2.gaf"), skipheader=True) # NOT by itself is not allowed assert len( list( filter( lambda e: e["obj"] == "NOT", p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1 assert len( list( filter( lambda e: e["obj"] == "contributes_to|enables", p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1 assert len([ a for a in assocs if association.Curie.from_str("RO:0004035") in a.qualifiers ]) == 1
def test_qualifiers_gaf(): p = GafParser() assocs = p.parse(open(QGAF, "r"), skipheader=True) neg_assocs = [a for a in assocs if a.negated == True] assert len(neg_assocs) == 3 for a in assocs: print('REL: {}'.format(str(a.relation))) assert len([a for a in assocs if str(a.relation) == 'RO:0002326']) == 1 # For the space in `colocalizes with` assert len( list( filter( lambda e: e["obj"] == "colocalizes with", p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1 assert len( list( filter( lambda e: e["obj"] == "involved_in", p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1
def test_qualifiers_gaf_2_2(): p = GafParser() assocs = p.parse(open("tests/resources/test-qualifiers-2.2.gaf"), skipheader=True) # NOT by itself is not allowed assert len( list( filter( lambda e: e["obj"] == "NOT", p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1 assert len( list( filter( lambda e: e["obj"] == "contributes_to|enables", p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1 assert len([ a for a in assocs if "acts_upstream_of_negative_effect" in a["qualifiers"] ]) == 1
def test_errors_gaf(): p = GafParser() p.config.ecomap = EcoMap() assocs = p.parse(open("tests/resources/errors.gaf", "r")) msgs = p.report.messages print("MESSAGES: {}".format(len(msgs))) for m in msgs: print("MESSAGE: {}".format(m)) assert len(msgs) == 15 # we expect 4 assert len(assocs) == 7 from ontobio.io import GafWriter w = GafWriter() w.write(assocs) xs = [x for a in assocs for x in a.get('object_extensions', [])] for x in xs: print('X: {}'.format(x)) # ensure that invalid expressions have been eliminated assert x['property'] == 'foo' assert x['filler'] == 'X:1' assert len(xs) == 3
def parse(self, limit=None): """ Override Source.parse() Args: :param limit (int, optional) limit the number of rows processed Returns: :return None """ if limit is not None: LOG.info("Only parsing first %d rows", limit) rgd_file = '/'.join( (self.rawdir, self.files['rat_gene2mammalian_phenotype']['file'])) # ontobio gafparser implemented here p = GafParser() assocs = p.parse(open(rgd_file, "r")) for i, assoc in enumerate(assocs): if 'relation' in assoc.keys(): self.make_association(assoc) if limit is not None and i > limit: break return
def parse(self, limit=None): """ Override Source.parse() Args: :param limit (int, optional) limit the number of rows processed Returns: :return None """ if limit is not None: LOG.info("Only parsing first %d rows", limit) rgd_file = '/'.join( (self.rawdir, self.files['rat_gene2mammalian_phenotype']['file'])) # ontobio gafparser implemented here p = GafParser() assocs = p.parse(open(rgd_file, "r")) for i, assoc in enumerate(assocs): if 'relation' in assoc.keys(): self.make_association(assoc) if limit is not None and i > limit: break return
def test_map2slim_gaf(): f = POMBASE p = GafParser() is_gaf = f == POMBASE ont = OntologyFactory().create(ONT) relations=['subClassOf', 'BFO:0000050'] # creates a basic JSON dictionary m = ont.create_slim_mapping(subset_nodes=SUBSET, relations=relations) assert m['GO:0071423'] == ['GO:0006810'] assert len(m[NESRA]) == 2 assert 'GO:0051169' in m[NESRA] assert 'GO:0003674' in m[NESRA] outfile = tempfile.NamedTemporaryFile(mode='w', delete=False) p.map_to_subset(open(f,"r"), class_map=m, outfile=outfile) for m in p.report.messages: logging.debug("MESSAGE1: {}".format(m)) for m in p.report.messages: logging.debug("MESSAGE1: {}".format(m)) logging.info("MESSAGES: {}".format(len(p.report.messages))) p = GafParser() logging.info("CLOSING: {}".format(outfile)) outfile.close() logging.info("Reading from: {}".format(outfile.name)) assocs = p.parse(outfile.name) for m in p.report.messages: logging.debug("MESSAGE2: {}".format(m)) assert len(assocs) > 100 cls_ids = set() for a in assocs: cid = a['object']['id'] assert cid in SUBSET cls_ids.add(cid) print(str(a)) print(cls_ids)
def test_parse(): ont = OntologyFactory().create(ONT) p = GafParser() assocs = p.parse(open(POMBASE, "r")) #gen(assocs,SimpleAssocRdfTransform(),'simple') gen(assocs, CamRdfTransform(), 'cam')
def test_default_gaf_version(): p = GafParser() assocs = p.parse(open("tests/resources/test-qualifiers-no-version.gaf"), skipheader=True) assert p.version == "2.1"