예제 #1
0
def test_gaf_2_1_simple_terms():
    line = ["SGD", "S000000819", "AFG3", "", "GO:0006259", "PMID:8681382|SGD_REF:S000055187", "IMP", "", "P", "Mitochondrial inner membrane m-AAA protease component", "YER017C|AAA family ATPase AFG3|YTA10", "gene", "taxon:559292", "20170428", "SGD"]
    ontology = OntologyFactory().create("tests/resources/goslim_generic.json")
    p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology))
    p.make_internal_cell_component_closure()

    parsed = gafparser.to_association(line)
    assoc = p.upgrade_empty_qualifier(parsed.associations[0])
    assert assoc.qualifiers[0] == association.Curie(namespace="RO", identity="0002264")

    line = ["SGD", "S000000819", "AFG3", "", "GO:0042393", "PMID:8681382|SGD_REF:S000055187", "IMP", "", "P",
            "Mitochondrial inner membrane m-AAA protease component", "YER017C|AAA family ATPase AFG3|YTA10", "gene",
            "taxon:559292", "20170428", "SGD"]
    ontology = OntologyFactory().create("tests/resources/goslim_generic.json")
    p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology))
    p.make_internal_cell_component_closure()

    parsed = gafparser.to_association(line)
    assoc = p.upgrade_empty_qualifier(parsed.associations[0])
    assert assoc.qualifiers[0] == association.Curie(namespace="RO", identity="0002327")

    line = ["SGD", "S000000819", "AFG3", "", "GO:0005773", "PMID:8681382|SGD_REF:S000055187", "IMP", "", "P",
            "Mitochondrial inner membrane m-AAA protease component", "YER017C|AAA family ATPase AFG3|YTA10", "gene",
            "taxon:559292", "20170428", "SGD"]
    ontology = OntologyFactory().create("tests/resources/goslim_generic.json")
    p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology))
    p.make_internal_cell_component_closure()

    parsed = gafparser.to_association(line)
    assoc = p.upgrade_empty_qualifier(parsed.associations[0])
    assert assoc.qualifiers[0] == association.Curie(namespace="RO", identity="0001025")
예제 #2
0
    def load_associations_from_file(self, associations_type: DataType,
                                    associations_url: str,
                                    associations_cache_path: str,
                                    config: GenedescConfigParser) -> None:
        """load go associations from file

        Args:
            associations_type (DataType): the type of associations to set
            associations_url (str): url to the association file
            associations_cache_path (str): path to cache file for the associations
            config (GenedescConfigParser): configuration object where to read properties
        """
        assoc_config = AssocParserConfig(remove_double_prefixes=True,
                                         paint=True)
        if associations_type == DataType.GO:
            logger.info("Loading GO associations from file")
            self.go_associations = AssociationSetFactory().create_from_assocs(
                assocs=GafParser(config=assoc_config).parse(
                    file=self._get_cached_file(
                        cache_path=associations_cache_path,
                        file_source_url=associations_url),
                    skipheader=True),
                ontology=self.go_ontology)
            self.go_associations = self.remove_blacklisted_annotations(
                association_set=self.go_associations,
                ontology=self.go_ontology,
                terms_blacklist=config.get_module_property(
                    module=Module.GO, prop=ConfigModuleProperty.EXCLUDE_TERMS))
        elif associations_type == DataType.DO:
            logger.info("Loading DO associations from file")
            self.do_associations = AssociationSetFactory().create_from_assocs(
                assocs=GafParser(config=assoc_config).parse(
                    file=self._get_cached_file(
                        cache_path=associations_cache_path,
                        file_source_url=associations_url),
                    skipheader=True),
                ontology=self.do_ontology)
            self.do_associations = self.remove_blacklisted_annotations(
                association_set=self.do_associations,
                ontology=self.do_ontology,
                terms_blacklist=config.get_module_property(
                    module=Module.DO_EXP_AND_BIO,
                    prop=ConfigModuleProperty.EXCLUDE_TERMS))
        elif associations_type == DataType.EXPR:
            logger.info("Loading Expression associations from file")
            self.expression_associations = AssociationSetFactory(
            ).create_from_assocs(assocs=GafParser(config=assoc_config).parse(
                file=self._get_cached_file(cache_path=associations_cache_path,
                                           file_source_url=associations_url),
                skipheader=True),
                                 ontology=self.expression_ontology)
            self.expression_associations = self.remove_blacklisted_annotations(
                association_set=self.expression_associations,
                ontology=self.expression_ontology,
                terms_blacklist=config.get_module_property(
                    module=Module.EXPRESSION,
                    prop=ConfigModuleProperty.EXCLUDE_TERMS))
예제 #3
0
def produce_gaf(dataset, source_gaf, ontology_graph, gpipath=None, paint=False, group="unknown"):
    filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w")

    config = assocparser.AssocParserConfig(
        ontology=ontology_graph,
        filter_out_evidence=["IEA"],
        filtered_evidence_file=filtered_associations,
        gpi_authority_path=gpipath,
        paint=paint
    )
    validated_gaf_path = os.path.join(os.path.split(source_gaf)[0], "{}_valid.gaf".format(dataset))
    outfile = open(validated_gaf_path, "w")
    gafwriter = GafWriter(file=outfile)

    click.echo("Validating source GAF: {}".format(source_gaf))
    parser = GafParser(config=config, group=group, dataset=dataset)
    with open(source_gaf) as sg:
        lines = sum(1 for line in sg)

    with open(source_gaf) as gaf:
        with click.progressbar(iterable=parser.association_generator(file=gaf), length=lines) as associations:
            for assoc in associations:
                gafwriter.write_assoc(assoc)

    outfile.close()
    filtered_associations.close()

    with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.md".format(dataset)), "w") as report_md:
        report_md.write(parser.report.to_markdown())

    with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.json".format(dataset)), "w") as report_json:
        report_json.write(json.dumps(parser.report.to_report_json(), indent=4))

    return [validated_gaf_path, filtered_associations.name]
 def load_associations(self, taxon) -> None:
     taxon_map = {
         'human': 'NCBITaxon:9606',
         'mouse': 'NCBITaxon:10090',
     }
     ofactory = OntologyFactory()
     self.ontology = ofactory.create(self.ont)
     p = GafParser()
     url = ''
     if self.ont == 'go':
         # CX: GO:0008150 is biological_process, GO:0003674 is molecular_function. 
         # CX: These are 2 out of 3 top-level terms in GO ontology. 
         # CX: The excluded term is cellular_component (where gene carries out a molecular function)
         go_roots = set(self.ontology.descendants('GO:0008150') + self.ontology.descendants('GO:0003674'))
         sub_ont = self.ontology.subontology(go_roots)
         if taxon == 'mouse':
             url = "http://current.geneontology.org/annotations/mgi.gaf.gz"
         if taxon == 'human':
             url = "http://current.geneontology.org/annotations/goa_human.gaf.gz"
         assocs = p.parse(url)
         self.assocs = assocs
         assocs = [x for x in assocs if 'header' not in x.keys()]
         assocs = [x for x in assocs if x['object']['id'] in go_roots]
         self.associations = self.afactory.create_from_assocs(assocs, ontology=sub_ont)
     else:
         self.associations = \
             self.afactory.create(
                     ontology=self.ontology,
                     subject_category='gene',
                     object_category='phenotype',
                     taxon=taxon_map[taxon]
         )
예제 #5
0
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"))
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    for m in msgs:
        print("MESSAGE: {}".format(m))
    assert len(msgs) == 15

    # we expect 4
    assert len(assocs) == 7
    from ontobio.io import GafWriter
    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if 'object_extensions' in a:
            # our test file has no ORs, so in DNF this is always the first
            xs = a['object_extensions']['union_of'][0]['intersection_of']
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x['property'] == 'foo'
                assert x['filler'] == 'X:1'
            assert len(xs) == 1
예제 #6
0
def test_qualifiers_gaf():
    # ont = OntologyFactory().create(ONT)

    p = GafParser()
    # p.config.ontology = ont
    assocs = p.parse(open(QGAF, "r"), skipheader=True)
    neg_assocs = [a for a in assocs if a['negated'] == True]
    assert len(neg_assocs) == 3
    for a in assocs:
        print('REL: {}'.format(a['relation']))

    assert len([a for a in assocs
                if a['relation']['id'] == 'contributes_to']) == 1

    # For the space in `colocalizes with`
    assert len(
        list(
            filter(
                lambda e: e["obj"] == "colocalizes with",
                p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1
    assert len(
        list(
            filter(
                lambda e: e["obj"] == "involved_in",
                p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1
예제 #7
0
def produce_ttl(dataset, target_dir, gaf_path, ontology_graph):
    gafparser = GafParser()
    gafparser.config = assocparser.AssocParserConfig(ontology=ontology_graph)

    with open(gaf_path) as sg:
        lines = sum(1 for line in sg)

    ttl_path = os.path.join(
        os.path.split(gaf_path)[0], "{}_cam.ttl".format(dataset))
    click.echo("Producing ttl: {}".format(ttl_path))
    rdf_writer = assoc_rdfgen.TurtleRdfWriter()
    transformer = assoc_rdfgen.CamRdfTransform(writer=rdf_writer)
    parser_config = assocparser.AssocParserConfig(ontology=ontology_graph)

    with open(gaf_path) as gf:
        with click.progressbar(
                iterable=gafparser.association_generator(file=gf),
                length=lines) as associations:
            for association in associations:
                if "header" not in association or not association["header"]:
                    transformer.provenance()
                    transformer.translate(association)

    with open(ttl_path, "wb") as ttl:
        click.echo("Writing ttl to disk")
        rdf_writer.serialize(destination=ttl)

    return ttl_path
예제 #8
0
def test_errors_gaf():
    config = assocparser.AssocParserConfig(ecomap=EcoMap())
    p = GafParser(config=config)
    assocs = p.parse(open("tests/resources/errors.gaf", "r"), skipheader=True)
    msgs = p.report.messages
    print(json.dumps(p.report.to_report_json(), indent=4))
    # print("MESSAGES: {}".format(len(msgs)))
    n_invalid_idspace = 0
    for m in msgs:
        print("MESSAGE: {}".format(m))
        if m['type'] == assocparser.Report.INVALID_IDSPACE:
            n_invalid_idspace += 1
    assert len(msgs) == 13
    assert n_invalid_idspace == 1
    assert len(assocs) == 2

    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if a.object_extensions != []:
            # our test file has no ORs, so in DNF this is always the first
            xs = a.object_extensions[0].elements
            print(xs)
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x.relation == association.Curie("BFO", "0000050")
                assert x.term == association.Curie.from_str('X:1')
            assert len(xs) == 1
예제 #9
0
def test_errors_gaf():
    config = assocparser.AssocParserConfig(ecomap=EcoMap())
    p = GafParser(config=config)
    assocs = p.parse(open("tests/resources/errors.gaf", "r"), skipheader=True)
    msgs = p.report.messages
    print(json.dumps(p.report.to_report_json(), indent=4))
    # print("MESSAGES: {}".format(len(msgs)))
    n_invalid_idspace = 0
    for m in msgs:
        print("MESSAGE: {}".format(m))
        if m['type'] == assocparser.Report.INVALID_IDSPACE:
            n_invalid_idspace += 1
    assert len(msgs) == 16
    assert n_invalid_idspace == 1
    assert len(assocs) == 5

    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if a['object_extensions'] != {}:
            # our test file has no ORs, so in DNF this is always the first
            xs = a['object_extensions']['union_of'][0]['intersection_of']
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x['property'] == 'foo'
                assert x['filler'] == 'X:1'
            assert len(xs) == 1
예제 #10
0
def test_no_flag_valid_id():
    ont = OntologyFactory().create(ONT)
    p = GafParser()
    p.config.ontology = ont
    p._validate_ontology_class_id(
        "GO:0000785", assocparser.SplitLine("fake", [""] * 17, taxon="foo"))
    assert len(p.report.messages) == 0
예제 #11
0
def test_bad_date():
    p = GafParser()
    assoc_result = p.parse_line(
        "PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\tTODAY\tPomBase\tfoo(X:1)"
    )
    assert assoc_result.skipped == True
    assert assoc_result.associations == []
예제 #12
0
def test_semsearch():
    afa = AssociationSetFactory()
    f = POMBASE
    ont = OntologyFactory().create(ONT)
    parser = GafParser()
    assocs = parser.parse(POMBASE, skipheader=True)
    assocs = [a for a in assocs if a['subject']['label'] in GENES]
    aset = afa.create_from_assocs(assocs, ontology=ont)
    ont = aset.subontology()
    aset.ontology = ont
    logging.info('Genes={} Terms={}'.format(len(aset.subjects),
                                            len(ont.nodes())))

    print('STATS={}'.format(aset.as_dataframe().describe()))

    #genes = aset.subjects[0:5]
    sse = SemSearchEngine(assocmodel=aset)

    logging.info('Calculating all MICAs')
    sse.calculate_all_micas()

    #h5path = 'tests/resources/mica_ic.h5'
    #logging.info('Saving to {}'.format(h5path))
    #sse.mica_ic_df.to_hdf(h5path, key='mica_ic', mode='w')
    #logging.info('Saved to {}'.format(h5path))

    logging.info('Doing pairwise')
    for i in aset.subjects:
        for j in aset.subjects:
            sim = sse.pw_score_cosine(i, j)
            #print('{} x {} = {}'.format(i,j,sim))
            if i == j:
                assert (sim > 0.9999)
            tups = sse.pw_score_resnik_bestmatches(i, j)
            print('{} x {} = {} // {}'.format(i, j, sim, tups))
예제 #13
0
 def retrieve_associations(self, ont, group):
     taxon_map = {
         'human': 'NCBITaxon:9606',
         'mouse': 'NCBITaxon:10090',
     }
     ofactory = OntologyFactory()
     self.ontology = ofactory.create(ont)
     p = GafParser()
     url = ''
     if ont == 'go':
         go_roots = set(
             self.ontology.descendants('GO:0008150') +
             self.ontology.descendants('GO:0003674'))
         sub_ont = self.ontology.subontology(go_roots)
         if group == 'mouse':
             url = "http://current.geneontology.org/annotations/mgi.gaf.gz"
         if group == 'human':
             url = "http://current.geneontology.org/annotations/goa_human.gaf.gz"
         assocs = p.parse('goa_human.gaf.gz')
         #assocs = p.parse(url)
         self.assocs = assocs
         assocs = [x for x in assocs if 'header' not in x.keys()]
         assocs = [x for x in assocs if x['object']['id'] in go_roots]
         self.associations = self.afactory.create_from_assocs(
             assocs, ontology=sub_ont)
     else:
         self.associations = self.afactory.create(
             ontology=self.ontology,
             subject_category='gene',
             object_category='phenotype',
             taxon=taxon_map[group])
예제 #14
0
    def create_from_file(self, file=None, fmt='gaf', skim=True, **args):
        """
        Creates from a file.

        Arguments
        ---------
        file : str or file
            input file or filename
        format : str
            name of format e.g. gaf

        """
        p = None
        if fmt == 'gaf':
            p = GafParser()
        elif fmt == 'gpad':
            p = GpadParser()
        elif fmt == 'hpoa':
            p = HpoaParser()
        else:
            logging.error("Format not recognized: {}".format(fmt))

        logging.info("Parsing {} with {}/{}".format(file, fmt, p))
        if skim:
            results = p.skim(file)
            return self.create_from_tuples(results, **args)
        else:
            assocs = p.parse(file, skipheader=True)
            return self.create_from_assocs(assocs, **args)
예제 #15
0
def produce_gpi(dataset, target_dir, gaf_path, ontology_graph):
    gafparser = GafParser()
    gafparser.config = assocparser.AssocParserConfig(ontology=ontology_graph)
    with open(gaf_path) as sg:
        lines = sum(1 for line in sg)

    gpi_path = os.path.join(
        os.path.split(gaf_path)[0], "{}.gpi".format(dataset))
    with open(gaf_path) as gf, open(gpi_path, "w") as gpi:
        click.echo("Using {} as the gaf to build gpi with".format(gaf_path))
        bridge = gafgpibridge.GafGpiBridge()
        gpiwriter = entitywriter.GpiWriter(file=gpi)
        gpi_cache = set()

        with click.progressbar(
                iterable=gafparser.association_generator(file=gf),
                length=lines) as associations:
            for association in associations:
                entity = bridge.convert_association(association)
                if entity not in gpi_cache and entity is not None:
                    # If the entity is not in the cache, add it and write it out
                    gpi_cache.add(entity)
                    gpiwriter.write_entity(entity)

    return gpi_path
예제 #16
0
def test_default_gaf_version():
    p = GafParser()

    assocs = p.parse(open("tests/resources/test-qualifiers-no-version.gaf"),
                     skipheader=True)

    assert p.version == "2.1"
예제 #17
0
def test_subject_extensions():
    p = GafParser()
    assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20181024\tPomBase\tfoo(X:1)\tUniProtKB:P12345")
    print(json.dumps(assoc_result.associations[0], indent=4))
    assert "subject_extensions" in assoc_result.associations[0]
    subject_extensions = assoc_result.associations[0]['subject_extensions']
    gene_product_form_id = [extension["filler"] for extension in subject_extensions if extension["property"] == "isoform"][0]
    assert gene_product_form_id == "UniProtKB:P12345"
예제 #18
0
def test_subject_extensions():
    p = GafParser()
    assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20181024\tPomBase\tpart_of(X:1)\tUniProtKB:P12345")
    assert len(assoc_result.associations[0].subject_extensions) == 1

    subject_extensions = assoc_result.associations[0].subject_extensions
    gene_product_form_id = subject_extensions[0].term
    assert gene_product_form_id == association.Curie.from_str("UniProtKB:P12345")
예제 #19
0
def test_validate_go_idspaces():
    ont = OntologyFactory().create(ONT)
    p = GafParser()
    p.config.class_idspaces = ['FOOZ']
    assocs = p.parse(open(POMBASE, "r"), skipheader=True)
    for m in p.report.messages:
        print("MESSAGE: {}".format(m))
    assert len(assocs) == 0
    assert len(p.report.messages) > 1
    summary = p.report.to_report_json()
    assert summary['associations'] == 0
    assert summary['lines'] > 300
    print(p.report.to_markdown())

    # ensure config is not preserved
    p = GafParser()
    assert p.config.class_idspaces == None
예제 #20
0
def test_obsolete_replair_of_withfrom():
    p = GafParser(config=assocparser.AssocParserConfig(
        ontology=OntologyFactory().create(OBSOLETE_ONT)))
    assocs = p.parse(open(ZFIN_GAF, "r"), skipheader=True)
    assert assocs[0].evidence.with_support_from == [
        ConjunctiveSet(elements=[Curie(namespace='GO', identity='0005912')])
    ]

    # Reset parser report
    p = GafParser(config=assocparser.AssocParserConfig(
        ontology=OntologyFactory().create(OBSOLETE_ONT)))
    p.version = "2.2"
    obsolete_no_replacement_line = "FB\tFBgn0003334\tScm\tlocated_in\tGO:0005634\tFB:FBrf0179383|PMID:15280237\tIC\tGO:0016458\tC\tSex comb on midleg\tCG9495|SCM|Sex Comb on Midleg|Sex Comb on the Midleg|Sex combs on midleg|Sex combs on midlegs|Su(z)302|l(3)85Ef|scm|sex comb on midleg\tprotein\ttaxon:7227\t20050203\tUniProt\t\t"
    assoc_result = p.parse_line(obsolete_no_replacement_line)
    assert assoc_result.associations == []
    assert p.report.to_report_json(
    )["messages"]["gorule-0000020"][0]["obj"] == "GO:0016458"
예제 #21
0
def test_gaf_2_1_upconvert_in_parse():
    gaf = io.StringIO("!gaf-version: 2.1\nSGD\tS000000819\tAFG3\t\tGO:0005840\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD")
    ontology = OntologyFactory().create("tests/resources/goslim_generic.json")
    p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology))

    # We're 2.1, qualifier blank, cell component term from above, ontology defined: should upgrade
    assocs = p.parse(gaf, skipheader=True)
    assert assocs[0].relation == association.Curie(namespace="BFO", identity="0000050")
예제 #22
0
def test_one_line():
    p = GafParser(config=assocparser.AssocParserConfig(
        ontology=OntologyFactory().create(
            "tests/resources/goslim_generic.json")))

    parsed = p.parse_line(
        "PomBase	SPBC16D10.09	pcn1		GO:0009536	PMID:8663159	IDA		C	PCNA	pcn	protein	taxon:4896	20150326	PomBase"
    )
예제 #23
0
def test_bad_withfrom():
    p = GafParser()
    # With/from has no identity portion after the namespace
    assoc_result = p.parse_line(
        "PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20181024\tPomBase"
    )
    assert assoc_result.associations == []
    assert p.report.to_report_json(
    )["messages"]["gorule-0000001"][0]["obj"] == "SGD:"
예제 #24
0
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"))
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    for m in msgs:
        print("MESSAGE: {}".format(m))
    assert len(msgs) == 8
예제 #25
0
def test_alt_id_repair():
    p = GafParser()
    ont = OntologyFactory().create(ALT_ID_ONT)
    p.config.ecomap = EcoMap()
    p.config.ontology = ont
    gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:0043623\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD")

    assocs = p.parse(gaf, skipheader=True)
    assert len(assocs) > 0
    assert assocs[0]["object"]["id"] == "GO:0043623"
예제 #26
0
 def create_from_remote_file(self, group, snapshot=True, **args):
     """
     Creates from remote GAF
     """
     import requests
     url = "http://snapshot.geneontology.org/annotations/{}.gaf.gz".format(group)
     r = requests.get(url, stream=True, headers={'User-Agent': get_user_agent(modules=[requests], caller_name=__name__)})
     p = GafParser()
     results = p.skim(r.raw)
     return self.create_from_tuples(results, **args)
예제 #27
0
 def create_from_remote_file(self, group, snapshot=True, **args):
     """
     Creates from remote GAF
     """
     import requests
     url = "http://snapshot.geneontology.org/annotations/{}.gaf.gz".format(group)
     r = requests.get(url, stream=True)
     p = GafParser()
     results = p.skim(r.raw)
     return self.create_from_tuples(results, **args)
예제 #28
0
def test_skim_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    results = p.skim(open(POMBASE, "r"))
    assert len(results) == 370
    for r in results:
        print(str(r))
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('PomBase:')
예제 #29
0
def test_object_extensions():
    p = GafParser()
    assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20181024\tPomBase\tpart_of(X:1)\tUniProtKB:P12345")
    print(p.report.to_markdown())
    assert len(assoc_result.associations[0].object_extensions) > 0
    object_extensions = [
        association.ConjunctiveSet([
            association.ExtensionUnit(association.Curie("BFO", "0000050"), association.Curie("X", "1"))
        ])
    ]
    assert assoc_result.associations[0].object_extensions == object_extensions
예제 #30
0
 def load_associations(self, group):
     p = GafParser()
     afactory = AssociationSetFactory()
     url = "http://geneontology.org/gene-associations/gene_association.{}.gz".format(
         group)
     if group == 'human':
         url = "http://geneontology.org/gene-associations/goa_human.gaf.gz"
     assocs = p.parse(url)
     assocs = [x for x in assocs if 'header' not in x.keys()]
     self.associations = afactory.create_from_assocs(assocs,
                                                     ontology=self.ontology)