def _run(obj):
    """Test reducing annotations as specified by the user"""
    #pylint: disable=superfluous-parens
    print('\nTESTING: {T} --------------------------------------------------'.
          format(T=obj.name))
    chk = _Chk(obj)
    obj.chk_qualifiers()
    if obj.name != 'id2gos':
        assert chk.num_nots != 0, '{NAME} HAS 0 NOT Qualifiers'.format(
            NAME=obj.name)
    assc = obj.associations
    num_assc = len(assc)

    tic = timeit.default_timer()
    _ev = obj.evobj
    assc_dflt = obj.reduce_annotations(assc, AnnoOptions(_ev))
    tic = obj.hms('Default', tic)
    assc_nd0_not0 = obj.reduce_annotations(
        assc, AnnoOptions(_ev, keep_ND=False, keep_NOT=False))
    tic = obj.hms('{N:6,} assc_nd0_not0'.format(N=len(assc_nd0_not0)), tic)
    assc_nd0_not1 = obj.reduce_annotations(
        assc, AnnoOptions(_ev, keep_ND=False, keep_NOT=True))
    tic = obj.hms('{N:6,} assc_nd0_not1'.format(N=len(assc_nd0_not1)), tic)
    assc_nd1_not0 = obj.reduce_annotations(
        assc, AnnoOptions(_ev, keep_ND=True, keep_NOT=False))
    tic = obj.hms('{N:6,} assc_nd1_not0'.format(N=len(assc_nd1_not0)), tic)
    assc_nd1_not1 = obj.reduce_annotations(
        assc, AnnoOptions(_ev, keep_ND=True, keep_NOT=True))
    tic = obj.hms('{N:6,} assc_nd1_not1'.format(N=len(assc_nd1_not1)), tic)

    _red = obj.reduce_annotations
    inc = set(_ev.code2nt.keys()).difference({'IEA'})
    # pylint: disable=line-too-long
    assert _red(assc, AnnoOptions(obj.evobj, ev_exclude={'IEA'})) == _red(
        assc, AnnoOptions(obj.evobj, ev_include=inc))
    inc = _ev.grp2codes['High_Throughput']
    assert _red(assc, AnnoOptions(obj.evobj,
                                  ev_include={'High_Throughput'})) == _red(
                                      assc,
                                      AnnoOptions(obj.evobj, ev_include=inc))

    assert len(assc_dflt) == len(assc_nd0_not0)
    assert len(assc_nd1_not1) == num_assc
    # if obj.name in {'gaf', 'id2gos', 'gpad', 'id2go'}:
    assert len(
        assc_nd1_not0
    ) == num_assc - chk.num_nots, '{N}: ACT({A:,}) != EXP({E:,})'.format(
        N=obj.name, A=len(assc_nd1_not0), E=num_assc - chk.num_nots)
    assert len(
        assc_nd0_not1
    ) == num_assc - chk.num_nds, '{N}: ACT({A:,}) != EXP({E:,})'.format(
        N=obj.name, A=len(assc_nd0_not1), E=num_assc - chk.num_nds)
Beispiel #2
0
 def _get_id2gos(self, associations, **kws):
     """Return given associations in a dict, id2gos"""
     options = AnnoOptions(self.evobj, **kws)
     # Default reduction is to remove. For all options, see goatools/anno/opts.py:
     #   * Evidence_Code == ND -> No biological data No biological Data available
     #   * Qualifiers contain NOT
     assc = self.reduce_annotations(associations, options)
     return self.get_dbid2goids(
         assc) if options.b_geneid2gos else self.get_goid2dbids(assc)
Beispiel #3
0
 def get_taxid2asscs(self, taxids=None, **kws):
     """Read Gene Association File (GAF). Return data."""
     # WAS: get_annotations_taxid2dct
     taxid2asscs = cx.defaultdict(lambda: cx.defaultdict(lambda: cx.defaultdict(set)))
     options = AnnoOptions(self.evobj, **kws)
     for taxid in self._get_taxids(taxids):
         nts = self.taxid2asscs[taxid]
         assc = self.reduce_annotations(nts, options)
         taxid2asscs[taxid]['ID2GOs'] = self.get_dbid2goids(assc)
         taxid2asscs[taxid]['GO2IDs'] = self.get_goid2dbids(assc)
     return taxid2asscs
 def _get_id2gos(self, associations, **kws):
     """Return given associations in a dict, id2gos"""
     options = AnnoOptions(self.evobj, **kws)
     # Default reduction is to remove. For all options, see goatools/anno/opts.py:
     #   * Evidence_Code == ND -> No biological data No biological Data available
     #   * Qualifiers contain NOT
     assc = self.reduce_annotations(associations, options)
     a2bs = self.get_dbid2goids(
         assc) if options.b_geneid2gos else self.get_goid2dbids(assc)
     # if not a2bs:
     #     raise RuntimeError('**ERROR: NO ASSOCATIONS FOUND: {FILE}'.format(FILE=self.filename))
     return a2bs
Beispiel #5
0
 def _get_id2gos(self, ntannos_usr, propagate_counts=False, relationships=None, prt=sys.stdout, **kws):
     """Return given ntannos_usr in a dict, id2gos"""
     options = AnnoOptions(self.evobj, **kws)
     # Default reduction is to remove. For all options, see goatools/anno/opts.py:
     #   * Evidence_Code == ND -> No biological data No biological Data available
     #   * Qualifiers contain NOT
     ntannos_m = self.reduce_annotations(ntannos_usr, options)
     dbid2goids = self.get_dbid2goids(ntannos_m, propagate_counts, relationships, prt)
     if options.b_geneid2gos:
         return dbid2goids
     # if not a2bs:
     #     raise RuntimeError('**ERROR: NO ASSOCATIONS FOUND: {FILE}'.format(FILE=self.filename))
     return self._get_goid2dbids(dbid2goids)
Beispiel #6
0
def _tst_ns2(obj, idx):
    """Test functions which use ns2 functions."""
    # ALL annotations for a species
    nts_all = obj.get_associations()
    num_nts_all = len(nts_all)
    num_nts_act = 0

    # Separate ALL annotations into BP MF CC
    ns2ntsanno = obj.get_ns2ntsanno()
    assert set(ns2ntsanno.keys()).issubset({'BP', 'MF',
                                            'CC'}), ns2ntsanno.keys()

    # Reduce annotations to remove IEA
    ns2anno_exp = {}
    kws = {'ev_include': INC_GOOD}
    for nspc, nts_orig in sorted(ns2ntsanno.items()):
        opt = AnnoOptions(obj.evobj, **kws)
        nts_redu = obj.reduce_annotations(nts_orig, opt)
        num_nts_orig = len(nts_orig)
        # Check that only current namespace is seen on namedtuples
        assert set(nt.NS for nt in nts_orig if nt.NS == nspc) == {nspc}
        num_nts_act += num_nts_orig
        num_nts_redu = len(nts_redu)
        # pylint: disable=line-too-long
        print(
            '{OPT} {IDX}) ns2ntanno {NS} {ALL:7,}=Loaded -> {N:7,} -> {R:7,} annos: {TYPE}'
            .format(OPT=opt,
                    IDX=idx,
                    NS=nspc,
                    ALL=num_nts_all,
                    N=num_nts_orig,
                    R=num_nts_redu,
                    TYPE=obj.get_desc()))
        ns2anno_exp[nspc] = obj.get_dbid2goids(nts_redu)

        assert num_nts_all >= num_nts_orig
        if obj.name == 'id2gos':
            assert num_nts_orig == num_nts_redu
        else:
            assert num_nts_orig > num_nts_redu

    assert num_nts_all >= num_nts_act

    # Compare step-by-step transformation with top-level function, id2gos
    ns2anno_act = obj.get_ns2assc(**kws)
    for nspc, anno_exp in ns2anno_exp.items():
        anno_act = ns2anno_act[nspc]
        assert set(anno_exp.keys()) == set(anno_act.keys())
        for geneid, gos_exp in anno_exp.items():
            gos_act = anno_act[geneid]
            assert gos_exp == gos_act
Beispiel #7
0
def read_ncbi_gene2go(fin_gene2go, taxids=None, **kws):
    """Read NCBI's gene2go. Return gene2go data for user-specified taxids."""
    obj = Gene2GoReader(fin_gene2go, taxids)
    # b_geneid2gos = not kws.get('go2geneids', False)
    opt = AnnoOptions(**kws)
    # By default, return id2gos. User can cause go2geneids to be returned by:
    #   >>> read_ncbi_gene2go(..., go2geneids=True
    if 'taxid2asscs' not in kws:
        if len(obj.taxid2asscs) == 1:
            taxid = next(iter(obj.taxid2asscs))
            return obj.get_annotations_dct(taxid, opt)
    # Optional detailed associations split by taxid and having both ID2GOs & GO2IDs
    # e.g., taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set))
    t2asscs_ret = obj.get_annotations_taxid2dct(opt)
    t2asscs_usr = kws.get(
        'taxid2asscs',
        defaultdict(lambda: defaultdict(lambda: defaultdict(set))))
    if 'taxid2asscs' in kws:
        obj.fill_taxid2asscs(t2asscs_usr, t2asscs_ret)
    return obj.get_id2gos_all(t2asscs_ret)