Esempio n. 1
0
class AnnoReaderBase(object):
    """Reads a Gene Association File. Returns a Python object."""
    # pylint: disable=broad-except,line-too-long,too-many-instance-attributes

    tic = timeit.default_timer()

    # Expected values for a Qualifier
    exp_qualifiers = set([
        # Seen in both GAF and gene2go
        'not',
        'contributes_to',
        'colocalizes_with',
    ])

    # pylint: disable=too-many-instance-attributes
    def __init__(self, name, filename=None, **kws):
        # kws: allow_missing_symbol
        self.name = name
        self.filename = filename
        self.godag = kws.get('godag')
        self.namespaces = kws.get('namespaces')
        self.evobj = EvidenceCodes()
        # Read anotation file, store namedtuples:
        #     Gene2GoReader(filename=None, taxids=None):
        #     GafReader(filename=None, hdr_only=False, prt=sys.stdout, allow_missing_symbol=False):
        #     GpadReader(filename=None, hdr_only=False):
        self.hdr = None
        self.datobj = None
        # pylint: disable=no-member
        self.associations = self._init_associations(filename, **kws)
        # assert self.associations, 'NO ANNOTATIONS FOUND: {ANNO}'.format(ANNO=filename)
        assert self.namespaces is None or isinstance(self.namespaces, set)

    def get_desc(self):
        """Get description"""
        return '{NAME} {NSs} {GODAG}'.format(
            NAME=self.name,
            NSs='' if self.namespaces is None else ','.join(self.namespaces),
            GODAG='' if self.godag is None else 'godag')

    # pylint: disable=unused-argument
    def get_associations(self, taxid=None):
        """Get associations"""
        # taxid is for NCBI's gene2gos
        return self.associations

    def prt_summary_anno2ev(self, prt=sys.stdout):
        """Print annotation/evidence code summary."""
        self.evobj.prt_summary_anno2ev(self.associations, prt)

    def get_name(self):
        """Return type of annotation"""
        return self.name

    # pylint: disable=no-self-use
    def get_taxid(self):
        """Return taxid, if one was provided, otherwise return -1"""
        return -1

    def get_ns2assc(self, **kws):
        """Return given associations into 3 (BP, MF, CC) dicts, id2gos"""
        return {
            ns: self._get_id2gos(nts, **kws)
            for ns, nts in self.get_ns2ntsanno(kws.get('taxid')).items()
        }

    # pylint: disable=unused-argument
    def get_ns2ntsanno(self, taxid=None):
        """Split list of annotations into 3 lists: BP, MF, CC"""
        return self._get_ns2ntsanno(self.associations)

    def _get_ns2ntsanno(self, annotations):
        """Split list of annotations into 3 lists: BP, MF, CC"""
        if self.name in {'gpad', 'id2gos'}:
            assert self.godag is not None, "{T}: LOAD godag TO USE {C}::ns2ntsanno".format(
                C=self.__class__.__name__, T=self.name)
        ns2nts = cx.defaultdict(list)
        for nta in annotations:
            ns2nts[nta.NS].append(nta)
        return {
            ns: ns2nts[ns]
            for ns in set(['BP', 'MF', 'CC']).intersection(ns2nts)
        }

    def get_id2gos_nss(self, **kws):
        """Return all associations in a dict, id2gos, regardless of namespace"""
        return self._get_id2gos(self.associations, **kws)

    def get_id2gos(self, namespace='BP', **kws):
        """Return associations from specified namespace in a dict, id2gos"""
        # pylint: disable=superfluous-parens
        if self.has_ns():
            assoc = [nt for nt in self.associations if nt.NS == namespace]
            id2gos = self._get_id2gos(assoc, **kws)
            print('{N} IDs in association branch, {NS}'.format(N=len(id2gos),
                                                               NS=namespace))
            return id2gos
        print('**ERROR: GODAG NOT LOADED. IGNORING namespace({NS})'.format(
            NS=namespace))
        id2gos = self._get_id2gos(self.associations, **kws)
        print('{N} IDs in association branch, {NS}'.format(N=len(id2gos),
                                                           NS=namespace))
        return id2gos

    def has_ns(self):
        """Return True if namespace field, NS exists on annotation namedtuples"""
        return hasattr(next(iter(self.associations)), 'NS')

    def _get_id2gos(self, associations, **kws):
        """Return given associations in a dict, id2gos"""
        options = AnnoOptions(self.evobj, **kws)
        # Default reduction is to remove. For all options, see goatools/anno/opts.py:
        #   * Evidence_Code == ND -> No biological data No biological Data available
        #   * Qualifiers contain NOT
        assc = self.reduce_annotations(associations, options)
        return self.get_dbid2goids(
            assc) if options.b_geneid2gos else self.get_goid2dbids(assc)

    def _get_namespaces(self, nts):
        """Get the set of namespaces seen in the namedtuples."""
        return set(nt.NS for nt in nts) if self.has_ns() else set()

    # Qualifier (column 4)
    # Flags that modify the interpretation of an annotation one (or more) of NOT, contributes_to, colocalizes_with
    # This field is not mandatory;
    #     * cardinality 0, 1, >1;
    #     * for cardinality >1 use a pipe to separate entries (e.g. NOT|contributes_to)
    def prt_qualifiers(self, prt=sys.stdout):
        """Print Qualifiers: 1,462 colocalizes_with; 1,454 contributes_to; 1,157 not"""
        # 13 not colocalizes_with   (TBD: CHK - Seen in gene2go, but not gafs)
        #  4 not contributes_to     (TBD: CHK - Seen in gene2go, but not gafs)
        self._prt_qualifiers(self.associations, prt)

    @staticmethod
    def _prt_qualifiers(associations, prt=sys.stdout):
        """Print Qualifiers found in the annotations.
           QUALIFIERS:
                1,462 colocalizes_with
                1,454 contributes_to
                1,157 not
                   13 not colocalizes_with   (TBD: CHK - Seen in gene2go, but not gafs)
                    4 not contributes_to     (TBD: CHK - Seen in gene2go, but not gafs)
        """
        prt.write('QUALIFIERS:\n')
        for fld, cnt in cx.Counter(q for nt in associations
                                   for q in nt.Qualifier).most_common():
            prt.write('    {N:6,} {FLD}\n'.format(N=cnt, FLD=fld))

    def reduce_annotations(self, annotations, options):
        """Reduce annotations to ones used to identify enrichment (normally exclude ND and NOT)."""
        getfnc_qual_ev = options.getfnc_qual_ev()
        return [
            nt for nt in annotations
            if getfnc_qual_ev(nt.Qualifier, nt.Evidence_Code)
        ]

    @staticmethod
    def get_dbid2goids(associations):
        """Return gene2go data for user-specified taxids."""
        id2gos = cx.defaultdict(set)
        for ntd in associations:
            id2gos[ntd.DB_ID].add(ntd.GO_ID)
        return dict(id2gos)

    @staticmethod
    def get_goid2dbids(associations):
        """Return gene2go data for user-specified taxids."""
        go2ids = cx.defaultdict(set)
        for ntd in associations:
            go2ids[ntd.GO_ID].add(ntd.DB_ID)
        return dict(go2ids)

    def hms(self, msg, tic=None, prt=sys.stdout):
        """Print elapsed time and message."""
        if tic is None:
            tic = self.tic
        now = timeit.default_timer()
        hms = str(datetime.timedelta(seconds=(now - tic)))
        prt.write('{HMS}: {MSG}\n'.format(HMS=hms, MSG=msg))
        return now

    def chk_associations(self, fout_err=None):
        """Check that associations are in expected format."""
        pass

    def nts_ev_nd(self):
        """Get annotations where Evidence_code == 'ND' (No biological data)"""
        return [nt for nt in self.associations if nt.Evidence_Code == 'ND']

    def nts_qual_not(self):
        """Get annotations having Qualifiers containing NOT"""
        return [nt for nt in self.associations if self._has_not_qual(nt)]

    def chk_qualifiers(self):
        """Check format of qualifier"""
        if self.name == 'id2gos':
            return
        for ntd in self.associations:
            # print(ntd)
            qual = ntd.Qualifier
            assert isinstance(
                qual, set), '{NAME}: QUALIFIER MUST BE A LIST: {NT}'.format(
                    NAME=self.name, NT=ntd)
            assert qual != set(['']), ntd
            assert qual != set(['-']), ntd
            assert 'always' not in qual, 'SPEC SAID IT WOULD BE THERE'

    @staticmethod
    def _has_not_qual(ntd):
        """Return True if the qualifiers contain a 'NOT'"""
        for qual in ntd.Qualifier:
            if 'not' in qual:
                return True
            if 'NOT' in qual:
                return True
        return False
Esempio n. 2
0
class AnnoReaderBase(object):
    """Reads a Gene Association File. Returns a Python object."""
    # pylint: disable=broad-except,line-too-long,too-many-instance-attributes

    tic = timeit.default_timer()

    # Expected values for a Qualifier
    exp_qualifiers = set([
        # Seen in both GAF and gene2go
        'not',
        'contributes_to',
        'colocalizes_with',
    ])

    valid_formats = {'gpad', 'gaf', 'gene2go', 'id2gos'}

    exp_nss = set(['BP', 'MF', 'CC'])

    def __init__(self, name, filename=None, **kws):
        # kws: allow_missing_symbol
        self.name = name  # name is one of valid_formats
        self.filename = filename
        self.godag = kws.get('godag')
        self.namespaces = kws.get('namespaces')
        self.evobj = EvidenceCodes()
        # Read anotation file, store namedtuples:
        #     Gene2GoReader(filename=None, taxids=None):
        #     GafReader(filename=None, hdr_only=False, prt=sys.stdout, allow_missing_symbol=False):
        #     GpadReader(filename=None, hdr_only=False):
        self.hdr = None
        self.datobj = None
        # pylint: disable=no-member
        self.associations = self._init_associations(filename, **kws)
        # assert self.associations, 'NO ANNOTATIONS FOUND: {ANNO}'.format(ANNO=filename)
        assert self.namespaces is None or isinstance(self.namespaces, set)

    def get_desc(self):
        """Get description"""
        return '{NAME} {NSs} {GODAG}'.format(
            NAME=self.name,
            NSs='' if self.namespaces is None else ','.join(self.namespaces),
            GODAG='' if self.godag is None else 'godag')

    # pylint: disable=unused-argument
    def get_associations(self, taxid=None):
        """Get associations"""
        # taxid is for NCBI's gene2gos
        return self.associations

    def prt_summary_anno2ev(self, prt=sys.stdout):
        """Print annotation/evidence code summary."""
        self.evobj.prt_summary_anno2ev(self.associations, prt)

    def get_name(self):
        """Return type of annotation"""
        return self.name

    # pylint: disable=no-self-use
    def get_taxid(self):
        """Return taxid, if one was provided, otherwise return -1"""
        return -1

    # Arg, taxid, is used by NCBI's annotations, but not by gpad, gaf, etc.
    def get_ns2assc(self, taxid=None, **kws):
        """Return given associations into 3 (BP, MF, CC) dicts, id2gos"""
        return {
            ns: self._get_id2gos(nts, **kws)
            for ns, nts in self.get_ns2ntsanno().items()
        }

    # pylint: disable=unused-argument
    # Arg, taxid, is used by NCBI's annotations, but not by gpad, gaf, etc.
    def get_ns2ntsanno(self, taxid=None):
        """Split list of annotations into 3 lists: BP, MF, CC"""
        return self._get_ns2ntsanno(self.associations)

    # Used by gpad, gaf, etc., but not used by NCBI's annotation reader
    def _get_ns2ntsanno(self, annotations):
        """Split list of annotations into 3 lists: BP, MF, CC"""
        if self.name in {'gpad', 'id2gos'}:
            assert self.godag is not None, "{T}: LOAD godag TO USE {C}::ns2ntsanno".format(
                C=self.__class__.__name__, T=self.name)
        ns2nts = cx.defaultdict(list)
        for nta in annotations:
            ns2nts[nta.NS].append(nta)
        return {ns: ns2nts[ns] for ns in self.exp_nss.intersection(ns2nts)}

    def get_id2gos_nss(self, **kws):
        """Return all associations in a dict, id2gos, regardless of namespace"""
        return self._get_id2gos(self.associations, **kws)

    def get_id2gos(self, namespace=None, prt=sys.stdout, **kws):
        """Return associations from specified namespace in a dict, id2gos"""
        # pylint: disable=superfluous-parens
        if self.has_ns():  # Anno namedtuple has NS field
            nspc, assoc = self._get_1ns_assn(namespace)
            id2gos = self._get_id2gos(assoc, **kws)
            if prt:
                prt.write(
                    '{N} IDs in loaded association branch, {NS}\n'.format(
                        N=len(id2gos), NS=nspc))
            return id2gos
        if prt and namespace is not None:
            print(
                '**ERROR {CLS}(..., godag=None).get_id2gos: GODAG is None. IGNORING namespace({NS})\n'
                .format(NS=namespace, CLS=type(self).__name__))
        id2gos = self._get_id2gos(self.associations, **kws)
        if prt:
            prt.write('{N} IDs in all associations\n'.format(N=len(id2gos)))
        return id2gos

    def _get_1ns_assn(self, namespace_usr):
        """Get one namespace, given a user-provided namespace or a default"""
        # If all namespaces were loaded
        if self.namespaces is None:
            # Return user-specified namespace, if provided. Otherwise BP
            nspc = 'BP' if namespace_usr is None else namespace_usr
            # Return one namespace
            if nspc in set(NAMESPACE2NS.values()):
                return nspc, [nt for nt in self.associations if nt.NS == nspc]
            # Return all namespaces
            return nspc, self.associations
        # If one namespace was loaded, use that regardless of what user specfies
        if len(self.namespaces) == 1:
            nspc = next(iter(self.namespaces))
            if namespace_usr is not None and nspc != namespace_usr:
                print('**WARNING: IGNORING {ns}; ONLY {NS} WAS LOADED'.format(
                    ns=namespace_usr, NS=nspc))
            return nspc, self.associations
        if namespace_usr is None:
            print('**ERROR get_id2gos: GODAG NOT LOADED. USING: {NSs}'.format(
                NSs=' '.join(sorted(self.namespaces))))
        return namespace_usr, self.associations

    def has_ns(self):
        """Return True if namespace field, NS exists on annotation namedtuples"""
        assert self.associations, 'NO ASSOCIATIONS IN file({}): {}'.format(
            self.filename, self.associations)
        return hasattr(next(iter(self.associations)), 'NS')

    def _get_id2gos(self,
                    ntannos_usr,
                    propagate_counts=False,
                    relationships=None,
                    prt=sys.stdout,
                    **kws):
        """Return given ntannos_usr in a dict, id2gos"""
        options = AnnoOptions(self.evobj, **kws)
        # Default reduction is to remove. For all options, see goatools/anno/opts.py:
        #   * Evidence_Code == ND -> No biological data No biological Data available
        #   * Qualifiers contain NOT
        ntannos_m = self.reduce_annotations(ntannos_usr, options)
        dbid2goids = self.get_dbid2goids(ntannos_m, propagate_counts,
                                         relationships, prt)
        if options.b_geneid2gos:
            return dbid2goids
        # if not a2bs:
        #     raise RuntimeError('**ERROR: NO ASSOCATIONS FOUND: {FILE}'.format(FILE=self.filename))
        return self._get_goid2dbids(dbid2goids)

    @staticmethod
    def _get_goid2dbids(dbid2goids):
        """Return dict of GO ID keys and a set of gene products as values"""
        goid2dbids = cx.defaultdict(set)
        for dbid, goids in dbid2goids.items():
            for goid in goids:
                goid2dbids[goid].add(dbid)
        return dict(goid2dbids)

    def _get_namespaces(self, nts):
        """Get the set of namespaces seen in the namedtuples."""
        return set(nt.NS for nt in nts) if self.has_ns() else set()

    # Qualifier (column 4)
    # Flags that modify the interpretation of an annotation one (or more) of NOT, contributes_to, colocalizes_with
    # This field is not mandatory;
    #     * cardinality 0, 1, >1;
    #     * for cardinality >1 use a pipe to separate entries (e.g. NOT|contributes_to)
    def prt_qualifiers(self, prt=sys.stdout):
        """Print Qualifiers: 1,462 colocalizes_with; 1,454 contributes_to; 1,157 not"""
        # 13 not colocalizes_with   (TBD: CHK - Seen in gene2go, but not gafs)
        #  4 not contributes_to     (TBD: CHK - Seen in gene2go, but not gafs)
        self._prt_qualifiers(self.associations, prt)

    @staticmethod
    def _prt_qualifiers(associations, prt=sys.stdout):
        """Print Qualifiers found in the annotations.
           QUALIFIERS:
                1,462 colocalizes_with
                1,454 contributes_to
                1,157 not
                   13 not colocalizes_with   (TBD: CHK - Seen in gene2go, but not gafs)
                    4 not contributes_to     (TBD: CHK - Seen in gene2go, but not gafs)
        """
        prt.write('QUALIFIERS:\n')
        for fld, cnt in cx.Counter(q for nt in associations
                                   for q in nt.Qualifier).most_common():
            prt.write('    {N:6,} {FLD}\n'.format(N=cnt, FLD=fld))

    def reduce_annotations(self, annotations, options):
        """Reduce annotations to ones used to identify enrichment (normally exclude ND and NOT)."""
        getfnc_qual_ev = options.getfnc_qual_ev()
        return [
            nt for nt in annotations
            if getfnc_qual_ev(nt.Qualifier, nt.Evidence_Code)
        ]

    @staticmethod
    def update_association(assc_goidsets, go2ancestors, prt=sys.stdout):
        """Update the GO sets in assc_gene2gos to include all GO ancestors"""
        goids_avail = set(go2ancestors)
        # assc_gos is assc_gene2gos.values()
        for assc_goids_cur in assc_goidsets:
            parents = set()
            for goid in assc_goids_cur.intersection(goids_avail):
                parents.update(go2ancestors[goid])
            assc_goids_cur.update(parents)

    def _get_go2ancestors(self,
                          goids_assoc_usr,
                          relationships,
                          prt=sys.stdout):
        """Return go2ancestors (set of parent GO IDs) for all GO ID keys in go2obj."""
        assert self.godag is not None
        _godag = self.godag
        # Get GO IDs in annotations that are in GO DAG
        goids_avail = set(_godag)
        self._rpt_goids_notfound(goids_assoc_usr, goids_avail)
        goids_assoc_cur = goids_assoc_usr.intersection(goids_avail)
        # Get GO Term for each current GO ID in the annotations
        _go2obj_assc = {go: _godag[go] for go in goids_assoc_cur}
        go2ancestors = get_go2parents_go2obj(_go2obj_assc, relationships, prt)
        if prt:
            prt.write('{N} GO IDs -> {M} go2ancestors\n'.format(
                N=len(goids_avail), M=len(go2ancestors)))
        return go2ancestors

    @staticmethod
    def _rpt_goids_notfound(goids_assoc_all, goids_avail):
        """Report the number of GO IDs in the association, but not in the GODAG"""
        goids_missing = goids_assoc_all.difference(goids_avail)
        if goids_missing:
            print("{N} GO IDs NOT FOUND IN ASSOCIATION: {GOs}".format(
                N=len(goids_missing), GOs=" ".join(goids_missing)))

    def get_dbid2goids(self,
                       ntannos,
                       propagate_counts=False,
                       relationships=None,
                       prt=sys.stdout):
        """Return gene2go data for user-specified taxids."""
        if propagate_counts:
            return self._get_dbid2goids_p1(ntannos, relationships, prt)
        return self._get_dbid2goids_p0(ntannos)

    @staticmethod
    def _get_dbid2goids_p0(associations):
        """Return gene2goids with annotations as-is (propagate_counts == False)"""
        id2gos = cx.defaultdict(set)
        for ntd in associations:
            id2gos[ntd.DB_ID].add(ntd.GO_ID)
        return dict(id2gos)

    def _get_dbid2goids_p1(self, ntannos, relationships=None, prt=sys.stdout):
        """Return gene2goids with propagate_counts == True"""
        id2gos = cx.defaultdict(set)
        goids_annos = set(nt.GO_ID for nt in ntannos)
        go2ancestors = self._get_go2ancestors(goids_annos, relationships, prt)
        # https://github.com/geneontology/go-annotation/issues/3523
        exclude = {'GO:2000325', 'GO:2000327'}
        for ntd in ntannos:
            goid = ntd.GO_ID
            # https://github.com/geneontology/go-annotation/issues/3523
            if goid not in exclude:
                goids = id2gos[ntd.DB_ID]
                goids.add(goid)
                goids.update(go2ancestors[goid])
            else:
                print('**WARNING: OBSOLETE GO ID({GO})'.format(GO=goid))
        return dict(id2gos)

    @staticmethod
    def get_goid2dbids(associations):
        """Return gene2go data for user-specified taxids."""
        go2ids = cx.defaultdict(set)
        for ntd in associations:
            go2ids[ntd.GO_ID].add(ntd.DB_ID)
        return dict(go2ids)

    def hms(self, msg, tic=None, prt=sys.stdout):
        """Print elapsed time and message."""
        if tic is None:
            tic = self.tic
        now = timeit.default_timer()
        hms = str(datetime.timedelta(seconds=(now - tic)))
        prt.write('{HMS}: {MSG}\n'.format(HMS=hms, MSG=msg))
        return now

    def chk_associations(self, fout_err=None):
        """Check that associations are in expected format."""
        # pylint: disable=unnecessary-pass
        pass

    def nts_ev_nd(self):
        """Get annotations where Evidence_code == 'ND' (No biological data)"""
        return [nt for nt in self.associations if nt.Evidence_Code == 'ND']

    def nts_qual_not(self):
        """Get annotations having Qualifiers containing NOT"""
        return [nt for nt in self.associations if self._has_not_qual(nt)]

    def chk_qualifiers(self):
        """Check format of qualifier"""
        if self.name == 'id2gos':
            return
        for ntd in self.associations:
            # print(ntd)
            qual = ntd.Qualifier
            assert isinstance(
                qual, set), '{NAME}: QUALIFIER MUST BE A LIST: {NT}'.format(
                    NAME=self.name, NT=ntd)
            assert qual != set(['']), ntd
            assert qual != set(['-']), ntd
            assert 'always' not in qual, 'SPEC SAID IT WOULD BE THERE'

    def chk_godag(self):
        """Check that a GODag was loaded"""
        if not self.godag:
            raise RuntimeError(
                '{CLS} MUST INCLUDE GODag: {CLS}(file.anno, godag=godag)'.
                format(CLS=self.__class__.__name__))

    @staticmethod
    def _has_not_qual(ntd):
        """Return True if the qualifiers contain a 'NOT'"""
        for qual in ntd.Qualifier:
            if 'not' in qual:
                return True
            if 'NOT' in qual:
                return True
        return False

    def prt_counts(self, prt=sys.stdout):
        """Print the number of taxids stored."""
        num_annos = len(self.associations)
        # 792,891 annotations for 3 taxids stored: 10090 7227 9606
        prt.write('{A:8,} annotations\n'.format(A=num_annos))
Esempio n. 3
0
class AnnoReaderBase(object):
    """Reads a Gene Association File. Returns a Python object."""
    # pylint: disable=broad-except,line-too-long,too-many-instance-attributes

    tic = timeit.default_timer()

    # Expected values for a Qualifier
    exp_qualifiers = set([
        # Seen in both GAF and gene2go
        'not', 'contributes_to', 'colocalizes_with',
    ])

    # pylint: disable=too-many-instance-attributes
    def __init__(self, name, filename=None, **kws):
        # kws: allow_missing_symbol
        self.name = name
        self.filename = filename
        self.godag = kws.get('godag')
        self.namespaces = kws.get('namespaces')
        self.evobj = EvidenceCodes()
        # Read anotation file, store namedtuples:
        #     Gene2GoReader(filename=None, taxids=None):
        #     GafReader(filename=None, hdr_only=False, prt=sys.stdout, allow_missing_symbol=False):
        #     GpadReader(filename=None, hdr_only=False):
        self.hdr = None
        self.datobj = None
        # pylint: disable=no-member
        self.associations = self._init_associations(filename, **kws)
        # assert self.associations, 'NO ANNOTATIONS FOUND: {ANNO}'.format(ANNO=filename)
        assert self.namespaces is None or isinstance(self.namespaces, set)

    def get_desc(self):
        """Get description"""
        return '{NAME} {NSs} {GODAG}'.format(
            NAME=self.name,
            NSs='' if self.namespaces is None else ','.join(self.namespaces),
            GODAG='' if self.godag is None else 'godag')

    # pylint: disable=unused-argument
    def get_associations(self, taxid=None):
        """Get associations"""
        # taxid is for NCBI's gene2gos
        return self.associations

    def prt_summary_anno2ev(self, prt=sys.stdout):
        """Print annotation/evidence code summary."""
        self.evobj.prt_summary_anno2ev(self.associations, prt)

    def get_name(self):
        """Return type of annotation"""
        return self.name

    # pylint: disable=no-self-use
    def get_taxid(self):
        """Return taxid, if one was provided, otherwise return -1"""
        return -1

    def get_ns2assc(self, **kws):
        """Return given associations into 3 (BP, MF, CC) dicts, id2gos"""
        return {ns:self._get_id2gos(nts, **kws) for ns, nts in self.get_ns2ntsanno(kws.get('taxid')).items()}

    # pylint: disable=unused-argument
    def get_ns2ntsanno(self, taxid=None):
        """Split list of annotations into 3 lists: BP, MF, CC"""
        return self._get_ns2ntsanno(self.associations)

    def _get_ns2ntsanno(self, annotations):
        """Split list of annotations into 3 lists: BP, MF, CC"""
        if self.name in {'gpad', 'id2gos'}:
            assert self.godag is not None, "{T}: LOAD godag TO USE {C}::ns2ntsanno".format(
                C=self.__class__.__name__, T=self.name)
        ns2nts = cx.defaultdict(list)
        for nta in annotations:
            ns2nts[nta.NS].append(nta)
        return {ns:ns2nts[ns] for ns in set(['BP', 'MF', 'CC']).intersection(ns2nts)}

    def get_id2gos_nss(self, **kws):
        """Return all associations in a dict, id2gos, regardless of namespace"""
        return self._get_id2gos(self.associations, **kws)

    def get_id2gos(self, namespace='BP', **kws):
        """Return associations from specified namespace in a dict, id2gos"""
        # pylint: disable=superfluous-parens
        if self.has_ns():
            assoc = [nt for nt in self.associations if nt.NS == namespace]
            id2gos = self._get_id2gos(assoc, **kws)
            print('{N} IDs in association branch, {NS}'.format(N=len(id2gos), NS=namespace))
            return id2gos
        print('**ERROR: GODAG NOT LOADED. IGNORING namespace({NS})'.format(NS=namespace))
        id2gos = self._get_id2gos(self.associations, **kws)
        print('{N} IDs in association branch, {NS}'.format(N=len(id2gos), NS=namespace))
        return id2gos

    def has_ns(self):
        """Return True if namespace field, NS exists on annotation namedtuples"""
        return hasattr(next(iter(self.associations)), 'NS')

    def _get_id2gos(self, associations, **kws):
        """Return given associations in a dict, id2gos"""
        options = AnnoOptions(self.evobj, **kws)
        # Default reduction is to remove. For all options, see goatools/anno/opts.py:
        #   * Evidence_Code == ND -> No biological data No biological Data available
        #   * Qualifiers contain NOT
        assc = self.reduce_annotations(associations, options)
        return self.get_dbid2goids(assc) if options.b_geneid2gos else self.get_goid2dbids(assc)

    def _get_namespaces(self, nts):
        """Get the set of namespaces seen in the namedtuples."""
        return set(nt.NS for nt in nts) if self.has_ns() else set()

    # Qualifier (column 4)
    # Flags that modify the interpretation of an annotation one (or more) of NOT, contributes_to, colocalizes_with
    # This field is not mandatory;
    #     * cardinality 0, 1, >1;
    #     * for cardinality >1 use a pipe to separate entries (e.g. NOT|contributes_to)
    def prt_qualifiers(self, prt=sys.stdout):
        """Print Qualifiers: 1,462 colocalizes_with; 1,454 contributes_to; 1,157 not"""
        # 13 not colocalizes_with   (TBD: CHK - Seen in gene2go, but not gafs)
        #  4 not contributes_to     (TBD: CHK - Seen in gene2go, but not gafs)
        self._prt_qualifiers(self.associations, prt)

    @staticmethod
    def _prt_qualifiers(associations, prt=sys.stdout):
        """Print Qualifiers found in the annotations.
           QUALIFIERS:
                1,462 colocalizes_with
                1,454 contributes_to
                1,157 not
                   13 not colocalizes_with   (TBD: CHK - Seen in gene2go, but not gafs)
                    4 not contributes_to     (TBD: CHK - Seen in gene2go, but not gafs)
        """
        prt.write('QUALIFIERS:\n')
        for fld, cnt in cx.Counter(q for nt in associations for q in nt.Qualifier).most_common():
            prt.write('    {N:6,} {FLD}\n'.format(N=cnt, FLD=fld))

    def reduce_annotations(self, annotations, options):
        """Reduce annotations to ones used to identify enrichment (normally exclude ND and NOT)."""
        getfnc_qual_ev = options.getfnc_qual_ev()
        return [nt for nt in annotations if getfnc_qual_ev(nt.Qualifier, nt.Evidence_Code)]

    @staticmethod
    def get_dbid2goids(associations):
        """Return gene2go data for user-specified taxids."""
        id2gos = cx.defaultdict(set)
        for ntd in associations:
            id2gos[ntd.DB_ID].add(ntd.GO_ID)
        return dict(id2gos)

    @staticmethod
    def get_goid2dbids(associations):
        """Return gene2go data for user-specified taxids."""
        go2ids = cx.defaultdict(set)
        for ntd in associations:
            go2ids[ntd.GO_ID].add(ntd.DB_ID)
        return dict(go2ids)

    def hms(self, msg, tic=None, prt=sys.stdout):
        """Print elapsed time and message."""
        if tic is None:
            tic = self.tic
        now = timeit.default_timer()
        hms = str(datetime.timedelta(seconds=(now-tic)))
        prt.write('{HMS}: {MSG}\n'.format(HMS=hms, MSG=msg))
        return now

    def chk_associations(self, fout_err=None):
        """Check that associations are in expected format."""
        pass

    def nts_ev_nd(self):
        """Get annotations where Evidence_code == 'ND' (No biological data)"""
        return [nt for nt in self.associations if nt.Evidence_Code == 'ND']

    def nts_qual_not(self):
        """Get annotations having Qualifiers containing NOT"""
        return [nt for nt in self.associations if self._has_not_qual(nt)]

    def chk_qualifiers(self):
        """Check format of qualifier"""
        if self.name == 'id2gos':
            return
        for ntd in self.associations:
            # print(ntd)
            qual = ntd.Qualifier
            assert isinstance(qual, set), '{NAME}: QUALIFIER MUST BE A LIST: {NT}'.format(
                NAME=self.name, NT=ntd)
            assert qual != set(['']), ntd
            assert qual != set(['-']), ntd
            assert 'always' not in qual, 'SPEC SAID IT WOULD BE THERE'

    @staticmethod
    def _has_not_qual(ntd):
        """Return True if the qualifiers contain a 'NOT'"""
        for qual in ntd.Qualifier:
            if 'not' in qual:
                return True
            if 'NOT' in qual:
                return True
        return False