コード例 #1
0
def get_hmmdb_ec_nums(acc, c):
    """
    This returns a list of bioannotation:ECAnnotation objects
    """
    qry = "SELECT he.ec_id FROM hmm_ec he JOIN hmm ON he.hmm_id=hmm.id WHERE hmm.accession = ?"
    c.execute(qry, (acc, ))

    ec_annots = list()

    for row in c:
        ec = annotation.ECAnnotation(number=row[0])
        ec_annots.append(ec)

    return ec_annots
コード例 #2
0
def get_uniref_ec_nums(acc, c):
    """
    This returns a list of bioannotation:ECAnnotation objects
    """
    qry = """
          SELECT us_ec.ec_num
            FROM uniref_ec us_ec
           WHERE us_ec.id = ?
          """
    c.execute(qry, (acc, ))

    ec_annots = list()

    for row in c:
        ec = annotation.ECAnnotation(number=row[0])
        ec_annots.append(ec)

    return ec_annots
コード例 #3
0
def get_uspdb_ec_nums(acc, c):
    """
    This returns a list of bioannotation:ECAnnotation objects
    """
    qry = """
          SELECT us_ec.ec_num
            FROM uniprot_sprot_ec us_ec
                 JOIN uniprot_sprot_acc us_acc ON us_ec.id=us_acc.id
           WHERE us_acc.accession = ?
          """
    c.execute(qry, (acc, ))

    ec_annots = list()

    for row in c:
        ec = annotation.ECAnnotation(number=row[0])
        ec_annots.append(ec)

    return ec_annots
コード例 #4
0
def parse_kegg_blast_evidence(log_fh, polypeptides, blast_list, eval_cutoff):
    '''
    Reads a list file of NCBI BLAST evidence against KEGG and a dict of polypeptides,
    populating each with Annotation evidence where appropriate.  Only attaches evidence if
    the product name is the default.

    Currently only considers the top BLAST hit for each query which doesn't have
    'uncharacterized' or hypothetical in the product name.
    '''
    for file in utils.read_list_file(blast_list):
        last_qry_id = None

        for line in open(file):
            line = line.rstrip()
            cols = line.split("\t")

            # We're going to ignore any lines which have a few keywords in the name
            # First character left off for initcap reasons
            if 'ncharacterized' in cols[15] or 'ypothetical' in cols[15]:
                continue

            this_qry_id = cols[0]

            # skip this line if it doesn't meet the cutoff
            if float(cols[19]) > eval_cutoff:
                continue

            # the BLAST hits are sorted already with the top hit for each query first
            if last_qry_id != this_qry_id:
                annot = polypeptides[this_qry_id].annotation

                # get the accession from the cols[5]
                accession = cols[5]

                # save it, unless the gene product name has already changed from the default
                if annot.product_name == DEFAULT_PRODUCT_NAME:
                    accession = cols[5]

                    # the product field looks like this:
                    # dam; adenine-specific DNA methyltransferase; K06223 DNA adenine methylase [EC:2.1.1.72]
                    # troponin I type 1 (skeletal, slow); K10371 troponin I, slow skeletal muscle
                    if ' [EC' in cols[15] and cols[15].endswith(']'):
                        m = re.search("\; (K\d+)\s+(.+) \[EC\:(.+)\]",
                                      cols[15])
                    else:
                        m = re.search("\; (K\d+)\s+(.+)", cols[15])

                    if m:
                        kegg_id = m.group(1)
                        product = m.group(2)

                        if len(m.groups()) == 3:
                            ec_num = m.group(3)
                        else:
                            ec_num = None

                        annot.product_name = product
                        log_fh.write(
                            "INFO: {0}: Updated product name to '{1}' based on BLAST hit to KEGG accession '{2}'\n"
                            .format(this_qry_id, annot.product_name,
                                    accession))

                        if ec_num is not None and ec_num is not '':
                            ec = annotation.ECAnnotation(number=ec_num)
                            annot.add_ec_number(ec)

                        kegg_dbxref = annotation.Dbxref(db='KEGG',
                                                        identifier=kegg_id)
                        annot.add_dbxref(kegg_dbxref)

                # remember the ID we just saw
                last_qry_id = this_qry_id