Exemple #1
0
def mrnaExt(ID):
    """Extract sequence record for the given sequence ID"""
    recData = Entrez.efetch(db="nucleotide",
                            id=ID,
                            rettype="gb",
                            warning=False)
    record = SeqIO.read(recData, 'genbank')
    return record
Exemple #2
0
def cdsExt(ID, geneName):
    """
        returns sequence record object for the input gene refseq ID
        """

    retdata = Entrez.efetch(db="nucleotide",
                            id=ID,
                            rettype='gb',
                            retmode='text').read()
    with open("Align/" + geneName.split('.')[0] + ".log", "a") as fp:
        if 'LOW QUALITY PROTEIN' in retdata:
            fp.write('%s CDS is of low quality\n' % ID)

    data = retdata.split('\n')
    for obj in data:
        if '     CDS             ' in obj:
            try:
                cdsRange = [
                    int(obj.lstrip('     CDS             ').split('..')[0]),
                    int(obj.lstrip('     CDS             ').split('..')[1])
                ]
            except ValueError:
                try:
                    cdsRange = [
                        int(
                            obj.lstrip('     CDS             ').split('..')[0].
                            lstrip('<').lstrip('>').rstrip('<').rstrip('>')),
                        int(
                            obj.lstrip('     CDS             ').split('..')[1].
                            lstrip('<').lstrip('>').rstrip('<').rstrip('>'))
                    ]
                except ValueError:
                    try:
                        cdsRange = [
                            int(
                                obj.lstrip('     CDS             ').split('..')
                                [0]),
                            int(
                                obj.lstrip('     CDS             ').split('..')
                                [1].lstrip('>'))
                        ]
                    except ValueError:
                        print(
                            "Problem found while extracting cds from %s. Please report this issue to ambuj (at) ufl (dot) edu"
                            % obj)
                        continue

    recData = Entrez.efetch(db="nucleotide",
                            id=ID,
                            rettype="gb",
                            warning=False)
    record = SeqIO.read(recData, 'genbank')
    record.seq = record.seq[cdsRange[0] - 1:cdsRange[1]]

    return record