Python SeqIO Examples, Utils.Bio.SeqIO Python Examples

Example #1

0

Show file

File: functions.py Project: Ambuj-UF/ConCat-1.0

def Convert(input, output, filename):
    """
        File format conversion program (fasta, strict-phylip, sequential-phylip, relaxed-phylip and nexus).
        @parameter input - Input file format.
        @parameter output - Output file format.
        @parameter filename - Input filename.
        """
    formDict = {
        'fasta': '*.fas',
        'nexus': '*.nex',
        'phylip': '*.phy',
        'phylip-sequential': '*.phy',
        'phylip-relaxed': '*.phy'
    }

    os.chdir('..')
    
    if input == 'fasta' and output == 'nexus':
        alignment = AlignIO.read(open(filename), "fasta", alphabet=Gapped(IUPAC.protein))
        g = open(filename.split(".")[0] + '.nex', 'w')
        g.write(alignment.format("nexus")); g.close()

    else:
        try:
            handle = open(filename, 'rU'); record = list(SeqIO.parse(handle, input))
            fp = open(filename.split('.')[0] + '.' + formDict[output].split('.')[1], 'w')
            SeqIO.write(record, fp, output); fp.close(); handle.close()
        
        except:
            print("Bad Alignment\n")

    print("Final output saved in %s" %filename.split('.')[0] + '.' + formDict[output].split('.')[1])

Example #2

0

Show file

    def nexML(self, filename):
        """
            Produces concatenated alignment file in NexML format.
        """

        fp = open('Results.xml', 'w')
        handleXML = open(filename, 'rU')
        recordsXML = list(SeqIO.parse(handleXML, "nexus"))
        SeqIO.write(recordsXML, fp, "seqxml")
        fp.close()
        handleXML.close()

Example #3

0

Show file

File: handler.py Project: Ambuj-UF/ConCat-1.0

 def nexML(self, filename):
     
     """
         Produces concatenated alignment file in NexML format.
     """
     
     fp = open('Results.xml', 'w')
     handleXML = open(filename, 'rU')
     recordsXML = list(SeqIO.parse(handleXML, "nexus"))
     SeqIO.write(recordsXML, fp, "seqxml")
     fp.close()
     handleXML.close()

Example #4

0

Show file

    def fileOpenID(self):
        """
           Quick Record Import. This program creates a list of records for all files.
           It can handle Nexus record objects.
           
        """

        file_format = self.file_format

        extList = ["*.fas", "*.nex", "*.phy", "*.phy", "*.phy"]
        typeList = [
            "fasta", "nexus", "phylip", "phylip-sequential", "phylip-relaxed"
        ]

        fileList = glob.glob(extList[file_format - 1])

        dict = {}

        for filename in fileList:
            handle = open(filename, "rU")
            idList = []
            for record in SeqIO.parse(handle, typeList[file_format - 1]):
                idList.append(record.id)
            gene = filename.split(".")[0]
            dict[gene] = idList
            handle.close()

        return dict

Example #5

0

Show file

File: handler.py Project: Ambuj-UF/ConCat-1.0

 def fileOpenID(self):
     
     """
        Quick Record Import. This program creates a list of records for all files.
        It can handle Nexus record objects.
        
     """
     
     file_format = self.file_format
     
     extList = ["*.fas", "*.nex", "*.phy", "*.phy", "*.phy"]
     typeList = ["fasta", "nexus", "phylip", "phylip-sequential", "phylip-relaxed"]
     
     fileList = glob.glob(extList[file_format - 1])
 
     dict = {}
 
     for filename in fileList:
         handle = open(filename, "rU")
         idList = []
         for record in SeqIO.parse(handle, typeList[file_format - 1]):
             idList.append(record.id)
         gene = filename.split(".")[0]
         dict[gene] = idList
         handle.close()
     
     return dict

Example #6

0

Show file

def mrnaExt(ID):
    """Extract sequence record for the given sequence ID"""
    recData = Entrez.efetch(db="nucleotide",
                            id=ID,
                            rettype="gb",
                            warning=False)
    record = SeqIO.read(recData, 'genbank')
    return record

Example #7

0

Show file

def cdsExt(ID, geneName):
    """
        returns sequence record object for the input gene refseq ID
        """

    retdata = Entrez.efetch(db="nucleotide",
                            id=ID,
                            rettype='gb',
                            retmode='text').read()
    with open("Align/" + geneName.split('.')[0] + ".log", "a") as fp:
        if 'LOW QUALITY PROTEIN' in retdata:
            fp.write('%s CDS is of low quality\n' % ID)

    data = retdata.split('\n')
    for obj in data:
        if '     CDS             ' in obj:
            try:
                cdsRange = [
                    int(obj.lstrip('     CDS             ').split('..')[0]),
                    int(obj.lstrip('     CDS             ').split('..')[1])
                ]
            except ValueError:
                try:
                    cdsRange = [
                        int(
                            obj.lstrip('     CDS             ').split('..')[0].
                            lstrip('<').lstrip('>').rstrip('<').rstrip('>')),
                        int(
                            obj.lstrip('     CDS             ').split('..')[1].
                            lstrip('<').lstrip('>').rstrip('<').rstrip('>'))
                    ]
                except ValueError:
                    try:
                        cdsRange = [
                            int(
                                obj.lstrip('     CDS             ').split('..')
                                [0]),
                            int(
                                obj.lstrip('     CDS             ').split('..')
                                [1].lstrip('>'))
                        ]
                    except ValueError:
                        print(
                            "Problem found while extracting cds from %s. Please report this issue to ambuj (at) ufl (dot) edu"
                            % obj)
                        continue

    recData = Entrez.efetch(db="nucleotide",
                            id=ID,
                            rettype="gb",
                            warning=False)
    record = SeqIO.read(recData, 'genbank')
    record.seq = record.seq[cdsRange[0] - 1:cdsRange[1]]

    return record

Example #8

0

Show file

def Convert(input, output, filename):
    """
        File format conversion program (fasta, strict-phylip, sequential-phylip, relaxed-phylip and nexus).
        @parameter input - Input file format.
        @parameter output - Output file format.
        @parameter filename - Input filename.
        """
    formDict = {
        'fasta': '*.fas',
        'nexus': '*.nex',
        'phylip': '*.phy',
        'phylip-sequential': '*.phy',
        'phylip-relaxed': '*.phy'
    }

    os.chdir('..')

    if input == 'fasta' and output == 'nexus':
        alignment = AlignIO.read(open(filename),
                                 "fasta",
                                 alphabet=Gapped(IUPAC.protein))
        g = open(filename.split(".")[0] + '.nex', 'w')
        g.write(alignment.format("nexus"))
        g.close()

    else:
        try:
            handle = open(filename, 'rU')
            record = list(SeqIO.parse(handle, input))
            fp = open(
                filename.split('.')[0] + '.' + formDict[output].split('.')[1],
                'w')
            SeqIO.write(record, fp, output)
            fp.close()
            handle.close()

        except:
            print("Bad Alignment\n")

    print("Final output saved in %s" % filename.split('.')[0] + '.' +
          formDict[output].split('.')[1])

Example #9

0

Show file

def _cleanAli(recordNuc, omit, fileName):
    handleP = open('tAligned.fas', 'rU')
    records = list(SeqIO.parse(handleP, 'fasta'))

    store = list()
    for i, rec in enumerate(records):
        nucData = [x.seq for x in recordNuc if x.id in rec.id]
        nucSeqData = _spliter(nucData[0], 3)
        sequence = Seq("", generic_dna)
        pos = 0
        for j, amino in enumerate(rec.seq):
            if amino == '-':
                sequence = sequence + Seq("---", generic_dna)
            elif amino == 'Z':
                sequence = sequence + Seq("NNN", generic_dna)
                pos = pos + 1
            else:
                try:
                    sequence = sequence + nucSeqData[pos]
                    pos = pos + 1
                except:
                    if rec.id not in store:
                        store.append(rec.id)

        records[i].seq = Seq(str(sequence), generic_dna)

    records = [x for x in records if x.id not in store]
    if store != []:
        print("Failed to align following sequences: %s" % store)

    if omit == False:
        with open("Input/" + fileName.split('.')[0] + ".nex", 'w') as fp:
            SeqIO.write(records, fp, "nexus")
    else:
        with open("Input/" + fileName.split('.')[0] + "_omited.nex",
                  'w') as fp:
            SeqIO.write(records, fp, "nexus")

    os.remove('translated.fas')
    os.remove('tAligned.fas')

Example #10

0

Show file

    def RNAfoldConsensus(self):
        """
           Creates RNA structure data from consensus alignment using RNAfold program.
           Output is stored in RNAConsensus.txt file
           
        """

        os.chdir("RNAdata")
        fileList = glob.glob('*.nex')

        newFileList = []

        for name in fileList:
            file_name = name.split('.')[0]
            newName = file_name + '.aln'
            newFileList.append(newName)

        recordList = self.fileOpenConcNex()
        os.chdir("..")
        n = 0
        while n < len(fileList):
            record = recordList[n]
            file_Write = open(newFileList[n], 'w')
            SeqIO.write(record, file_Write, "clustal")
            file_Write.close()
            n = n + 1

        fp = open("RNAConsensus.txt", 'w')

        for name in newFileList:
            print("RNA structure     |    %s|    %s" %
                  (time.strftime("%c"), name))
            fp.write("[ %s ]\n" % name.split('.')[0])
            fp.write(
                subprocess.check_output("RNAalifold < %s" % name, shell=True))
            fp.write('\n\n')

        fp.close()

Example #11

0

Show file

File: handler.py Project: Ambuj-UF/ConCat-1.0

    def RNAfoldConsensus(self):
        
        """
           Creates RNA structure data from consensus alignment using RNAfold program.
           Output is stored in RNAConsensus.txt file
           
        """
        
        os.chdir("RNAdata")
        fileList = glob.glob('*.nex')

        newFileList = []
    
        for name in fileList:
            file_name = name.split('.')[0]
            newName = file_name + '.aln'
            newFileList.append(newName)
        
        recordList = self.fileOpenConcNex()
        os.chdir("..")
        n = 0
        while n < len(fileList):
            record = recordList[n]
            file_Write = open(newFileList[n], 'w')
            SeqIO.write(record, file_Write, "clustal")
            file_Write.close()
            n = n + 1
                              
        fp = open("RNAConsensus.txt", 'w')

        for name in newFileList:
            print("RNA structure     |    %s|    %s" %(time.strftime("%c"), name))
            fp.write("[ %s ]\n" % name.split('.')[0])
            fp.write(subprocess.check_output("RNAalifold < %s" % name, shell = True))
            fp.write('\n\n')
            
        fp.close()

Example #12

0

Show file

File: handler.py Project: Ambuj-UF/ConCat-1.0

 def alignOutput(self, combine):
     
     """
        alignOutput creates an output file in user defined file format
        
        @parameter combine - concatenated alignment matrix
        
     """
     
     output_format = self.file_format
     if output_format == 1:
         filecompname = "Result1.fasta"
         file_Write = open(filecompname, 'w')
         SeqIO.write(combine, file_Write, "fasta")
         file_Write.close()
         #This section is for cleaning any unknown description tag from the final fasta file
     
         fin = open("Result1.fasta", "r")
         fout = open("Result.fasta", "w+")
     
         input_data = fin.readlines()
     
         for line in input_data:
             if "<unknown description>" in line:
                 line = line.replace("<unknown description>", "")
             fout.write(line)
         fin.close()
         fout.close()
     
     elif output_format == 2:
         file_Write = open("Result.phy", 'w')
         SeqIO.write(combine, file_Write, "phylip")
         file_Write.close()
 
     elif output_format == 3:
         file_Write = open("Result.phy", 'w')
         SeqIO.write(combine, file_Write, "phylip-sequential")
         file_Write.close()
 
     elif output_format == 4:
         file_Write = open("Result.phy", 'w')
         SeqIO.write(combine, file_Write, "phylip-relaxed")
         file_Write.close()
 
     else:
         sys.exit("You have enetered wrong value \n Program Terminated...")

Example #13

0

Show file

    def alignOutput(self, combine):
        """
           alignOutput creates an output file in user defined file format
           
           @parameter combine - concatenated alignment matrix
           
        """

        output_format = self.file_format
        if output_format == 1:
            filecompname = "Result1.fasta"
            file_Write = open(filecompname, 'w')
            SeqIO.write(combine, file_Write, "fasta")
            file_Write.close()
            #This section is for cleaning any unknown description tag from the final fasta file

            fin = open("Result1.fasta", "r")
            fout = open("Result.fasta", "w+")

            input_data = fin.readlines()

            for line in input_data:
                if "<unknown description>" in line:
                    line = line.replace("<unknown description>", "")
                fout.write(line)
            fin.close()
            fout.close()

        elif output_format == 2:
            file_Write = open("Result.phy", 'w')
            SeqIO.write(combine, file_Write, "phylip")
            file_Write.close()

        elif output_format == 3:
            file_Write = open("Result.phy", 'w')
            SeqIO.write(combine, file_Write, "phylip-sequential")
            file_Write.close()

        elif output_format == 4:
            file_Write = open("Result.phy", 'w')
            SeqIO.write(combine, file_Write, "phylip-relaxed")
            file_Write.close()

        else:
            sys.exit("You have enetered wrong value \n Program Terminated...")

Example #14

0

Show file

def mrnaAlign(inputFile, pkg, arguments=None):
    if pkg != 'muscle' and arguments == None:
        pkg = 'muscle'

    if pkg == 'muscle':
        if 'Darwin' in platform.system():
            subprocess.call("./src/muscle/muscle -in %s -out %s" %
                            ("Align/" + inputFile, "Input/" + inputFile),
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
            handle = open("Input/" + inputFile, 'rU')
            record = list(SeqIO.parse(handle, 'fasta'))
            with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
                SeqIO.write(record, fp, 'nexus')
            os.remove("Input/" + inputFile)
        else:
            subprocess.call("./src/muscle/muscleLinux -in %s -out %s" %
                            ("Align/" + inputFile, "Input/" + inputFile),
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
            handle = open("Input/" + inputFile, 'rU')
            record = list(SeqIO.parse(handle, 'fasta'))
            with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
                SeqIO.write(record, fp, 'nexus')
            os.remove("Input/" + inputFile)
    else:
        arguments = arguments.replace('[', '').replace(']', '')
        subprocess.call(
            "./src/mafft/mafft.bat %s %s > %s" %
            (arguments, "Align/" + inputFile, "Input/" + inputFile),
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT)
        handle = open("Input/" + inputFile, 'rU')
        record = list(SeqIO.parse(handle, 'fasta'))
        with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
            SeqIO.write(record, fp, 'nexus')
        os.remove("Input/" + inputFile)

Example #15

0

Show file

    def fileOpenConcNex(self):
        """
           This functions creates a list of alignment records from the files stored in RNAdata
           directory.
           
           Returns - List of alignment records.
        """

        fileList = glob.glob("*.nex")
        recordList = []
        for filename in fileList:
            handle = open(filename, "rU")
            record = list(SeqIO.parse(handle, "nexus"))
            recordList.append(record)

        return recordList

Example #16

0

Show file

def cdsAlign(inputFile, pkg='muscle', omit=False, ign=False, CT=None):

    codonTables = [
        'Ascidian Mitochondrial', 'SGC9', 'Coelenterate Mitochondrial',
        'Protozoan Mitochondrial', 'Vertebrate Mitochondrial', 'Plant Plastid',
        'Thraustochytrium Mitochondrial', 'Blepharisma Macronuclear',
        'Mold Mitochondrial', 'Invertebrate Mitochondrial', 'Standard',
        'Trematode Mitochondrial', 'Scenedesmus obliquus Mitochondrial',
        'Euplotid Nuclear', 'Yeast Mitochondrial', 'Spiroplasma',
        'Alternative Flatworm Mitochondrial', 'Ciliate Nuclear', 'SGC8',
        'Alternative Yeast Nuclear', 'Hexamita Nuclear', 'SGC5', 'SGC4',
        'SGC3', 'SGC2', 'SGC1', 'SGC0', 'Flatworm Mitochondrial',
        'Dasycladacean Nuclear', 'Chlorophycean Mitochondrial', 'Mycoplasma',
        'Bacterial', 'Echinoderm Mitochondrial'
    ]

    if CT == None:
        table = CodonTable.ambiguous_dna_by_id[1]
    elif CT != None and CT in codonTables:
        table = CodonTable.ambiguous_generic_by_name[CT]
    else:
        table = CodonTable.ambiguous_generic_by_name['Standard']

    handle = open("Align/" + inputFile, 'rU')
    records = list(SeqIO.parse(handle, 'fasta'))
    for j, rec in enumerate(records):
        if 'TAA' in rec.seq[-3:] or 'TGA' in rec.seq[-3:] or 'TAG' in rec.seq[
                -3:]:
            records[j].seq = rec.seq[0:-3]

    if omit == True:
        badQuality = list()
        fdata = open("Align/" + inputFile.split('.')[0] + '.log',
                     'r').readlines()
        for lines in fdata:
            badQuality.append(lines.split(' ')[0])

        newRecords = list()
        for rec in records:
            if rec.id.split('|')[1] not in badQuality:
                newRecords.append(rec)

        records = newRecords

    records = _translator(records, ign, omit, table)
    _alignP(pkg)
    _cleanAli(records, omit, inputFile)

Example #17

0

Show file

File: handler.py Project: Ambuj-UF/ConCat-1.0

 def fileOpenConcNex(self):
     
     """
        This functions creates a list of alignment records from the files stored in RNAdata
        directory.
        
        Returns - List of alignment records.
     """
     
     fileList = glob.glob("*.nex")
     recordList = []
     for filename in fileList:
         handle = open(filename, "rU")
         record = list(SeqIO.parse(handle, "nexus"))
         recordList.append(record)
         
     return recordList

Example #18

0

Show file

    def fileOpenConc(self):
        """
            This function is used in opening input alignment files for concatenation.
            It can handle file formats other than Nexus.
            
        """

        file_format = self.file_format

        extList = ["*.fas", "*.phy", "*.phy", "*.phy"]
        typeList = ["fasta", "phylip", "phylip-sequential", "phylip-relaxed"]
        fileList = glob.glob(extList[file_format - 1])

        recordList = []
        for filename in fileList:
            handle = open(filename, "rU")
            record = list(SeqIO.parse(handle, typeList[file_format - 1]))
            recordList.append(record)

        return recordList

Example #19

0

Show file

File: handler.py Project: Ambuj-UF/ConCat-1.0

 def fileOpenConc(self):
     
     """
         This function is used in opening input alignment files for concatenation.
         It can handle file formats other than Nexus.
         
     """
     
     file_format = self.file_format
     
     extList = ["*.fas", "*.phy", "*.phy", "*.phy"]
     typeList = ["fasta", "phylip", "phylip-sequential", "phylip-relaxed"]
     fileList = glob.glob(extList[file_format-1])
 
     recordList = []
     for filename in fileList:
         handle = open(filename, "rU")
         record = list(SeqIO.parse(handle, typeList[file_format - 1]))
         recordList.append(record)
 
     return recordList

Example #20

0

Show file

def mrnaImport(geneName, group, ortho):
    """
        @ geneName - name of the gene
        @ group - organism name
        @ creates a taxon mRNA aligned fasta file as output for the set of genes given as input
        """

    if ortho != None:
        inpTerm = ortho + "[sym] AND " + group + "[orgn]"
    elif group != None:
        inpTerm = geneName + "[sym] AND " + group + "[orgn]"

    Entrez.email = '*****@*****.**'

    try:
        handle = Entrez.esearch(db="gene",
                                term=inpTerm,
                                rettype='xml',
                                RetMax=300)
    except:
        raise RuntimeError(
            "Failed to import sequence from NCBI. Check your internet connection.\nThis might also occur due to NCBI failure"
        )

    records = Entrez.read(handle)
    idList = records["IdList"]

    inpTerm = "ortholog_gene_" + str(idList[0]) + "[group]"
    handle = Entrez.esearch(db="gene",
                            term=inpTerm,
                            rettype='xml',
                            RetMax=300,
                            warning=False)
    records = Entrez.read(handle)
    idList = records["IdList"]

    outRecord = list()
    for ids in idList:
        _xmlcreate(ids)
        refIds = _xmlparser()
        os.remove('export.xml')
        recordList = list()
        for inIDs in refIds:
            recordList.append(mrnaExt(inIDs))

        try:
            longestRec = recordList[0]
        except:
            continue
        for rec in recordList:
            longestRec = rec if len(rec.seq) > len(
                longestRec.seq) else longestRec
        print("%s" % longestRec.description)
        outRecord.append(longestRec)

    with open("Align/" + geneName + '.fas', 'w') as fp:
        SeqIO.write(outRecord, fp, 'fasta')

    fdata = open("Align/" + geneName + '.fas', 'r').readlines()
    with open("Align/" + geneName + '.fas', 'w') as fp:
        for lines in fdata:
            if '>' in lines and 'PREDICTED' in lines:
                newLine = '>' + lines.split(' ')[2] + '_' + lines.split(
                    ' ')[3] + '|' + lines.split(' ')[0].lstrip('>')
                fp.write('%s\n' % newLine)
            elif '>' in lines and 'PREDICTED' not in lines:
                newLine = '>' + lines.split(' ')[1] + '_' + lines.split(
                    ' ')[2] + '|' + lines.split(' ')[0].lstrip('>')
                fp.write('%s\n' % newLine)
            else:
                fp.write('%s' % lines)

Example #21

0

Show file

def _translator(recordData, ign, omit, table):
    proteinSeqList = list()
    recordsFunc = recordData

    for i, rec in enumerate(recordsFunc):
        counter = dict()
        seqT = _translate_str(str(rec.seq), table)

        if ign == False:
            if "*" in seqT:
                counter['one'] = seqT.count('*')
                seqT = _translate_str(
                    str(rec.seq[1:len(rec.seq)] + Seq("N", generic_dna)),
                    table)
                if "*" in seqT:
                    counter['two'] = seqT.count('*')
                    seqT = _translate_str(
                        str(rec.seq[2:len(rec.seq)] + Seq("NN", generic_dna)),
                        table)
                    if "*" in seqT:
                        counter['three'] = seqT.count('*')
                        if omit == False:
                            if min(counter, key=counter.get) == 'one':
                                seqT = _translate_str(str(rec.seq), table)
                            elif min(counter, key=counter.get) == 'two':
                                seqT = _translate_str(
                                    str(rec.seq[1:len(rec.seq)] +
                                        Seq("N", generic_dna)), table)
                                recordsFunc[i].seq = recordsFunc[i].seq[
                                    1:len(rec.seq)] + Seq("N", generic_dna)
                            elif min(counter, key=counter.get) == 'three':
                                seqT = _translate_str(
                                    str(rec.seq[2:len(rec.seq)] +
                                        Seq("NN", generic_dna)), table)
                                recordsFunc[i].seq = recordsFunc[i].seq[
                                    2:len(rec.seq)] + Seq("NN", generic_dna)

                    else:
                        seqT = _translate_str(
                            str(rec.seq[2:len(rec.seq)] +
                                Seq("NN", generic_dna)), table)
                        recordsFunc[
                            i].seq = recordsFunc[i].seq[2:len(rec.seq)] + Seq(
                                "NN", generic_dna)
                else:
                    seqT = _translate_str(
                        str(rec.seq[1:len(rec.seq)] + Seq("N", generic_dna)),
                        table)
                    recordsFunc[
                        i].seq = recordsFunc[i].seq[1:len(rec.seq)] + Seq(
                            "N", generic_dna)

            else:
                pass

        for j, obj in enumerate(seqT):
            if '*' in obj:
                seqT = seqT[:j] + 'Z' + seqT[j + 1:]

        proteinSeqList.append(
            SeqRecord(Seq(seqT, IUPAC.protein),
                      id=rec.id,
                      name=rec.name,
                      description=rec.description))

    with open('translated.fas', 'w') as fp:
        SeqIO.write(proteinSeqList, fp, 'fasta')

    return recordsFunc

Example #22

0

Show file

File: handler.py Project: Ambuj-UF/ConCat-1.0

    def RYcoding(self, file, position, msaObject):
        
        """
           RY-coding program: It replaces A & G to R and C & T to Y either user defined positions
           or at all the positions. It depends on user selection.
           
           @parameter file -
           @parameter position - user defined position to perform RY coding in alignment matrix
           @parameter msaObject - Input multiple sequence alignment matri data
           
           Return - Multiple sequence alignment object with RY coding
        """
        
        def ReplaceThird(self, string, position):
            for i in range(position, len(string), 3):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i-1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i-1] + "Y" + string[i:]
            return string
                    
        def ReplaceAll(self, string):
            for i in range(1, len(string), 1):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i-1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i-1] + "Y" + string[i:]
            return string

        handle = open("Results.nex", "rU")
        records = list(SeqIO.parse(handle, "nexus"))
        handle.close()
        
        msa = msaObject

        seqlist = []
        idlist = []
        data = []
        x = 0
        while x < len(msa):
            sequence = ""
            y=0
            idlist.append(msa[x].id)
            while y < len(msa[1]):
                sequence = sequence + msa[x][y]
                y = y + 1
            seqlist.append(sequence)
            x = x + 1
            
        
        newSeqList = []
        
        if position == 'all':
            for seqData in seqlist:
                newSeqData = ReplaceAll(self, seqData)
                newSeqList.append(newSeqData)
        else:
            for seqData in seqlist:
                newSeqData = ReplaceThird(self, seqData, int(position))
                newSeqList.append(newSeqData)


        counter = 0
        while counter < len(newSeqList):
            data.append(SeqRecord(Seq(newSeqList[counter], generic_dna),\
                                  id = records[counter].id, name = records[counter].name,\
                                  description = records[counter].description))
                                  
            counter = counter + 1

        newmsa = MultipleSeqAlignment(data)

        return newmsa

Example #23

0

Show file

    def RYcoding(self, file, position, msaObject):
        """
           RY-coding program: It replaces A & G to R and C & T to Y either user defined positions
           or at all the positions. It depends on user selection.
           
           @parameter file -
           @parameter position - user defined position to perform RY coding in alignment matrix
           @parameter msaObject - Input multiple sequence alignment matri data
           
           Return - Multiple sequence alignment object with RY coding
        """
        def ReplaceThird(self, string, position):
            for i in range(position, len(string), 3):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i - 1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i - 1] + "Y" + string[i:]
            return string

        def ReplaceAll(self, string):
            for i in range(1, len(string), 1):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i - 1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i - 1] + "Y" + string[i:]
            return string

        handle = open("Results.nex", "rU")
        records = list(SeqIO.parse(handle, "nexus"))
        handle.close()

        msa = msaObject

        seqlist = []
        idlist = []
        data = []
        x = 0
        while x < len(msa):
            sequence = ""
            y = 0
            idlist.append(msa[x].id)
            while y < len(msa[1]):
                sequence = sequence + msa[x][y]
                y = y + 1
            seqlist.append(sequence)
            x = x + 1

        newSeqList = []

        if position == 'all':
            for seqData in seqlist:
                newSeqData = ReplaceAll(self, seqData)
                newSeqList.append(newSeqData)
        else:
            for seqData in seqlist:
                newSeqData = ReplaceThird(self, seqData, int(position))
                newSeqList.append(newSeqData)

        counter = 0
        while counter < len(newSeqList):
            data.append(SeqRecord(Seq(newSeqList[counter], generic_dna),\
                                  id = records[counter].id, name = records[counter].name,\
                                  description = records[counter].description))

            counter = counter + 1

        newmsa = MultipleSeqAlignment(data)

        return newmsa