def fileOpenID(self): """ Quick Record Import. This program creates a list of records for all files. It can handle Nexus record objects. """ file_format = self.file_format extList = ["*.fas", "*.nex", "*.phy", "*.phy", "*.phy"] typeList = ["fasta", "nexus", "phylip", "phylip-sequential", "phylip-relaxed"] fileList = glob.glob(extList[file_format - 1]) dict = {} for filename in fileList: handle = open(filename, "rU") idList = [] for record in SeqIO.parse(handle, typeList[file_format - 1]): idList.append(record.id) gene = filename.split(".")[0] dict[gene] = idList handle.close() return dict
def Convert(input, output, filename): """ File format conversion program (fasta, strict-phylip, sequential-phylip, relaxed-phylip and nexus). @parameter input - Input file format. @parameter output - Output file format. @parameter filename - Input filename. """ formDict = { 'fasta': '*.fas', 'nexus': '*.nex', 'phylip': '*.phy', 'phylip-sequential': '*.phy', 'phylip-relaxed': '*.phy' } os.chdir('..') if input == 'fasta' and output == 'nexus': alignment = AlignIO.read(open(filename), "fasta", alphabet=Gapped(IUPAC.protein)) g = open(filename.split(".")[0] + '.nex', 'w') g.write(alignment.format("nexus")); g.close() else: try: handle = open(filename, 'rU'); record = list(SeqIO.parse(handle, input)) fp = open(filename.split('.')[0] + '.' + formDict[output].split('.')[1], 'w') SeqIO.write(record, fp, output); fp.close(); handle.close() except: print("Bad Alignment\n") print("Final output saved in %s" %filename.split('.')[0] + '.' + formDict[output].split('.')[1])
def fileOpenID(self): """ Quick Record Import. This program creates a list of records for all files. It can handle Nexus record objects. """ file_format = self.file_format extList = ["*.fas", "*.nex", "*.phy", "*.phy", "*.phy"] typeList = [ "fasta", "nexus", "phylip", "phylip-sequential", "phylip-relaxed" ] fileList = glob.glob(extList[file_format - 1]) dict = {} for filename in fileList: handle = open(filename, "rU") idList = [] for record in SeqIO.parse(handle, typeList[file_format - 1]): idList.append(record.id) gene = filename.split(".")[0] dict[gene] = idList handle.close() return dict
def nexML(self, filename): """ Produces concatenated alignment file in NexML format. """ fp = open('Results.xml', 'w') handleXML = open(filename, 'rU') recordsXML = list(SeqIO.parse(handleXML, "nexus")) SeqIO.write(recordsXML, fp, "seqxml") fp.close() handleXML.close()
def mrnaAlign(inputFile, pkg, arguments=None): if pkg != 'muscle' and arguments == None: pkg = 'muscle' if pkg == 'muscle': if 'Darwin' in platform.system(): subprocess.call("./src/muscle/muscle -in %s -out %s" % ("Align/" + inputFile, "Input/" + inputFile), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) handle = open("Input/" + inputFile, 'rU') record = list(SeqIO.parse(handle, 'fasta')) with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp: SeqIO.write(record, fp, 'nexus') os.remove("Input/" + inputFile) else: subprocess.call("./src/muscle/muscleLinux -in %s -out %s" % ("Align/" + inputFile, "Input/" + inputFile), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) handle = open("Input/" + inputFile, 'rU') record = list(SeqIO.parse(handle, 'fasta')) with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp: SeqIO.write(record, fp, 'nexus') os.remove("Input/" + inputFile) else: arguments = arguments.replace('[', '').replace(']', '') subprocess.call( "./src/mafft/mafft.bat %s %s > %s" % (arguments, "Align/" + inputFile, "Input/" + inputFile), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) handle = open("Input/" + inputFile, 'rU') record = list(SeqIO.parse(handle, 'fasta')) with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp: SeqIO.write(record, fp, 'nexus') os.remove("Input/" + inputFile)
def fileOpenConcNex(self): """ This functions creates a list of alignment records from the files stored in RNAdata directory. Returns - List of alignment records. """ fileList = glob.glob("*.nex") recordList = [] for filename in fileList: handle = open(filename, "rU") record = list(SeqIO.parse(handle, "nexus")) recordList.append(record) return recordList
def cdsAlign(inputFile, pkg='muscle', omit=False, ign=False, CT=None): codonTables = [ 'Ascidian Mitochondrial', 'SGC9', 'Coelenterate Mitochondrial', 'Protozoan Mitochondrial', 'Vertebrate Mitochondrial', 'Plant Plastid', 'Thraustochytrium Mitochondrial', 'Blepharisma Macronuclear', 'Mold Mitochondrial', 'Invertebrate Mitochondrial', 'Standard', 'Trematode Mitochondrial', 'Scenedesmus obliquus Mitochondrial', 'Euplotid Nuclear', 'Yeast Mitochondrial', 'Spiroplasma', 'Alternative Flatworm Mitochondrial', 'Ciliate Nuclear', 'SGC8', 'Alternative Yeast Nuclear', 'Hexamita Nuclear', 'SGC5', 'SGC4', 'SGC3', 'SGC2', 'SGC1', 'SGC0', 'Flatworm Mitochondrial', 'Dasycladacean Nuclear', 'Chlorophycean Mitochondrial', 'Mycoplasma', 'Bacterial', 'Echinoderm Mitochondrial' ] if CT == None: table = CodonTable.ambiguous_dna_by_id[1] elif CT != None and CT in codonTables: table = CodonTable.ambiguous_generic_by_name[CT] else: table = CodonTable.ambiguous_generic_by_name['Standard'] handle = open("Align/" + inputFile, 'rU') records = list(SeqIO.parse(handle, 'fasta')) for j, rec in enumerate(records): if 'TAA' in rec.seq[-3:] or 'TGA' in rec.seq[-3:] or 'TAG' in rec.seq[ -3:]: records[j].seq = rec.seq[0:-3] if omit == True: badQuality = list() fdata = open("Align/" + inputFile.split('.')[0] + '.log', 'r').readlines() for lines in fdata: badQuality.append(lines.split(' ')[0]) newRecords = list() for rec in records: if rec.id.split('|')[1] not in badQuality: newRecords.append(rec) records = newRecords records = _translator(records, ign, omit, table) _alignP(pkg) _cleanAli(records, omit, inputFile)
def fileOpenConc(self): """ This function is used in opening input alignment files for concatenation. It can handle file formats other than Nexus. """ file_format = self.file_format extList = ["*.fas", "*.phy", "*.phy", "*.phy"] typeList = ["fasta", "phylip", "phylip-sequential", "phylip-relaxed"] fileList = glob.glob(extList[file_format - 1]) recordList = [] for filename in fileList: handle = open(filename, "rU") record = list(SeqIO.parse(handle, typeList[file_format - 1])) recordList.append(record) return recordList
def Convert(input, output, filename): """ File format conversion program (fasta, strict-phylip, sequential-phylip, relaxed-phylip and nexus). @parameter input - Input file format. @parameter output - Output file format. @parameter filename - Input filename. """ formDict = { 'fasta': '*.fas', 'nexus': '*.nex', 'phylip': '*.phy', 'phylip-sequential': '*.phy', 'phylip-relaxed': '*.phy' } os.chdir('..') if input == 'fasta' and output == 'nexus': alignment = AlignIO.read(open(filename), "fasta", alphabet=Gapped(IUPAC.protein)) g = open(filename.split(".")[0] + '.nex', 'w') g.write(alignment.format("nexus")) g.close() else: try: handle = open(filename, 'rU') record = list(SeqIO.parse(handle, input)) fp = open( filename.split('.')[0] + '.' + formDict[output].split('.')[1], 'w') SeqIO.write(record, fp, output) fp.close() handle.close() except: print("Bad Alignment\n") print("Final output saved in %s" % filename.split('.')[0] + '.' + formDict[output].split('.')[1])
def fileOpenConc(self): """ This function is used in opening input alignment files for concatenation. It can handle file formats other than Nexus. """ file_format = self.file_format extList = ["*.fas", "*.phy", "*.phy", "*.phy"] typeList = ["fasta", "phylip", "phylip-sequential", "phylip-relaxed"] fileList = glob.glob(extList[file_format-1]) recordList = [] for filename in fileList: handle = open(filename, "rU") record = list(SeqIO.parse(handle, typeList[file_format - 1])) recordList.append(record) return recordList
def _cleanAli(recordNuc, omit, fileName): handleP = open('tAligned.fas', 'rU') records = list(SeqIO.parse(handleP, 'fasta')) store = list() for i, rec in enumerate(records): nucData = [x.seq for x in recordNuc if x.id in rec.id] nucSeqData = _spliter(nucData[0], 3) sequence = Seq("", generic_dna) pos = 0 for j, amino in enumerate(rec.seq): if amino == '-': sequence = sequence + Seq("---", generic_dna) elif amino == 'Z': sequence = sequence + Seq("NNN", generic_dna) pos = pos + 1 else: try: sequence = sequence + nucSeqData[pos] pos = pos + 1 except: if rec.id not in store: store.append(rec.id) records[i].seq = Seq(str(sequence), generic_dna) records = [x for x in records if x.id not in store] if store != []: print("Failed to align following sequences: %s" % store) if omit == False: with open("Input/" + fileName.split('.')[0] + ".nex", 'w') as fp: SeqIO.write(records, fp, "nexus") else: with open("Input/" + fileName.split('.')[0] + "_omited.nex", 'w') as fp: SeqIO.write(records, fp, "nexus") os.remove('translated.fas') os.remove('tAligned.fas')
def RYcoding(self, file, position, msaObject): """ RY-coding program: It replaces A & G to R and C & T to Y either user defined positions or at all the positions. It depends on user selection. @parameter file - @parameter position - user defined position to perform RY coding in alignment matrix @parameter msaObject - Input multiple sequence alignment matri data Return - Multiple sequence alignment object with RY coding """ def ReplaceThird(self, string, position): for i in range(position, len(string), 3): if string[i] == 'A' or string[i] == 'G': string = string[:i - 1] + "R" + string[i:] elif string[i] == 'C' or string[i] == 'T': string = string[:i - 1] + "Y" + string[i:] return string def ReplaceAll(self, string): for i in range(1, len(string), 1): if string[i] == 'A' or string[i] == 'G': string = string[:i - 1] + "R" + string[i:] elif string[i] == 'C' or string[i] == 'T': string = string[:i - 1] + "Y" + string[i:] return string handle = open("Results.nex", "rU") records = list(SeqIO.parse(handle, "nexus")) handle.close() msa = msaObject seqlist = [] idlist = [] data = [] x = 0 while x < len(msa): sequence = "" y = 0 idlist.append(msa[x].id) while y < len(msa[1]): sequence = sequence + msa[x][y] y = y + 1 seqlist.append(sequence) x = x + 1 newSeqList = [] if position == 'all': for seqData in seqlist: newSeqData = ReplaceAll(self, seqData) newSeqList.append(newSeqData) else: for seqData in seqlist: newSeqData = ReplaceThird(self, seqData, int(position)) newSeqList.append(newSeqData) counter = 0 while counter < len(newSeqList): data.append(SeqRecord(Seq(newSeqList[counter], generic_dna),\ id = records[counter].id, name = records[counter].name,\ description = records[counter].description)) counter = counter + 1 newmsa = MultipleSeqAlignment(data) return newmsa
def RYcoding(self, file, position, msaObject): """ RY-coding program: It replaces A & G to R and C & T to Y either user defined positions or at all the positions. It depends on user selection. @parameter file - @parameter position - user defined position to perform RY coding in alignment matrix @parameter msaObject - Input multiple sequence alignment matri data Return - Multiple sequence alignment object with RY coding """ def ReplaceThird(self, string, position): for i in range(position, len(string), 3): if string[i] == 'A' or string[i] == 'G': string = string[:i-1] + "R" + string[i:] elif string[i] == 'C' or string[i] == 'T': string = string[:i-1] + "Y" + string[i:] return string def ReplaceAll(self, string): for i in range(1, len(string), 1): if string[i] == 'A' or string[i] == 'G': string = string[:i-1] + "R" + string[i:] elif string[i] == 'C' or string[i] == 'T': string = string[:i-1] + "Y" + string[i:] return string handle = open("Results.nex", "rU") records = list(SeqIO.parse(handle, "nexus")) handle.close() msa = msaObject seqlist = [] idlist = [] data = [] x = 0 while x < len(msa): sequence = "" y=0 idlist.append(msa[x].id) while y < len(msa[1]): sequence = sequence + msa[x][y] y = y + 1 seqlist.append(sequence) x = x + 1 newSeqList = [] if position == 'all': for seqData in seqlist: newSeqData = ReplaceAll(self, seqData) newSeqList.append(newSeqData) else: for seqData in seqlist: newSeqData = ReplaceThird(self, seqData, int(position)) newSeqList.append(newSeqData) counter = 0 while counter < len(newSeqList): data.append(SeqRecord(Seq(newSeqList[counter], generic_dna),\ id = records[counter].id, name = records[counter].name,\ description = records[counter].description)) counter = counter + 1 newmsa = MultipleSeqAlignment(data) return newmsa