def getInput(self): ''' Open a single .seq, .fasta, .fastq, .ztr, .scf, .ab1 file (or even a text file with a DNA sequence) and set variables accordingly. ''' #read the input if self.input_type in ['TXT', None ] and self.filename not in ['allseqs.txt']: f = open(self.filepath, 'r') input = f.read() f.close() self.setDNA(input.replace('\n', '')) elif self.input_type in ['SEQ', 'SEQ.CLIPPED']: output = fasta.parseFile( self.filepath ) #parse the fasta file. File should contain ONE entry for item in output: id, seq = item self.setDNA(seq) elif self.input_type in ['AB1', 'ABI', 'ABIF']: ab1 = ABIreader.Trace(self.filepath, trimming=True) #optionally ', trimming=True' self.setDNA(ab1.seq) self.setQualVal(ab1.qual_val) self.setTrace([ ab1.data['raw1'], ab1.data['raw2'], ab1.data['raw3'], ab1.data['raw4'] ]) #need to RC this too # elif self.input_type == 'ZTR': # print('Support for .ztr files has not yet been implemented') # elif self.input_type == 'SCF': # print('Support for .scf files has not yet been implemented') elif self.input_type is 'FASTA': id, seq = fasta.parseFile( self.filepath ) #parse the fasta file. File should contain ONE entry self.setDNA(seq) elif self.input_type is 'FASTQ': id, seq, id2, qual_val = fastq.parse( self.filepath ) #parse the fastq file. File should contain ONE entry self.setDNA(seq) self.setQualVal(qual_val) else: print(( '"%s" is not a .txt, .seq, .scf, .fasta, .fastq, .abif, .ab1, .abi or .ztr file' % self.filename))
def getInput(self): ''' Open a single .seq, .fasta, .fastq, .ztr, .scf, .ab1 file (or even a text file with a DNA sequence) and set variables accordingly. ''' #read the input if self.input_type in ['TXT', 'SEQ', 'SEQ.CLIPPED', None] and self.filename not in ['allseqs.txt']: f = open(self.filepath, 'r') input = f.read() f.close() self.setDNA(input.replace('\n', '')) elif self.input_type in ['AB1', 'ABI', 'ABIF']: ab1 = ABIreader.Trace(self.filepath, trimming=True) #optionally ', trimming=True' self.setDNA(ab1.seq) self.setQualVal(ab1.qual_val) self.setTrace([ab1.data['raw1'], ab1.data['raw2'], ab1.data['raw3'], ab1.data['raw4']]) #need to RC this too # elif self.input_type == 'ZTR': # print('Support for .ztr files has not yet been implemented') # elif self.input_type == 'SCF': # print('Support for .scf files has not yet been implemented') elif self.input_type is 'FASTA': id, seq = fasta.parseFile(self.filepath) #parse the fasta file. File should contain ONE entry self.setDNA(seq) elif self.input_type is 'FASTQ': id, seq, id2, qual_val = fastq.parse(self.filepath) #parse the fastq file. File should contain ONE entry self.setDNA(seq) self.setQualVal(qual_val) else: print('"%s" is not a .txt, .seq, .scf, .fasta, .fastq, .abif, .ab1, .abi or .ztr file' % self.filename)
def getInput(self): """ Open a single .seq, .fasta, .fastq, .ztr, .scf, .ab1 file (or even a text file with a DNA sequence) and set variables accordingly. """ # read the input if self.input_type in ["TXT", None] and self.filename not in ["allseqs.txt"]: f = open(self.filepath, "r") input = f.read() f.close() self.setDNA(input.replace("\n", "")) elif self.input_type in ["SEQ", "SEQ.CLIPPED"]: output = fasta.parseFile(self.filepath) # parse the fasta file. File should contain ONE entry for item in output: id, seq = item self.setDNA(seq) elif self.input_type in ["AB1", "ABI", "ABIF"]: ab1 = ABIreader.Trace(self.filepath, trimming=True) # optionally ', trimming=True' self.setDNA(ab1.seq) self.setQualVal(ab1.qual_val) self.setTrace( [ab1.data["raw1"], ab1.data["raw2"], ab1.data["raw3"], ab1.data["raw4"]] ) # need to RC this too # elif self.input_type == 'ZTR': # print('Support for .ztr files has not yet been implemented') # elif self.input_type == 'SCF': # print('Support for .scf files has not yet been implemented') elif self.input_type is "FASTA": id, seq = fasta.parseFile(self.filepath) # parse the fasta file. File should contain ONE entry self.setDNA(seq) elif self.input_type is "FASTQ": id, seq, id2, qual_val = fastq.parse(self.filepath) # parse the fastq file. File should contain ONE entry self.setDNA(seq) self.setQualVal(qual_val) else: print('"%s" is not a .txt, .seq, .scf, .fasta, .fastq, .abif, .ab1, .abi or .ztr file' % self.filename)
def make_codon_freq_table(file): ''' Input is a file path. Counts the usage of each codon in a FASTA file of DNA sequences. Then converts that as codon usage per 1000 codons. Good for generating codon tables. Output is a dictionary of codon frequencies per 1000 codons and the total number in brackets. ''' num_table = {'UUU': 0, 'UUC': 0, 'UUA': 0, 'UUG': 0, 'CUU': 0, 'CUC': 0, 'CUA': 0, 'CUG': 0, 'AUU': 0, 'AUC': 0, 'AUA': 0, 'AUG': 0, 'GUU': 0, 'GUC': 0, 'GUA': 0, 'GUG': 0, 'UAU': 0, 'UAC': 0, 'UAA': 0, 'UAG': 0, 'CAU': 0, 'CAC': 0, 'CAA': 0, 'CAG': 0, 'AAU': 0, 'AAC': 0, 'AAA': 0, 'AAG': 0, 'GAU': 0, 'GAC': 0, 'GAA': 0, 'GAG': 0, 'UCU': 0, 'UCC': 0, 'UCA': 0, 'UCG': 0, 'CCU': 0, 'CCC': 0, 'CCA': 0, 'CCG': 0, 'ACU': 0, 'ACC': 0, 'ACA': 0, 'ACG': 0, 'GCU': 0, 'GCC': 0, 'GCA': 0, 'GCG': 0, 'UGU': 0, 'UGC': 0, 'UGA': 0, 'UGG': 0, 'CGU': 0, 'CGC': 0, 'CGA': 0, 'CGG': 0, 'AGU': 0, 'AGC': 0, 'AGA': 0, 'AGG': 0, 'GGU': 0, 'GGC': 0, 'GGA': 0, 'GGG': 0} records = fasta.parseFile(file) for record in records: cds = record[1] codons = count_codons(cds) for key in codons.keys(): num_table[key] += codons[key] #sum codons sum = 0.0 for key in num_table.keys(): sum += num_table[key] #divide each by the sum and multiply by 1000 freq_table = {} for key in num_table.keys(): freq_table[key] = '%s(%s)' % (1000*(num_table[key]/sum), num_table[key]) #ouput is following format: freq/thousand(number) return freq_table
def getInput(self): '''Open a single .seq, .fasta, .fastq, .ztr, .scf, .ab1 file (or even a text file with a DNA sequence) and set variables accordingly.''' parts = self.filepath.split('/') filename = parts.pop() #get filename path = '/'.join(parts)+'/' #path to file #establish type of input file if '.' in filename: self.input_type = filename.split('.')[-1].upper() print('type', self.input_type) else: self.input_type = None #read the input if self.input_type in ['TXT', 'SEQ', None] and filename not in ['allseqs.txt']: #establish orientation of DNA if filename.split('.')[0][-2:].upper() == 'FW': self.setOrientation('fw') elif filename.split('.')[0][-2:].upper() == 'RV': self.setOrientation('rv') else: raise TypeError, 'The last two characters of the filename (before the .) must specify whether the sequence is fw or rv. Pleace rename file %s accordingly' % filename self.setName(filename) f = open(self.filepath, 'r') input = f.read() f.close() if self.getOrientation() == 'fw': self.setDNA(input.replace('\n', '')) elif self.getOrientation() == 'rv': self.setDNA(DNA.RC(input.replace('\n', ''))) self.setRC(True) elif self.input_type in ['AB1', 'ABI', 'ABIF']: #establish orientation of DNA if filename.split('.')[0][-2:].upper() == 'FW': self.setOrientation('fw') elif filename.split('.')[0][-2:].upper() == 'RV': self.setOrientation('rv') else: raise TypeError, 'The last two characters of the filename (before the .) must specify whether the sequence is fw or rv. Pleace rename file %s accordingly' % filename self.setName(filename) ab1 = ABIreader.Trace(self.filepath, trimming=True) #optionally ', trimming=True' if self.getOrientation() == 'fw': self.setDNA(ab1.seq) self.setQualVal(ab1.qual_val) #need to RC this too self.setTrace([ab1.data['raw1'], ab1.data['raw2'], ab1.data['raw3'], ab1.data['raw4']]) #need to RC this too #abi=dict(baseorder=ab1.data['baseorder'], qual_val=ab1.qual_val, G=str(AB1Trace.data['raw1']), A=str(AB1Trace.data['raw2']), T=str(AB1Trace.data['raw3']), C=str(AB1Trace.data['raw4'])) elif self.getOrientation() == 'rv': self.setDNA(DNA.RC(ab1.seq)) self.setQualVal(ab1.qual_val) #need to RC this too self.setTrace([ab1.data['raw1'], ab1.data['raw2'], ab1.data['raw3'], ab1.data['raw4']]) #need to RC this too #abi=dict(baseorder=ab1.data['baseorder'], qual_val=ab1.qual_val, G=str(AB1Trace.data['raw1']), A=str(AB1Trace.data['raw2']), T=str(AB1Trace.data['raw3']), C=str(AB1Trace.data['raw4'])) self.setRC(True) # elif self.input_type == 'ZTR': # print('Support for .ztr files has not yet been implemented') # elif self.input_type == 'SCF': # print('Support for .scf files has not yet been implemented') elif fnmatch.fnmatch(filename, '*.fasta'): self.setName(filename) id, dna = fasta.parseFile(self.filepath) #parse the fasta file. File should contain ONE entry self.setDNA(dna) #establish orientation of DNA if filename.split('.')[0][-2:].upper() == 'FW': self.setOrientation('fw') elif filename.split('.')[0][-2:].upper() == 'RV': self.setOrientation('rv') else: raise TypeError, 'The last two characters of the filename (before the .) must specify whether the sequence is fw or rv. Pleace rename file %s accordingly' % filename elif fnmatch.fnmatch(filename, '*.fastq'): self.setName(filename) id, dna, id2, qual_val = fastq.parse(self.filepath) #parse the fastq file. File should contain ONE entry self.setDNA(dna) self.setQualVal(qual_val) #establish orientation of DNA if filename.split('.')[0][-2:].upper() == 'FW': self.setOrientation('fw') elif filename.split('.')[0][-2:].upper() == 'RV': self.setOrientation('rv') else: raise TypeError, 'The last two characters of the filename (before the .) must specify whether the sequence is fw or rv. Pleace rename file %s accordingly' % filename else: pass print('"%s" is not a .txt, .seq, .scf, .fasta, .fastq, .abif, .ab1, .abi or .ztr file' % filename)