Exemple #1
0
    def getInput(self):
        '''
		Open a single .seq, .fasta, .fastq, .ztr, .scf, .ab1 file (or even a text file with a DNA sequence) and set variables accordingly.
		'''

        #read the input
        if self.input_type in ['TXT', None
                               ] and self.filename not in ['allseqs.txt']:
            f = open(self.filepath, 'r')
            input = f.read()
            f.close()
            self.setDNA(input.replace('\n', ''))

        elif self.input_type in ['SEQ', 'SEQ.CLIPPED']:
            output = fasta.parseFile(
                self.filepath
            )  #parse the fasta file. File should contain ONE entry
            for item in output:
                id, seq = item
            self.setDNA(seq)

        elif self.input_type in ['AB1', 'ABI', 'ABIF']:
            ab1 = ABIreader.Trace(self.filepath,
                                  trimming=True)  #optionally ', trimming=True'
            self.setDNA(ab1.seq)
            self.setQualVal(ab1.qual_val)
            self.setTrace([
                ab1.data['raw1'], ab1.data['raw2'], ab1.data['raw3'],
                ab1.data['raw4']
            ])  #need to RC this too


#		elif self.input_type == 'ZTR':
#			print('Support for .ztr files has not yet been implemented')

#		elif self.input_type == 'SCF':
#			print('Support for .scf files has not yet been implemented')

        elif self.input_type is 'FASTA':
            id, seq = fasta.parseFile(
                self.filepath
            )  #parse the fasta file. File should contain ONE entry
            self.setDNA(seq)

        elif self.input_type is 'FASTQ':
            id, seq, id2, qual_val = fastq.parse(
                self.filepath
            )  #parse the fastq file. File should contain ONE entry
            self.setDNA(seq)
            self.setQualVal(qual_val)

        else:
            print((
                '"%s" is not a .txt, .seq, .scf, .fasta, .fastq, .abif, .ab1, .abi or .ztr file'
                % self.filename))
Exemple #2
0
	def getInput(self):
		'''
		Open a single .seq, .fasta, .fastq, .ztr, .scf, .ab1 file (or even a text file with a DNA sequence) and set variables accordingly.
		'''

		#read the input
		if self.input_type in ['TXT', 'SEQ', 'SEQ.CLIPPED', None] and self.filename not in ['allseqs.txt']:
			f = open(self.filepath, 'r') 
			input = f.read() 
			f.close()
			self.setDNA(input.replace('\n', ''))
			
		elif self.input_type in ['AB1', 'ABI', 'ABIF']:
			ab1 = ABIreader.Trace(self.filepath, trimming=True) #optionally ', trimming=True'
			self.setDNA(ab1.seq)
			self.setQualVal(ab1.qual_val)
			self.setTrace([ab1.data['raw1'], ab1.data['raw2'], ab1.data['raw3'], ab1.data['raw4']]) #need to RC this too

#		elif self.input_type == 'ZTR':
#			print('Support for .ztr files has not yet been implemented')
			
#		elif self.input_type == 'SCF':
#			print('Support for .scf files has not yet been implemented')
			
		elif self.input_type is 'FASTA':
			id, seq = fasta.parseFile(self.filepath) #parse the fasta file. File should contain ONE entry
			self.setDNA(seq)
			
		elif self.input_type is 'FASTQ':
			id, seq, id2, qual_val = fastq.parse(self.filepath) #parse the fastq file. File should contain ONE entry
			self.setDNA(seq)
			self.setQualVal(qual_val)
		
		else:
			print('"%s" is not a .txt, .seq, .scf, .fasta, .fastq, .abif, .ab1, .abi or .ztr file' % self.filename)
Exemple #3
0
    def getInput(self):
        """
		Open a single .seq, .fasta, .fastq, .ztr, .scf, .ab1 file (or even a text file with a DNA sequence) and set variables accordingly.
		"""

        # read the input
        if self.input_type in ["TXT", None] and self.filename not in ["allseqs.txt"]:
            f = open(self.filepath, "r")
            input = f.read()
            f.close()
            self.setDNA(input.replace("\n", ""))

        elif self.input_type in ["SEQ", "SEQ.CLIPPED"]:
            output = fasta.parseFile(self.filepath)  # parse the fasta file. File should contain ONE entry
            for item in output:
                id, seq = item
            self.setDNA(seq)

        elif self.input_type in ["AB1", "ABI", "ABIF"]:
            ab1 = ABIreader.Trace(self.filepath, trimming=True)  # optionally ', trimming=True'
            self.setDNA(ab1.seq)
            self.setQualVal(ab1.qual_val)
            self.setTrace(
                [ab1.data["raw1"], ab1.data["raw2"], ab1.data["raw3"], ab1.data["raw4"]]
            )  # need to RC this too

        # 		elif self.input_type == 'ZTR':
        # 			print('Support for .ztr files has not yet been implemented')

        # 		elif self.input_type == 'SCF':
        # 			print('Support for .scf files has not yet been implemented')

        elif self.input_type is "FASTA":
            id, seq = fasta.parseFile(self.filepath)  # parse the fasta file. File should contain ONE entry
            self.setDNA(seq)

        elif self.input_type is "FASTQ":
            id, seq, id2, qual_val = fastq.parse(self.filepath)  # parse the fastq file. File should contain ONE entry
            self.setDNA(seq)
            self.setQualVal(qual_val)

        else:
            print('"%s" is not a .txt, .seq, .scf, .fasta, .fastq, .abif, .ab1, .abi or .ztr file' % self.filename)
Exemple #4
0
def make_codon_freq_table(file):
	'''
	Input is a file path.
	Counts the usage of each codon in a FASTA file of DNA sequences.
	Then converts that as codon usage per 1000 codons.
	Good for generating codon tables.
	Output is a dictionary of codon frequencies per 1000 codons and the total number in brackets.
	'''
	
	num_table = {'UUU': 0, 'UUC': 0, 'UUA': 0, 'UUG': 0, 'CUU': 0, 
				'CUC': 0, 'CUA': 0, 'CUG': 0, 'AUU': 0, 'AUC': 0, 
				'AUA': 0, 'AUG': 0, 'GUU': 0, 'GUC': 0, 'GUA': 0, 
				'GUG': 0, 'UAU': 0, 'UAC': 0, 'UAA': 0, 'UAG': 0, 
				'CAU': 0, 'CAC': 0, 'CAA': 0, 'CAG': 0, 'AAU': 0, 
				'AAC': 0, 'AAA': 0, 'AAG': 0, 'GAU': 0, 'GAC': 0, 
				'GAA': 0, 'GAG': 0, 'UCU': 0, 'UCC': 0, 'UCA': 0, 
				'UCG': 0, 'CCU': 0, 'CCC': 0, 'CCA': 0, 'CCG': 0, 
				'ACU': 0, 'ACC': 0, 'ACA': 0, 'ACG': 0, 'GCU': 0, 
				'GCC': 0, 'GCA': 0, 'GCG': 0, 'UGU': 0, 'UGC': 0, 
				'UGA': 0, 'UGG': 0, 'CGU': 0, 'CGC': 0, 'CGA': 0, 
				'CGG': 0, 'AGU': 0, 'AGC': 0, 'AGA': 0, 'AGG': 0, 
				'GGU': 0, 'GGC': 0, 'GGA': 0, 'GGG': 0} 
	records = fasta.parseFile(file)
	for record in records:
		cds = record[1]
		codons = count_codons(cds)
		for key in codons.keys():
			num_table[key] += codons[key]

	#sum codons
	sum = 0.0
	for key in num_table.keys():
		sum += num_table[key]
	
	#divide each by the sum and multiply by 1000
	freq_table = {}
	for key in num_table.keys():
		freq_table[key] = '%s(%s)' % (1000*(num_table[key]/sum), num_table[key]) #ouput is following format: freq/thousand(number)
	return freq_table		
Exemple #5
0
def make_codon_freq_table(file):
	'''
	Input is a file path.
	Counts the usage of each codon in a FASTA file of DNA sequences.
	Then converts that as codon usage per 1000 codons.
	Good for generating codon tables.
	Output is a dictionary of codon frequencies per 1000 codons and the total number in brackets.
	'''
	
	num_table = {'UUU': 0, 'UUC': 0, 'UUA': 0, 'UUG': 0, 'CUU': 0, 
				'CUC': 0, 'CUA': 0, 'CUG': 0, 'AUU': 0, 'AUC': 0, 
				'AUA': 0, 'AUG': 0, 'GUU': 0, 'GUC': 0, 'GUA': 0, 
				'GUG': 0, 'UAU': 0, 'UAC': 0, 'UAA': 0, 'UAG': 0, 
				'CAU': 0, 'CAC': 0, 'CAA': 0, 'CAG': 0, 'AAU': 0, 
				'AAC': 0, 'AAA': 0, 'AAG': 0, 'GAU': 0, 'GAC': 0, 
				'GAA': 0, 'GAG': 0, 'UCU': 0, 'UCC': 0, 'UCA': 0, 
				'UCG': 0, 'CCU': 0, 'CCC': 0, 'CCA': 0, 'CCG': 0, 
				'ACU': 0, 'ACC': 0, 'ACA': 0, 'ACG': 0, 'GCU': 0, 
				'GCC': 0, 'GCA': 0, 'GCG': 0, 'UGU': 0, 'UGC': 0, 
				'UGA': 0, 'UGG': 0, 'CGU': 0, 'CGC': 0, 'CGA': 0, 
				'CGG': 0, 'AGU': 0, 'AGC': 0, 'AGA': 0, 'AGG': 0, 
				'GGU': 0, 'GGC': 0, 'GGA': 0, 'GGG': 0} 
	records = fasta.parseFile(file)
	for record in records:
		cds = record[1]
		codons = count_codons(cds)
		for key in codons.keys():
			num_table[key] += codons[key]

	#sum codons
	sum = 0.0
	for key in num_table.keys():
		sum += num_table[key]
	
	#divide each by the sum and multiply by 1000
	freq_table = {}
	for key in num_table.keys():
		freq_table[key] = '%s(%s)' % (1000*(num_table[key]/sum), num_table[key]) #ouput is following format: freq/thousand(number)
	return freq_table		
Exemple #6
0
	def getInput(self):
		'''Open a single .seq, .fasta, .fastq, .ztr, .scf, .ab1 file (or even a text file with a DNA sequence) and set variables accordingly.'''
		parts = self.filepath.split('/')
		filename = parts.pop() #get filename
		path = '/'.join(parts)+'/' #path to file
		
		#establish type of input file
		if '.' in filename: 
			self.input_type = filename.split('.')[-1].upper() 
			print('type', self.input_type)
		else:
			self.input_type = None
		
	
		#read the input
		if self.input_type in ['TXT', 'SEQ', None] and filename not in ['allseqs.txt']:
			#establish orientation of DNA
			if filename.split('.')[0][-2:].upper() == 'FW':
				self.setOrientation('fw')
			elif filename.split('.')[0][-2:].upper() == 'RV':
				self.setOrientation('rv')
			else:
				raise TypeError, 'The last two characters of the filename (before the .) must specify whether the sequence is fw or rv. Pleace rename file %s accordingly' % filename

			self.setName(filename)
			f = open(self.filepath, 'r') 
			input = f.read() 
			f.close()
			if self.getOrientation() == 'fw':
				self.setDNA(input.replace('\n', ''))
			elif self.getOrientation() == 'rv':
				self.setDNA(DNA.RC(input.replace('\n', '')))
				self.setRC(True)

			
		elif self.input_type in ['AB1', 'ABI', 'ABIF']:
			#establish orientation of DNA
			if filename.split('.')[0][-2:].upper() == 'FW':
				self.setOrientation('fw')
			elif filename.split('.')[0][-2:].upper() == 'RV':
				self.setOrientation('rv')
			else:
				raise TypeError, 'The last two characters of the filename (before the .) must specify whether the sequence is fw or rv. Pleace rename file %s accordingly' % filename

			self.setName(filename)
			ab1 = ABIreader.Trace(self.filepath, trimming=True) #optionally ', trimming=True'
			if self.getOrientation() == 'fw':
				self.setDNA(ab1.seq)
				self.setQualVal(ab1.qual_val) #need to RC this too
				self.setTrace([ab1.data['raw1'], ab1.data['raw2'], ab1.data['raw3'], ab1.data['raw4']]) #need to RC this too
				#abi=dict(baseorder=ab1.data['baseorder'], qual_val=ab1.qual_val, G=str(AB1Trace.data['raw1']), A=str(AB1Trace.data['raw2']), T=str(AB1Trace.data['raw3']), C=str(AB1Trace.data['raw4']))

			elif self.getOrientation() == 'rv':
				self.setDNA(DNA.RC(ab1.seq))
				self.setQualVal(ab1.qual_val) #need to RC this too
				self.setTrace([ab1.data['raw1'], ab1.data['raw2'], ab1.data['raw3'], ab1.data['raw4']]) #need to RC this too
				#abi=dict(baseorder=ab1.data['baseorder'], qual_val=ab1.qual_val, G=str(AB1Trace.data['raw1']), A=str(AB1Trace.data['raw2']), T=str(AB1Trace.data['raw3']), C=str(AB1Trace.data['raw4']))
				self.setRC(True)				

#		elif self.input_type == 'ZTR':
#			print('Support for .ztr files has not yet been implemented')
			
#		elif self.input_type == 'SCF':
#			print('Support for .scf files has not yet been implemented')
			
		elif fnmatch.fnmatch(filename, '*.fasta'):
			self.setName(filename)
			id, dna = fasta.parseFile(self.filepath) #parse the fasta file. File should contain ONE entry
			self.setDNA(dna)
			#establish orientation of DNA
			if filename.split('.')[0][-2:].upper() == 'FW':
				self.setOrientation('fw')
			elif filename.split('.')[0][-2:].upper() == 'RV':
				self.setOrientation('rv')
			else:
				raise TypeError, 'The last two characters of the filename (before the .) must specify whether the sequence is fw or rv. Pleace rename file %s accordingly' % filename
			
		elif fnmatch.fnmatch(filename, '*.fastq'):
			self.setName(filename)
			id, dna, id2, qual_val = fastq.parse(self.filepath) #parse the fastq file. File should contain ONE entry
			self.setDNA(dna)
			self.setQualVal(qual_val)
			#establish orientation of DNA
			if filename.split('.')[0][-2:].upper() == 'FW':
				self.setOrientation('fw')
			elif filename.split('.')[0][-2:].upper() == 'RV':
				self.setOrientation('rv')
			else:
				raise TypeError, 'The last two characters of the filename (before the .) must specify whether the sequence is fw or rv. Pleace rename file %s accordingly' % filename
			
		else:
			pass
			print('"%s" is not a .txt, .seq, .scf, .fasta, .fastq, .abif, .ab1, .abi or .ztr file' % filename)