Esempio n. 1
0
	def __init__(self, seq, seqdb, verbose=False, **kwargs):
		"""
			seq: the sequence to search with
				a single or a list of SeqRecords

			seqdb: the sequence database to search
				a single or a list of SeqRecords

			keyword arguments: other arguments to jackhmmer - see HMMER docs
		"""
		#Can only have one query
		if not isinstance(seq, SeqRecord):
			raise ValueError("seq must be a SeqRecord, not \'{}\'".format(type(seq)))
		if not is_protein(seq):
			raise ValueError("seq be have a ProteinAlphabet, not \'{}\'".format(a))
				
		if isinstance(seqdb, SeqRecord):
			seqdb = [seqdb,]

		self.args = self.getArgs(**kwargs)
		self.matches = []
		self.hmms = []

		#apply unique ids to the targets
		self.seq = wrap_seqrecords([seq,])
		self.seqdb=wrap_seqrecords(seqdb)

		targets = []

		for t in self.seqdb:
			if is_protein(t):
				targets.append(t)
			elif is_dna(t):
				targets += list(tools.getSixFrameTranslation(t))
			else:
				raise ValueError(
					"Targets must have a DNAAlphabet or a ProteinAlphabet, not \'{}\'"
						.format(t.seq.alphabet))

		self._do_search(self.seq, targets, self.args)
Esempio n. 2
0
	def search(self, hmm, targets, **kwargs):
		"""Perform the search
				hmm: a file name or an HMM object which has been loaded from a file
				targets: the sequences to search - a fasta filename or one or 
					more Bio.SeqRecord

				If the hmm performs searches on Amino Acids and and of the inputs are
				DNA sequences, 6-frame translations will be produced automatically
				Reverse translations (from Amino Acid to DNA) are not supported
		"""

		# Load the HMM(s)
		if not hasattr(hmm, "__iter__"):
			hmm = [hmm,]
		#load the file if h is not an HMM object
		self.hmm = list()
		for h in hmm:
			if not isinstance(h, hmmfile.HMM):
				self.hmm = self.hmm + hmmfile.read(h)
			else:
				self.hmm.append(h)

		#make sure targets is iterable
		if not hasattr(targets, '__iter__') or isinstance(targets, SeqRecord):
			targets = [targets,]
		self.targets = list(targets)
		for t in self.targets:
			if not isinstance(t, SeqRecord):
				raise ValueError("Search Targets must be SeqRecords")

		#apply unique ids
		self.targets = wrap_seqrecords(self.targets) 
		self.hmm = wrap_hmms(self.hmm)

		hmm_alpha = self.hmm[0].alph.upper()
		for h in self.hmm:
			if h.alph.upper() != hmm_alpha:
				raise ValueError("The HMMs don't all have the same alphabet")

		#get the arguments for HMMER
		args = self.getArgs(**kwargs)
		#clear the matches
		self.matches = []

		#Translate targets if necessary
		for t in self.targets:
			t_alpha = t.alphabet()
			if hmm_alpha == 'DNA':
				if t_alpha == 'DNA':
					self.matches += self._do_search(self.hmm, t, args)
				else:
					raise ValueError("Cannot search DNA model against non-DNA target")
			elif hmm_alpha == 'AMINO':
				if t_alpha == 'AMINO':
					self.matches += self._do_search(self.hmm, t, args)
				elif t_alpha == 'DNA':
					#looks like we have to convert
					for tt in tools.getSixFrameTranslation(t):
						self.matches += self._do_search(self.hmm, tt, args)
				else:
					raise ValueError("Cannot search Protein model against {} target"
							.format(t.seq.alphabet))