def __init__(self, seq, seqdb, verbose=False, **kwargs): """ seq: the sequence to search with a single or a list of SeqRecords seqdb: the sequence database to search a single or a list of SeqRecords keyword arguments: other arguments to jackhmmer - see HMMER docs """ #Can only have one query if not isinstance(seq, SeqRecord): raise ValueError("seq must be a SeqRecord, not \'{}\'".format(type(seq))) if not is_protein(seq): raise ValueError("seq be have a ProteinAlphabet, not \'{}\'".format(a)) if isinstance(seqdb, SeqRecord): seqdb = [seqdb,] self.args = self.getArgs(**kwargs) self.matches = [] self.hmms = [] #apply unique ids to the targets self.seq = wrap_seqrecords([seq,]) self.seqdb=wrap_seqrecords(seqdb) targets = [] for t in self.seqdb: if is_protein(t): targets.append(t) elif is_dna(t): targets += list(tools.getSixFrameTranslation(t)) else: raise ValueError( "Targets must have a DNAAlphabet or a ProteinAlphabet, not \'{}\'" .format(t.seq.alphabet)) self._do_search(self.seq, targets, self.args)
def search(self, hmm, targets, **kwargs): """Perform the search hmm: a file name or an HMM object which has been loaded from a file targets: the sequences to search - a fasta filename or one or more Bio.SeqRecord If the hmm performs searches on Amino Acids and and of the inputs are DNA sequences, 6-frame translations will be produced automatically Reverse translations (from Amino Acid to DNA) are not supported """ # Load the HMM(s) if not hasattr(hmm, "__iter__"): hmm = [hmm,] #load the file if h is not an HMM object self.hmm = list() for h in hmm: if not isinstance(h, hmmfile.HMM): self.hmm = self.hmm + hmmfile.read(h) else: self.hmm.append(h) #make sure targets is iterable if not hasattr(targets, '__iter__') or isinstance(targets, SeqRecord): targets = [targets,] self.targets = list(targets) for t in self.targets: if not isinstance(t, SeqRecord): raise ValueError("Search Targets must be SeqRecords") #apply unique ids self.targets = wrap_seqrecords(self.targets) self.hmm = wrap_hmms(self.hmm) hmm_alpha = self.hmm[0].alph.upper() for h in self.hmm: if h.alph.upper() != hmm_alpha: raise ValueError("The HMMs don't all have the same alphabet") #get the arguments for HMMER args = self.getArgs(**kwargs) #clear the matches self.matches = [] #Translate targets if necessary for t in self.targets: t_alpha = t.alphabet() if hmm_alpha == 'DNA': if t_alpha == 'DNA': self.matches += self._do_search(self.hmm, t, args) else: raise ValueError("Cannot search DNA model against non-DNA target") elif hmm_alpha == 'AMINO': if t_alpha == 'AMINO': self.matches += self._do_search(self.hmm, t, args) elif t_alpha == 'DNA': #looks like we have to convert for tt in tools.getSixFrameTranslation(t): self.matches += self._do_search(self.hmm, tt, args) else: raise ValueError("Cannot search Protein model against {} target" .format(t.seq.alphabet))