def hmmsearch_genome(self, hmms, genome, table='Standard', decorate=False, **kwargs): #translate _genes with user_message('Translating whole genome in 6 reading frames', '\n'): translator = Translator(self._abort_event) translation = translator.translate_six_frames(genome, table) if not translation: return None if isinstance(hmms, str): hmms = [hmms] results = [] for hmm in hmms: with user_message('Performing hmm search.'): hmm_results = self.hmmsearch_recs(hmm, translation, **kwargs) if not any(len(r) for r in hmm_results): continue results += hmm_results #decorate genome if decorate: translation = dict((t.id, t) for t in translation) with user_message('Adding results as annotations...'): hmm_name = os.path.basename(hmm) glen = len(genome) for frame in hmm_results: for hit in frame: frec = translation[hit.id] start = frec.annotations['start'] strand = frec.annotations['strand'] for hsp in hit: if strand == 1: hmm_location = FeatureLocation( start + hsp.hit_start * 3, start + hsp.hit_end * 3, strand) else: hmm_location = FeatureLocation( glen - start - hsp.hit_end * 3, glen - start - hsp.hit_start * 3, strand) hmm_feature = self.hsp2feature( hmm_name, 'HMM_annotations', hmm_location, hsp) genome.features.append(hmm_feature) return results if results else None
def blastp_annotate(self, tag_sequences, subject_record, min_identity, evalue=0.001, table=11, **kwargs): # translate subject in six frames with user_message('Translating whole genome in 6 reading frames', '\n'): translator = Translator(self._abort_event) translation = translator.translate_six_frames(subject_record, table) if not translation: return False results = self.s2s_blast_batch(tag_sequences, translation, evalue=evalue, command='blastp', **kwargs) if results is None: return False with user_message('Adding results as annotations...'): annotated = False subj_len = len(subject_record) for i, tag in enumerate(tag_sequences): if not results[i]: continue tag_name = pretty_rec_name(tag) if tag_name != tag.id: tag_name += ' (%s)' % tag.id for frame, record in enumerate(results[i]): if not record: continue frec = translation[frame] start = frec.annotations['start'] strand = frec.annotations['strand'] for hit in record: for ali in hit.alignments: for hsp in ali.hsps: if hsp.identities / float(hsp.align_length) < min_identity: continue if strand == 1: location = FeatureLocation(start+(hsp.sbjct_start-1)*3, start+hsp.sbjct_end*3, strand) else: location = FeatureLocation(subj_len-start-hsp.sbjct_end*3, subj_len-start-hsp.sbjct_start*3, strand) feature = self.hsp2feature(tag_name, 'blastp_annotations', location, hsp) self.add_program(feature, 'blastp') subject_record.features.append(feature) annotated = True return annotated