def trimming(self, raw_file, outname): self.raw_file = raw_file self.outname = outname default_strand = STRAND seq_factory_fasta = seq_factory_from_fasta(default_strand) ltr_seq = seq_factory_fasta.make(self.ltr_fasta) kwds = {"Program": ncbi_toolkit.EProgram.eBlastn} ltr_blaster = blaster(ltr_seq, **kwds) linker_seq = seq_factory_fasta.make(self.linker_fasta) kwds = {"Program": ncbi_toolkit.EProgram.eBlastn} linker_blaster = blaster(linker_seq, **kwds) fin = open(self.raw_file) fs = fasta_stream_from_stream(fin) ss = blast_seq_stream(seq_factory_fasta, fs) bs = blast_result_stream(ltr_blaster, ss) fbs = similarity_filter(0.89, len(ltr_seq.get_sequence()), bs) ltr_strs = mapped_stream(cut_ltr_mapper_output_fasta(), fbs) ltr_seqs = blast_seq_stream(seq_factory_fasta, ltr_strs) lnk_res = blast_result_stream(linker_blaster, ltr_seqs) lnk_strs = mapped_stream(cut_linker_mapper_output_fasta(), lnk_res) for x in lnk_strs: if len(x[1]) > 19: line = "\n".join([">" + x[0].replace("|", "_").replace("lcl_", ""), x[1]]) self._setOut(line) else: pass
def compute_results(self): """ Compute results dict. """ self.res_dict = {} b = blaster(self.sequences, **self.blast_options) subjects, r = b.blast(self.sequences) self.assertEqual(len(r), len(self.sequences)) self.assertEqual(subjects, self.sequences) for hit_list in r: if hit_list is None: continue for hsp_list in hit_list: query = self.sequences[hsp_list.query_index] subject = self.sequences[hsp_list.ordinal_id_subject_sequence] #print "%s vs %s" % (query.id, subject.id) results = [] for hsp in hsp_list: res = {} res['bit_score'] = hsp.bit_score res['e_value'] = hsp.evalue (res['query_start'], res['query_end'], res['subject_start'], res['subject_end']) = get_match_endpoints( hsp.query, hsp.subject, query.length, subject.length, self.progname ) results.append(res) results.sort(key=itemgetter("subject_end")) results.sort(key=itemgetter("subject_start")) results.sort(key=itemgetter("query_end")) results.sort(key=itemgetter("query_start")) self.res_dict[(query.id, subject.id)] = results
def compute_results(self): """ Compute results dict. """ self.res_dict = {} for s1 in self.sequences: b = blaster(s1, **self.blast_options) for s2 in self.sequences: subject, r = b.blast(s2) self.assertEqual(subject, s2) hit_list = r[0] if hit_list is None: continue hsp_list = hit_list[0] results = [] for hsp in hsp_list: res = {} res['bit_score'] = hsp.bit_score res['e_value'] = hsp.evalue (res['query_start'], res['query_end'], res['subject_start'], res['subject_end']) = get_match_endpoints( hsp.query, hsp.subject, s1.length, s2.length, self.progname ) results.append(res) results.sort(key=itemgetter("subject_end")) results.sort(key=itemgetter("subject_start")) results.sort(key=itemgetter("query_end")) results.sort(key=itemgetter("query_start")) self.res_dict[(s1.id, s2.id)] = results
def compute_results(self): """ Compute results dict. """ self.res_dict = {} b = blaster(self.sequences, **self.blast_options) subjects, r = b.blast(self.sequences) self.assertEqual(len(r), len(self.sequences)) self.assertEqual(subjects, self.sequences) for hit_list in r: if hit_list is None: continue for hsp_list in hit_list: query = self.sequences[hsp_list.query_index] subject = self.sequences[hsp_list.ordinal_id_subject_sequence] #print "%s vs %s" % (query.id, subject.id) results = [] for hsp in hsp_list: res = {} res['bit_score'] = hsp.bit_score res['e_value'] = hsp.evalue (res['query_start'], res['query_end'], res['subject_start'], res['subject_end']) = get_match_endpoints( hsp.query, hsp.subject, query.length, subject.length, self.progname) results.append(res) results.sort(key=itemgetter("subject_end")) results.sort(key=itemgetter("subject_start")) results.sort(key=itemgetter("query_end")) results.sort(key=itemgetter("query_start")) self.res_dict[(query.id, subject.id)] = results
def compute_results(self): """ Compute results dict. """ self.res_dict = {} for s1 in self.sequences: b = blaster(s1, **self.blast_options) for s2 in self.sequences: subject, r = b.blast(s2) self.assertEqual(subject, s2) hit_list = r[0] if hit_list is None: continue hsp_list = hit_list[0] results = [] for hsp in hsp_list: res = {} res['bit_score'] = hsp.bit_score res['e_value'] = hsp.evalue (res['query_start'], res['query_end'], res['subject_start'], res['subject_end']) = get_match_endpoints( hsp.query, hsp.subject, s1.length, s2.length, self.progname) results.append(res) results.sort(key=itemgetter("subject_end")) results.sort(key=itemgetter("subject_start")) results.sort(key=itemgetter("query_end")) results.sort(key=itemgetter("query_start")) self.res_dict[(s1.id, s2.id)] = results
def test_set_options(self): for progname in "blastn", "tblastx": for seq in self.sequences: prog = get_program(progname) self.blaster = blaster(seq, Program=prog) og = self.blaster.get_options() os = self.blaster.set_options() for k in og.keys(): v = og[k] nv = get_modified_value(v) os[k] = nv self.assertEqual(nv, og[k])