def blastn_annotate(self, tag_sequences, subject_record, min_identity, evalue=0.001, **kwargs): results = self.s2s_blast_batch(tag_sequences, [subject_record], evalue=evalue, command='blastn', **kwargs) if results is None: return False with user_message('Adding results as annotations...'): annotated = False for i, tag in enumerate(tag_sequences): if not results[i]: continue record = results[i][0] if not record: continue tag_name = pretty_rec_name(tag) if tag_name != tag.id: tag_name += ' (%s)' % tag.id for hit in record: for ali in hit.alignments: for hsp in ali.hsps: if hsp.identities / float(hsp.align_length) < min_identity: continue strand = 1 if hsp.sbjct_start < hsp.sbjct_end else -1 if strand == 1: location = FeatureLocation(hsp.sbjct_start-1, hsp.sbjct_end, strand) else: location = FeatureLocation(hsp.sbjct_end-1, hsp.sbjct_start, strand) feature = self.hsp2feature(tag_name,'blastn_annotations', location, hsp) self.add_program(feature, 'blastn') subject_record.features.append(feature) annotated = True return annotated
def _find_matches(self, seq_files, seq_id): if not self._load_db(seq_files): print 'No templates were loaded from: %s' % str(seq_files) return False if self.aborted(): return False template = self._seq_db[seq_id] self._seq_name = pretty_rec_name(template) self._matches_list = self._searcher.find_matches( WorkCounter(), template, self._max_mismatches, self._PCR_ProductsFinder) return self._matches_list is not None
def _find_matches(self, seq_files, seq_id): if not self._load_db(seq_files): print 'No templates were loaded from: %s' % str(seq_files) return False if self.aborted(): return False template = self._seq_db[seq_id] self._seq_name = pretty_rec_name(template) self._matches_list = self._searcher.find_matches(WorkCounter(), template, self._max_mismatches, self._PCR_ProductsFinder) return self._matches_list is not None
def worker(template): search_results = [] for primer in self._primers: matches = self._searcher.find_matches(WorkCounter(), template, primer, mismatches) if not matches: continue duplexes = self._searcher.compile_duplexes(WorkCounter(), *matches) if duplexes: search_results.append(duplexes) if not search_results: return None all_annealings = self._combine_annealings(*search_results) tname = pretty_rec_name(template) mixture = self.create_PCR_mixture(WorkCounter(), tname, all_annealings[0], all_annealings[1]) if not mixture: return None return tname, mixture.save()
def fetchMore(self, index=QModelIndex()): start = len(self._rows) end = start + min(len(self._db) - start, self.rows_to_load) self.beginInsertRows(QModelIndex(), start, end - 1) self._rows.extend((sid, pretty_rec_name(self._db[sid])) for sid in self._db.keys()[start:end]) self.endInsertRows() if self._to_select and start <= self._to_select[0]: selected = 0 for row in self._to_select: if row >= end: break self.select_row.emit(row) selected += 1 self._to_select = self._to_select[selected:]
def _blast_feature(self, f, c1, c2, features1, features2, evalue, max_rlen): trans = Translator(self._abort_event) cds = trans.translate(f.extract(c1), 11) sixframes = trans.translate_six_frames_single(c2, 11) if not sixframes: return [(None, None, None)] results = [] for frame in sixframes: res = BlastCLI.s2s_blast(cds, frame, evalue, command='blastp', task='blastp') if res: results.extend(res) hsps = BlastCLI.all_hsps(results, max_rlen) if not hsps: return [(None, None, None)] f1 = [] f2 = [] col = [] c1_name = pretty_rec_name(c1) if 'locus_tag' in f.qualifiers: fname = f.qualifiers['locus_tag'][0] else: fname = 'CDS' cds_len = len(cds) for hsp in hsps: color_t = (float(hsp.identities) / hsp.align_length) print '%s %s: %5.1f%% (%5.1f%%)' % (c1_name, fname, color_t * 100, float(hsp.identities) / cds_len * 100) col.append( colors.linearlyInterpolatedColor(colors.Color(0, 0, 1, 0.2), colors.Color(0, 1, 0, 0.2), 0.2, 1, color_t)) qstart = (hsp.query_start - 1) * 3 qend = qstart + hsp.align_length * 3 sstart = (hsp.sbjct_start - 1) * 3 send = sstart + hsp.align_length * 3 f1.append( SeqFeature( FeatureLocation(f.location.start + qstart, f.location.start + qend, strand=hsp.strand[0]))) f2.append( SeqFeature(FeatureLocation(sstart, send, strand=hsp.strand[1]))) return zip(f1, f2, col)
def find(self, counter, template, mismatches): all_annealings = [] counter.set_subwork(self._num_p+1, self._p_weights+[0.01*self._pw_sum]) for i, primer in enumerate(self._primers): if self.aborted(): return None annealings = self._searcher.find(counter[i], template, primer, mismatches) if annealings is None: continue all_annealings.append(annealings) if not all_annealings: return None tname = pretty_rec_name(template) mixture = self.create_PCR_mixture(counter[-1], tname, *self._extract_annealings(*all_annealings)) cleanup_files(all_annealings) counter.done() if not mixture: return None return {tname: mixture.save()}
def worker(template): search_results = [] for primer in self._primers: matches = self._searcher.find_matches(WorkCounter(), template, primer, mismatches) if not matches: continue duplexes = self._searcher.compile_duplexes( WorkCounter(), *matches) if duplexes: search_results.append(duplexes) if not search_results: return None all_annealings = self._combine_annealings(*search_results) tname = pretty_rec_name(template) mixture = self.create_PCR_mixture(WorkCounter(), tname, all_annealings[0], all_annealings[1]) if not mixture: return None return tname, mixture.save()
def find(self, counter, template, mismatches): all_annealings = [] counter.set_subwork(self._num_p + 1, self._p_weights + [0.01 * self._pw_sum]) for i, primer in enumerate(self._primers): if self.aborted(): return None annealings = self._searcher.find(counter[i], template, primer, mismatches) if annealings is None: continue all_annealings.append(annealings) if not all_annealings: return None tname = pretty_rec_name(template) mixture = self.create_PCR_mixture( counter[-1], tname, *self._extract_annealings(*all_annealings)) cleanup_files(all_annealings) counter.done() if not mixture: return None return {tname: mixture.save()}
def blastp_annotate(self, tag_sequences, subject_record, min_identity, evalue=0.001, table=11, **kwargs): # translate subject in six frames with user_message('Translating whole genome in 6 reading frames', '\n'): translator = Translator(self._abort_event) translation = translator.translate_six_frames(subject_record, table) if not translation: return False results = self.s2s_blast_batch(tag_sequences, translation, evalue=evalue, command='blastp', **kwargs) if results is None: return False with user_message('Adding results as annotations...'): annotated = False subj_len = len(subject_record) for i, tag in enumerate(tag_sequences): if not results[i]: continue tag_name = pretty_rec_name(tag) if tag_name != tag.id: tag_name += ' (%s)' % tag.id for frame, record in enumerate(results[i]): if not record: continue frec = translation[frame] start = frec.annotations['start'] strand = frec.annotations['strand'] for hit in record: for ali in hit.alignments: for hsp in ali.hsps: if hsp.identities / float(hsp.align_length) < min_identity: continue if strand == 1: location = FeatureLocation(start+(hsp.sbjct_start-1)*3, start+hsp.sbjct_end*3, strand) else: location = FeatureLocation(subj_len-start-hsp.sbjct_end*3, subj_len-start-hsp.sbjct_start*3, strand) feature = self.hsp2feature(tag_name, 'blastp_annotations', location, hsp) self.add_program(feature, 'blastp') subject_record.features.append(feature) annotated = True return annotated
def _main(self): email = '*****@*****.**' genome_dir = '/home/allis/Dropbox/Science/Микра/Thermococcus/sequence/GenBank/Thermococcales/Thermococcus/' genome = 'Thermococcus_barophilus_Ch5.gb' gene = 'TBCH5v1_1369' #cooS database = 'nr' segment = [3200, 12000] seq = SeqLoader.load_file(os.path.join(genome_dir, genome)) if not seq: raise RuntimeError('No genome loaded') seq = seq[0] index = get_indexes_of_genes(seq, gene) if not index: raise RuntimeError('No gene found') feature = seq.features[index[0]] query = feature.extract(seq) segments_file = 'CO-clusters.gb' #get cluster variants if needed if not os.path.isfile(segments_file): blast_file = 'blast.results.xml' if os.path.isfile(blast_file): blast = list(parse(open(blast_file))) else: blast = BlastCLI.blast_seq(query, database, 100, remote=True, task='blastn', parse_results=True, save_results_to='blast.results.xml') if not blast: raise RuntimeError('Blast returned no results') flt = BlastFilter(lambda hsp, r: hsp.align_length > 700, filter_hsps=True) flt(blast) queries = [] for ali in BlastCLI.iter_alignments(blast): q = BlastCLI.Query(ali, 'hsp', start_offset=segment[0], end_offset=segment[1]) if q: queries.append(q) print(queries[-1]) segments = BlastWWW.fetch_queries(email, queries) safe_write(segments, segments_file) for r in segments: print('[%s] %s: %dbp' % (r.id, pretty_rec_name(r), len(r))) return 0 #find primers in alignments of the selected features local_files = [ os.path.join(genome_dir, f) for f in ('Thermococcus_barophilus_DT4-complete-genome.gb', 'Thermococcus_ST-423.gb', 'Thermococcus_CH1-complete.gb') ] loader = SeqLoader(self.abort_event) segments = loader.load_files([segments_file] + local_files) fprimers, transF_ali = find_primers( segments, 'transF', dict(plen=(20, 30), max_mismatches=5, min_first_matches=3, AT_first=True)) rprimers, cooS_ali = find_primers(segments, 'cooS', dict(plen=(20, 30), max_mismatches=4, min_first_matches=3, AT_first=True), reverse=True) if not fprimers: print('\nNo forward primers found') return 1 if not rprimers: print('\nNo reverse primers found') return 1 print('\nForward primers:') for p in fprimers: print('%s: %s' % (p.id, p)) print('\nReverse primers:') for p in rprimers: print('%s: %s' % (p.id, p)) print() #add primers to alignments and save them transF_ali = PrimerFinder.add_primers_to_alignment( fprimers, transF_ali) cooS_ali = PrimerFinder.add_primers_to_alignment(rprimers, cooS_ali, reverse=True) AlignmentUtils.save(transF_ali, 'transF.aln') AlignmentUtils.save(cooS_ali, 'cooS.aln')