def _main(self): from BioUtils.SeqUtils import SeqView from BioUtils.Tools.Multiprocessing import parallelize_work with simple_timeit('load'): sv = SeqView() sv.load([self.large_seqdb]) ssv = sv.subview(sv.keys()[:5]) print ssv.keys() print ssv[3] print import cPickle as pickle ssv1 = pickle.loads(pickle.dumps(ssv, protocol=-1)) print ssv1.keys() print ssv1[3] print def worker(id, db): return len(db[id]) for numrecs in xrange(1000, len(sv), 1000): svs = sv[0:numrecs] with simple_timeit('sequential %d' % numrecs): res1 = [len(svs[k]) for k in svs.keys()] with simple_timeit('parallel %d' % numrecs): res2 = parallelize_work(self.abort_event, 1, 1, worker, svs.keys(), svs, init_args=lambda db: (db.clone(),)) assert res1 == res2 print '-'*80 print 'Done'
class _Loader(QThread): loaded = pyqtSignal() def __init__(self, filenames): QThread.__init__(self) self.db = None self.filenames = filenames def __del__(self): self.wait() def run(self): self.db = SeqView() self.db.load(self.filenames) self.loaded.emit()
def _extract_clusters(self, tag, qual='ugene_name'): tagre = re.compile(tag) clusters = {} records = SeqView() records.load(self.genomes_files) for record in records: for f in record.features: if qual in f.qualifiers: q = ' '.join(f.qualifiers[qual]) if not tagre.match(q): continue c = f.extract(record) c.id = c.name = q c.description = record.description if c.seq.alphabet is not NucleotideAlphabet \ or c.seq.alphabet is not ProteinAlphabet: c.seq.alphabet = IUPAC.IUPACAmbiguousDNA() self._process_features(c) clusters[c.id] = c return clusters
class iPCR_Base(iPCR_Interface): ''' Using PCR_Simulation and SeqDB classes runs PCR simulation with given primers and report results in human readable form in a text file. ''' def __init__(self, abort_event, max_mismatches, *args, **kwargs): iPCR_Interface.__init__(self, abort_event, *args, **kwargs) self._max_mismatches = max_mismatches self._seq_db = None self._PCR_Simulation = None #end def def __del__(self): try: self._searcher.shutdown() except: pass #end def def _load_db(self, filenames): self._seq_db = SeqView(upper=True) if not self._seq_db.load(filenames): self._seq_db = None return False return True def _format_header(self): header = iPCR_Interface._format_header(self) if self._max_mismatches != None: header += 'Number of mismatches allowed: %d\n\n' % self._max_mismatches return header #end def def write_products_report(self): if not self._have_results: return #open report file ipcr_products = self._open_report('iPCR products', self._PCR_products_filename) ipcr_products.write(time_hr()) if self._PCR_Simulation: ipcr_products.write(self._PCR_Simulation.format_products_report()) else: ipcr_products.write( hr(' No PCR products have been found ', symbol='!')) ipcr_products.close() print '\nThe list of PCR products was written to:\n %s' % self._PCR_products_filename self._add_report('iPCR products', self._PCR_products_filename) #end def #end class
def _main(self): from BioUtils.SeqUtils import SeqView from BioUtils.Tools.Multiprocessing import parallelize_work with simple_timeit('load'): sv = SeqView() sv.load([self.large_seqdb]) ssv = sv.subview(sv.keys()[:5]) print ssv.keys() print ssv[3] print import cPickle as pickle ssv1 = pickle.loads(pickle.dumps(ssv, protocol=-1)) print ssv1.keys() print ssv1[3] print def worker(id, db): return len(db[id]) for numrecs in xrange(1000, len(sv), 1000): svs = sv[0:numrecs] with simple_timeit('sequential %d' % numrecs): res1 = [len(svs[k]) for k in svs.keys()] with simple_timeit('parallel %d' % numrecs): res2 = parallelize_work(self.abort_event, 1, 1, worker, svs.keys(), svs, init_args=lambda db: (db.clone(), )) assert res1 == res2 print '-' * 80 print 'Done'
class iPCR_Base(iPCR_Interface): ''' Using PCR_Simulation and SeqDB classes runs PCR simulation with given primers and report results in human readable form in a text file. ''' def __init__(self, abort_event, max_mismatches, *args, **kwargs): iPCR_Interface.__init__(self, abort_event, *args, **kwargs) self._max_mismatches = max_mismatches self._seq_db = None self._PCR_Simulation = None #end def def __del__(self): try: self._searcher.shutdown() except: pass #end def def _load_db(self, filenames): self._seq_db = SeqView(upper=True) if not self._seq_db.load(filenames): self._seq_db = None return False return True def _format_header(self): header = iPCR_Interface._format_header(self) if self._max_mismatches != None: header += 'Number of mismatches allowed: %d\n\n' % self._max_mismatches return header #end def def write_products_report(self): if not self._have_results: return #open report file ipcr_products = self._open_report('iPCR products', self._PCR_products_filename) ipcr_products.write(time_hr()) if self._PCR_Simulation: ipcr_products.write(self._PCR_Simulation.format_products_report()) else: ipcr_products.write(hr(' No PCR products have been found ', symbol='!')) ipcr_products.close() print '\nThe list of PCR products was written to:\n %s' % self._PCR_products_filename self._add_report('iPCR products', self._PCR_products_filename) #end def #end class
def _main(self): min_prod = 400 silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva.fasta' alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta' add_filename = FilenameParser.strip_ext(alifile)+'.with_additions.fasta' outgroups = ['Thermococcus_chitonophagus', 'SMTZ1-55', 'contig72135_1581_sunspring_meta'] add = ['KF836721.1.1270','EU635905.1.1323'] exclude = []#['Thermococcus_chitonophagus', 'SMTZ1-55', 'BA1-16S', 'contig72135_1581_sunspring_meta'] #load alignment if os.path.isfile(add_filename): alifile = add_filename add_filename = '' with user_message('Loadding initial alignment...', '\n'): orig_ali = AlignmentUtils.load_first(alifile) if not orig_ali: return 1 #load homologs if add_filename: with user_message('Loadding additional sequences...', '\n'): add_seqs = [] db = SeqView() if db.load(silva_db): for sid in add: seq = db.get(sid) if seq: add_seqs.append(seq) else: print '%s not found in %s' % (sid, silva_db) #realign data if needed if add_seqs: with user_message('Realigning data...', '\n'): add_filename = FilenameParser.strip_ext(alifile)+'.with_additions.fasta' AlignmentUtils.align(list(orig_ali)+add_seqs, add_filename) orig_ali = AlignmentUtils.load_first(add_filename) if not orig_ali: return 2 #process the alignment ali = orig_ali.remove(*exclude).trim() for out in outgroups: if not ali.index(out): print '%s not found in the alignment' % out return 3 ali.sort(key=lambda r: 'zzzzzzzz' if r.id in outgroups else r.id) ali_len = ali.get_alignment_length() AlignmentUtils.save(ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.trimmed.fasta') args = dict(plen = (20,40), max_mismatches = 8, min_match_mismatches = 1, first_match_mismatches = 1, first_may_match = 1, AT_first=True, outgroup=len(outgroups)) fprimers = self._find_primers(ali, **args) rprimers = self._find_primers(ali.reverse_complement(), **args) pairs = [] for i, (fs, fp) in enumerate(fprimers): start = fs fprimer = Primer.from_sequences(fp[:-1], 1, 'SSBaF%d' % fs) for _j, (rs, rp) in enumerate(rprimers): end = ali_len-rs if end-start <= min_prod: continue pairs.append((fprimer, Primer.from_sequences(rp[:-1], 1, 'SSBaR%d' % (ali_len-rs+1)))) if not pairs: print '\nNo suitable primer pairs found' return 3 added = set() for i, (fp, rp) in enumerate(pairs): print '\npair %d' % (i+1) print '%s: %s' % (fp.id, fp) print '%s: %s' % (rp.id, rp) if fp.id not in added: orig_ali.append(fp.master_sequence+'-'*(orig_ali.get_alignment_length()-len(fp))) added.add(fp.id) if rp.id not in added: orig_ali.append(copy_attrs(rp.master_sequence, rp.master_sequence.reverse_complement())+ '-'*(orig_ali.get_alignment_length()-len(rp))) added.add(rp.id) print orig_ali = AlignmentUtils.align(orig_ali) AlignmentUtils.save(orig_ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.with_primers.aln.fasta') print 'Done'
def _main(self): min_prod = 400 silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva.fasta' alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta' add_filename = FilenameParser.strip_ext( alifile) + '.with_additions.fasta' outgroups = [ 'Thermococcus_chitonophagus', 'SMTZ1-55', 'contig72135_1581_sunspring_meta' ] add = ['KF836721.1.1270', 'EU635905.1.1323'] exclude = [ ] #['Thermococcus_chitonophagus', 'SMTZ1-55', 'BA1-16S', 'contig72135_1581_sunspring_meta'] #load alignment if os.path.isfile(add_filename): alifile = add_filename add_filename = '' with user_message('Loadding initial alignment...', '\n'): orig_ali = AlignmentUtils.load_first(alifile) if not orig_ali: return 1 #load homologs if add_filename: with user_message('Loadding additional sequences...', '\n'): add_seqs = [] db = SeqView() if db.load(silva_db): for sid in add: seq = db.get(sid) if seq: add_seqs.append(seq) else: print '%s not found in %s' % (sid, silva_db) #realign data if needed if add_seqs: with user_message('Realigning data...', '\n'): add_filename = FilenameParser.strip_ext( alifile) + '.with_additions.fasta' AlignmentUtils.align( list(orig_ali) + add_seqs, add_filename) orig_ali = AlignmentUtils.load_first(add_filename) if not orig_ali: return 2 #process the alignment ali = orig_ali.remove(*exclude).trim() for out in outgroups: if not ali.index(out): print '%s not found in the alignment' % out return 3 ali.sort(key=lambda r: 'zzzzzzzz' if r.id in outgroups else r.id) AlignmentUtils.save( ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.trimmed.fasta' ) args = dict(plen=(20, 40), max_mismatches=8, min_match_mismatches=1, first_match_mismatches=1, first_may_match=1, AT_first=True, outgroup=len(outgroups)) fprimers = PrimerFinder.find_discriminating_primers(ali, **args) rprimers = PrimerFinder.find_discriminating_primers(ali, reverse=True, **args) pairs = PrimerFinder.compile_pairs(fprimers, rprimers, min_prod, 'SSBa') if not pairs: print '\nNo suitable primer pairs found' return 3 PrimerFinder.print_pairs(pairs) orig_ali = PrimerFinder.add_pairs_to_alignment(pairs, orig_ali) AlignmentUtils.save( orig_ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.with_primers.aln.fasta' ) print 'Done'