コード例 #1
0
ファイル: Test_SeqUtils.py プロジェクト: allista/BioUtils
 def _main(self):
     from BioUtils.SeqUtils import SeqView
     from BioUtils.Tools.Multiprocessing import parallelize_work
     
     with simple_timeit('load'):
         sv = SeqView()
         sv.load([self.large_seqdb])
         
     ssv = sv.subview(sv.keys()[:5])
     print ssv.keys()
     print ssv[3]
     print
     
     import cPickle as pickle
     ssv1 = pickle.loads(pickle.dumps(ssv, protocol=-1))
     print ssv1.keys()
     print ssv1[3]
     print
         
     def worker(id, db): 
         return len(db[id])    
     
     for numrecs in xrange(1000, len(sv), 1000):
         svs = sv[0:numrecs]
         with simple_timeit('sequential %d' % numrecs):
             res1 = [len(svs[k]) for k in svs.keys()]
         with simple_timeit('parallel %d' % numrecs):
             res2 = parallelize_work(self.abort_event, 1, 1, worker, svs.keys(), svs, init_args=lambda db: (db.clone(),))
         assert res1 == res2
         print '-'*80
     print 'Done'
コード例 #2
0
ファイル: iPCR.py プロジェクト: npilshchikova/DegenPrimer
 def _find_products_in_db(self, counter, seq_ids, PCR_Sim, P_Finder):
     #sort templates into short, suitable for plain search and long -- for mp
     seq_lengths = dict()
     short_templates = []
     long_templates = []
     with simple_timeit('PCR Simulation: sorting templates'):
         for t_id in seq_ids:
             template = self._seq_db[t_id]
             if template is None:
                 print 'Sequence %s not found. Something is not right with the sequence database.' % t_id
                 continue
             t_len = len(template)
             seq_lengths[t_id] = t_len
             if mp_better(t_len): long_templates.append(t_id)
             else: short_templates.append(t_id)
         if self.aborted(): return False
     if len(short_templates) == 1:
         long_templates += short_templates
         short_templates = []
     #setup work counters
     with simple_timeit('PCR Simulation: counting work to be done'):
         if short_templates and long_templates:
             counter.set_subwork(2, (sum(seq_lengths[t_id]
                                         for t_id in short_templates),
                                     sum(seq_lengths[t_id]
                                         for t_id in long_templates)))
             short_counter = counter[0]
             long_counter = counter[1]
         else:
             long_counter = short_counter = counter
         if long_templates:
             long_counter.set_subwork(
                 len(long_templates),
                 [seq_lengths[t_id] for t_id in long_templates])
         if short_templates:
             short_counter.set_subwork(len(short_templates))
     print '\nPCR Simulation: searching for annealing sites in provided sequences...'
     #search short templates in batch
     if short_templates:
         results = self._find_products_in_templates(
             short_counter, self._seq_db.subview(short_templates), P_Finder)
         if not results or self.aborted(): return False
         for t_id, m_path in results.iteritems():
             PCR_Sim.add_mixture(t_id, m_path)
     #if there're long templates, search sequentially
     for i, t_id in enumerate(long_templates):
         result = self._find_products_in_templates(long_counter[i],
                                                   [self._seq_db[t_id]],
                                                   P_Finder)
         if result is None:
             if self.aborted(): return False
             else: continue
         PCR_Sim.add_mixture(*result.items()[0])
     return PCR_Sim.not_empty()
コード例 #3
0
ファイル: iPCR.py プロジェクト: allista/DegenPrimer
 def _find_products_in_db(self, counter, seq_ids, PCR_Sim, P_Finder):
     #sort templates into short, suitable for plain search and long -- for mp
     seq_lengths = dict()
     short_templates = []
     long_templates  = []
     with simple_timeit('PCR Simulation: sorting templates'):
         for t_id in seq_ids:
             template = self._seq_db[t_id]
             if template is None:
                 print 'Sequence %s not found. Something is not right with the sequence database.' % t_id 
                 continue
             t_len = len(template)
             seq_lengths[t_id] = t_len
             if mp_better(t_len): long_templates.append(t_id)
             else: short_templates.append(t_id)
         if self.aborted(): return False
     if len(short_templates) == 1: 
         long_templates += short_templates
         short_templates = []
     #setup work counters
     with simple_timeit('PCR Simulation: counting work to be done'):
         if short_templates and long_templates: 
             counter.set_subwork(2, (sum(seq_lengths[t_id] for t_id in short_templates),
                                     sum(seq_lengths[t_id] for t_id in long_templates)))
             short_counter  = counter[0]
             long_counter   = counter[1]
         else: long_counter = short_counter = counter
         if long_templates: 
             long_counter.set_subwork(len(long_templates), 
                                      [seq_lengths[t_id] for t_id in long_templates])
         if short_templates:
             short_counter.set_subwork(len(short_templates))
     print '\nPCR Simulation: searching for annealing sites in provided sequences...'
     #search short templates in batch
     if short_templates:
         results = self._find_products_in_templates(short_counter, 
                                                    self._seq_db.subview(short_templates), 
                                                    P_Finder)
         if not results or self.aborted(): return False
         for t_id, m_path in results.iteritems():
             PCR_Sim.add_mixture(t_id, m_path)
     #if there're long templates, search sequentially
     for i, t_id in enumerate(long_templates):
         result = self._find_products_in_templates(long_counter[i], [self._seq_db[t_id]], P_Finder)
         if result is None:
             if self.aborted(): return False
             else: continue
         PCR_Sim.add_mixture(*result.items()[0])
     return PCR_Sim.not_empty()
コード例 #4
0
ファイル: iPCR.py プロジェクト: npilshchikova/DegenPrimer
 def _find_products(self, counter, PCR_Sim, P_Finder, seq_files, seq_ids):
     with simple_timeit('PCR Simulation: loading templates'):
         if not seq_files or not self._load_db(seq_files):
             print 'No templates were loaded from: %s' % str(seq_files)
             return False
         if self.aborted(): return False
     if not seq_ids: seq_ids = self._seq_db.keys()
     else: seq_ids = [str(sid) for sid in seq_ids]
     print 'Number of templates to process: %d' % len(seq_ids)
     return self._find_products_in_db(counter, seq_ids, PCR_Sim, P_Finder)
コード例 #5
0
ファイル: iPCR.py プロジェクト: allista/DegenPrimer
 def _find_products(self, counter, PCR_Sim, P_Finder, seq_files, seq_ids):
     with simple_timeit('PCR Simulation: loading templates'):
         if not seq_files or not self._load_db(seq_files):
             print 'No templates were loaded from: %s' % str(seq_files) 
             return False
         if self.aborted(): return False
     if not seq_ids: seq_ids = self._seq_db.keys()
     else: seq_ids = [str(sid) for sid in seq_ids]
     print 'Number of templates to process: %d' % len(seq_ids)
     return self._find_products_in_db(counter, seq_ids, PCR_Sim, P_Finder)
コード例 #6
0
    def _main(self):
        from BioUtils.SeqUtils import SeqView
        from BioUtils.Tools.Multiprocessing import parallelize_work

        with simple_timeit('load'):
            sv = SeqView()
            sv.load([self.large_seqdb])

        ssv = sv.subview(sv.keys()[:5])
        print ssv.keys()
        print ssv[3]
        print

        import cPickle as pickle
        ssv1 = pickle.loads(pickle.dumps(ssv, protocol=-1))
        print ssv1.keys()
        print ssv1[3]
        print

        def worker(id, db):
            return len(db[id])

        for numrecs in xrange(1000, len(sv), 1000):
            svs = sv[0:numrecs]
            with simple_timeit('sequential %d' % numrecs):
                res1 = [len(svs[k]) for k in svs.keys()]
            with simple_timeit('parallel %d' % numrecs):
                res2 = parallelize_work(self.abort_event,
                                        1,
                                        1,
                                        worker,
                                        svs.keys(),
                                        svs,
                                        init_args=lambda db: (db.clone(), ))
            assert res1 == res2
            print '-' * 80
        print 'Done'