Example #1
0
 def _main(self):
     from BioUtils.SeqUtils import SeqView
     from BioUtils.Tools.Multiprocessing import parallelize_work
     
     with simple_timeit('load'):
         sv = SeqView()
         sv.load([self.large_seqdb])
         
     ssv = sv.subview(sv.keys()[:5])
     print ssv.keys()
     print ssv[3]
     print
     
     import cPickle as pickle
     ssv1 = pickle.loads(pickle.dumps(ssv, protocol=-1))
     print ssv1.keys()
     print ssv1[3]
     print
         
     def worker(id, db): 
         return len(db[id])    
     
     for numrecs in xrange(1000, len(sv), 1000):
         svs = sv[0:numrecs]
         with simple_timeit('sequential %d' % numrecs):
             res1 = [len(svs[k]) for k in svs.keys()]
         with simple_timeit('parallel %d' % numrecs):
             res2 = parallelize_work(self.abort_event, 1, 1, worker, svs.keys(), svs, init_args=lambda db: (db.clone(),))
         assert res1 == res2
         print '-'*80
     print 'Done'
Example #2
0
    class _Loader(QThread):
        loaded = pyqtSignal()

        def __init__(self, filenames):
            QThread.__init__(self)
            self.db = None
            self.filenames = filenames

        def __del__(self):
            self.wait()

        def run(self):
            self.db = SeqView()
            self.db.load(self.filenames)
            self.loaded.emit()
Example #3
0
 def find_clusters(self, files, output_dir=''):
     if not self.clusters: return False
     # load genomes
     genomes = SeqView.safe_load(files)
     if not genomes: return False
     # create dest dir if needed
     if output_dir and not os.path.isdir(output_dir):
         os.mkdir(output_dir)
     # process files
     results = {}
     glen = len(genomes)
     with ProgressCounter('Searching for %d clusters in %d sequence%s:' %
                                  (len(self.clusters), glen, 's' if glen>1 else ''), glen) as prg:
         work = self.Work()
         work.start_work(self._process_genome, genomes.keys(), None, genomes, output_dir)
         work.assemble(ordered_results_assembler, results, prg)
         if not work.wait(): return False
     # check results
     found = False
     for gi in results:
         res, gname = results[gi]
         if not res: continue
         found = True
         print('%s:\n\t%s\n' % (gname, '\n\t'.join(res)))
     if not found:
         print 'No putative clusters were found.'
     return True
Example #4
0
 def _extract_clusters(self, tag, qual='ugene_name'):
     tagre = re.compile(tag)
     clusters = {}
     records = SeqView()
     records.load(self.genomes_files)
     for record in records:
         for f in record.features:
             if qual in f.qualifiers:
                 q = ' '.join(f.qualifiers[qual])
                 if not tagre.match(q): continue
                 c = f.extract(record)
                 c.id = c.name = q
                 c.description = record.description
                 if c.seq.alphabet is not NucleotideAlphabet \
                 or c.seq.alphabet is not ProteinAlphabet:
                     c.seq.alphabet = IUPAC.IUPACAmbiguousDNA()
                 self._process_features(c)
                 clusters[c.id] = c
     return clusters
Example #5
0
class iPCR_Base(iPCR_Interface):
    '''
    Using PCR_Simulation and SeqDB classes runs PCR simulation with given 
    primers and report results in human readable form in a text file.
    '''
    def __init__(self, abort_event, max_mismatches, *args, **kwargs):
        iPCR_Interface.__init__(self, abort_event, *args, **kwargs)
        self._max_mismatches = max_mismatches
        self._seq_db = None
        self._PCR_Simulation = None

    #end def

    def __del__(self):
        try:
            self._searcher.shutdown()
        except:
            pass

    #end def

    def _load_db(self, filenames):
        self._seq_db = SeqView(upper=True)
        if not self._seq_db.load(filenames):
            self._seq_db = None
            return False
        return True

    def _format_header(self):
        header = iPCR_Interface._format_header(self)
        if self._max_mismatches != None:
            header += 'Number of mismatches allowed: %d\n\n' % self._max_mismatches
        return header

    #end def

    def write_products_report(self):
        if not self._have_results: return
        #open report file
        ipcr_products = self._open_report('iPCR products',
                                          self._PCR_products_filename)
        ipcr_products.write(time_hr())
        if self._PCR_Simulation:
            ipcr_products.write(self._PCR_Simulation.format_products_report())
        else:
            ipcr_products.write(
                hr(' No PCR products have been found ', symbol='!'))
        ipcr_products.close()
        print '\nThe list of PCR products was written to:\n   %s' % self._PCR_products_filename
        self._add_report('iPCR products', self._PCR_products_filename)

    #end def


#end class
Example #6
0
 def extract_clusters(self):
     self.clusters = []
     genomes = SeqView.safe_load(self.files)
     if not genomes: return False
     glen = len(genomes)
     self.clusters = [None]*glen
     if self.order: self.order = [oid for oid in self.order if oid in genomes.keys()]
     with ProgressCounter('Extracting clusters from provided genomes:', glen) as prg:
         work = self.Work()
         work.start_work(self._process_genome, self.order or genomes.keys(), None, genomes)
         work.assemble(ordered_shelved_results_assembler, self.clusters, prg)
         if not work.wait(): return False
     self.clusters = list(chain.from_iterable(c for c in self.clusters if c))
     #generate gene colors if needed
     if not self.no_color and not self.colors:
         self._generate_gene_colors()
     return bool(self.clusters)
Example #7
0
class iPCR_Base(iPCR_Interface):
    '''
    Using PCR_Simulation and SeqDB classes runs PCR simulation with given 
    primers and report results in human readable form in a text file.
    '''
    
    def __init__(self, abort_event, max_mismatches, *args, **kwargs):
        iPCR_Interface.__init__(self, abort_event, *args, **kwargs)
        self._max_mismatches = max_mismatches
        self._seq_db         = None
        self._PCR_Simulation = None
    #end def
    
    def __del__(self):
        try: self._searcher.shutdown()
        except: pass
    #end def
    
    def _load_db(self, filenames):
        self._seq_db = SeqView(upper=True)
        if not self._seq_db.load(filenames):
            self._seq_db = None
            return False
        return True
    
    def _format_header(self):
        header = iPCR_Interface._format_header(self)
        if self._max_mismatches != None:
            header += 'Number of mismatches allowed: %d\n\n' % self._max_mismatches
        return header
    #end def
    
    def write_products_report(self):
        if not self._have_results: return
        #open report file
        ipcr_products = self._open_report('iPCR products', self._PCR_products_filename)
        ipcr_products.write(time_hr())
        if self._PCR_Simulation:
            ipcr_products.write(self._PCR_Simulation.format_products_report())
        else: ipcr_products.write(hr(' No PCR products have been found ', symbol='!')) 
        ipcr_products.close()
        print '\nThe list of PCR products was written to:\n   %s' % self._PCR_products_filename
        self._add_report('iPCR products', self._PCR_products_filename)
    #end def
#end class
Example #8
0
    def _main(self):
        from BioUtils.SeqUtils import SeqView
        from BioUtils.Tools.Multiprocessing import parallelize_work

        with simple_timeit('load'):
            sv = SeqView()
            sv.load([self.large_seqdb])

        ssv = sv.subview(sv.keys()[:5])
        print ssv.keys()
        print ssv[3]
        print

        import cPickle as pickle
        ssv1 = pickle.loads(pickle.dumps(ssv, protocol=-1))
        print ssv1.keys()
        print ssv1[3]
        print

        def worker(id, db):
            return len(db[id])

        for numrecs in xrange(1000, len(sv), 1000):
            svs = sv[0:numrecs]
            with simple_timeit('sequential %d' % numrecs):
                res1 = [len(svs[k]) for k in svs.keys()]
            with simple_timeit('parallel %d' % numrecs):
                res2 = parallelize_work(self.abort_event,
                                        1,
                                        1,
                                        worker,
                                        svs.keys(),
                                        svs,
                                        init_args=lambda db: (db.clone(), ))
            assert res1 == res2
            print '-' * 80
        print 'Done'
Example #9
0
 def _main(self):
     min_prod = 400
     silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva.fasta'
     alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta'
     add_filename = FilenameParser.strip_ext(alifile)+'.with_additions.fasta'
     outgroups = ['Thermococcus_chitonophagus', 'SMTZ1-55', 'contig72135_1581_sunspring_meta']
     add = ['KF836721.1.1270','EU635905.1.1323']
     exclude = []#['Thermococcus_chitonophagus', 'SMTZ1-55', 'BA1-16S', 'contig72135_1581_sunspring_meta']
     #load alignment
     if os.path.isfile(add_filename): 
         alifile = add_filename
         add_filename = ''
     with user_message('Loadding initial alignment...', '\n'):
         orig_ali = AlignmentUtils.load_first(alifile)
         if not orig_ali: return 1
     #load homologs
     if add_filename:
         with user_message('Loadding additional sequences...', '\n'):
             add_seqs = []
             db = SeqView()
             if db.load(silva_db):
                 for sid in add:
                     seq = db.get(sid)
                     if seq: add_seqs.append(seq)
                     else: print '%s not found in %s' % (sid, silva_db)
         #realign data if needed
         if add_seqs:
             with user_message('Realigning data...', '\n'):
                 add_filename = FilenameParser.strip_ext(alifile)+'.with_additions.fasta'
                 AlignmentUtils.align(list(orig_ali)+add_seqs, add_filename)
                 orig_ali = AlignmentUtils.load_first(add_filename)
                 if not orig_ali: return 2
     #process the alignment
     ali = orig_ali.remove(*exclude).trim()
     for out in outgroups:
         if not ali.index(out):
             print '%s not found in the alignment' % out
             return 3
     ali.sort(key=lambda r: 'zzzzzzzz' if r.id in outgroups else r.id)
     ali_len = ali.get_alignment_length()
     AlignmentUtils.save(ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.trimmed.fasta')
     args = dict(plen = (20,40),
                 max_mismatches = 8,
                 min_match_mismatches = 1,
                 first_match_mismatches = 1,
                 first_may_match = 1,
                 AT_first=True,
                 outgroup=len(outgroups))
     fprimers = self._find_primers(ali, **args)
     rprimers = self._find_primers(ali.reverse_complement(), **args)
     pairs = []
     for i, (fs, fp) in enumerate(fprimers):
         start = fs
         fprimer = Primer.from_sequences(fp[:-1], 1, 'SSBaF%d' % fs)
         for _j, (rs, rp) in enumerate(rprimers):
             end = ali_len-rs
             if end-start <= min_prod: continue
             pairs.append((fprimer, Primer.from_sequences(rp[:-1], 1, 'SSBaR%d' % (ali_len-rs+1))))
     if not pairs:
         print '\nNo suitable primer pairs found'
         return 3
     added = set()
     for i, (fp, rp) in enumerate(pairs):
         print '\npair %d' % (i+1)
         print '%s: %s' % (fp.id, fp)
         print '%s: %s' % (rp.id, rp)
         if fp.id not in added:
             orig_ali.append(fp.master_sequence+'-'*(orig_ali.get_alignment_length()-len(fp)))
             added.add(fp.id)
         if rp.id not in added:
             orig_ali.append(copy_attrs(rp.master_sequence,
                                        rp.master_sequence.reverse_complement())+
                             '-'*(orig_ali.get_alignment_length()-len(rp)))
             added.add(rp.id)
     print
     orig_ali = AlignmentUtils.align(orig_ali)
     AlignmentUtils.save(orig_ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.with_primers.aln.fasta')
     print 'Done'
Example #10
0
 def _load_db(self, filenames):
     self._seq_db = SeqView(upper=True)
     if not self._seq_db.load(filenames):
         self._seq_db = None
         return False
     return True
Example #11
0
 def _load_db(self, filenames):
     self._seq_db = SeqView(upper=True)
     if not self._seq_db.load(filenames):
         self._seq_db = None
         return False
     return True
Example #12
0
 def run(self):
     self.db = SeqView()
     self.db.load(self.filenames)
     self.loaded.emit()
Example #13
0
 def _main(self):
     min_prod = 400
     silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva.fasta'
     alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta'
     add_filename = FilenameParser.strip_ext(
         alifile) + '.with_additions.fasta'
     outgroups = [
         'Thermococcus_chitonophagus', 'SMTZ1-55',
         'contig72135_1581_sunspring_meta'
     ]
     add = ['KF836721.1.1270', 'EU635905.1.1323']
     exclude = [
     ]  #['Thermococcus_chitonophagus', 'SMTZ1-55', 'BA1-16S', 'contig72135_1581_sunspring_meta']
     #load alignment
     if os.path.isfile(add_filename):
         alifile = add_filename
         add_filename = ''
     with user_message('Loadding initial alignment...', '\n'):
         orig_ali = AlignmentUtils.load_first(alifile)
         if not orig_ali: return 1
     #load homologs
     if add_filename:
         with user_message('Loadding additional sequences...', '\n'):
             add_seqs = []
             db = SeqView()
             if db.load(silva_db):
                 for sid in add:
                     seq = db.get(sid)
                     if seq: add_seqs.append(seq)
                     else: print '%s not found in %s' % (sid, silva_db)
         #realign data if needed
         if add_seqs:
             with user_message('Realigning data...', '\n'):
                 add_filename = FilenameParser.strip_ext(
                     alifile) + '.with_additions.fasta'
                 AlignmentUtils.align(
                     list(orig_ali) + add_seqs, add_filename)
                 orig_ali = AlignmentUtils.load_first(add_filename)
                 if not orig_ali: return 2
     #process the alignment
     ali = orig_ali.remove(*exclude).trim()
     for out in outgroups:
         if not ali.index(out):
             print '%s not found in the alignment' % out
             return 3
     ali.sort(key=lambda r: 'zzzzzzzz' if r.id in outgroups else r.id)
     AlignmentUtils.save(
         ali,
         '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.trimmed.fasta'
     )
     args = dict(plen=(20, 40),
                 max_mismatches=8,
                 min_match_mismatches=1,
                 first_match_mismatches=1,
                 first_may_match=1,
                 AT_first=True,
                 outgroup=len(outgroups))
     fprimers = PrimerFinder.find_discriminating_primers(ali, **args)
     rprimers = PrimerFinder.find_discriminating_primers(ali,
                                                         reverse=True,
                                                         **args)
     pairs = PrimerFinder.compile_pairs(fprimers, rprimers, min_prod,
                                        'SSBa')
     if not pairs:
         print '\nNo suitable primer pairs found'
         return 3
     PrimerFinder.print_pairs(pairs)
     orig_ali = PrimerFinder.add_pairs_to_alignment(pairs, orig_ali)
     AlignmentUtils.save(
         orig_ali,
         '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.with_primers.aln.fasta'
     )
     print 'Done'