def test(): # import cProfile # primer = Primer(SeqRecord(Seq('ATARTCTYCGAMGGCTATKCAGNCTGGGANGGNTACGNGGGTAAANAAACG'),id='primer1'), 0.9e-6) primer = Primer(SeqRecord(Seq('ATARTCTYCGAMGGCNATKCAGGNCTGRGGA'),id='primer1'), 0.9e-6) primer.generate_components() primer.calculate_Tms() print primer.str_sequences print primer.concentration print repr(primer) # from timeit import timeit # from tests.violin_plot import violin_plot # from matplotlib.pyplot import figure, show # # gen = [timeit('primer.generate_components()', 'from __main__ import primer', number=1) for _i in xrange(1)] # gen_mp = [timeit('primer.generate_components_mp()', 'from __main__ import primer', number=1) for _i in xrange(1)] # data = [gen, gen_mp] # print data # # fig=figure() # ax = fig.add_subplot(111) # violin_plot(ax,data,range(len(data)),bp=1) # show() # cProfile.run(''' #for _n in xrange(100): primer.generate_components_mp() #for _n in xrange(100): primer.generate_components() #''', # 'gen_components.profile') print 'Done.'
def compile_primers(primer_alis, name_base, reverse=False): primers = [] if reverse: for pos, ali in primer_alis: primers.append( Primer.from_sequences(ali, 1, '%sR%d' % (name_base, pos))) else: for pos, ali in primer_alis: primers.append( Primer.from_sequences(ali, 1, '%sF%d' % (name_base, pos))) return primers
def compile_pairs(falis, ralis, min_len, name_base): pairs = [] for _i, (fs, fp) in enumerate(falis): fprimer = Primer.from_sequences(fp, 1, '%sF%d' % (name_base, fs)) for _j, (rs, rp) in enumerate(ralis): if rs - fs <= min_len: continue pairs.append( PrimerPair( fprimer, Primer.from_sequences(rp, 1, '%sR%d' % (name_base, rs)))) return pairs
def test(): # import cProfile # primer = Primer(SeqRecord(Seq('ATARTCTYCGAMGGCTATKCAGNCTGGGANGGNTACGNGGGTAAANAAACG'),id='primer1'), 0.9e-6) primer = Primer( SeqRecord(Seq('ATARTCTYCGAMGGCNATKCAGGNCTGRGGA'), id='primer1'), 0.9e-6) primer.generate_components() primer.calculate_Tms() print primer.str_sequences print primer.concentration print repr(primer) # from timeit import timeit # from tests.violin_plot import violin_plot # from matplotlib.pyplot import figure, show # # gen = [timeit('primer.generate_components()', 'from __main__ import primer', number=1) for _i in xrange(1)] # gen_mp = [timeit('primer.generate_components_mp()', 'from __main__ import primer', number=1) for _i in xrange(1)] # data = [gen, gen_mp] # print data # # fig=figure() # ax = fig.add_subplot(111) # violin_plot(ax,data,range(len(data)),bp=1) # show() # cProfile.run(''' #for _n in xrange(100): primer.generate_components_mp() #for _n in xrange(100): primer.generate_components() #''', # 'gen_components.profile') print 'Done.'
def _main(self): min_prod = 400 silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva.fasta' alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta' add_filename = FilenameParser.strip_ext(alifile)+'.with_additions.fasta' outgroups = ['Thermococcus_chitonophagus', 'SMTZ1-55', 'contig72135_1581_sunspring_meta'] add = ['KF836721.1.1270','EU635905.1.1323'] exclude = []#['Thermococcus_chitonophagus', 'SMTZ1-55', 'BA1-16S', 'contig72135_1581_sunspring_meta'] #load alignment if os.path.isfile(add_filename): alifile = add_filename add_filename = '' with user_message('Loadding initial alignment...', '\n'): orig_ali = AlignmentUtils.load_first(alifile) if not orig_ali: return 1 #load homologs if add_filename: with user_message('Loadding additional sequences...', '\n'): add_seqs = [] db = SeqView() if db.load(silva_db): for sid in add: seq = db.get(sid) if seq: add_seqs.append(seq) else: print '%s not found in %s' % (sid, silva_db) #realign data if needed if add_seqs: with user_message('Realigning data...', '\n'): add_filename = FilenameParser.strip_ext(alifile)+'.with_additions.fasta' AlignmentUtils.align(list(orig_ali)+add_seqs, add_filename) orig_ali = AlignmentUtils.load_first(add_filename) if not orig_ali: return 2 #process the alignment ali = orig_ali.remove(*exclude).trim() for out in outgroups: if not ali.index(out): print '%s not found in the alignment' % out return 3 ali.sort(key=lambda r: 'zzzzzzzz' if r.id in outgroups else r.id) ali_len = ali.get_alignment_length() AlignmentUtils.save(ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.trimmed.fasta') args = dict(plen = (20,40), max_mismatches = 8, min_match_mismatches = 1, first_match_mismatches = 1, first_may_match = 1, AT_first=True, outgroup=len(outgroups)) fprimers = self._find_primers(ali, **args) rprimers = self._find_primers(ali.reverse_complement(), **args) pairs = [] for i, (fs, fp) in enumerate(fprimers): start = fs fprimer = Primer.from_sequences(fp[:-1], 1, 'SSBaF%d' % fs) for _j, (rs, rp) in enumerate(rprimers): end = ali_len-rs if end-start <= min_prod: continue pairs.append((fprimer, Primer.from_sequences(rp[:-1], 1, 'SSBaR%d' % (ali_len-rs+1)))) if not pairs: print '\nNo suitable primer pairs found' return 3 added = set() for i, (fp, rp) in enumerate(pairs): print '\npair %d' % (i+1) print '%s: %s' % (fp.id, fp) print '%s: %s' % (rp.id, rp) if fp.id not in added: orig_ali.append(fp.master_sequence+'-'*(orig_ali.get_alignment_length()-len(fp))) added.add(fp.id) if rp.id not in added: orig_ali.append(copy_attrs(rp.master_sequence, rp.master_sequence.reverse_complement())+ '-'*(orig_ali.get_alignment_length()-len(rp))) added.add(rp.id) print orig_ali = AlignmentUtils.align(orig_ali) AlignmentUtils.save(orig_ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.with_primers.aln.fasta') print 'Done'
def _main(self): from threading import Lock from BioUtils.Tools import WaitingThread from DegenPrimer.Primer import Primer from DegenPrimer.SeqUtils import load_sequence from DegenPrimer.PCR_Optimizer import PCR_Optimizer from DegenPrimer import TD_Functions as tdf tdf.PCR_P.PCR_T = 53 tdf.PCR_P.Mg = 3e-3 tdf.PCR_P.dNTP = 300e-6 tdf.PCR_P.DNA = 1e-10 fwd_primer = Primer( load_sequence('ATATTCTACRACGGCTATCC', 'fwd_test', 'fwd_test'), 0.43e-6, True) rev_primer = Primer( load_sequence('GAASGCRAAKATYGGGAAC', 'rev_test', 'rev_test'), 0.43e-6, True) optimizer = PCR_Optimizer(self.abort_event, 100, 5, max_mismatches=5, job_id='test-job', primers=[fwd_primer, rev_primer], min_amplicon=50, max_amplicon=2000, polymerase=40000, with_exonuclease=False, num_cycles=30, side_reactions=None, side_concentrations=None, include_side_annealings=False) plock = Lock() job = WaitingThread( plock, 1, target=optimizer.optimize_PCR_parameters, name='optimize PCR', args=( '../data/ThGa.fa', ({ 'start': 47920, 'end': 49321 }, ), ({ 'name': 'PCR_T', 'min': 50, 'ini': 60, 'max': 72 }, # {'name':'dNTP', # 'min':100e-6, 'ini':200e-6, 'max':900e-6}, ), )) job.start() print '' job.join() optimizer.write_report()
def _main(self): import time import DegenPrimer.TD_Functions as tdf from DegenPrimer.Primer import Primer from DegenPrimer.SeqUtils import load_sequence from DegenPrimer.WorkCounter import WorkCounterManager from BioUtils.Tools import WaitingThread from DegenPrimer.iPCR import iPCR from threading import Lock import cProfile tdf.PCR_P.PCR_T = 53 tdf.PCR_P.Mg = 3e-3 tdf.PCR_P.dNTP = 300e-6 tdf.PCR_P.DNA = 1e-10 fwd_primer = Primer( load_sequence('ATATTCTACRACGGCTATCC', 'fwd_test', 'fwd_test'), 0.43e-6, True) rev_primer = Primer( load_sequence('GAASGCRAAKATYGGGAAC', 'rev_test', 'rev_test'), 0.43e-6, True) ipcr = iPCR(self.abort_event, max_mismatches=6, job_id='test-job', primers=[fwd_primer, rev_primer], min_amplicon=50, max_amplicon=2000, polymerase=40000, with_exonuclease=False, num_cycles=30, side_reactions=None, side_concentrations=None, include_side_annealings=True) cmgr = WorkCounterManager() cmgr.start() counter = cmgr.WorkCounter() plock = Lock() job = WaitingThread( plock, 1, target=ipcr.simulate_PCR, name='simulate_PCR', args=( counter, ( '../data/ThGa.fa', #single sequence # '../data/Ch5_gnm.fa', '../data/ThDS1.fa', '../data/ThES1.fa', #long sequences '../data/ThDS1-FC.fa', '../data/ThDS1-850b-product.fa', #short sequences ), )) job.start() print '' while job.is_alive(): if counter.changed_percent(): with plock: print counter time.sleep(1) job.join() with plock: print counter ipcr.write_report() cProfile.runctx('for i in xrange(100): ipcr.write_reports()', globals(), locals(), 'iPCR.write_reports.profile')
seq_file = '../data/ThGa.fa' try: record_file = open(seq_file, 'r') except IOError, e: print 'Unable to open %s' % seq_file print e sys.exit(1) template = SeqIO.read(record_file, 'fasta', IUPAC.unambiguous_dna) record_file.close() ftgam = Seq('ATATTCTACRACGGCTATCC', IUPAC.ambiguous_dna) rtgam = Seq('GAASGCRAAKATYGGGAAC', IUPAC.ambiguous_dna) # primer = Primer(SeqRecord(ftgam, id='ftgam'), 0.43e-6, True) primer = Primer(SeqRecord(rtgam, id='rtgam'), 0.43e-6, True) #48C, 9mism, results: 564.85Mb, 410.37Mb, 240.07Mb def print_out(out, name): print name for i, results in enumerate(out): if not results: print 'No results.' else: print 'Results %d' % i for res in results: print res[0] for dup, _id in res[1]: print _id print dup #print 'mismatches:', dup.mismatches print '\n'
from DegenPrimer.TD_Functions import PCR_P from DegenPrimer.BlastPrimers import BlastPrimers from threading import Lock os.chdir('../') mgr = Manager() abort_event = mgr.Event() PCR_P.PCR_T = 53 PCR_P.Mg = 3e-3 PCR_P.dNTP = 300e-6 PCR_P.DNA = 1e-10 fwd_primer = Primer( load_sequence('ATATTCTACRACGGCTATCC', 'F-TGAM_0057-268_d1', 'F-TGAM_0057-268_d1'), 0.43e-6, True) rev_primer = Primer( load_sequence('GAASGCRAAKATYGGGAAC', 'R-TGAM_0055-624-d4', 'R-TGAM_0055-624-d4'), 0.43e-6, True) blastp = BlastPrimers(abort_event, job_id='F-TGAM_0057-268_d1-R-TGAM_0055-624-d4', primers=[fwd_primer, rev_primer], min_amplicon=50, max_amplicon=2000, polymerase=40000, with_exonuclease=False, num_cycles=30, side_reactions=None, side_concentrations=None,