def run(self): # If we have an input file, use that. Otherwise pull from db if self.input: with open(self.input, 'rb') as infile: primers = Primers(infile) else: self.skip_filtering = False primers = Primers() assert isinstance(primers, Primers) # Undo all active marks, if any Primer.update(active=False).execute() if not self.skip_filtering: ( primers .filter_min_fg_rate(self.min_fg_bind) .filter_max_bg_rate(self.max_bg_bind) .summarize() .filter_tm_range(self.min_tm, self.max_tm) .limit_to(self.max_primers) .filter_max_gini(self.max_gini, self.fg_genome_fp) ) primers.activate(self.max_primers)
def primers(): primers = [ Primer.create(seq="ATGC", fg_freq=1, bg_freq=2, ratio=1.0, active=True), Primer.create(seq="GGCC", fg_freq=1, bg_freq=3, ratio=0.5, active=True), Primer.create(seq="CCTA", fg_freq=2, bg_freq=0, ratio=float('inf'), active=True) ] return primers
def run(self): # If we have an input file, use that. Otherwise pull from db if self.input: with open(self.input, 'rb') as infile: primers = Primers(infile) else: self.skip_filtering = False primers = Primers() assert isinstance(primers, Primers) # Undo all active marks, if any Primer.update(active=False).execute() if not self.skip_filtering: (primers.filter_min_fg_rate(self.min_fg_bind).filter_max_bg_rate( self.max_bg_bind).summarize().filter_tm_range( self.min_tm, self.max_tm).limit_to(self.max_primers).filter_max_gini( self.max_gini, self.fg_genome_fp)) primers.activate(self.max_primers)
def test_linearize_binding_sites(kmer, ws, fastafile): p = Primer.create(seq=kmer) p._update_locations(fastafile) chr_ends = swga.locate.chromosome_ends(fastafile) linear_bind_sites = swga.locate.linearize_binding_sites([p], chr_ends) # (number of sites + (2*number of chromosomes) - (any overlaps)) assert len(linear_bind_sites) == 10 for record, ends in chr_ends.iteritems(): start, end = ends assert start in linear_bind_sites assert end in linear_bind_sites for site in p.locations[record]: assert site in linear_bind_sites
def count_kmers(self): # We need to clear all previous primers each time due to uniqueness # constraints if Primer.select().count() > 0: if not self.force: click.confirm( "Remove all previously-found primers and re-count?", abort=True) self.workspace.reset_primers() mkdirp(output_dir) kmers = [] for k in xrange(self.min_size, self.max_size + 1): fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir) bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir) if self.exclude_fp: assert os.path.isfile(self.exclude_fp) ex = swga.kmers.count_kmers(k, self.exclude_fp, output_dir, self.exclude_threshold) else: ex = {} # Keep kmers found in foreground, merging bg binding values, and # excluding those found in the excluded fasta kmers = [ primer_dict(seq, fg, bg, self.min_fg_bind, self.max_bg_bind, self.max_dimer_bp) for seq in fg.viewkeys() if seq not in ex.viewkeys() ] kmers = filter(lambda x: x != {}, kmers) nkmers = len(kmers) chunk_size = 199 message("Writing {n} {k}-mers into db in blocks of {cs}...".format( n=nkmers * 2, k=k, cs=chunk_size)) Primers.add(kmers, add_revcomp=True) message("Counted kmers in range %d-%d" % (self.min_size, self.max_size))
def count_kmers(self): # We need to clear all previous primers each time due to uniqueness # constraints if Primer.select().count() > 0: if not self.force: click.confirm( "Remove all previously-found primers and re-count?", abort=True) self.workspace.reset_primers() mkdirp(output_dir) kmers = [] for k in xrange(self.min_size, self.max_size + 1): fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir) bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir) if self.exclude_fp: assert os.path.isfile(self.exclude_fp) ex = swga.kmers.count_kmers( k, self.exclude_fp, output_dir, self.exclude_threshold) else: ex = {} # Keep kmers found in foreground, merging bg binding values, and # excluding those found in the excluded fasta kmers = [ primer_dict(seq, fg, bg, self.min_fg_bind, self.max_bg_bind, self.max_dimer_bp) for seq in fg.viewkeys() if seq not in ex.viewkeys() ] kmers = filter(lambda x: x != {}, kmers) nkmers = len(kmers) chunk_size = 199 message( "Writing {n} {k}-mers into db in blocks of {cs}..." .format(n=nkmers * 2, k=k, cs=chunk_size)) Primers.add(kmers, add_revcomp=True) message("Counted kmers in range %d-%d" % (self.min_size, self.max_size))
def primers(self): primers = [ # reference primer Primer.create(_id=0, seq="ATGCTC"), # rev. complement has 4 bases overlapping Primer.create(_id=1, seq="CAGCAT"), # rev. complement has 3 bases overlapping Primer.create(_id=2, seq="GAGGTA"), Primer.create(_id=3, seq="ATCGAG"), # rev. complement has one base overlapping Primer.create(_id=4, seq="TTCCAC"), # substring of reference primer Primer.create(_id=5, seq="ATGC") ] return primers
def primers(ws, seqs): return [Primer.create(seq=seq) for seq in seqs]
def run(self): self.summary_msg = summary_template self.best_set_desc = best_set_desc avg_fg_bind, avg_bg_bind, nprimers = ( Primer .select(fn.Avg(Primer.fg_freq), fn.Avg(Primer.bg_freq), fn.Count(Primer.seq)) .scalar(as_tuple=True)) if (avg_fg_bind is None) or (avg_bg_bind is None): (avg_fg_bind, avg_bg_bind) = (0, 0) fg_bind_ratio = avg_fg_bind / float(self.fg_length) bg_bind_ratio = avg_bg_bind / float(self.bg_length) nactive = Primer.select().where(Primer.active == True).count() min_tm, max_tm, avg_tm = ( Primer .select(fn.Min(Primer.tm), fn.Max(Primer.tm), fn.Avg(Primer.tm)) .where(Primer.active == True) .scalar(as_tuple=True)) nsets = Set.select(fn.Count(Set._id)).scalar() if nsets > 0: bs = Set.select().order_by(Set.score).limit(1).get() bs_primers = ", ".join(bs.primer_seqs()).strip() best_set = bs._id bs_size = bs.set_size bs_score = bs.score bs_stats = "- " + "\n - ".join( fmtkv(k, bs.__dict__['_data'][k]) for k in bs.exported_fields() if k not in ["_id", "pids", "score", "primers"] ) self.best_set_desc = self.best_set_desc.format(**locals()) if_no_primers_msg = click.style( "Run `swga count` to find possible primers." if nprimers == 0 else "", fg='green') if_no_active_primers_msg = click.style( "Run `swga filter` to identify primers to use." if nactive == 0 else "", fg='green') melting_tmp_msg = ( "The melting temp of the primers ranges between {min_tm:.2f}C and " "{max_tm:.2f}C with an average of {avg_tm:.2f}C." if nactive > 0 and min_tm and max_tm else "No melting temps have been calculated yet.").format(**locals()) ifzero_sets_msg = click.style( "Run `swga find_sets` after identifying valid primers to begin " "collecting sets.\n", fg='green') set_msg = (self.best_set_desc if nsets > 0 else ifzero_sets_msg) primer_db = os.path.abspath(self.primer_db) nprimers = click.style(str(nprimers), bold=True, fg='blue') nactive = click.style(str(nactive), bold=True, fg='blue') nsets = click.style(str(nsets), bold=True, fg='blue') self.header = click.style("swga v{}".format(__version__), fg='green') # Copy all the relevant values into one dict values = self.__dict__.copy() values.update(locals()) # Format the summary message with all the calculated values self.summary_msg = self.summary_msg.format(**values) click.echo(quote(self.summary_msg, quote=" ", width=200))
def validate_order_field(field, model): '''Ensures the given field exists in the model.''' if field and field not in model.fields(): swga.error( "Cannot order by '{}'. Valid choices are {}" .format(field, ", ".join(Primer.fields())))
def test_add_primers(self): '''Must add the reverse complement of a primer if requested.''' primers = [{'seq': "AAAA"}] Primers.add(primers, add_revcomp=True) assert Primer.select().where(Primer.seq == "TTTT").count() == 1
def test_bad_set_add(self): with pytest.raises(ValueError): Set.add(0, None, score=1) with pytest.raises(ValueError): invalid_primers = Primer.select().where(Primer.seq == "XX") Set.add(0, primers=invalid_primers, score=1)
def run(self): self.summary_msg = summary_template self.best_set_desc = best_set_desc avg_fg_bind, avg_bg_bind, nprimers = (Primer.select( fn.Avg(Primer.fg_freq), fn.Avg(Primer.bg_freq), fn.Count(Primer.seq)).scalar(as_tuple=True)) if (avg_fg_bind is None) or (avg_bg_bind is None): (avg_fg_bind, avg_bg_bind) = (0, 0) fg_bind_ratio = avg_fg_bind / float(self.fg_length) bg_bind_ratio = avg_bg_bind / float(self.bg_length) nactive = Primer.select().where(Primer.active == True).count() min_tm, max_tm, avg_tm = (Primer.select(fn.Min( Primer.tm), fn.Max(Primer.tm), fn.Avg( Primer.tm)).where(Primer.active == True).scalar(as_tuple=True)) nsets = Set.select(fn.Count(Set._id)).scalar() if nsets > 0: bs = Set.select().order_by(Set.score).limit(1).get() bs_primers = ", ".join(bs.primer_seqs()).strip() best_set = bs._id bs_size = bs.set_size bs_score = bs.score bs_stats = "- " + "\n - ".join( fmtkv(k, bs.__dict__['_data'][k]) for k in bs.exported_fields() if k not in ["_id", "pids", "score", "primers"]) self.best_set_desc = self.best_set_desc.format(**locals()) if_no_primers_msg = click.style( "Run `swga count` to find possible primers." if nprimers == 0 else "", fg='green') if_no_active_primers_msg = click.style( "Run `swga filter` to identify primers to use." if nactive == 0 else "", fg='green') melting_tmp_msg = ( "The melting temp of the primers ranges between {min_tm:.2f}C and " "{max_tm:.2f}C with an average of {avg_tm:.2f}C." if nactive > 0 and min_tm and max_tm else "No melting temps have been calculated yet.").format(**locals()) ifzero_sets_msg = click.style( "Run `swga find_sets` after identifying valid primers to begin " "collecting sets.\n", fg='green') set_msg = (self.best_set_desc if nsets > 0 else ifzero_sets_msg) primer_db = os.path.abspath(self.primer_db) nprimers = click.style(str(nprimers), bold=True, fg='blue') nactive = click.style(str(nactive), bold=True, fg='blue') nsets = click.style(str(nsets), bold=True, fg='blue') self.header = click.style("swga v{}".format(__version__), fg='green') # Copy all the relevant values into one dict values = self.__dict__.copy() values.update(locals()) # Format the summary message with all the calculated values self.summary_msg = self.summary_msg.format(**values) click.echo(quote(self.summary_msg, quote=" ", width=200))