def run(self): # If we have an input file, use that. Otherwise pull from db if self.input: with open(self.input, 'rb') as infile: primers = Primers(infile) else: self.skip_filtering = False primers = Primers() assert isinstance(primers, Primers) # Undo all active marks, if any Primer.update(active=False).execute() if not self.skip_filtering: ( primers .filter_min_fg_rate(self.min_fg_bind) .filter_max_bg_rate(self.max_bg_bind) .summarize() .filter_tm_range(self.min_tm, self.max_tm) .limit_to(self.max_primers) .filter_max_gini(self.max_gini, self.fg_genome_fp) ) primers.activate(self.max_primers)
def run(self): primers = Primers(self.input) try: (primers.update_melt_temps().update_locations( self.fg_genome_fp).activate()) except AttributeError as e: warn("Error updating database: '{}'".format(e.message)) raise e
def run(self): self.chr_ends = locate.chromosome_ends(self.fg_genome_fp) # Evaluate the scoring expression from a string and return it as a # callable function self.score_fun = functools.partial(score.default_score_set, expression=self.score_expression) primers = Primers(self.input) if len(primers) == 0: error("No primers specified exist in database, aborting.", exception=False) bg_dist_mean = score.calculate_bg_dist_mean(primers, self.bg_length) set_score, variables, _ = score.score_set(primers=primers, max_fg_bind_dist=0, bg_dist_mean=bg_dist_mean, chr_ends=self.chr_ends, score_fun=self.score_fun, interactive=True) do_add_set, set_id = self.user_add_set(set_score, variables) if do_add_set: s = workspace.Set.add(_id=set_id, primers=primers, score=set_score, scoring_fn=self.score_expression, **variables) set_added = s is not None if set_added: message("Set {} added successfully.".format(set_id)) else: message("That primer set already exists.")
def count_kmers(self): # We need to clear all previous primers each time due to uniqueness # constraints if Primer.select().count() > 0: if not self.force: click.confirm( "Remove all previously-found primers and re-count?", abort=True) self.workspace.reset_primers() mkdirp(output_dir) kmers = [] for k in xrange(self.min_size, self.max_size + 1): fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir) bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir) if self.exclude_fp: assert os.path.isfile(self.exclude_fp) ex = swga.kmers.count_kmers(k, self.exclude_fp, output_dir, self.exclude_threshold) else: ex = {} # Keep kmers found in foreground, merging bg binding values, and # excluding those found in the excluded fasta kmers = [ primer_dict(seq, fg, bg, self.min_fg_bind, self.max_bg_bind, self.max_dimer_bp) for seq in fg.viewkeys() if seq not in ex.viewkeys() ] kmers = filter(lambda x: x != {}, kmers) nkmers = len(kmers) chunk_size = 199 message("Writing {n} {k}-mers into db in blocks of {cs}...".format( n=nkmers * 2, k=k, cs=chunk_size)) Primers.add(kmers, add_revcomp=True) message("Counted kmers in range %d-%d" % (self.min_size, self.max_size))
def count_kmers(self): # We need to clear all previous primers each time due to uniqueness # constraints if Primer.select().count() > 0: if not self.force: click.confirm( "Remove all previously-found primers and re-count?", abort=True) self.workspace.reset_primers() mkdirp(output_dir) kmers = [] for k in xrange(self.min_size, self.max_size + 1): fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir) bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir) if self.exclude_fp: assert os.path.isfile(self.exclude_fp) ex = swga.kmers.count_kmers( k, self.exclude_fp, output_dir, self.exclude_threshold) else: ex = {} # Keep kmers found in foreground, merging bg binding values, and # excluding those found in the excluded fasta kmers = [ primer_dict(seq, fg, bg, self.min_fg_bind, self.max_bg_bind, self.max_dimer_bp) for seq in fg.viewkeys() if seq not in ex.viewkeys() ] kmers = filter(lambda x: x != {}, kmers) nkmers = len(kmers) chunk_size = 199 message( "Writing {n} {k}-mers into db in blocks of {cs}..." .format(n=nkmers * 2, k=k, cs=chunk_size)) Primers.add(kmers, add_revcomp=True) message("Counted kmers in range %d-%d" % (self.min_size, self.max_size))
def count_specific_kmers(self, kmers): try: # Skip primers that already exist and warn users existing = Primers.select_by_seqs(kmers) for p in existing: message("{} already exists in db, skipping...".format(p)) kmers = [p for p in kmers if p not in existing] except OperationalError: # If this fails due to an OperationalError, it probably means the # database tables haven't been created yet. error( "It doesn't appear that the workspace has been initialized: " "run `swga init' first.") mkdirp(output_dir) # Group the kmers by length to avoid repeatedly counting kmers of the # same size kmers_by_length = defaultdict(list) for kmer in kmers: kmers_by_length[len(kmer)].append(kmer) for k, mers in kmers_by_length.items(): fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir, 1) bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir, 1) primers = [] for mer in mers: try: primers.append(primer_dict(mer, fg, bg, 0, INF, INF)) except KeyError: message( "{} does not exist in foreground genome, skipping..." .format(mer)) # Omitting any primers that were returned empty # primers = filter(lambda p: p == {}, primers) chunk_size = 199 message( "Writing {n} {k}-mers into db in blocks of {cs}..." .format(n=len(primers), k=k, cs=chunk_size)) Primers.add(primers, add_revcomp=False)
def process_lines(self, setfinder_lines): passed = processed = 0 smallest_max_dist = float('inf') try: for line in setfinder_lines: try: primer_ids, bg_dist_mean = score.read_set_finder_line(line) except ValueError: warn("Could not parse line:\n\t" + line) continue primers = Primers.select_by_ids(primer_ids) processed += 1 set_score, variables, max_dist = score.score_set( primers=primers, max_fg_bind_dist=self.max_fg_bind_dist, bg_dist_mean=bg_dist_mean, chr_ends=self.chr_ends, score_fun=self.score_fun, interactive=False ) if max_dist < smallest_max_dist: smallest_max_dist = max_dist message( STATUS_LINE.format(processed, passed, smallest_max_dist), newline=False) # Return early if the set doesn't pass if set_score is False: continue else: passed += 1 Set.add( _id=passed, primers=primers, score=set_score, scoring_fn=self.score_expression, **variables) if passed >= self.max_sets: message("\nDone (scored %i sets)" % passed) break finally: # Raises a GeneratorExit inside the find_sets command, prompting it # to quit the subprocess setfinder_lines.close()
def count_specific_kmers(self, kmers): try: # Skip primers that already exist and warn users existing = Primers.select_by_seqs(kmers) for p in existing: message("{} already exists in db, skipping...".format(p)) kmers = [p for p in kmers if p not in existing] except OperationalError: # If this fails due to an OperationalError, it probably means the # database tables haven't been created yet. error("It doesn't appear that the workspace has been initialized: " "run `swga init' first.") mkdirp(output_dir) # Group the kmers by length to avoid repeatedly counting kmers of the # same size kmers_by_length = defaultdict(list) for kmer in kmers: kmers_by_length[len(kmer)].append(kmer) for k, mers in kmers_by_length.items(): fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir, 1) bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir, 1) primers = [] for mer in mers: try: primers.append(primer_dict(mer, fg, bg, 0, INF, INF)) except KeyError: message( "{} does not exist in foreground genome, skipping...". format(mer)) # Omitting any primers that were returned empty # primers = filter(lambda p: p == {}, primers) chunk_size = 199 message("Writing {n} {k}-mers into db in blocks of {cs}...".format( n=len(primers), k=k, cs=chunk_size)) Primers.add(primers, add_revcomp=False)
def process_lines(self, setfinder_lines): passed = processed = 0 smallest_max_dist = float('inf') try: for line in setfinder_lines: try: primer_ids, bg_dist_mean = score.read_set_finder_line(line) except ValueError: warn("Could not parse line:\n\t" + line) continue primers = Primers.select_by_ids(primer_ids) processed += 1 set_score, variables, max_dist = score.score_set( primers=primers, max_fg_bind_dist=self.max_fg_bind_dist, bg_dist_mean=bg_dist_mean, chr_ends=self.chr_ends, score_fun=self.score_fun, interactive=False) if max_dist < smallest_max_dist: smallest_max_dist = max_dist message(STATUS_LINE.format(processed, passed, smallest_max_dist), newline=False) # Return early if the set doesn't pass if set_score is False: continue else: passed += 1 Set.add(_id=passed, primers=primers, score=set_score, scoring_fn=self.score_expression, **variables) if passed >= self.max_sets: message("\nDone (scored %i sets)" % passed) break finally: # Raises a GeneratorExit inside the find_sets command, prompting it # to quit the subprocess setfinder_lines.close()
def build_graph(max_hetdimer_bind, outfile): '''Selects all active primers and outputs a primer compatibility graph.''' # Reset all the primer IDs (as ids are only used for set_finder) primers = Primers.select_active().assign_ids() # print [(p._id, p.ratio) for p in primers] message("Composing primer compatibility graph...") edges = build_edges(primers, max_hetdimer_bind) if len(edges) == 0: error("No compatible primers. Try relaxing your parameters.", exception=False) with open(outfile, 'wb') as out: write_graph(primers, edges, out)
def build_graph(max_hetdimer_bind, outfile): '''Selects all active primers and outputs a primer compatibility graph.''' # Reset all the primer IDs (as ids are only used for set_finder) primers = Primers.select_active().assign_ids() # print [(p._id, p.ratio) for p in primers] message("Composing primer compatibility graph...") edges = build_edges(primers, max_hetdimer_bind) if len(edges) == 0: error( "No compatible primers. Try relaxing your parameters.", exception=False) with open(outfile, 'wb') as out: write_graph(primers, edges, out)
def run(self): # If we have an input file, use that. Otherwise pull from db if self.input: with open(self.input, 'rb') as infile: primers = Primers(infile) else: self.skip_filtering = False primers = Primers() assert isinstance(primers, Primers) # Undo all active marks, if any Primer.update(active=False).execute() if not self.skip_filtering: (primers.filter_min_fg_rate(self.min_fg_bind).filter_max_bg_rate( self.max_bg_bind).summarize().filter_tm_range( self.min_tm, self.max_tm).limit_to(self.max_primers).filter_max_gini( self.max_gini, self.fg_genome_fp)) primers.activate(self.max_primers)
def test_add_primers(self): '''Must add the reverse complement of a primer if requested.''' primers = [{'seq': "AAAA"}] Primers.add(primers, add_revcomp=True) assert Primer.select().where(Primer.seq == "TTTT").count() == 1