def run(self): primers = Primers(self.input) try: (primers.update_melt_temps().update_locations( self.fg_genome_fp).activate()) except AttributeError as e: warn("Error updating database: '{}'".format(e.message)) raise e
def run(self): primers = Primers(self.input) try: (primers .update_melt_temps() .update_locations(self.fg_genome_fp) .activate()) except AttributeError as e: warn("Error updating database: '{}'".format(e.message)) raise e
def parse_args(self, argv, quiet=False): args, unknown = self.parser.parse_known_args(argv) self.args = vars(args) self.unknown = unknown for k, v in self.args.items(): setattr(self, k, v) if not quiet: self._pprint_args() if len(self.args) > 0 and all(v is None for k, v in self.args.items()): warn( u'[swga {0:s}]: All parameters are missing- this may indicate a corrupt or missing parameters file.' .format(self.name))
def parse_args(self, argv, quiet=False): args, unknown = self.parser.parse_known_args(argv) self.unknown = unknown self.args = vars(args) self.kwargs_as_args(**self.args) if not quiet: self.pprint_args() if len(self.args) > 0 and all(v is None for k, v in self.args.items()): swga.warn( "[swga %s]: All parameters are missing- ", "this may indicate a corrupt or missing parameters file." % self.name)
def process_lines(self, setfinder_lines): passed = processed = 0 smallest_max_dist = float('inf') try: for line in setfinder_lines: try: primer_ids, bg_dist_mean = score.read_set_finder_line(line) except ValueError: warn("Could not parse line:\n\t" + line) continue primers = Primers.select_by_ids(primer_ids) processed += 1 set_score, variables, max_dist = score.score_set( primers=primers, max_fg_bind_dist=self.max_fg_bind_dist, bg_dist_mean=bg_dist_mean, chr_ends=self.chr_ends, score_fun=self.score_fun, interactive=False ) if max_dist < smallest_max_dist: smallest_max_dist = max_dist message( STATUS_LINE.format(processed, passed, smallest_max_dist), newline=False) # Return early if the set doesn't pass if set_score is False: continue else: passed += 1 Set.add( _id=passed, primers=primers, score=set_score, scoring_fn=self.score_expression, **variables) if passed >= self.max_sets: message("\nDone (scored %i sets)" % passed) break finally: # Raises a GeneratorExit inside the find_sets command, prompting it # to quit the subprocess setfinder_lines.close()
def process_lines(self, setfinder_lines): passed = processed = 0 smallest_max_dist = float('inf') try: for line in setfinder_lines: try: primer_ids, bg_dist_mean = score.read_set_finder_line(line) except ValueError: warn("Could not parse line:\n\t" + line) continue primers = Primers.select_by_ids(primer_ids) processed += 1 set_score, variables, max_dist = score.score_set( primers=primers, max_fg_bind_dist=self.max_fg_bind_dist, bg_dist_mean=bg_dist_mean, chr_ends=self.chr_ends, score_fun=self.score_fun, interactive=False) if max_dist < smallest_max_dist: smallest_max_dist = max_dist message(STATUS_LINE.format(processed, passed, smallest_max_dist), newline=False) # Return early if the set doesn't pass if set_score is False: continue else: passed += 1 Set.add(_id=passed, primers=primers, score=set_score, scoring_fn=self.score_expression, **variables) if passed >= self.max_sets: message("\nDone (scored %i sets)" % passed) break finally: # Raises a GeneratorExit inside the find_sets command, prompting it # to quit the subprocess setfinder_lines.close()
def calculate_bg_dist_mean(primers, bg_length): """Calculate the mean distance between binding sites on the bg genome. :param bg_length: the total length of the background genome. """ total_bg_freq = sum(p.bg_freq for p in primers) if total_bg_freq == 0: warn( "No primers appear in the background genome: " "bg_dist_mean set as infinite") bg_dist_mean = float('Inf') else: bg_dist_mean = float( bg_length) / sum(p.bg_freq for p in primers) return bg_dist_mean
def read_primer_list(lines): """Read in a list of primers and return their records from the db. :param lines: a list of primer sequences, one per line; anything after the first whitespace is ignored. """ seqs = [re.split(r'[ \t]+', line.strip('\n'))[0] for line in lines] primers = list(Primer.select().where(Primer.seq << seqs).execute()) if len(primers) < len(seqs): primer_seqs = [p.seq for p in primers] missing = [_ for _ in seqs if _ not in primer_seqs] for seq in missing: warn( seq + ' not in the database; skipping. Add it manually with ' '`swga count --input <file>` ') return primers
def init_db(db_fname, create_if_missing=False): ''' Initializes the database at the file path specified. If `create_if_missing` is True, it will create the database if it can't be found. Otherwise, it exits with an error (SystemExit). ''' if db_fname is None: swga.error("Primer db name cannot be `None`: corrupt preferences.cfg?") elif db_fname == ":memory:": swga.warn("Creating in-memory primer database; this may not work.") elif not os.path.isfile(db_fname) and not create_if_missing: # Exits here swga.error( "Primer db not found at '%s': specify different filename or " "re-run `swga count`" % db_fname, exception=False ) db.init(db_fname) return db
def _hits_per_record(self): ''' Calculates the number of bases covered by primers in each record, and yields the record name, midpoint of the record, and number of hits as a tuple. ''' record_ends = swga.locate.chromosome_ends(self.fg_genome_fp) for record_name, ends in record_ends.iteritems(): record_length = ends[1] + 1 # Check window size <= record_length and fix if not this_window_size = self.window_size if this_window_size > record_length: this_window_size = record_length swga.warn( "In [{}]: window size larger than record; set to {}" .format(record_name, this_window_size)) # Check step size is compatible with window size and fix if not this_step_size = self.step_size if this_step_size > this_window_size: this_step_size = this_window_size swga.warn( "In [{}]: step size larger than window size ({}), set to {}" .format(record_name, this_window_size, this_step_size)) # Count the number of primers that bind to any given nucleotide # in the current record counter = Counter() for primer in self.set.primers: k = len(primer.seq) locations = primer.locations() for l in locations[record_name]: counter.update(Counter(xrange(l, l + k))) starting_positions = xrange( 0, int(record_length - this_window_size), this_step_size) for start in starting_positions: end = start + this_window_size midpoint = (end + start) / 2 # Add each base's count to get the number of bases covered hits = sum([counter[i] for i in xrange(start, end)]) yield record_name, midpoint, hits
def check_create_tables(primer_db): if os.path.isfile(primer_db): swga.warn("Existing database found at %s" % os.path.abspath(primer_db)) swga.warn("This will reset the entire database!") click.confirm("Are you sure you want to proceed?", abort=True) database.create_tables()