def main(): command_opts = { 'init': commands.init.main, 'summary': commands.summary.main, 'count': commands.count.main, 'filter': commands.filter.main, 'find_sets': commands.find_sets.main, 'score': commands.score.main, 'activate': commands.activate.main, 'export': commands.export.main} cfg_file = swga.utils.configure.default_config_file parser = argparse.ArgumentParser( usage=usage % cfg_file, formatter_class=argparse.RawDescriptionHelpFormatter, add_help=False) parser.add_argument( 'command', type=str, choices=command_opts.keys()) parser.add_argument( '-c', '--config', metavar="CFG_FILE", help='path to config file (default: %(default)s)', default=cfg_file) args, remaining = parser.parse_known_args() try: command_opts[args.command](remaining, args.config) except KeyboardInterrupt: swga.error("\n-- Stopped by user --", exception=False)
def run(self): self.chr_ends = locate.chromosome_ends(self.fg_genome_fp) # Evaluate the scoring expression from a string and return it as a # callable function self.score_fun = functools.partial(score.default_score_set, expression=self.score_expression) primers = Primers(self.input) if len(primers) == 0: error("No primers specified exist in database, aborting.", exception=False) bg_dist_mean = score.calculate_bg_dist_mean(primers, self.bg_length) set_score, variables, _ = score.score_set(primers=primers, max_fg_bind_dist=0, bg_dist_mean=bg_dist_mean, chr_ends=self.chr_ends, score_fun=self.score_fun, interactive=True) do_add_set, set_id = self.user_add_set(set_score, variables) if do_add_set: s = workspace.Set.add(_id=set_id, primers=primers, score=set_score, scoring_fn=self.score_expression, **variables) set_added = s is not None if set_added: message("Set {} added successfully.".format(set_id)) else: message("That primer set already exists.")
def main(): command_opts = { 'init': init.main, 'summary': Summary, 'count': Count, 'filter': Filter, 'find_sets': FindSets, 'score': Score, 'activate': Activate, 'export': Export } parser = argparse.ArgumentParser( usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, add_help=False) parser.add_argument( 'command', type=str, choices=command_opts.keys()) args, remaining = parser.parse_known_args() try: if args.command == 'init': command_opts[args.command](remaining) else: cmd_class = command_opts[args.command] setup_and_run(cmd_class, args.command, remaining) except KeyboardInterrupt: error("\n-- Stopped by user --", exception=False)
def main(): command_opts = { 'init': init.main, 'summary': Summary, 'count': Count, 'filter': Filter, 'find_sets': FindSets, 'score': Score, 'activate': Activate, 'export': Export } parser = argparse.ArgumentParser( usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, add_help=False) parser.add_argument('command', type=str, choices=command_opts.keys()) args, remaining = parser.parse_known_args() try: if args.command == 'init': command_opts[args.command](remaining) else: cmd_class = command_opts[args.command] setup_and_run(cmd_class, args.command, remaining) except KeyboardInterrupt: error("\n-- Stopped by user --", exception=False)
def select_active(): active = Primer.select().where(Primer.active == True) if active.count() == 0: error( 'No active primers found. Run `swga filter` or `swga activate` ' 'first.', exception=False) return Primers(active)
def _get_resource_file(rs): import sys _rs = os.path.join(sys.prefix, 'bin', rs) # If it's not in sys.prefix/bin/, try sys.exec_prefix? if not os.path.isfile(_rs): _rs = os.path.join(sys.exec_prefix, 'bin', rs) # If it still doesn't work, check the package data if not os.path.isfile(_rs): if resource_exists('swga', os.path.join('bin', rs)): _rs = resource_filename('swga', os.path.join('bin', rs)) else: swga.error("Could not find `{}': try re-installing swga.".format(rs)) return os.path.abspath(_rs)
def _get_resource_file(rs): import sys _rs = os.path.join(sys.prefix, 'bin', rs) # If it's not in sys.prefix/bin/, try sys.exec_prefix? if not os.path.isfile(_rs): _rs = os.path.join(sys.exec_prefix, 'bin', rs) # If it still doesn't work, check the package data if not os.path.isfile(_rs): if resource_exists('swga', os.path.join('bin', rs)): _rs = resource_filename('swga', os.path.join('bin', rs)) else: swga.error( "Could not find `{}': try re-installing swga.".format(rs)) return os.path.abspath(_rs)
def add_set(_id, primers, **kwargs): if not primers: swga.error("Invalid primers for set") if isinstance(primers, pw.SelectQuery): nprimers = primers.count() else: nprimers = len(primers) if nprimers == 0: swga.error("Cannot have an empty set") _hash = hash(frozenset([p.seq for p in primers])) if Set.select(pw.fn.Count(Set._id)).where(Set._hash == _hash).scalar() > 0: return None s = Set.create(_id=_id, _hash=_hash, **kwargs) s.primers.add(primers) return s
def build_graph(max_hetdimer_bind, outfile): '''Selects all active primers and outputs a primer compatibility graph.''' # Reset all the primer IDs (as ids are only used for set_finder) primers = Primers.select_active().assign_ids() # print [(p._id, p.ratio) for p in primers] message("Composing primer compatibility graph...") edges = build_edges(primers, max_hetdimer_bind) if len(edges) == 0: error("No compatible primers. Try relaxing your parameters.", exception=False) with open(outfile, 'wb') as out: write_graph(primers, edges, out)
def build_graph(max_hetdimer_bind, outfile): '''Selects all active primers and outputs a primer compatibility graph.''' # Reset all the primer IDs (as ids are only used for set_finder) primers = Primers.select_active().assign_ids() # print [(p._id, p.ratio) for p in primers] message("Composing primer compatibility graph...") edges = build_edges(primers, max_hetdimer_bind) if len(edges) == 0: error( "No compatible primers. Try relaxing your parameters.", exception=False) with open(outfile, 'wb') as out: write_graph(primers, edges, out)
def init_db(db_fname, create_if_missing=False): ''' Initializes the database at the file path specified. If `create_if_missing` is True, it will create the database if it can't be found. Otherwise, it exits with an error (SystemExit). ''' if db_fname is None: swga.error("Primer db name cannot be `None`: corrupt preferences.cfg?") elif db_fname == ":memory:": swga.warn("Creating in-memory primer database; this may not work.") elif not os.path.isfile(db_fname) and not create_if_missing: # Exits here swga.error( "Primer db not found at '%s': specify different filename or " "re-run `swga count`" % db_fname, exception=False ) db.init(db_fname) return db
def _parse_meta(opts, section): section_str = "[{section}]\n" try: meta = opts[section]["META"] if not meta.get('incfg', True): raise ExcludeOptionFlag desc = meta.get('desc', '') desc = "\n" + _format_comment(desc, quote='##') return desc + section_str.format(section=section) except KeyError: raise swga.error("Malformed options file: try re-installing SWGA")
def run(self): self.chr_ends = locate.chromosome_ends(self.fg_genome_fp) # Evaluate the scoring expression from a string and return it as a # callable function self.score_fun = functools.partial( score.default_score_set, expression=self.score_expression) primers = Primers(self.input) if len(primers) == 0: error( "No primers specified exist in database, aborting.", exception=False) bg_dist_mean = score.calculate_bg_dist_mean(primers, self.bg_length) set_score, variables, _ = score.score_set( primers=primers, max_fg_bind_dist=0, bg_dist_mean=bg_dist_mean, chr_ends=self.chr_ends, score_fun=self.score_fun, interactive=True ) do_add_set, set_id = self.user_add_set(set_score, variables) if do_add_set: s = workspace.Set.add( _id=set_id, primers=primers, score=set_score, scoring_fn=self.score_expression, **variables) set_added = s is not None if set_added: message("Set {} added successfully.".format(set_id)) else: message("That primer set already exists.")
def count_specific_kmers(self, kmers): try: # Skip primers that already exist and warn users existing = Primers.select_by_seqs(kmers) for p in existing: message("{} already exists in db, skipping...".format(p)) kmers = [p for p in kmers if p not in existing] except OperationalError: # If this fails due to an OperationalError, it probably means the # database tables haven't been created yet. error( "It doesn't appear that the workspace has been initialized: " "run `swga init' first.") mkdirp(output_dir) # Group the kmers by length to avoid repeatedly counting kmers of the # same size kmers_by_length = defaultdict(list) for kmer in kmers: kmers_by_length[len(kmer)].append(kmer) for k, mers in kmers_by_length.items(): fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir, 1) bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir, 1) primers = [] for mer in mers: try: primers.append(primer_dict(mer, fg, bg, 0, INF, INF)) except KeyError: message( "{} does not exist in foreground genome, skipping..." .format(mer)) # Omitting any primers that were returned empty # primers = filter(lambda p: p == {}, primers) chunk_size = 199 message( "Writing {n} {k}-mers into db in blocks of {cs}..." .format(n=len(primers), k=k, cs=chunk_size)) Primers.add(primers, add_revcomp=False)
def parse_config(cfg_file, section): ''' Parses a config file in the given section. Missing sections and values do not raise an error (but missing values may give a warning). Returns: - defaults: a dict of values in the given section ''' config = ConfigParser.SafeConfigParser() defaults = {} if not os.path.isfile(cfg_file): swga.error( "Cannot find parameters file. Run `swga init` or specify options "+ "manually.", exception=False) return {} with open(cfg_file) as cfg_file_fp: config.readfp(cfg_file_fp) try: defaults = dict(config.items(section)) except ConfigParser.NoSectionError: defaults = {} return defaults
def count_specific_kmers(self, kmers): try: # Skip primers that already exist and warn users existing = Primers.select_by_seqs(kmers) for p in existing: message("{} already exists in db, skipping...".format(p)) kmers = [p for p in kmers if p not in existing] except OperationalError: # If this fails due to an OperationalError, it probably means the # database tables haven't been created yet. error("It doesn't appear that the workspace has been initialized: " "run `swga init' first.") mkdirp(output_dir) # Group the kmers by length to avoid repeatedly counting kmers of the # same size kmers_by_length = defaultdict(list) for kmer in kmers: kmers_by_length[len(kmer)].append(kmer) for k, mers in kmers_by_length.items(): fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir, 1) bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir, 1) primers = [] for mer in mers: try: primers.append(primer_dict(mer, fg, bg, 0, INF, INF)) except KeyError: message( "{} does not exist in foreground genome, skipping...". format(mer)) # Omitting any primers that were returned empty # primers = filter(lambda p: p == {}, primers) chunk_size = 199 message("Writing {n} {k}-mers into db in blocks of {cs}...".format( n=len(primers), k=k, cs=chunk_size)) Primers.add(primers, add_revcomp=False)
def check_version(self, version): """Check the version of the database and compare it to the swga version. If the two versions are incompatible, raise a SystemExit. """ try: db_ver = self.metadata.version db_ver = semver.Version(self.metadata.version) except pw.OperationalError: db_ver = "<NA>" ver = semver.Version(version) spec = semver.Spec('=={}.{}'.format(ver.major, ver.minor)) if db_ver not in spec: error( "This workspace was created with a different version of swga.\n" " Workspace version: {}\n" " swga version: {}\n" "Please re-initialize the workspace with `swga init` or use a " "different version of swga." .format(db_ver, ver), exception=False, wrap=False )
def summary(primer_db, fg_length, bg_length): db = swga.database.init_db(primer_db) db.connect() swga.database.create_tables(drop=False) avg_fg_bind, avg_bg_bind, nprimers = ( Primer .select(fn.Avg(Primer.fg_freq), fn.Avg(Primer.bg_freq), fn.Count(Primer.seq)) .scalar(as_tuple=True)) if (avg_fg_bind is None) or (avg_bg_bind is None): raise swga.error( "Could not calculate summary statistics; database may be corrupt") fg_bind_ratio = avg_fg_bind / float(fg_length) bg_bind_ratio = avg_bg_bind / float(bg_length) nactive = Primer.select().where(Primer.active==True).count() min_tm, max_tm, avg_tm = ( Primer .select(fn.Min(Primer.tm), fn.Max(Primer.tm), fn.Avg(Primer.tm)) .where(Primer.active==True) .scalar(as_tuple=True)) nsets = Set.select(fn.Count(Set._id)).scalar() if nsets > 0: bs = Set.select().order_by(Set.score).limit(1).get() bs_primers = ", ".join(swga.database.get_primers_for_set(bs._id)).strip() best_set = bs._id bs_size = bs.set_size bs_score = bs.score bs_stats = "- "+"\n - ".join( fmtkv(k, v) for k, v in bs.__dict__['_data'].items() if k not in ["_id", "pids", "score"] ) version_header = ( "---------------------\n" "==== SWGA v{version} ====\n" "---------------------\n" .format(version=swga.__version__) ) summary_msg = """ {version_header} PRIMER SUMMARY --------------- There are {nprimers} primers in the database. {nactive} are marked as active (i.e., they passed filter steps and will be used to find sets of compatible primers.) {ifzero_primers_msg} The average number of foreground genome binding sites is {avg_fg_bind:.0f}. (avg binding / genome_length = {fg_bind_ratio:05f}) The average number of background genome binding sites is {avg_bg_bind:.0f}. (avg binding / genome_length = {bg_bind_ratio:05f}) {melting_tmp_msg} SETS SUMMARY --------------- There are {nsets} sets in the database. {set_msg}--------------- Report generated from {primer_db} """ ifzero_primers_msg = colored.green( "Run `swga filter` to identify primers to use." if nactive == 0 else "") melting_tmp_msg = ( """The melting temp of the primers ranges between {min_tm:.2f}C and {max_tm:.2f}C with an average of {avg_tm:.2f}C.""" if nactive > 0 and min_tm and max_tm else "No melting temps have been calculated yet.") ifzero_sets_msg = colored.green( "Run `swga find_sets` after identifying valid primers to begin collecting sets.\n") set_msg = (""" The best scoring set is #{best_set}, with {bs_size} primers and a score of {bs_score:03f}.\nVarious statistics: {bs_stats} The primers in Set {best_set} are: {bs_primers} """ if nsets > 0 else ifzero_sets_msg) primer_db = os.path.abspath(primer_db) nprimers = colored.blue(nprimers, bold=True) nactive = colored.blue(nactive, bold=True) nsets = colored.blue(nsets, bold=True) set_msg = set_msg.format(**locals()) melting_tmp_msg = melting_tmp_msg.format(**locals()) version_header = colored.green(version_header) summary_msg = summary_msg.format(**locals()) with indent(2): puts(max_width(summary_msg, 80))
def _get_resource_file(fp): resource_path = resource_filename("swga", fp) if not resource_exists("swga", fp): swga.error("Resource does not exist: {}".format(resource_path)) return resource_path
def locations(self): if self._locations: return json.loads(self._locations) else: error("No locations stored for " + str(self))
def locations(self, fg_genome_fp=None): if self._locations: return json.loads(self._locations) else: swga.error("No locations stored for " + str(self))
def _update_n(self): n = self.primers.count() if n == 0: error('No primers left.', exception=False) else: self.n = n