Beispiel #1
0
def main():
    command_opts = {
        'init': commands.init.main,
        'summary': commands.summary.main,
        'count': commands.count.main,
        'filter': commands.filter.main,
        'find_sets': commands.find_sets.main,
        'score': commands.score.main,
        'activate': commands.activate.main,
        'export': commands.export.main}

    cfg_file = swga.utils.configure.default_config_file

    parser = argparse.ArgumentParser(
        usage=usage % cfg_file,
        formatter_class=argparse.RawDescriptionHelpFormatter,
        add_help=False)

    parser.add_argument(
        'command',
        type=str,
        choices=command_opts.keys())

    parser.add_argument(
        '-c', '--config',
        metavar="CFG_FILE",
        help='path to config file (default: %(default)s)',
        default=cfg_file)

    args, remaining = parser.parse_known_args()

    try:
        command_opts[args.command](remaining, args.config)
    except KeyboardInterrupt:
        swga.error("\n-- Stopped by user --", exception=False)
Beispiel #2
0
    def run(self):
        self.chr_ends = locate.chromosome_ends(self.fg_genome_fp)
        # Evaluate the scoring expression from a string and return it as a
        # callable function
        self.score_fun = functools.partial(score.default_score_set,
                                           expression=self.score_expression)

        primers = Primers(self.input)
        if len(primers) == 0:
            error("No primers specified exist in database, aborting.",
                  exception=False)

        bg_dist_mean = score.calculate_bg_dist_mean(primers, self.bg_length)

        set_score, variables, _ = score.score_set(primers=primers,
                                                  max_fg_bind_dist=0,
                                                  bg_dist_mean=bg_dist_mean,
                                                  chr_ends=self.chr_ends,
                                                  score_fun=self.score_fun,
                                                  interactive=True)

        do_add_set, set_id = self.user_add_set(set_score, variables)

        if do_add_set:
            s = workspace.Set.add(_id=set_id,
                                  primers=primers,
                                  score=set_score,
                                  scoring_fn=self.score_expression,
                                  **variables)
            set_added = s is not None

            if set_added:
                message("Set {} added successfully.".format(set_id))
            else:
                message("That primer set already exists.")
Beispiel #3
0
def main():
    command_opts = {
        'init': init.main,
        'summary': Summary,
        'count': Count,
        'filter': Filter,
        'find_sets': FindSets,
        'score': Score,
        'activate': Activate,
        'export': Export
    }

    parser = argparse.ArgumentParser(
        usage=usage,
        formatter_class=argparse.RawDescriptionHelpFormatter,
        add_help=False)

    parser.add_argument(
        'command',
        type=str,
        choices=command_opts.keys())

    args, remaining = parser.parse_known_args()

    try:
        if args.command == 'init':
            command_opts[args.command](remaining)
        else:
            cmd_class = command_opts[args.command]
            setup_and_run(cmd_class, args.command, remaining)
    except KeyboardInterrupt:
        error("\n-- Stopped by user --", exception=False)
Beispiel #4
0
def main():
    command_opts = {
        'init': init.main,
        'summary': Summary,
        'count': Count,
        'filter': Filter,
        'find_sets': FindSets,
        'score': Score,
        'activate': Activate,
        'export': Export
    }

    parser = argparse.ArgumentParser(
        usage=usage,
        formatter_class=argparse.RawDescriptionHelpFormatter,
        add_help=False)

    parser.add_argument('command', type=str, choices=command_opts.keys())

    args, remaining = parser.parse_known_args()

    try:
        if args.command == 'init':
            command_opts[args.command](remaining)
        else:
            cmd_class = command_opts[args.command]
            setup_and_run(cmd_class, args.command, remaining)
    except KeyboardInterrupt:
        error("\n-- Stopped by user --", exception=False)
Beispiel #5
0
 def select_active():
     active = Primer.select().where(Primer.active == True)
     if active.count() == 0:
         error(
             'No active primers found. Run `swga filter` or `swga activate` '
             'first.',
             exception=False)
     return Primers(active)
Beispiel #6
0
def _get_resource_file(rs):
    import sys
    _rs = os.path.join(sys.prefix, 'bin', rs)
    # If it's not in sys.prefix/bin/, try sys.exec_prefix?
    if not os.path.isfile(_rs):
        _rs = os.path.join(sys.exec_prefix, 'bin', rs)
    # If it still doesn't work, check the package data
    if not os.path.isfile(_rs):
        if resource_exists('swga', os.path.join('bin', rs)):
            _rs = resource_filename('swga', os.path.join('bin', rs))
        else:
            swga.error("Could not find `{}': try re-installing swga.".format(rs))
    return os.path.abspath(_rs)
Beispiel #7
0
def _get_resource_file(rs):
    import sys
    _rs = os.path.join(sys.prefix, 'bin', rs)
    # If it's not in sys.prefix/bin/, try sys.exec_prefix?
    if not os.path.isfile(_rs):
        _rs = os.path.join(sys.exec_prefix, 'bin', rs)
    # If it still doesn't work, check the package data
    if not os.path.isfile(_rs):
        if resource_exists('swga', os.path.join('bin', rs)):
            _rs = resource_filename('swga', os.path.join('bin', rs))
        else:
            swga.error(
                "Could not find `{}': try re-installing swga.".format(rs))
    return os.path.abspath(_rs)
Beispiel #8
0
def add_set(_id, primers, **kwargs):
    if not primers:
        swga.error("Invalid primers for set")
    if isinstance(primers, pw.SelectQuery):
        nprimers = primers.count()
    else:
        nprimers = len(primers)
    if nprimers == 0:
        swga.error("Cannot have an empty set")
    _hash = hash(frozenset([p.seq for p in primers]))
    if Set.select(pw.fn.Count(Set._id)).where(Set._hash == _hash).scalar() > 0:
        return None
    s = Set.create(_id=_id, _hash=_hash, **kwargs)
    s.primers.add(primers)
    return s
Beispiel #9
0
def build_graph(max_hetdimer_bind, outfile):
    '''Selects all active primers and outputs a primer compatibility graph.'''

    # Reset all the primer IDs (as ids are only used for set_finder)
    primers = Primers.select_active().assign_ids()
    #    print [(p._id, p.ratio) for p in primers]
    message("Composing primer compatibility graph...")
    edges = build_edges(primers, max_hetdimer_bind)

    if len(edges) == 0:
        error("No compatible primers. Try relaxing your parameters.",
              exception=False)

    with open(outfile, 'wb') as out:
        write_graph(primers, edges, out)
Beispiel #10
0
def build_graph(max_hetdimer_bind, outfile):
    '''Selects all active primers and outputs a primer compatibility graph.'''

    # Reset all the primer IDs (as ids are only used for set_finder)
    primers = Primers.select_active().assign_ids()
#    print [(p._id, p.ratio) for p in primers]
    message("Composing primer compatibility graph...")
    edges = build_edges(primers, max_hetdimer_bind)

    if len(edges) == 0:
        error(
            "No compatible primers. Try relaxing your parameters.",
            exception=False)

    with open(outfile, 'wb') as out:
        write_graph(primers, edges, out)
Beispiel #11
0
def init_db(db_fname, create_if_missing=False):
    '''
    Initializes the database at the file path specified.
    If `create_if_missing` is True, it will create the database if it can't be
    found. Otherwise, it exits with an error (SystemExit).
    '''
    if db_fname is None:
        swga.error("Primer db name cannot be `None`: corrupt preferences.cfg?")
    elif db_fname == ":memory:":
        swga.warn("Creating in-memory primer database; this may not work.")
    elif not os.path.isfile(db_fname) and not create_if_missing:
        # Exits here
        swga.error(
            "Primer db not found at '%s': specify different filename or "
            "re-run `swga count`" % db_fname, exception=False
        )
    db.init(db_fname)
    return db
Beispiel #12
0
def _parse_meta(opts, section):
    section_str = "[{section}]\n"
    try:
        meta = opts[section]["META"]
        if not meta.get('incfg', True):
            raise ExcludeOptionFlag
        desc = meta.get('desc', '')
        desc = "\n" + _format_comment(desc, quote='##')
        return desc + section_str.format(section=section)
    except KeyError:
        raise swga.error("Malformed options file: try re-installing SWGA")
Beispiel #13
0
    def run(self):
        self.chr_ends = locate.chromosome_ends(self.fg_genome_fp)
        # Evaluate the scoring expression from a string and return it as a
        # callable function
        self.score_fun = functools.partial(
            score.default_score_set,
            expression=self.score_expression)

        primers = Primers(self.input)
        if len(primers) == 0:
            error(
                "No primers specified exist in database, aborting.",
                exception=False)

        bg_dist_mean = score.calculate_bg_dist_mean(primers, self.bg_length)

        set_score, variables, _ = score.score_set(
            primers=primers,
            max_fg_bind_dist=0,
            bg_dist_mean=bg_dist_mean,
            chr_ends=self.chr_ends,
            score_fun=self.score_fun,
            interactive=True
        )

        do_add_set, set_id = self.user_add_set(set_score, variables)

        if do_add_set:
            s = workspace.Set.add(
                _id=set_id,
                primers=primers,
                score=set_score,
                scoring_fn=self.score_expression,
                **variables)
            set_added = s is not None

            if set_added:
                message("Set {} added successfully.".format(set_id))
            else:
                message("That primer set already exists.")
Beispiel #14
0
    def count_specific_kmers(self, kmers):
        try:
            # Skip primers that already exist and warn users
            existing = Primers.select_by_seqs(kmers)
            for p in existing:
                message("{} already exists in db, skipping...".format(p))
            kmers = [p for p in kmers if p not in existing]
        except OperationalError:
            # If this fails due to an OperationalError, it probably means the
            # database tables haven't been created yet. 
            error(
                "It doesn't appear that the workspace has been initialized: "
                "run `swga init' first.")
        mkdirp(output_dir)

        # Group the kmers by length to avoid repeatedly counting kmers of the
        # same size
        kmers_by_length = defaultdict(list)
        for kmer in kmers:
            kmers_by_length[len(kmer)].append(kmer)

        for k, mers in kmers_by_length.items():
            fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir, 1)
            bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir, 1)
            primers = []
            for mer in mers:
                try:
                    primers.append(primer_dict(mer, fg, bg, 0, INF, INF))
                except KeyError:
                    message(
                        "{} does not exist in foreground genome, skipping..."
                        .format(mer))

            # Omitting any primers that were returned empty
            # primers = filter(lambda p: p == {}, primers)
            chunk_size = 199
            message(
                "Writing {n} {k}-mers into db in blocks of {cs}..."
                .format(n=len(primers), k=k, cs=chunk_size))
            Primers.add(primers, add_revcomp=False)
Beispiel #15
0
def parse_config(cfg_file, section):
    '''
    Parses a config file in the given section. Missing sections and values do
    not raise an error (but missing values may give a warning).

    Returns:
    - defaults: a dict of values in the given section
    '''
    config = ConfigParser.SafeConfigParser()
    defaults = {}
    if not os.path.isfile(cfg_file):
        swga.error(
            "Cannot find parameters file. Run `swga init` or specify options "+
            "manually.", exception=False)
        return {}
    with open(cfg_file) as cfg_file_fp:
        config.readfp(cfg_file_fp)
        try:
            defaults = dict(config.items(section))
        except ConfigParser.NoSectionError:
            defaults = {}
        return defaults
Beispiel #16
0
    def count_specific_kmers(self, kmers):
        try:
            # Skip primers that already exist and warn users
            existing = Primers.select_by_seqs(kmers)
            for p in existing:
                message("{} already exists in db, skipping...".format(p))
            kmers = [p for p in kmers if p not in existing]
        except OperationalError:
            # If this fails due to an OperationalError, it probably means the
            # database tables haven't been created yet.
            error("It doesn't appear that the workspace has been initialized: "
                  "run `swga init' first.")
        mkdirp(output_dir)

        # Group the kmers by length to avoid repeatedly counting kmers of the
        # same size
        kmers_by_length = defaultdict(list)
        for kmer in kmers:
            kmers_by_length[len(kmer)].append(kmer)

        for k, mers in kmers_by_length.items():
            fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir, 1)
            bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir, 1)
            primers = []
            for mer in mers:
                try:
                    primers.append(primer_dict(mer, fg, bg, 0, INF, INF))
                except KeyError:
                    message(
                        "{} does not exist in foreground genome, skipping...".
                        format(mer))

            # Omitting any primers that were returned empty
            # primers = filter(lambda p: p == {}, primers)
            chunk_size = 199
            message("Writing {n} {k}-mers into db in blocks of {cs}...".format(
                n=len(primers), k=k, cs=chunk_size))
            Primers.add(primers, add_revcomp=False)
Beispiel #17
0
    def check_version(self, version):
        """Check the version of the database and compare it to the swga version.

        If the two versions are incompatible, raise a SystemExit.
        """
        try:
            db_ver = self.metadata.version
            db_ver = semver.Version(self.metadata.version)
        except pw.OperationalError:
            db_ver = "<NA>"
        ver = semver.Version(version)
        spec = semver.Spec('=={}.{}'.format(ver.major, ver.minor))
        if db_ver not in spec:
            error(
                "This workspace was created with a different version of swga.\n"
                "  Workspace version: {}\n"
                "  swga version:      {}\n"
                "Please re-initialize the workspace with `swga init` or use a "
                "different version of swga."
                .format(db_ver, ver),
                exception=False,
                wrap=False
            )
Beispiel #18
0
def summary(primer_db, fg_length, bg_length):

    db = swga.database.init_db(primer_db)
    db.connect()
    swga.database.create_tables(drop=False)

    avg_fg_bind, avg_bg_bind, nprimers = (
        Primer
        .select(fn.Avg(Primer.fg_freq),
                fn.Avg(Primer.bg_freq),
                fn.Count(Primer.seq))
        .scalar(as_tuple=True))

    if (avg_fg_bind is None) or (avg_bg_bind is None):
        raise swga.error(
            "Could not calculate summary statistics; database may be corrupt")

    fg_bind_ratio = avg_fg_bind / float(fg_length)
    bg_bind_ratio = avg_bg_bind / float(bg_length)
    nactive = Primer.select().where(Primer.active==True).count()

    min_tm, max_tm, avg_tm = (
        Primer
        .select(fn.Min(Primer.tm),
                fn.Max(Primer.tm),
                fn.Avg(Primer.tm))
        .where(Primer.active==True)
        .scalar(as_tuple=True))

    nsets = Set.select(fn.Count(Set._id)).scalar()

    if nsets > 0:
        bs = Set.select().order_by(Set.score).limit(1).get()
        bs_primers = ", ".join(swga.database.get_primers_for_set(bs._id)).strip()
        best_set = bs._id
        bs_size = bs.set_size
        bs_score = bs.score
        bs_stats = "- "+"\n - ".join(
            fmtkv(k, v)
            for k, v in bs.__dict__['_data'].items()
            if k not in ["_id", "pids", "score"]
        )

    version_header = (
        "---------------------\n"
        "==== SWGA v{version} ====\n"
        "---------------------\n"
        .format(version=swga.__version__)
    )

    summary_msg = """
    {version_header}

    PRIMER SUMMARY
    ---------------
    There are {nprimers} primers in the database.

    {nactive} are marked as active (i.e., they passed filter steps and will be used to find sets of compatible primers.) {ifzero_primers_msg}

    The average number of foreground genome binding sites is {avg_fg_bind:.0f}.
       (avg binding / genome_length = {fg_bind_ratio:05f})
    The average number of background genome binding sites is {avg_bg_bind:.0f}.
       (avg binding / genome_length = {bg_bind_ratio:05f})

    {melting_tmp_msg}


    SETS SUMMARY
    ---------------
    There are {nsets} sets in the database.
    {set_msg}---------------

    Report generated from {primer_db}
"""

    ifzero_primers_msg = colored.green(
        "Run `swga filter` to identify primers to use."
        if nactive == 0 else "")
    melting_tmp_msg = (
        """The melting temp of the primers ranges between {min_tm:.2f}C and {max_tm:.2f}C with an average of {avg_tm:.2f}C."""
        if nactive > 0 and min_tm and max_tm else
        "No melting temps have been calculated yet.")
    ifzero_sets_msg = colored.green(
        "Run `swga find_sets` after identifying valid primers to begin collecting sets.\n")

    set_msg = ("""
    The best scoring set is #{best_set}, with {bs_size} primers and a score of {bs_score:03f}.\nVarious statistics:
    {bs_stats}
The primers in Set {best_set} are:
    {bs_primers}
    """ if nsets > 0 else ifzero_sets_msg)



    primer_db = os.path.abspath(primer_db)
    nprimers = colored.blue(nprimers, bold=True)
    nactive = colored.blue(nactive, bold=True)
    nsets = colored.blue(nsets, bold=True)
    set_msg = set_msg.format(**locals())
    melting_tmp_msg = melting_tmp_msg.format(**locals())
    version_header = colored.green(version_header)
    summary_msg = summary_msg.format(**locals())

    with indent(2):
        puts(max_width(summary_msg, 80))
Beispiel #19
0
def _get_resource_file(fp):
    resource_path = resource_filename("swga", fp)
    if not resource_exists("swga", fp):
        swga.error("Resource does not exist: {}".format(resource_path))
    return resource_path
Beispiel #20
0
 def locations(self):
     if self._locations:
         return json.loads(self._locations)
     else:
         error("No locations stored for " + str(self))
Beispiel #21
0
 def locations(self, fg_genome_fp=None):
     if self._locations:
         return json.loads(self._locations)
     else:
         swga.error("No locations stored for " + str(self))
Beispiel #22
0
 def _update_n(self):
     n = self.primers.count()
     if n == 0:
         error('No primers left.', exception=False)
     else:
         self.n = n