Esempio n. 1
0
    def run(self):
        # If we have an input file, use that. Otherwise pull from db
        if self.input:
            with open(self.input, 'rb') as infile:
                primers = Primers(infile)
        else:
            self.skip_filtering = False
            primers = Primers()

        assert isinstance(primers, Primers)

        # Undo all active marks, if any
        Primer.update(active=False).execute()

        if not self.skip_filtering:
            (
                primers
                .filter_min_fg_rate(self.min_fg_bind)
                .filter_max_bg_rate(self.max_bg_bind)
                .summarize()
                .filter_tm_range(self.min_tm, self.max_tm)
                .limit_to(self.max_primers)
                .filter_max_gini(self.max_gini, self.fg_genome_fp)
            )

        primers.activate(self.max_primers)
Esempio n. 2
0
def primers():
    primers = [
        Primer.create(seq="ATGC", fg_freq=1, bg_freq=2, ratio=1.0, active=True),
        Primer.create(seq="GGCC", fg_freq=1, bg_freq=3, ratio=0.5, active=True),
        Primer.create(seq="CCTA", fg_freq=2, bg_freq=0, ratio=float('inf'), active=True)
    ]
    return primers
Esempio n. 3
0
    def run(self):
        # If we have an input file, use that. Otherwise pull from db
        if self.input:
            with open(self.input, 'rb') as infile:
                primers = Primers(infile)
        else:
            self.skip_filtering = False
            primers = Primers()

        assert isinstance(primers, Primers)

        # Undo all active marks, if any
        Primer.update(active=False).execute()

        if not self.skip_filtering:
            (primers.filter_min_fg_rate(self.min_fg_bind).filter_max_bg_rate(
                self.max_bg_bind).summarize().filter_tm_range(
                    self.min_tm,
                    self.max_tm).limit_to(self.max_primers).filter_max_gini(
                        self.max_gini, self.fg_genome_fp))

        primers.activate(self.max_primers)
Esempio n. 4
0
def test_linearize_binding_sites(kmer, ws, fastafile):
    p = Primer.create(seq=kmer)
    p._update_locations(fastafile)
    chr_ends = swga.locate.chromosome_ends(fastafile)
    linear_bind_sites = swga.locate.linearize_binding_sites([p], chr_ends)
    # (number of sites + (2*number of chromosomes) - (any overlaps))
    assert len(linear_bind_sites) == 10
    for record, ends in chr_ends.iteritems():
        start, end = ends
        assert start in linear_bind_sites
        assert end in linear_bind_sites
        for site in p.locations[record]:
            assert site in linear_bind_sites
Esempio n. 5
0
    def count_kmers(self):

        # We need to clear all previous primers each time due to uniqueness
        # constraints
        if Primer.select().count() > 0:
            if not self.force:
                click.confirm(
                    "Remove all previously-found primers and re-count?",
                    abort=True)
            self.workspace.reset_primers()

        mkdirp(output_dir)

        kmers = []
        for k in xrange(self.min_size, self.max_size + 1):
            fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir)
            bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir)

            if self.exclude_fp:
                assert os.path.isfile(self.exclude_fp)
                ex = swga.kmers.count_kmers(k, self.exclude_fp, output_dir,
                                            self.exclude_threshold)
            else:
                ex = {}

            # Keep kmers found in foreground, merging bg binding values, and
            # excluding those found in the excluded fasta

            kmers = [
                primer_dict(seq, fg, bg, self.min_fg_bind, self.max_bg_bind,
                            self.max_dimer_bp) for seq in fg.viewkeys()
                if seq not in ex.viewkeys()
            ]

            kmers = filter(lambda x: x != {}, kmers)

            nkmers = len(kmers)

            chunk_size = 199
            message("Writing {n} {k}-mers into db in blocks of {cs}...".format(
                n=nkmers * 2, k=k, cs=chunk_size))
            Primers.add(kmers, add_revcomp=True)

        message("Counted kmers in range %d-%d" %
                (self.min_size, self.max_size))
Esempio n. 6
0
    def count_kmers(self):

        # We need to clear all previous primers each time due to uniqueness
        # constraints
        if Primer.select().count() > 0:
            if not self.force:
                click.confirm(
                    "Remove all previously-found primers and re-count?",
                    abort=True)
            self.workspace.reset_primers()

        mkdirp(output_dir)

        kmers = []
        for k in xrange(self.min_size, self.max_size + 1):
            fg = swga.kmers.count_kmers(k, self.fg_genome_fp, output_dir)
            bg = swga.kmers.count_kmers(k, self.bg_genome_fp, output_dir)

            if self.exclude_fp:
                assert os.path.isfile(self.exclude_fp)
                ex = swga.kmers.count_kmers(
                    k, self.exclude_fp, output_dir, self.exclude_threshold)
            else:
                ex = {}

            # Keep kmers found in foreground, merging bg binding values, and
            # excluding those found in the excluded fasta

            kmers = [
                primer_dict(seq, fg, bg, self.min_fg_bind, self.max_bg_bind,
                            self.max_dimer_bp)
                for seq in fg.viewkeys() if seq not in ex.viewkeys()
            ]

            kmers = filter(lambda x: x != {}, kmers)

            nkmers = len(kmers)

            chunk_size = 199
            message(
                "Writing {n} {k}-mers into db in blocks of {cs}..."
                .format(n=nkmers * 2, k=k, cs=chunk_size))
            Primers.add(kmers, add_revcomp=True)

        message("Counted kmers in range %d-%d" % (self.min_size, self.max_size))
Esempio n. 7
0
 def primers(self):
     primers = [
         # reference primer
         Primer.create(_id=0, seq="ATGCTC"),
         # rev. complement has 4 bases overlapping
         Primer.create(_id=1, seq="CAGCAT"),
         # rev. complement has 3 bases overlapping
         Primer.create(_id=2, seq="GAGGTA"),
         Primer.create(_id=3, seq="ATCGAG"),
         # rev. complement has one base overlapping
         Primer.create(_id=4, seq="TTCCAC"),
         # substring of reference primer
         Primer.create(_id=5, seq="ATGC")
     ]
     return primers
Esempio n. 8
0
 def primers(self):
     primers = [
         # reference primer
         Primer.create(_id=0, seq="ATGCTC"),
         # rev. complement has 4 bases overlapping
         Primer.create(_id=1, seq="CAGCAT"),
         # rev. complement has 3 bases overlapping
         Primer.create(_id=2, seq="GAGGTA"),
         Primer.create(_id=3, seq="ATCGAG"),
         # rev. complement has one base overlapping
         Primer.create(_id=4, seq="TTCCAC"),
         # substring of reference primer
         Primer.create(_id=5, seq="ATGC")
     ]
     return primers
Esempio n. 9
0
def primers(ws, seqs):
    return [Primer.create(seq=seq) for seq in seqs]
Esempio n. 10
0
def primers(ws, seqs):
    return [Primer.create(seq=seq) for seq in seqs]
Esempio n. 11
0
    def run(self):
        self.summary_msg = summary_template
        self.best_set_desc = best_set_desc
        avg_fg_bind, avg_bg_bind, nprimers = (
            Primer
            .select(fn.Avg(Primer.fg_freq),
                    fn.Avg(Primer.bg_freq),
                    fn.Count(Primer.seq))
            .scalar(as_tuple=True))

        if (avg_fg_bind is None) or (avg_bg_bind is None):
            (avg_fg_bind, avg_bg_bind) = (0, 0)

        fg_bind_ratio = avg_fg_bind / float(self.fg_length)
        bg_bind_ratio = avg_bg_bind / float(self.bg_length)
        nactive = Primer.select().where(Primer.active == True).count()

        min_tm, max_tm, avg_tm = (
            Primer
            .select(fn.Min(Primer.tm),
                    fn.Max(Primer.tm),
                    fn.Avg(Primer.tm))
            .where(Primer.active == True)
            .scalar(as_tuple=True))

        nsets = Set.select(fn.Count(Set._id)).scalar()

        if nsets > 0:
            bs = Set.select().order_by(Set.score).limit(1).get()
            bs_primers = ", ".join(bs.primer_seqs()).strip()
            best_set = bs._id
            bs_size = bs.set_size
            bs_score = bs.score
            bs_stats = "- " + "\n - ".join(
                fmtkv(k, bs.__dict__['_data'][k])
                for k in bs.exported_fields()
                if k not in ["_id", "pids", "score", "primers"]
            )
            self.best_set_desc = self.best_set_desc.format(**locals())

        if_no_primers_msg = click.style(
            "Run `swga count` to find possible primers."
            if nprimers == 0 else "", fg='green')
        if_no_active_primers_msg = click.style(
            "Run `swga filter` to identify primers to use."
            if nactive == 0 else "", fg='green')
        melting_tmp_msg = (
            "The melting temp of the primers ranges between {min_tm:.2f}C and "
            "{max_tm:.2f}C with an average of {avg_tm:.2f}C."
            if nactive > 0 and min_tm and max_tm else
            "No melting temps have been calculated yet.").format(**locals())
        ifzero_sets_msg = click.style(
            "Run `swga find_sets` after identifying valid primers to begin "
            "collecting sets.\n", fg='green')

        set_msg = (self.best_set_desc if nsets > 0 else ifzero_sets_msg)

        primer_db = os.path.abspath(self.primer_db)
        nprimers = click.style(str(nprimers), bold=True, fg='blue')
        nactive = click.style(str(nactive), bold=True, fg='blue')
        nsets = click.style(str(nsets), bold=True, fg='blue')

        self.header = click.style("swga v{}".format(__version__), fg='green')

        # Copy all the relevant values into one dict
        values = self.__dict__.copy()
        values.update(locals())

        # Format the summary message with all the calculated values

        self.summary_msg = self.summary_msg.format(**values)
        click.echo(quote(self.summary_msg, quote="  ", width=200))
Esempio n. 12
0
def validate_order_field(field, model):
    '''Ensures the given field exists in the model.'''
    if field and field not in model.fields():
        swga.error(
            "Cannot order by '{}'. Valid choices are {}"
            .format(field, ", ".join(Primer.fields())))
Esempio n. 13
0
 def test_add_primers(self):
     '''Must add the reverse complement of a primer if requested.'''
     primers = [{'seq': "AAAA"}]
     Primers.add(primers, add_revcomp=True)
     assert Primer.select().where(Primer.seq == "TTTT").count() == 1
Esempio n. 14
0
 def test_bad_set_add(self):
     with pytest.raises(ValueError):
         Set.add(0, None, score=1)
     with pytest.raises(ValueError):
         invalid_primers = Primer.select().where(Primer.seq == "XX")
         Set.add(0, primers=invalid_primers, score=1)
Esempio n. 15
0
    def run(self):
        self.summary_msg = summary_template
        self.best_set_desc = best_set_desc
        avg_fg_bind, avg_bg_bind, nprimers = (Primer.select(
            fn.Avg(Primer.fg_freq), fn.Avg(Primer.bg_freq),
            fn.Count(Primer.seq)).scalar(as_tuple=True))

        if (avg_fg_bind is None) or (avg_bg_bind is None):
            (avg_fg_bind, avg_bg_bind) = (0, 0)

        fg_bind_ratio = avg_fg_bind / float(self.fg_length)
        bg_bind_ratio = avg_bg_bind / float(self.bg_length)
        nactive = Primer.select().where(Primer.active == True).count()

        min_tm, max_tm, avg_tm = (Primer.select(fn.Min(
            Primer.tm), fn.Max(Primer.tm), fn.Avg(
                Primer.tm)).where(Primer.active == True).scalar(as_tuple=True))

        nsets = Set.select(fn.Count(Set._id)).scalar()

        if nsets > 0:
            bs = Set.select().order_by(Set.score).limit(1).get()
            bs_primers = ", ".join(bs.primer_seqs()).strip()
            best_set = bs._id
            bs_size = bs.set_size
            bs_score = bs.score
            bs_stats = "- " + "\n - ".join(
                fmtkv(k, bs.__dict__['_data'][k])
                for k in bs.exported_fields()
                if k not in ["_id", "pids", "score", "primers"])
            self.best_set_desc = self.best_set_desc.format(**locals())

        if_no_primers_msg = click.style(
            "Run `swga count` to find possible primers."
            if nprimers == 0 else "",
            fg='green')
        if_no_active_primers_msg = click.style(
            "Run `swga filter` to identify primers to use."
            if nactive == 0 else "",
            fg='green')
        melting_tmp_msg = (
            "The melting temp of the primers ranges between {min_tm:.2f}C and "
            "{max_tm:.2f}C with an average of {avg_tm:.2f}C."
            if nactive > 0 and min_tm and max_tm else
            "No melting temps have been calculated yet.").format(**locals())
        ifzero_sets_msg = click.style(
            "Run `swga find_sets` after identifying valid primers to begin "
            "collecting sets.\n",
            fg='green')

        set_msg = (self.best_set_desc if nsets > 0 else ifzero_sets_msg)

        primer_db = os.path.abspath(self.primer_db)
        nprimers = click.style(str(nprimers), bold=True, fg='blue')
        nactive = click.style(str(nactive), bold=True, fg='blue')
        nsets = click.style(str(nsets), bold=True, fg='blue')

        self.header = click.style("swga v{}".format(__version__), fg='green')

        # Copy all the relevant values into one dict
        values = self.__dict__.copy()
        values.update(locals())

        # Format the summary message with all the calculated values

        self.summary_msg = self.summary_msg.format(**values)
        click.echo(quote(self.summary_msg, quote="  ", width=200))