예제 #1
0
def make_graph(max_hetdimer_bind, outfile):
    '''Selects all active primers and outputs a primer compatibility graph.'''

    # Reset all the primer IDs (as ids are only used for set_finder)
    Primer.update(_id = -1).execute()

    primers = list(Primer.select().where(Primer.active == True)
                   .order_by(Primer.ratio.desc()).execute())

    if len(primers) == 0:
        swga.error("No active sets found. Run `swga filter` first.")

    for i, p in enumerate(primers):
        p._id = i + 1

    update_in_chunks(primers, show_progress=False)

    swga.message("Composing primer compatibility graph...")
    edges = graph.test_pairs(primers, max_hetdimer_bind)

    if len(edges) == 0:
        swga.error("No compatible primers. Try relaxing your parameters.", exception=False)

    with open(outfile, 'wb') as out:
        graph.write_graph(primers, edges, out)
예제 #2
0
def primers():
    primers = [
        Primer.create(seq="ATGC", fg_freq=1, bg_freq=2, ratio=1.0, active=True),
        Primer.create(seq="GGCC", fg_freq=1, bg_freq=3, ratio=0.5, active=True),
        Primer.create(seq="CCTA", fg_freq=2, bg_freq=0, ratio=float('inf'), active=True)
    ]
    return primers
예제 #3
0
파일: test_db.py 프로젝트: pombredanne/swga
 def test_bad_add_set_function(self, initdb, tprimers):
     '''Should raise errors if invalid primers supplied.'''
     with pytest.raises(swga.SWGAError):
         database.add_set(_id=2, primers=None, score=100)
     with pytest.raises(swga.SWGAError):
         invalid_primers = Primer.select().where(Primer.seq == "XX")
         database.add_set(_id=3, primers=invalid_primers, score=100)
예제 #4
0
파일: test_db.py 프로젝트: pombredanne/swga
 def test_create_tables(self):
     database.db.init(":memory:")
     database.create_tables()
     p = Primer.create(seq="ATGC")
     s = Set.create(_id=1, score=1)
     s.primers.add(p)
     database.db.close()
예제 #5
0
파일: test_db.py 프로젝트: pombredanne/swga
 def test_update_in_chunks(self, initdb, tprimers, seqs):
     '''Must push all the updates successfully..'''
     for primer in tprimers:
         primer.fg_freq = 100
     database.update_in_chunks(tprimers)
     primers = Primer.select().where(Primer.seq << seqs)
     for primer in primers:
         assert primer.fg_freq == 100
예제 #6
0
파일: primers.py 프로젝트: pombredanne/swga
def update_Tms(primers):
    targets = list(
        Primer.select()
        .where(
            (Primer.seq << primers) &
            (Primer.tm >> None)))
    for primer in targets:
            primer.update_tm()
    swga.database.update_in_chunks(targets, label="Updating primer db... ")
예제 #7
0
파일: test_db.py 프로젝트: pombredanne/swga
 def test_destroy_primer_sets(self, initdb, tprimers, tset):
     '''
     The many-to-many relationship must be updated when a set is destroyed.
     '''
     mers = Primer.select().limit(5)
     tset.primers.add(mers)
     tset.delete_instance()
     assert mers.count() == 5
     for mer in mers:
         assert tset not in mer.sets
예제 #8
0
파일: test_db.py 프로젝트: pombredanne/swga
 def test_create_primers_sets(self, initdb, tprimers, tset):
     '''
     Create primers and sets, and find which primers belong to which sets.
     '''
     mers = Primer.select().limit(5)
     tset.primers.add(mers)
     assert mers.count() == 5
     for mer in mers:
         assert tset in mer.sets
         assert mer in tset.primers
예제 #9
0
파일: filter.py 프로젝트: pombredanne/swga
def filter_primers(
        primers,
        min_fg_bind,
        max_bg_bind,
        fg_length,
        bg_length,
        min_tm,
        max_tm,
        max_primers):
    """
    Takes a list of sequences and retrieves them in the database, then returns
    those sequences that pass various criteria.
    """
    primers = Primer.select().where(Primer.seq << primers)
    fg_min_freq = min_fg_bind
    bg_max_freq = max_bg_bind

    # Find primers that pass the binding rate thresholds
    fgp = Primer.select().where((Primer.seq << primers) &
                                (Primer.fg_freq >= fg_min_freq))
    swga.message("{} primers bind foreground genome with freq >= {} sites"
                 .format(fgp.count(), min_fg_bind))

    bgp = Primer.select().where((Primer.seq << primers) &
                                (Primer.bg_freq <= bg_max_freq))
    swga.message("{} primers bind background genome with freq <= {} sites"
                 .format(bgp.count(), max_bg_bind))

    candidates = Primer.select().where((Primer.seq << primers) &
                                       (Primer.seq << fgp) &
                                       (Primer.seq << bgp))
    swga.message(
        "{} primers pass both fg and bg binding freq filters"
        .format(candidates.count()))

    # Add melt temp for any primer that doesn't have it yet
    swga.primers.update_Tms(candidates)

    valid_primers = Primer.select().where((Primer.seq << candidates) &
                                          (Primer.tm <= max_tm) &
                                          (Primer.tm >= min_tm))
    swga.message("{} of those primers have a melting temp within given range"
                 .format(valid_primers.count()))

    # Sort by background binding rate (smallest -> largest) and select top `n`,
    # then sort those by ratio (highest -> lowest)
    first_pass = (Primer.select()
                  .where(Primer.seq << valid_primers)
                  .order_by(Primer.bg_freq)
                  .limit(max_primers))

    second_pass = (Primer.select()
                   .where(Primer.seq << first_pass)
                   .order_by(Primer.ratio.desc()))

    return second_pass
예제 #10
0
파일: primers.py 프로젝트: pombredanne/swga
def update_locations(primers, fg_genome_fp):
    '''
    Updates the primers from the given set who are missing location data.
    '''
    targets = list(
        Primer.select()
        .where(
            (Primer.seq << primers) &
            (Primer._locations >> None)))
    for primer in targets:
        primer._update_locations(fg_genome_fp)
    swga.database.update_in_chunks(targets, label="Updating primer db... ")
예제 #11
0
def test_linearize_binding_sites(kmer, initdb, fastafile):
    p = Primer.create(seq=kmer)
    p._update_locations(fastafile)
    chr_ends = swga.locate.chromosome_ends(fastafile)
    linear_bind_sites = swga.locate.linearize_binding_sites([p], chr_ends)
    # (number of sites + (2*number of chromosomes) - (any overlaps))
    assert len(linear_bind_sites) == 10
    for record, ends in chr_ends.iteritems():
        start, end = ends
        assert start in linear_bind_sites
        assert end in linear_bind_sites
        for site in p.locations()[record]:
            assert site in linear_bind_sites
예제 #12
0
파일: primers.py 프로젝트: pombredanne/swga
def read_primer_list(lines, fg_genome_fp, bg_genome_fp):
    '''
    Reads in a list of primers, one per line, and returns the corresponding
    records from the primer database. If the primer doesn't exist in the db,
    tries to create it manually. If the primer doesn't appear in the fg genome,
    it skips it with a warning.
    '''
    seqs = [re.split(r'[ \t]+', line.strip('\n'))[0] for line in lines]
    primers = list(Primer.select().where(Primer.seq << seqs).execute())
    if len(primers) < len(seqs):
        primer_seqs = [p.seq for p in primers]
        missing = [_ for _ in seqs if _ not in primer_seqs]
        for seq in missing:
            swga.message(seq + " not in the database; skipping. Add it "
                         "manually with `swga count --input <file>` ")
    return primers
예제 #13
0
 def primers(self, initdb):
     primers = [
         # reference primer
         Primer.create(_id=0, seq="ATGCTC"),
         # rev. complement has 4 bases overlapping
         Primer.create(_id=1, seq="CAGCAT"),
         # rev. complement has 3 bases overlapping
         Primer.create(_id=2, seq="GAGGTA"),
         Primer.create(_id=3, seq="ATCGAG"),
         # rev. complement has one base overlapping
         Primer.create(_id=4, seq="TTCCAC"),
         # substring of reference primer
         Primer.create(_id=5, seq="ATGC")
     ]
     return primers
예제 #14
0
파일: filter.py 프로젝트: pombredanne/swga
def main(argv, cfg_file):
    cmd = Command('filter', cfg_file=cfg_file)
    cmd.parse_args(argv)

    swga.database.init_db(cmd.primer_db)

    # If we have an input file, use that. Otherwise pull from db
    if cmd.input:
        with open(cmd.input, 'rb') as infile:
            primers = swga.primers.read_primer_list(
                infile,
                cmd.fg_genome_fp,
                cmd.bg_genome_fp)
    else:
        cmd.skip_filtering = False
        primers = Primer.select()

    # Undo all active marks, if any
    deactivate_all_primers()

    if not cmd.skip_filtering:
        primers = filter_primers(
            primers,
            cmd.min_fg_bind,
            cmd.max_bg_bind,
            cmd.fg_length,
            cmd.bg_length,
            cmd.min_tm,
            cmd.max_tm,
            cmd.max_primers)

    swga.primers.update_locations(primers, cmd.fg_genome_fp)
    n_active = activate_primers(primers)
    if n_active < cmd.max_primers:
        swga.warn(
            "Fewer than {} primers were selected ({} passed all the filters). "
            "You may want to try less restrictive filtering parameters."
            .format(cmd.max_primers, n_active))
예제 #15
0
파일: summary.py 프로젝트: pombredanne/swga
def summary(primer_db, fg_length, bg_length):

    db = swga.database.init_db(primer_db)
    db.connect()
    swga.database.create_tables(drop=False)

    avg_fg_bind, avg_bg_bind, nprimers = (
        Primer
        .select(fn.Avg(Primer.fg_freq),
                fn.Avg(Primer.bg_freq),
                fn.Count(Primer.seq))
        .scalar(as_tuple=True))

    if (avg_fg_bind is None) or (avg_bg_bind is None):
        raise swga.error(
            "Could not calculate summary statistics; database may be corrupt")

    fg_bind_ratio = avg_fg_bind / float(fg_length)
    bg_bind_ratio = avg_bg_bind / float(bg_length)
    nactive = Primer.select().where(Primer.active==True).count()

    min_tm, max_tm, avg_tm = (
        Primer
        .select(fn.Min(Primer.tm),
                fn.Max(Primer.tm),
                fn.Avg(Primer.tm))
        .where(Primer.active==True)
        .scalar(as_tuple=True))

    nsets = Set.select(fn.Count(Set._id)).scalar()

    if nsets > 0:
        bs = Set.select().order_by(Set.score).limit(1).get()
        bs_primers = ", ".join(swga.database.get_primers_for_set(bs._id)).strip()
        best_set = bs._id
        bs_size = bs.set_size
        bs_score = bs.score
        bs_stats = "- "+"\n - ".join(
            fmtkv(k, v)
            for k, v in bs.__dict__['_data'].items()
            if k not in ["_id", "pids", "score"]
        )

    version_header = (
        "---------------------\n"
        "==== SWGA v{version} ====\n"
        "---------------------\n"
        .format(version=swga.__version__)
    )

    summary_msg = """
    {version_header}

    PRIMER SUMMARY
    ---------------
    There are {nprimers} primers in the database.

    {nactive} are marked as active (i.e., they passed filter steps and will be used to find sets of compatible primers.) {ifzero_primers_msg}

    The average number of foreground genome binding sites is {avg_fg_bind:.0f}.
       (avg binding / genome_length = {fg_bind_ratio:05f})
    The average number of background genome binding sites is {avg_bg_bind:.0f}.
       (avg binding / genome_length = {bg_bind_ratio:05f})

    {melting_tmp_msg}


    SETS SUMMARY
    ---------------
    There are {nsets} sets in the database.
    {set_msg}---------------

    Report generated from {primer_db}
"""

    ifzero_primers_msg = colored.green(
        "Run `swga filter` to identify primers to use."
        if nactive == 0 else "")
    melting_tmp_msg = (
        """The melting temp of the primers ranges between {min_tm:.2f}C and {max_tm:.2f}C with an average of {avg_tm:.2f}C."""
        if nactive > 0 and min_tm and max_tm else
        "No melting temps have been calculated yet.")
    ifzero_sets_msg = colored.green(
        "Run `swga find_sets` after identifying valid primers to begin collecting sets.\n")

    set_msg = ("""
    The best scoring set is #{best_set}, with {bs_size} primers and a score of {bs_score:03f}.\nVarious statistics:
    {bs_stats}
The primers in Set {best_set} are:
    {bs_primers}
    """ if nsets > 0 else ifzero_sets_msg)



    primer_db = os.path.abspath(primer_db)
    nprimers = colored.blue(nprimers, bold=True)
    nactive = colored.blue(nactive, bold=True)
    nsets = colored.blue(nsets, bold=True)
    set_msg = set_msg.format(**locals())
    melting_tmp_msg = melting_tmp_msg.format(**locals())
    version_header = colored.green(version_header)
    summary_msg = summary_msg.format(**locals())

    with indent(2):
        puts(max_width(summary_msg, 80))
예제 #16
0
파일: test_db.py 프로젝트: pombredanne/swga
 def test_add_primers(self, initdb):
     '''Must add the reverse complement of a primer if requested.'''
     primers = [{'seq': "AAAA"}]
     database.add_primers(primers, add_revcomp=True)
     assert Primer.select().where(Primer.seq == "TTTT").count() == 1
예제 #17
0
def tprimers(seqs):
    return [Primer.create(seq=seq) for seq in seqs]
예제 #18
0
파일: export.py 프로젝트: pombredanne/swga
def validate_order_field(field, model):
    '''Ensures the given field exists in the model.'''
    if field and field not in model.fields():
        swga.error(
            "Cannot order by '{}'. Valid choices are {}"
            .format(field, ", ".join(Primer.fields())))
예제 #19
0
파일: primers.py 프로젝트: pombredanne/swga
def activate(primers):
    '''Marks a list of primers as active.'''
    n = Primer.update(active=True).where(
        Primer.seq << primers).execute()
    return n
예제 #20
0
파일: filter.py 프로젝트: pombredanne/swga
def deactivate_all_primers():
    """Resets all active marks on primers."""
    Primer.update(active=False).execute()