Example #1
0
def addv102():
    db_path = sys.argv[2]
    targets_path = sys.argv[3]
    out_path = sys.argv[4]
    from spats_shape_seq.db import PairDB
    db = PairDB(db_path)
    db.add_v102_comparison(targets_path, out_path)
Example #2
0
def addv102():
    db_path = sys.argv[2]
    targets_path = sys.argv[3]
    out_path = sys.argv[4]
    from spats_shape_seq.db import PairDB
    db = PairDB(db_path)
    db.add_v102_comparison(targets_path, out_path)
Example #3
0
 def run_dataset(self, case, algorithm):
     bp = "test/{}/".format(case)
     test_file = bp + "test.spats.tmp"
     try:
         shutil.copyfile(bp + "ds.spats", test_file)
         db = PairDB(test_file)
         s = Spats()
         db.load_run(s.run)
         if not s.run.cotrans and algorithm == "native":
             return
         s.run.writeback_results = True
         s.run.result_set_name = "test"
         s.run.algorithm = algorithm
         s.run.quiet = True
         s.loadTargets(db)
         if not s._processor.exists():
             # just ignore the native test if it's not available
             self.assertEqual("native", algorithm)
             return
         s.process_pair_db(db, batch_size = 1024) # small batch_size just to exercise multiprocessing code
         msg = None
         count = 0
         for res in db.differing_results("test", "test_validation"):
             msg = str([str(x) for x in res])
             count += 1
         self.assertEqual(0, count, "{} differing results: {} / {} \n{}".format(count, case, algorithm, msg))
     finally:
         if os.path.exists(test_file):
             os.remove(test_file)
Example #4
0
def makedb():
    db_path = sys.argv[2]
    targets_path = sys.argv[3]
    r1_path = sys.argv[4]
    r2_path = sys.argv[5]
    from spats_shape_seq.db import PairDB
    db = PairDB(db_path)
    db.show_progress_every = 200000
    db.load_and_index(targets_path, r1_path, r2_path)
Example #5
0
def makedb():
    db_path = sys.argv[2]
    targets_path = sys.argv[3]
    r1_path = sys.argv[4]
    r2_path = sys.argv[5]
    from spats_shape_seq.db import PairDB
    db = PairDB(db_path)
    db.show_progress_every = 200000
    db.load_and_index(targets_path, r1_path, r2_path)
Example #6
0
def d5s_writeback_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "dev_out/pairs.db")
    pair_db.add_targets_table(bp + "5s/5S.fa")

    from spats_shape_seq import Spats
    s = Spats()
    s.addTargets(bp + "5s/5S.fa")
    s.writeback_results = True
    s.result_set_name = "pure_python"
    s.process_pair_db(pair_db)
Example #7
0
def d5s_writeback_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "dev_out/pairs.db")
    pair_db.add_targets_table(bp + "5s/5S.fa")

    from spats_shape_seq import Spats
    s = Spats()
    s.addTargets(bp + "5s/5S.fa")
    s.writeback_results = True
    s.result_set_name = "pure_python"
    s.process_pair_db(pair_db)
Example #8
0
def rdiff_func(db_path, rs1_name, rs2_name, diag_spats=None):
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram
    from spats_shape_seq.pair import Pair
    db = PairDB(db_path)
    n1 = db.num_results(rs1_name)
    n2 = db.num_results(rs2_name)
    print "{}: {} results  /  {}: {} results".format(rs1_name, n1, rs2_name,
                                                     n2)
    if not n1 or not n2:
        print "** Abort."
        exit(1)
    print "Diffs:"
    ours_only = []
    theirs_only = []
    differences = []
    for r in db.differing_results(rs1_name, rs2_name):
        if r[4] == -1:
            assert (r[9] != -1)
            theirs_only.append(r)
        elif r[9] == -1:
            ours_only.append(r)
        else:
            differences.append(r)
    all_lists = [ours_only, theirs_only, differences]
    for l in all_lists:
        reasons = {}
        for r in l:
            key = r[7] or r[12] or "different values"
            assert (key)
            rlist = reasons.get(key)
            if not rlist:
                rlist = []
                reasons[key] = rlist
            rlist.append(r)
        for reason, rlist in reasons.iteritems():
            for r in rlist[:min(len(rlist), 10)]:
                print "  {}:{} s{}m{} ({}) -- {}:{} s{}m{} ({})   ([ '{}', '{}', '{}', {}, {}, [ {} ] ])".format(
                    r[3] or 'x', r[4], r[5], r[6], r[7] or "OK", r[8] or 'x',
                    r[9], r[10], r[11], r[12] or "OK", r[0], r[1], r[2], r[4],
                    r[5], "" if -1 == r[6] else r[6])
            if len(rlist) > 0:
                print "... {} total.".format(len(rlist))
            if diag_spats:
                pair = Pair()
                pair.set_from_data(str(r[0]), str(r[1]), str(r[2]))
                diag_spats.process_pair(pair)
                print diagram(pair, diag_spats.run)

    print "{} total diffs.".format(sum(map(len, all_lists)))
Example #9
0
def rdiff_func(db_path, rs1_name, rs2_name, diag_spats = None):
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram
    from spats_shape_seq.pair import Pair
    db = PairDB(db_path)
    n1 = db.num_results(rs1_name)
    n2 = db.num_results(rs2_name)
    print "{}: {} results  /  {}: {} results".format(rs1_name, n1, rs2_name, n2)
    if not n1 or not n2:
        print "** Abort."
        exit(1)
    print "Diffs:"
    ours_only = []
    theirs_only = []
    differences = []
    for r in db.differing_results(rs1_name, rs2_name):
        if r[4] == -1:
            assert(r[9] != -1)
            theirs_only.append(r)
        elif r[9] == -1:
            ours_only.append(r)
        else:
            differences.append(r)
    all_lists = [ ours_only, theirs_only, differences ]
    for l in all_lists:
        reasons = {}
        for r in l:
            key = r[7] or r[12] or "different values"
            assert(key)
            rlist = reasons.get(key)
            if not rlist:
                rlist = []
                reasons[key] = rlist
            rlist.append(r)
        for reason, rlist in reasons.iteritems():
            for r in rlist[:min(len(rlist), 10)]:
                print "  {}:{} s{}m{} ({}) -- {}:{} s{}m{} ({})   ([ '{}', '{}', '{}', {}, {}, [ {} ] ])".format(r[3] or 'x', r[4], r[5], r[6], r[7] or "OK",
                                                                                                                 r[8] or 'x', r[9], r[10], r[11], r[12] or "OK",
                                                                                                                 r[0], r[1], r[2], r[4], r[5], "" if -1 == r[6] else r[6] )
            if len(rlist) > 0:
                print "... {} total.".format(len(rlist))
            if diag_spats:
                pair = Pair()
                pair.set_from_data(str(r[0]), str(r[1]), str(r[2]))
                diag_spats.process_pair(pair)
                print diagram(pair, diag_spats.run)

    print "{} total diffs.".format(sum(map(len, all_lists)))
Example #10
0
def make_test_dataset():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/data/"
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "ds.spats")
    pair_db.add_targets_table(bp + "../cotrans_single.fa")
    pair_db.parse(bp + "med_R1.fq", bp + "med_R2.fq")
    s = Spats(cotrans=True)
    s.run.num_workers = 1
    s.run.writeback_results = True
    s.run._process_all_pairs = True
    s.run.algorithm = "find_partial"
    s.run.result_set_name = "test_validation"
    s.process_pair_db(pair_db)
    pair_db.store_run(s.run)
    pair_db.store_counters('spats', s.counters)
Example #11
0
def make_test_dataset():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/data/"
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "ds.spats")
    pair_db.add_targets_table(bp + "../cotrans_single.fa")
    pair_db.parse(bp + "med_R1.fq", bp + "med_R2.fq")
    s = Spats(cotrans = True)
    s.run.num_workers = 1
    s.run.writeback_results = True
    s.run._process_all_pairs = True
    s.run.algorithm = "find_partial"
    s.run.result_set_name = "test_validation"
    s.process_pair_db(pair_db)
    pair_db.store_run(s.run)
    pair_db.store_counters('spats', s.counters)
Example #12
0
def dbrun():
    db_path = sys.argv[2]
    run_name = sys.argv[3]
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    db = PairDB(db_path)
    s = Spats()
    s.run.writeback_results = True
    s.run.result_set_name = run_name
    #s.run.resume_processing = True
    s.process_pair_db(db)
Example #13
0
class TestDatasets(unittest.TestCase):
    def test_datasets(self):
        for case in cases:
            for alg in algorithms:
                if os.environ.get('SKIP_SLOW_TESTS') and alg == 'native':
                    raise nose.SkipTest('skipping slow tests')
                self.run_dataset(case, alg)
        print("Ran {} datasets.".format(len(cases)))

    def run_dataset(self, case, algorithm):
        bp = "test/{}/".format(case)
        test_file = bp + "test.spats.tmp"
        try:
            shutil.copyfile(bp + "ds.spats", test_file)
            db = PairDB(test_file)
            s = Spats()
            db.load_run(s.run)
            if not s.run.cotrans and algorithm == "native":
                return
            s.run.writeback_results = True
            s.run.result_set_name = "test"
            s.run.algorithm = algorithm
            s.run.quiet = True
            s.loadTargets(db)
            if not s._processor.exists():
                # just ignore the native test if it's not available
                self.assertEqual("native", algorithm)
                return
            s.process_pair_db(
                db, batch_size=1024
            )  # small batch_size just to exercise multiprocessing code
            msg = None
            count = 0
            for res in db.differing_results("test", "test_validation"):
                msg = str([str(x) for x in res])
                count += 1
            self.assertEqual(
                0, count, "{} differing results: {} / {} \n{}".format(
                    count, case, algorithm, msg))
        finally:
            if os.path.exists(test_file):
                os.remove(test_file)
Example #14
0
def tquery():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "db/pairs.db")
    print pair_db.results_matching(1, ["linker_cotrans", "adapter"], ["match"])
Example #15
0
def tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "db/pairs.db")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "cotrans_single.fa")
        pair_db.parse(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                      bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq",
                      sample_size=100000)

    from spats_shape_seq import Spats
    from spats_shape_seq.tag import TagProcessor
    from spats_shape_seq.util import reverse_complement
    s = Spats()
    s.run._processor_class = TagProcessor
    s.run.writeback_results = True
    s.run.result_set_name = "tags"
    s.run.num_workers = 1
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.loadTargets(pair_db)

    s.run.allow_indeterminate = True
    s.run.allowed_target_errors = 2
    s.run.allowed_adapter_errors = 2

    p = s._processor
    for target in pair_db.targets():
        p.addTagTarget(target[0], target[1])
        p.addTagTarget(target[0] + "_rc", reverse_complement(target[1]))
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)
    if s.run.cotrans:
        p.addTagTarget("linker_cotrans", s.run.cotrans_linker)
        p.addTagTarget("linker_cotrans_rc",
                       reverse_complement(s.run.cotrans_linker))

    s.process_pair_db(pair_db)
    rsid = pair_db.result_set_id_for_name(s.run.result_set_name)
    pair_db.count_tags(rsid)
    print pair_db.tag_counts(rsid)
Example #16
0
def tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "db/pairs.db")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "cotrans_single.fa")
        pair_db.parse(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                      bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq",
                      sample_size = 100000)

    from spats_shape_seq import Spats
    from spats_shape_seq.tag import TagProcessor
    from spats_shape_seq.util import reverse_complement
    s = Spats()
    s.run._processor_class = TagProcessor
    s.run.writeback_results = True
    s.run.result_set_name = "tags"
    s.run.num_workers = 1
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.loadTargets(pair_db)

    s.run.allow_indeterminate = True
    s.run.allowed_target_errors = 2
    s.run.allowed_adapter_errors = 2

    p = s._processor
    for target in pair_db.targets():
        p.addTagTarget(target[0], target[1])
        p.addTagTarget(target[0] + "_rc", reverse_complement(target[1]))
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)
    if s.run.cotrans:
        p.addTagTarget("linker_cotrans", s.run.cotrans_linker)
        p.addTagTarget("linker_cotrans_rc", reverse_complement(s.run.cotrans_linker))

    s.process_pair_db(pair_db)
    rsid = pair_db.result_set_id_for_name(s.run.result_set_name)
    pair_db.count_tags(rsid)
    print pair_db.tag_counts(rsid)
Example #17
0
 def pair_db(self):
     """Access the underlying :class:`.db.PairDB`.
     """
     if not self._pair_db:
         self._pair_db = PairDB(self.db_path)
     return self._pair_db
Example #18
0
def tquery():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "db/pairs.db")
    print pair_db.results_matching(1, [ "linker_cotrans", "adapter" ], [ "match" ])
Example #19
0
def tmut():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    pair_db = PairDB(bp + "ds_cmp.spats")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "mut_single.fa")
        fq_name = "mut2"
        pair_db.parse(bp + fq_name + "_R1.fastq", bp + fq_name + "_R2.fastq")

    spatss = []
    for alg in ["find_partial", "lookup"]:
        spats = Spats(cotrans=False)
        spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        spats.run.count_mutations = True
        spats.run.algorithm = alg
        spats.run.allowed_target_errors = 1
        spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
        spats.run._process_all_pairs = True
        spats.run.writeback_results = True
        spats.run.num_workers = 1
        spats.run.result_set_name = "mut_" + alg

        spats.process_pair_db(pair_db)
        pair_db.store_run(spats.run)
        pair_db.store_counters(spats.run.result_set_name, spats.counters)
        spatss.append(spats)

    rdiff_func(bp + "ds_cmp.spats",
               "mut_find_partial",
               "mut_lookup",
               diag_spats=spatss[0])
Example #20
0
def tmut():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    pair_db = PairDB(bp + "ds_cmp.spats")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "mut_single.fa")
        fq_name = "mut2"
        pair_db.parse(bp + fq_name + "_R1.fastq", bp + fq_name + "_R2.fastq")

    spatss = []
    for alg in [ "find_partial", "lookup" ]:
        spats = Spats(cotrans = False)
        spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        spats.run.count_mutations = True
        spats.run.algorithm = alg
        spats.run.allowed_target_errors = 1
        spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
        spats.run._process_all_pairs = True
        spats.run.writeback_results = True
        spats.run.num_workers = 1
        spats.run.result_set_name = "mut_" + alg

        spats.process_pair_db(pair_db)
        pair_db.store_run(spats.run)
        pair_db.store_counters(spats.run.result_set_name, spats.counters)
        spatss.append(spats)

    rdiff_func(bp + "ds_cmp.spats", "mut_find_partial", "mut_lookup", diag_spats = spatss[0])
Example #21
0
 def open_spats(self, path):
     self._db = PairDB(path)
     self._loadDBAndModel()
     self.db_name = os.path.basename(path)
     cjb.util.writeJsonToPath({ "last" : path}, self.last_path)