Example #1
0
def show_failure_types():
    from spats_clean import Spats, Pair, FastqRecord
    spats = Spats("test/5s/5s.fa", "test/5s")
    spats.setup()
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    with open(bp + "t11/x/filtered_R1.fq", 'rb') as r1_in:
        with open(bp + "t11/x/filtered_R2.fq", 'rb') as r2_in:
            r1_record = FastqRecord()
            r2_record = FastqRecord()
            pair = Pair()
            while True:
                r1_record.read(r1_in)
                if not r1_record.identifier:
                    break
                r2_record.read(r2_in)
                pair.set_from_records(r1_record, r2_record)

                spats.process_pair(pair)

                summary = "{} :: {}".format(pair.identifier, pair.site if pair.has_site else pair.failure)
                if pair.r1.match_errors:
                    summary += " R1!: {}".format(pair.r1.match_errors)
                if pair.r1.adapter_errors:
                    summary += " R1A!: {}, adapter_len={}".format(pair.r1.adapter_errors, pair.r1._rtrim)
                if pair.r2.match_errors:
                    summary += " R2!: {}".format(pair.r2.match_errors)
                if pair.r2.adapter_errors:
                    summary += " R2A!: {}, adapter_len={}".format(pair.r2.adapter_errors, pair.r2._rtrim - 4)
                print summary
Example #2
0
def d5s_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    #from spats_shape_seq.db import PairDB
    #pair_db = PairDB(bp + "dev_out/pairs.db")
    #if False:
    #    pair_db.add_targets_table(bp + "5s/5S.fa")
    #    pair_db.parse(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq",
    #                  bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq")

    from spats_shape_seq import Spats
    from spats_shape_seq.partial import PartialFindProcessor
    s = Spats()
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    #s.run.writeback_results = True
    #s.run.resume_processing = True
    #s.run.result_set_name = "lookup"
    s.addTargets(bp + "5s/5S.fa")
    #s.process_pair_db(pair_db)
    s.process_pair_data(
        bp +
        "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq",
        bp +
        "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq"
    )
    s.compute_profiles()
    s.write_reactivities(bp + "dev_out/rx2.out")
Example #3
0
def tmut():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    pair_db = PairDB(bp + "ds_cmp.spats")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "mut_single.fa")
        fq_name = "mut2"
        pair_db.parse(bp + fq_name + "_R1.fastq", bp + fq_name + "_R2.fastq")

    spatss = []
    for alg in [ "find_partial", "lookup" ]:
        spats = Spats(cotrans = False)
        spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        spats.run.count_mutations = True
        spats.run.algorithm = alg
        spats.run.allowed_target_errors = 1
        spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
        spats.run._process_all_pairs = True
        spats.run.writeback_results = True
        spats.run.num_workers = 1
        spats.run.result_set_name = "mut_" + alg

        spats.process_pair_db(pair_db)
        pair_db.store_run(spats.run)
        pair_db.store_counters(spats.run.result_set_name, spats.counters)
        spatss.append(spats)

    rdiff_func(bp + "ds_cmp.spats", "mut_find_partial", "mut_lookup", diag_spats = spatss[0])
Example #4
0
def diag_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.pair import Pair
    from spats_shape_seq.tests.test_mut import cases
    #from spats_shape_seq.tests.test_pairs import prefix_cases as cases
    from spats_shape_seq.diagram import diagram
    #spats_config.minimum_target_match_length = 8
    spats = Spats()
    #spats.addTargets("test/5s/5s.fa")
    spats.addTargets("test/mut/mut_single.fa")
    spats.run.debug = True
    spats.run.algorithm = "find_partial"
    spats.run.count_mutations = True
    #spats.run.mutations_require_quality_score = ord('.') - ord('!')
    spats.run.allowed_target_errors = 1
    spats.run.ignore_stops_with_mismatched_overlap = True
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"

    #spats.run.collapse_left_prefixes = True
    spats._case_errors = False
    def run_case(case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        spats.process_pair(pair)
        print diagram(pair, spats.run)
        if case[3] != pair.site:
            spats._case_errors = True
            print "******* mismatch: {} != {}".format(case[3], pair.site)
    for case in cases:
        if case[0].startswith("*"):
            run_case(case)
    spats.run.debug = False
    if spats._case_errors:
        raise Exception("Case failed")
Example #5
0
 def setUp(self):
     from spats_shape_seq import Spats
     self.spats = Spats()
     self.spats.run.cotrans = True
     self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
     self.spats.run.collapse_left_prefixes = True
     self.spats.addTargets("test/cotrans/cotrans_single.fa")
Example #6
0
class TestShortAdapterB(unittest.TestCase):

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        print('running: {} / {}'.format(case[0], self.spats.run.algorithm))
        self.spats.process_pair(pair)
        self.assertEqual(case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format(pair.site, case[3], self.__class__.__name__, case[0], self.spats.run.algorithm, pair.failure))

    def test_pairs(self):
        for alg in algorithms:
            if alg == 'native':
                continue
            self.run_algorithm(alg)

    def run_algorithm(self, alg):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = alg
        self.spats.addTargets("test/5SrRNA/5SrRNA.fa")
        self.run_pairs()

    def run_pairs(self):
        for case in short_adapter_b_cases:
            self.run_case(case)
        print("Ran {} adapter_b test cases.".format(len(short_adapter_b_cases)))
Example #7
0
class TestOnlyPrefixes(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.cotrans = True
        self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        self.spats.run.collapse_left_prefixes = True
        self.spats.run.collapse_only_prefixes = "T,ACGT,CCA"
        self.spats.addTargets("test/cotrans/cotrans_single.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.counters.reset()
        self.spats.process_pair(pair)
        if case[5]:
            expect = case[4] if case[5] in self.spats.run.collapse_only_prefixes.split(',') else None
            self.assertEqual(expect, pair.site, "PREF res={} != {} ({}, {})".format(pair.site, case[4], self.__class__.__name__, case[0]))
        return pair

    def test_pairs(self):
        for case in prefix_cases:
            self.run_case(case)
        print("Ran {} prefix test cases.".format(len(cases)))
Example #8
0
 def run_dataset(self, case, algorithm):
     bp = "test/{}/".format(case)
     test_file = bp + "test.spats.tmp"
     try:
         shutil.copyfile(bp + "ds.spats", test_file)
         db = PairDB(test_file)
         s = Spats()
         db.load_run(s.run)
         if not s.run.cotrans and algorithm == "native":
             return
         s.run.writeback_results = True
         s.run.result_set_name = "test"
         s.run.algorithm = algorithm
         s.run.quiet = True
         s.loadTargets(db)
         if not s._processor.exists():
             # just ignore the native test if it's not available
             self.assertEqual("native", algorithm)
             return
         s.process_pair_db(db, batch_size = 1024) # small batch_size just to exercise multiprocessing code
         msg = None
         count = 0
         for res in db.differing_results("test", "test_validation"):
             msg = str([str(x) for x in res])
             count += 1
         self.assertEqual(0, count, "{} differing results: {} / {} \n{}".format(count, case, algorithm, msg))
     finally:
         if os.path.exists(test_file):
             os.remove(test_file)
Example #9
0
class TestPrefixPairs(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.collapse_left_prefixes = True
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.counters.reset()
        self.spats.process_pair(pair)
        self.assertEqual(case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0]))
        if case[4]:
            self.assertEqual(1, getattr(self.spats.counters, 'prefix_RRRY_' + case[4]), "prefix {} not counted ({})".format(case[4], case[0]))
        return pair

    def test_pairs(self):
        for case in prefix_cases:
            self.run_case(case)
        print("Ran {} prefix test cases.".format(len(cases)))
Example #10
0
class TestPrefixPairs(unittest.TestCase):
    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.collapse_left_prefixes = True
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.counters.reset()
        self.spats.process_pair(pair)
        self.assertEqual(
            case[3], pair.site,
            "res={} != {} ({}, {})".format(pair.site, case[3],
                                           self.__class__.__name__, case[0]))
        if case[4]:
            self.assertEqual(
                1, getattr(self.spats.counters, 'prefix_RRRY_' + case[4]),
                "prefix {} not counted ({})".format(case[4], case[0]))
        return pair

    def test_pairs(self):
        for case in prefix_cases:
            self.run_case(case)
        print("Ran {} prefix test cases.".format(len(cases)))
Example #11
0
 def setUp(self):
     try:
         self.spats = Spats()
         self.test_set.spats_setUp(self.spats)
     except Exception as e:
         print("exception caught on testset '{}' setup : {}".format(
             self.test_set.name, e))
         raise e
Example #12
0
 def setUp(self):
     self.spats = Spats()
     self.spats.run.count_mutations = True
     self.spats.run.mutations_require_quality_score = ord('.') - ord('!')
     self.spats.run.allowed_target_errors = 1
     self.spats.run.ignore_stops_with_mismatched_overlap = True
     self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
     self.setup_processor()
Example #13
0
 def run_algorithm(self, alg):
     from spats_shape_seq import Spats
     self.spats = Spats()
     self.spats.run.algorithm = alg
     self.spats.run.count_mutations = True
     self.spats.run.allowed_target_errors = True
     self.spats.run.ignore_stops_with_mismatched_overlap = True
     self.spats.addTargets("test/SRP/SRP.fa")
     self.run_pairs()
Example #14
0
class TestPairs(unittest.TestCase):
    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(
            case[3], pair.site,
            "res={} != {} ({}, {})".format(pair.site, case[3],
                                           self.__class__.__name__, case[0]))
        return pair

    def test_pairs(self):
        for case in cases:
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))

    def test_find_partial_weird_case(self):
        pair = Pair()
        pair.set_from_data("x", 'CTCAGTCCTTGGTGCCCGAGTCAGGATCGGAAGAG',
                           'TGACTCGGGCACCAAAGACTGAGAGATCGGAAGAG')
        self.spats.process_pair(pair)
        print("{} / {}".format(pair.site, pair.failure))

    def test_minimum_length(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = "find_partial"
        self.spats.run.minimum_target_match_length = 11
        self.spats.addTargets("test/5s/5s.fa")
        self.assertEqual(11, self.spats._targets.minimum_match_length)
        case = [
            '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA',
            'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None
        ]
        self.run_case(case)

        # this case only matches if the minimum length is set to 8
        self.spats.run.minimum_target_match_length = 8
        self.spats = Spats()
        self.spats.run.algorithm = "lookup"
        self.spats.addTargets("test/5s/5s.fa")
        case[3] = 135
        self.run_case(case)
Example #15
0
def dbrun():
    db_path = sys.argv[2]
    run_name = sys.argv[3]
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    db = PairDB(db_path)
    s = Spats()
    s.run.writeback_results = True
    s.run.result_set_name = run_name
    #s.run.resume_processing = True
    s.process_pair_db(db)
Example #16
0
def dbrun():
    db_path = sys.argv[2]
    run_name = sys.argv[3]
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    db = PairDB(db_path)
    s = Spats()
    s.run.writeback_results = True
    s.run.result_set_name = run_name
    #s.run.resume_processing = True
    s.process_pair_db(db)
Example #17
0
 def _dump_indel_lens(self):
     run_name = self._run_file()
     if not os.path.exists(run_name):
         raise Exception("Run must be run before attempting dump")
     spats = Spats()
     spats.load(run_name)
     countinfo = spats.counters.counts_dict()
     ilen_cnt = []
     for lc in sorted([int(k.split('_')[-1]) for k in countinfo.keys() if k.startswith('mapped_indel_len_')]):
         ilen_cnt.append((lc, countinfo["mapped_indel_len_{}".format(lc)]))
     output_path = os.path.join(self.path, 'mapped_indel_len_counts.csv')
     self._write_csv(output_path, [ "Indel Length", "Reads" ], ilen_cnt)
Example #18
0
def cotrans_test():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.addTargets(bp + "F_wt.fa")
    from spats_shape_seq.pair import Pair
    pair = Pair()
    pair.set_from_data('x', 'GAGCGTCCTTGGTGCCCGAGTCAGAAATAGACTCCT', 'TATCACTACTGGTAGGAGTCTATTTCTGACTCGGGC')
    s.process_pair(pair)
    print "{}: {}".format(pair.target.name, pair.site)
Example #19
0
def ligation_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/Shape_Seq_ligation/"
    from spats_shape_seq import Spats
    s = Spats()
    #s.config.debug = True
    #s.run.minimum_target_match_length = 10
    #s.run.num_workers = 1
    #from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    s.addTargets(bp + "panel_RNAs_complete.fa")
    s.process_pair_data(bp + "data/KEW1_S1_L001_R1_001.fastq",
                        bp + "data/KEW1_S1_L001_R2_001.fastq")
Example #20
0
    def validate(self):
        """Validate the results of a previous 'process' run against a second (slower) algorithm.
        """

        run_name = self._run_file()
        if not os.path.exists(run_name):
            raise Exception("Run must be performed before validating")

        spats = Spats()
        spats.load(run_name)
        if spats.validate_results(self.r1, self.r2):
            self._add_note("Validation pass")
        else:
            self._add_note("Validation FAILURE")
Example #21
0
def cotrans_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    #from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    #s.run.writeback_results = True
    #s.run.resume_processing = True
    #s.run.result_set_name = "lookup"
    s.addTargets(bp + "F_wt.fa")
    s.process_pair_data(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                        bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq")
    s.compute_profiles()
    s.write_reactivities(bp + "dev_out/rx.out")
Example #22
0
class TestPairs(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0]))
        return pair

    def test_pairs(self):
        for case in cases:
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))

    def test_find_partial_weird_case(self):
        pair = Pair()
        pair.set_from_data("x", 'CTCAGTCCTTGGTGCCCGAGTCAGGATCGGAAGAG', 'TGACTCGGGCACCAAAGACTGAGAGATCGGAAGAG')
        self.spats.process_pair(pair)
        print("{} / {}".format(pair.site, pair.failure))

    def test_minimum_length(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = "find_partial"
        self.spats.run.minimum_target_match_length = 11
        self.spats.addTargets("test/5s/5s.fa")
        self.assertEqual(11, self.spats._targets.minimum_match_length)
        case = [ '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA', 'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None ]
        self.run_case(case)

        # this case only matches if the minimum length is set to 8
        self.spats.run.minimum_target_match_length = 8
        self.spats = Spats()
        self.spats.addTargets("test/5s/5s.fa")
        case[3] = 135
        self.run_case(case)
Example #23
0
def tmut_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    spats = Spats(cotrans = False)
    spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    spats.run.count_mutations = True
    spats.run.algorithm = "find_partial"
    spats.run.allowed_target_errors = 1
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
    spats.run._process_all_pairs = True
    spats.run.writeback_results = True
    spats.run.num_workers = 1
    spats.run.result_set_name = "mut"
    spats.addTargets(bp + "mut_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ]
    #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ]
    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ]
    #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ]
    #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ]
    #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ]
    #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ]
    #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ]
    #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ]
    #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ]
    #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ]
    #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ]
    #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ]
    #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ]
    c = [ 'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA', 'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA' ]
    #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ]
    #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ]
    pair.set_from_data('x', c[0], c[1])
    spats.process_pair(pair)
    print diagram(pair, spats.run)
    if pair.has_site:
        print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end, pair.mutations)
    else:
        print "FAIL: {}".format(pair.failure)
Example #24
0
def cotrans_debug():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #s.run.algorithm = "find_partial"
    #s.run._p_v102_compat = True
    s.run.minimum_target_match_length = 10
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71]
    #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ]
    #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ]
    #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ]
    #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ]
    #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ]
    #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ]
    #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ]
    c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ]
    #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ]
    #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ]
    #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ]
    #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ]
    #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ]
    #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ]
    #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ]
    #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ]
    #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ]
    #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ]
    #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ]
    #c =     [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ]
    #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None]
    #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1]
    c = ['jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA', 'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71 ]
    pair.set_from_data(c[0], c[1], c[2])
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.end)
    else:
        print "FAIL: {}".format(pair.failure)
Example #25
0
def make_test_dataset():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/data/"
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "ds.spats")
    pair_db.add_targets_table(bp + "../cotrans_single.fa")
    pair_db.parse(bp + "med_R1.fq", bp + "med_R2.fq")
    s = Spats(cotrans = True)
    s.run.num_workers = 1
    s.run.writeback_results = True
    s.run._process_all_pairs = True
    s.run.algorithm = "find_partial"
    s.run.result_set_name = "test_validation"
    s.process_pair_db(pair_db)
    pair_db.store_run(s.run)
    pair_db.store_counters('spats', s.counters)
Example #26
0
def make_test_dataset():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/data/"
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "ds.spats")
    pair_db.add_targets_table(bp + "../cotrans_single.fa")
    pair_db.parse(bp + "med_R1.fq", bp + "med_R2.fq")
    s = Spats(cotrans=True)
    s.run.num_workers = 1
    s.run.writeback_results = True
    s.run._process_all_pairs = True
    s.run.algorithm = "find_partial"
    s.run.result_set_name = "test_validation"
    s.process_pair_db(pair_db)
    pair_db.store_run(s.run)
    pair_db.store_counters('spats', s.counters)
Example #27
0
    def test_minimum_length(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = "find_partial"
        self.spats.run.minimum_target_match_length = 11
        self.spats.addTargets("test/5s/5s.fa")
        self.assertEqual(11, self.spats._targets.minimum_match_length)
        case = [ '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA', 'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None ]
        self.run_case(case)

        # this case only matches if the minimum length is set to 8
        self.spats.run.minimum_target_match_length = 8
        self.spats = Spats()
        self.spats.addTargets("test/5s/5s.fa")
        case[3] = 135
        self.run_case(case)
Example #28
0
def test_refactor():
    from spats_clean import Spats
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/"
    out = bp + "t3/"
    s = Spats(bp + "5S.fa", out)
    s.setup()
    s.process_pair_data(bp + "data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", 
                        bp + "data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq")
    s.compute_profiles()
    s.write_reactivities()
    import subprocess
    subprocess.check_call(["diff", bp + "t2/rx.out", out + "/rx.out"])
    print "Diff OK"
Example #29
0
def prof_run():
    from spats_shape_seq import Spats
    spats = Spats()
    #spats.run.cotrans = True
    #spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #spats.run.writeback_results = False
    spats.run._process_all_pairs = True
    spats.run.skip_database = True
    spats.run.algorithm = "lookup"
    spats.run.count_mutations = True
    spats.run.num_workers = 1

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/pdc_muts/PDC_tweaked/PDC_09_001_6/"
    spats.addTargets(bp + "target.fa")
    spats.process_pair_data(bp + "2k_R1.fastq",
                            bp + "2k_R2.fastq")
    exit(0)
Example #30
0
def test_refactor():
    from spats_clean import Spats
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/"
    out = bp + "t3/"
    s = Spats(bp + "5S.fa", out)
    s.setup()
    s.process_pair_data(
        bp +
        "data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq",
        bp +
        "data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq"
    )
    s.compute_profiles()
    s.write_reactivities()
    import subprocess
    subprocess.check_call(["diff", bp + "t2/rx.out", out + "/rx.out"])
    print "Diff OK"
Example #31
0
 def _dump_mut_counts(self):
     run_name = self._run_file()
     if not os.path.exists(run_name):
         raise Exception("Run must be run before attempting dump")
     spats = Spats()
     spats.load(run_name)
     countinfo = spats.counters.counts_dict()
     mut_cnts = []
     for muts in sorted([int(k.split('_')[-1]) for k in countinfo.keys() if k.startswith('mut_count_')]):
         mut_cnts.append((muts, countinfo["mut_count_{}".format(muts)]))
     output_path = os.path.join(self.path, 'mut_counts.csv')
     self._write_csv(output_path, [ "Mutation Count", "Reads" ], mut_cnts)
     mut_cnts = []
     for muts in sorted([int(k.split('_')[-1]) for k in countinfo.keys() if k.startswith('mapped_mut_count_')]):
         mut_cnts.append((muts, countinfo["mapped_mut_count_{}".format(muts)]))
     output_path = os.path.join(self.path, 'mapped_mut_counts.csv')
     self._write_csv(output_path, [ "Mutation Count", "Reads" ], mut_cnts)
Example #32
0
 def setUp(self):
     self.spats = Spats()
     self.spats.run.count_mutations = True
     self.spats.run.mutations_require_quality_score = ord('.') - ord('!')
     self.spats.run.allowed_target_errors = 1
     self.spats.run.ignore_stops_with_mismatched_overlap = True
     self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
     self.setup_processor()
Example #33
0
class TestMutPairs(unittest.TestCase):
    
    def setUp(self):
        self.spats = Spats()
        self.spats.run.count_mutations = True
        self.spats.run.mutations_require_quality_score = ord('.') - ord('!')
        self.spats.run.allowed_target_errors = 1
        self.spats.run.ignore_stops_with_mismatched_overlap = True
        self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
        self.setup_processor()

    def setup_processor(self):
        self.spats.run.algorithm = "find_partial"
        self.spats.addTargets("test/mut/mut_single.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        if len(case) > 6:
            pair.r1.quality = case[6]
            pair.r2.quality = case[7]
        else:
            pair.r1.quality = 'K' * len(case[1])
            pair.r2.quality = 'K' * len(case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(case[4], pair.site, "res={} != {} ({}, {}, {})".format(pair.site, case[4], self.__class__.__name__, case[0], pair.failure))
        if pair.site is not None:
            self.assertEqual(case[3], pair.end, "end={} != {} ({}, {}, {})".format(pair.end, case[3], self.__class__.__name__, case[0], pair.failure))
            self.assertEqual(case[5], sorted(pair.mutations) if pair.mutations else pair.mutations, "muts={} != {} ({}, {}, {})".format(pair.mutations, case[5], self.__class__.__name__, case[0], pair.failure))
        return pair

    def cases(self):
        return cotrans_cases if self.spats.run.cotrans else cases

    def test_pairs(self):
        self.spats.run.pair_length = len(cases[0][1])
        for case in self.cases():
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))
Example #34
0
 def run_algorithm(self, alg):
     from spats_shape_seq import Spats
     self.spats = Spats()
     self.spats.run.algorithm = alg
     self.spats.run.count_mutations = True
     self.spats.run.allowed_target_errors = True
     self.spats.run.ignore_stops_with_mismatched_overlap = True
     self.spats.addTargets("test/SRP/SRP.fa")
     self.run_pairs()
Example #35
0
def test_tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.tag import TagProcessor
    s.run._processor_class = TagProcessor

    #from spats_shape_seq.target import Targets
    #s.addTargets(bp + "5S.fa")
    s.addTarget("5s", "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC")
    #s.addTarget("rc(5s)", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC")
    #s.addTarget("adapter_t", s.run.adapter_t)
    #s.addTarget("adapter_b", s.run.adapter_b)
    #s._targets._index_word_length = 8
    #s._targets._minimum_length = 8
    #s.addTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    #s.addTarget("adapter_b_rc", reverse_complement(s.run.adapter_b))

    p = s._processor
    p.addTagTarget("5s", "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC")
    p.addTagTarget("5s_rc", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC")
    from spats_shape_seq.util import reverse_complement
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)

    from spats_shape_seq.pair import Pair
    cases = [
        [ "1101:20069:1063", "TTTAGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG", "TCCCACCTGACCCCATGCCGAACTCAGAAGTGAAA" ],
        [ "1101:11562:1050", "AAACGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG", "CCACCTGACCCCATGCCGAACTCAGAAGTGAAACG" ],
        [ "21189", "TTTGGTCCTTGGTGCCCGAGTCAGAGATCGGAAGA", "CTGACTCGGGCACCAAGGACCAAAAGATCGGAAGA" ],
        [ "1101:12888:8140", "GGATGTCCTTGGTGCCCGAGTCAGATGCCAGATCG", "GGCATCTGACTCGGGCACCAAGGACATACAGATCG" ],
        [ "18333", "GAGTGTCCTTGGTGCCCGAGTCAGTGGTAGATCGG", "ACCACTGACTCGGGCACCAAGGACACTCAGATCGG" ],
    ]

    pair = Pair()
    for case in cases:
        pair.set_from_data(case[0], case[1], case[2])
        s.process_pair(pair)

        print pair.r1.original_seq
        print pair.r1.tags
        print pair.r2.original_seq
        print pair.r2.tags
        print "-----------------------------"
Example #36
0
def cotrans_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    #from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    #s.run.writeback_results = True
    #s.run.resume_processing = True
    #s.run.result_set_name = "lookup"
    s.addTargets(bp + "F_wt.fa")
    s.process_pair_data(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                        bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq")
    s.compute_profiles()
    s.write_reactivities(bp + "dev_out/rx.out")
Example #37
0
class TestPanelPairs(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.minimum_target_match_length = 10
        self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa")

    def tearDown(self):
        self.spats = None

    def test_single_R1_match_with_adapter_multiple_without(self):
        pair = Pair()
        pair.set_from_data('M02465:8:000000000-A5D', 'CCCGCCGTCCTTGGTGCCCGAGTGAGATCGGAAGA','CACTCGGGCACCAAGGACGGCGGGAGATCGGAAGA')
        self.spats.run.debug = True
        self.spats.run.algorithm = "find_partial"
        self.spats.process_pair(pair)
        self.assertEqual(None, pair.target)
        self.assertEqual(1, self.spats.counters.multiple_R1_match)
Example #38
0
def tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "db/pairs.db")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "cotrans_single.fa")
        pair_db.parse(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                      bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq",
                      sample_size = 100000)

    from spats_shape_seq import Spats
    from spats_shape_seq.tag import TagProcessor
    from spats_shape_seq.util import reverse_complement
    s = Spats()
    s.run._processor_class = TagProcessor
    s.run.writeback_results = True
    s.run.result_set_name = "tags"
    s.run.num_workers = 1
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.loadTargets(pair_db)

    s.run.allow_indeterminate = True
    s.run.allowed_target_errors = 2
    s.run.allowed_adapter_errors = 2

    p = s._processor
    for target in pair_db.targets():
        p.addTagTarget(target[0], target[1])
        p.addTagTarget(target[0] + "_rc", reverse_complement(target[1]))
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)
    if s.run.cotrans:
        p.addTagTarget("linker_cotrans", s.run.cotrans_linker)
        p.addTagTarget("linker_cotrans_rc", reverse_complement(s.run.cotrans_linker))

    s.process_pair_db(pair_db)
    rsid = pair_db.result_set_id_for_name(s.run.result_set_name)
    pair_db.count_tags(rsid)
    print pair_db.tag_counts(rsid)
Example #39
0
class TestOverlap(unittest.TestCase):
    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        print('running: {} / {}'.format(case[0], self.spats.run.algorithm))
        self.spats.process_pair(pair)
        self.assertEqual(
            case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format(
                pair.site, case[3], self.__class__.__name__, case[0],
                self.spats.run.algorithm, pair.failure))
        self.assertEqual(
            case[4], pair.failure, "failure res={} != {} ({}, {}, {})".format(
                pair.failure, case[4], self.__class__.__name__, case[0],
                self.spats.run.algorithm))

    def test_pairs(self):
        for alg in algorithms:
            if alg == 'native':
                continue
            self.run_algorithm(alg)

    def run_algorithm(self, alg):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = alg
        self.spats.run.count_mutations = True
        self.spats.run.allowed_target_errors = True
        self.spats.run.ignore_stops_with_mismatched_overlap = True
        self.spats.addTargets("test/SRP/SRP.fa")
        self.run_pairs()

    def run_pairs(self):
        for case in overlap_cases:
            self.run_case(case)
        print("Ran {} overlap test cases.".format(len(overlap_cases)))
Example #40
0
    def test_minimum_length(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = "find_partial"
        self.spats.run.minimum_target_match_length = 11
        self.spats.addTargets("test/5s/5s.fa")
        self.assertEqual(11, self.spats._targets.minimum_match_length)
        case = [
            '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA',
            'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None
        ]
        self.run_case(case)

        # this case only matches if the minimum length is set to 8
        self.spats.run.minimum_target_match_length = 8
        self.spats = Spats()
        self.spats.run.algorithm = "lookup"
        self.spats.addTargets("test/5s/5s.fa")
        case[3] = 135
        self.run_case(case)
Example #41
0
def d5s_writeback_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "dev_out/pairs.db")
    pair_db.add_targets_table(bp + "5s/5S.fa")

    from spats_shape_seq import Spats
    s = Spats()
    s.addTargets(bp + "5s/5S.fa")
    s.writeback_results = True
    s.result_set_name = "pure_python"
    s.process_pair_db(pair_db)
Example #42
0
class TestPanelPairs(unittest.TestCase):
    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.minimum_target_match_length = 10
        self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa")

    def tearDown(self):
        self.spats = None

    def test_single_R1_match_with_adapter_multiple_without(self):
        pair = Pair()
        pair.set_from_data('M02465:8:000000000-A5D',
                           'CCCGCCGTCCTTGGTGCCCGAGTGAGATCGGAAGA',
                           'CACTCGGGCACCAAGGACGGCGGGAGATCGGAAGA')
        self.spats.run.debug = True
        self.spats.run.algorithm = "find_partial"
        self.spats.process_pair(pair)
        self.assertEqual(None, pair.target)
        self.assertEqual(1, self.spats.counters.multiple_R1_match)
Example #43
0
def tag_test():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.run.algorithm = "find_partial"

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()
    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.right)
    else:
        print "FAIL: {}".format(pair.failure)
Example #44
0
def show_failure_types():
    from spats_clean import Spats, Pair, FastqRecord
    spats = Spats("test/5s/5s.fa", "test/5s")
    spats.setup()
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    with open(bp + "t11/x/filtered_R1.fq", 'rb') as r1_in:
        with open(bp + "t11/x/filtered_R2.fq", 'rb') as r2_in:
            r1_record = FastqRecord()
            r2_record = FastqRecord()
            pair = Pair()
            while True:
                r1_record.read(r1_in)
                if not r1_record.identifier:
                    break
                r2_record.read(r2_in)
                pair.set_from_records(r1_record, r2_record)

                spats.process_pair(pair)

                summary = "{} :: {}".format(
                    pair.identifier,
                    pair.site if pair.has_site else pair.failure)
                if pair.r1.match_errors:
                    summary += " R1!: {}".format(pair.r1.match_errors)
                if pair.r1.adapter_errors:
                    summary += " R1A!: {}, adapter_len={}".format(
                        pair.r1.adapter_errors, pair.r1._rtrim)
                if pair.r2.match_errors:
                    summary += " R2!: {}".format(pair.r2.match_errors)
                if pair.r2.adapter_errors:
                    summary += " R2A!: {}, adapter_len={}".format(
                        pair.r2.adapter_errors, pair.r2._rtrim - 4)
                print summary
Example #45
0
    def _loadDBAndModel(self):

        rsnames = self._db.result_sets()
        if rsnames:
            self.result_set_id = self._db.result_set_id_for_name(rsnames[0])
            self._db.index_results()
            self.has_tags = bool(self.result_set_id)
        else:
            self.result_set_id = -1
            self.has_tags = False

        self.has_counters = self._db.has_counters()

        s = Spats()
        self._db.load_run(s.run)
        s.run._p_use_tag_processor = True
        s.loadTargets(self._db)
        if self.has_counters:
            self._db.load_counters("spats", s.counters)

        if self.has_tags:
            p = s._processor
            for t in s._targets.targets:
                p.addTagTarget(t.name, t.seq)
                p.addTagTarget(t.name + "_rc", reverse_complement(t.seq))
                self.colors._colors[t.name.lower()] = self.colors.color("target")
            p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
            p.addTagTarget("adapter_b", s.run.adapter_b)
            if s.run.cotrans:
                p.addTagTarget("linker_cotrans", s.run.cotrans_linker)
                p.addTagTarget("linker_cotrans_rc", reverse_complement(s.run.cotrans_linker))
            if s.run._p_extra_tags:
                for tag, seq in s.run._p_extra_tags.iteritems():
                    p.addTagTarget(tag, seq)
            if not self.has_counters:
                p.counters.load_from_db_data(self._db.counter_data_for_results(self.result_set_id))

        self._spats = s
Example #46
0
def tmut():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    pair_db = PairDB(bp + "ds_cmp.spats")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "mut_single.fa")
        fq_name = "mut2"
        pair_db.parse(bp + fq_name + "_R1.fastq", bp + fq_name + "_R2.fastq")

    spatss = []
    for alg in ["find_partial", "lookup"]:
        spats = Spats(cotrans=False)
        spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        spats.run.count_mutations = True
        spats.run.algorithm = alg
        spats.run.allowed_target_errors = 1
        spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
        spats.run._process_all_pairs = True
        spats.run.writeback_results = True
        spats.run.num_workers = 1
        spats.run.result_set_name = "mut_" + alg

        spats.process_pair_db(pair_db)
        pair_db.store_run(spats.run)
        pair_db.store_counters(spats.run.result_set_name, spats.counters)
        spatss.append(spats)

    rdiff_func(bp + "ds_cmp.spats",
               "mut_find_partial",
               "mut_lookup",
               diag_spats=spatss[0])
Example #47
0
class TestPairsPartial(unittest.TestCase):
    
    def setUp(self):
        self.spats = Spats()
        self.spats.run.cotrans = True
        self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        self.setup_processor()
        self.spats.addTargets("test/cotrans/cotrans_single.fa")

    def setup_processor(self):
        self.spats.run.algorithm = "find_partial"

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(case[4], pair.site, "res={} != {} ({}, {})".format(pair.site, case[4], self.__class__.__name__, case[0]))
        if pair.site is not None:
            self.assertEqual(case[3], pair.end)
        return pair

    def test_pairs(self):
        self.spats.run.pair_length = len(cases[0][1])
        if not self.spats._processor.exists():
            # just ignore the native test if it's not available
            self.assertEqual("native", self.spats.run.algorithm)
            return
        for case in cases:
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))
Example #48
0
class TestMutPairs(unittest.TestCase):
    def setUp(self):
        self.spats = Spats()
        self.spats.run.count_mutations = True
        self.spats.run.mutations_require_quality_score = ord('.') - ord('!')
        self.spats.run.allowed_target_errors = 1
        self.spats.run.ignore_stops_with_mismatched_overlap = True
        self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
        self.setup_processor()

    def setup_processor(self):
        self.spats.run.algorithm = "find_partial"
        self.spats.addTargets("test/mut/mut_single.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        if len(case) > 6:
            pair.r1.quality = case[6]
            pair.r2.quality = case[7]
        else:
Example #49
0
class TestShortAdapterB(unittest.TestCase):
    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        print('running: {} / {}'.format(case[0], self.spats.run.algorithm))
        self.spats.process_pair(pair)
        self.assertEqual(
            case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format(
                pair.site, case[3], self.__class__.__name__, case[0],
                self.spats.run.algorithm, pair.failure))

    def test_pairs(self):
        for alg in algorithms:
            if alg == 'native':
                continue
            self.run_algorithm(alg)

    def run_algorithm(self, alg):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = alg
        self.spats.addTargets("test/5SrRNA/5SrRNA.fa")
        self.run_pairs()

    def run_pairs(self):
        for case in short_adapter_b_cases:
            self.run_case(case)
        print("Ran {} adapter_b test cases.".format(
            len(short_adapter_b_cases)))
Example #50
0
def diag_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.pair import Pair
    from spats_shape_seq.tests.test_mut import cases
    #from spats_shape_seq.tests.test_pairs import prefix_cases as cases
    from spats_shape_seq.diagram import diagram
    #spats_config.minimum_target_match_length = 8
    spats = Spats()
    #spats.addTargets("test/5s/5s.fa")
    spats.addTargets("test/mut/mut_single.fa")
    spats.run.debug = True
    spats.run.algorithm = "find_partial"
    spats.run.count_mutations = True
    #spats.run.mutations_require_quality_score = ord('.') - ord('!')
    spats.run.allowed_target_errors = 1
    spats.run.ignore_stops_with_mismatched_overlap = True
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"

    #spats.run.collapse_left_prefixes = True
    spats._case_errors = False

    def run_case(case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        spats.process_pair(pair)
        print diagram(pair, spats.run)
        if case[3] != pair.site:
            spats._case_errors = True
            print "******* mismatch: {} != {}".format(case[3], pair.site)

    for case in cases:
        if case[0].startswith("*"):
            run_case(case)
    spats.run.debug = False
    if spats._case_errors:
        raise Exception("Case failed")
Example #51
0
def indels_run():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.algorithm = True
    s.run.count_indels = True
    s.run.count_mutations = True
    s.run.allowed_target_errors = 8
    s.collapse_left_prefixes = True
    s.run.ignore_stops_with_mismatched_overlap = True
    s.run.allow_negative_values = True
    s.mutations_require_quality_score = 30
    bp = "/Users/steve/mos/tasks/oughxX/code"
    s.addTargets(bp + "/test/hairpin/hairpinA_circ.fa")
    rp = bp + "/TESTING/cmp_muts_favored/steve_test"
    s.process_pair_data(rp + "/R1_match_failures.fastq",
                        rp + "/R2_match_failures.fastq")
    exit(0)
Example #52
0
 def __init__(self, reads_data, cotrans = False):
     self._reads_data = reads_data
     self._pair_db = reads_data.pair_db
     s = Spats()
     s.run._p_use_tag_processor = True
     s.run.cotrans = cotrans
     s.run.writeback_results = True
     s.run.result_set_name = "tags"
     s.run.allow_indeterminate = True
     s.run.allowed_target_errors = 2
     s.run.allowed_adapter_errors = 2
     s.run.num_workers = 8
     self._spats = s
     self._extra_tag_targets = []
     self._plugins = {}
Example #53
0
def cotrans_debug():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #s.run.algorithm = "find_partial"
    #s.run._p_v102_compat = True
    s.run.minimum_target_match_length = 10
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71]
    #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ]
    #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ]
    #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ]
    #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ]
    #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ]
    #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ]
    #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ]
    c = [
        "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA",
        "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68
    ]
    #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ]
    #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ]
    #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ]
    #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ]
    #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ]
    #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ]
    #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ]
    #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ]
    #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ]
    #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ]
    #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ]
    #c =     [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ]
    #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None]
    #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1]
    c = [
        'jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA',
        'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71
    ]
    pair.set_from_data(c[0], c[1], c[2])
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq,
                               pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.end)
    else:
        print "FAIL: {}".format(pair.failure)
Example #54
0
def tmut_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    spats = Spats(cotrans=False)
    spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    spats.run.count_mutations = True
    spats.run.algorithm = "find_partial"
    spats.run.allowed_target_errors = 1
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
    spats.run._process_all_pairs = True
    spats.run.writeback_results = True
    spats.run.num_workers = 1
    spats.run.result_set_name = "mut"
    spats.addTargets(bp + "mut_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ]
    #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ]
    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ]
    #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ]
    #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ]
    #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ]
    #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ]
    #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ]
    #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ]
    #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ]
    #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ]
    #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ]
    #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ]
    #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ]
    c = [
        'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA',
        'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA'
    ]
    #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ]
    #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ]
    pair.set_from_data('x', c[0], c[1])
    spats.process_pair(pair)
    print diagram(pair, spats.run)
    if pair.has_site:
        print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end,
                                      pair.mutations)
    else:
        print "FAIL: {}".format(pair.failure)
Example #55
0
def cotrans_test():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.addTargets(bp + "F_wt.fa")
    from spats_shape_seq.pair import Pair
    pair = Pair()
    pair.set_from_data('x', 'GAGCGTCCTTGGTGCCCGAGTCAGAAATAGACTCCT',
                       'TATCACTACTGGTAGGAGTCTATTTCTGACTCGGGC')
    s.process_pair(pair)
    print "{}: {}".format(pair.target.name, pair.site)
Example #56
0
def ligation_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/Shape_Seq_ligation/"
    from spats_shape_seq import Spats
    s = Spats()
    #s.config.debug = True
    #s.run.minimum_target_match_length = 10
    #s.run.num_workers = 1
    #from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    s.addTargets(bp + "panel_RNAs_complete.fa")
    s.process_pair_data(bp + "data/KEW1_S1_L001_R1_001.fastq",
                        bp + "data/KEW1_S1_L001_R2_001.fastq")
Example #57
0
class TestDatasets(unittest.TestCase):
    def test_datasets(self):
        for case in cases:
            for alg in algorithms:
                if os.environ.get('SKIP_SLOW_TESTS') and alg == 'native':
                    raise nose.SkipTest('skipping slow tests')
                self.run_dataset(case, alg)
        print("Ran {} datasets.".format(len(cases)))

    def run_dataset(self, case, algorithm):
        bp = "test/{}/".format(case)
        test_file = bp + "test.spats.tmp"
        try:
            shutil.copyfile(bp + "ds.spats", test_file)
            db = PairDB(test_file)
            s = Spats()
            db.load_run(s.run)
            if not s.run.cotrans and algorithm == "native":
                return
            s.run.writeback_results = True
            s.run.result_set_name = "test"
            s.run.algorithm = algorithm
            s.run.quiet = True
            s.loadTargets(db)
            if not s._processor.exists():
                # just ignore the native test if it's not available
                self.assertEqual("native", algorithm)
                return
            s.process_pair_db(
                db, batch_size=1024
            )  # small batch_size just to exercise multiprocessing code
            msg = None
            count = 0
            for res in db.differing_results("test", "test_validation"):
                msg = str([str(x) for x in res])
                count += 1
            self.assertEqual(
                0, count, "{} differing results: {} / {} \n{}".format(
                    count, case, algorithm, msg))
        finally:
            if os.path.exists(test_file):
                os.remove(test_file)