Ejemplo n.º 1
0
def show_failure_types():
    from spats_clean import Spats, Pair, FastqRecord
    spats = Spats("test/5s/5s.fa", "test/5s")
    spats.setup()
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    with open(bp + "t11/x/filtered_R1.fq", 'rb') as r1_in:
        with open(bp + "t11/x/filtered_R2.fq", 'rb') as r2_in:
            r1_record = FastqRecord()
            r2_record = FastqRecord()
            pair = Pair()
            while True:
                r1_record.read(r1_in)
                if not r1_record.identifier:
                    break
                r2_record.read(r2_in)
                pair.set_from_records(r1_record, r2_record)

                spats.process_pair(pair)

                summary = "{} :: {}".format(pair.identifier, pair.site if pair.has_site else pair.failure)
                if pair.r1.match_errors:
                    summary += " R1!: {}".format(pair.r1.match_errors)
                if pair.r1.adapter_errors:
                    summary += " R1A!: {}, adapter_len={}".format(pair.r1.adapter_errors, pair.r1._rtrim)
                if pair.r2.match_errors:
                    summary += " R2!: {}".format(pair.r2.match_errors)
                if pair.r2.adapter_errors:
                    summary += " R2A!: {}, adapter_len={}".format(pair.r2.adapter_errors, pair.r2._rtrim - 4)
                print summary
Ejemplo n.º 2
0
class TestOnlyPrefixes(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.cotrans = True
        self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        self.spats.run.collapse_left_prefixes = True
        self.spats.run.collapse_only_prefixes = "T,ACGT,CCA"
        self.spats.addTargets("test/cotrans/cotrans_single.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.counters.reset()
        self.spats.process_pair(pair)
        if case[5]:
            expect = case[4] if case[5] in self.spats.run.collapse_only_prefixes.split(',') else None
            self.assertEqual(expect, pair.site, "PREF res={} != {} ({}, {})".format(pair.site, case[4], self.__class__.__name__, case[0]))
        return pair

    def test_pairs(self):
        for case in prefix_cases:
            self.run_case(case)
        print("Ran {} prefix test cases.".format(len(cases)))
Ejemplo n.º 3
0
class TestPrefixPairs(unittest.TestCase):
    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.collapse_left_prefixes = True
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.counters.reset()
        self.spats.process_pair(pair)
        self.assertEqual(
            case[3], pair.site,
            "res={} != {} ({}, {})".format(pair.site, case[3],
                                           self.__class__.__name__, case[0]))
        if case[4]:
            self.assertEqual(
                1, getattr(self.spats.counters, 'prefix_RRRY_' + case[4]),
                "prefix {} not counted ({})".format(case[4], case[0]))
        return pair

    def test_pairs(self):
        for case in prefix_cases:
            self.run_case(case)
        print("Ran {} prefix test cases.".format(len(cases)))
Ejemplo n.º 4
0
class TestPrefixPairs(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.collapse_left_prefixes = True
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.counters.reset()
        self.spats.process_pair(pair)
        self.assertEqual(case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0]))
        if case[4]:
            self.assertEqual(1, getattr(self.spats.counters, 'prefix_RRRY_' + case[4]), "prefix {} not counted ({})".format(case[4], case[0]))
        return pair

    def test_pairs(self):
        for case in prefix_cases:
            self.run_case(case)
        print("Ran {} prefix test cases.".format(len(cases)))
Ejemplo n.º 5
0
def show_failure_types():
    from spats_clean import Spats, Pair, FastqRecord
    spats = Spats("test/5s/5s.fa", "test/5s")
    spats.setup()
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    with open(bp + "t11/x/filtered_R1.fq", 'rb') as r1_in:
        with open(bp + "t11/x/filtered_R2.fq", 'rb') as r2_in:
            r1_record = FastqRecord()
            r2_record = FastqRecord()
            pair = Pair()
            while True:
                r1_record.read(r1_in)
                if not r1_record.identifier:
                    break
                r2_record.read(r2_in)
                pair.set_from_records(r1_record, r2_record)

                spats.process_pair(pair)

                summary = "{} :: {}".format(
                    pair.identifier,
                    pair.site if pair.has_site else pair.failure)
                if pair.r1.match_errors:
                    summary += " R1!: {}".format(pair.r1.match_errors)
                if pair.r1.adapter_errors:
                    summary += " R1A!: {}, adapter_len={}".format(
                        pair.r1.adapter_errors, pair.r1._rtrim)
                if pair.r2.match_errors:
                    summary += " R2!: {}".format(pair.r2.match_errors)
                if pair.r2.adapter_errors:
                    summary += " R2A!: {}, adapter_len={}".format(
                        pair.r2.adapter_errors, pair.r2._rtrim - 4)
                print summary
Ejemplo n.º 6
0
    def compare(self):

        from spats_shape_seq import Spats
        from spats_shape_seq.pair import Pair

        json_base = { 'target' : self.config['target'], 'config' : { 'algorithm' : 'find_partial', 'debug' : True }, 'expect' : {}}

        spats_fp = Spats(cotrans = self.cotrans)
        spats_lookup = Spats(cotrans = self.cotrans)
        self._update_run_config(spats_fp.run)
        self._update_run_config(spats_lookup.run, json_base['config'])
        spats_fp.run.algorithm = 'find_partial'
        spats_lookup.run.algorithm = 'lookup'

        spats_fp.addTargets(self.config['target'])
        spats_lookup.addTargets(self.config['target'])

        count = 0
        match = 0
        with FastFastqParser(self.r1, self.r2) as parser:
            total = parser.appx_number_of_pairs()
            for batch in parser.iterator(5000):
                for item in batch:
                    pair_fp = Pair()
                    pair_lookup = Pair()
                    pair_fp.set_from_data(str(item[0]), item[1], item[2])
                    pair_lookup.set_from_data(str(item[0]), item[1], item[2])
                    try:
                        spats_fp.process_pair(pair_fp)
                        spats_lookup.process_pair(pair_lookup)
                    except:
                        print('Error after {}/{}'.format(match, count))
                        raise
                    if (pair_fp.has_site == pair_lookup.has_site):
                        if not pair_fp.has_site:
                            count += 1
                            continue
                        elif (pair_fp.target.name == pair_lookup.target.name and
                              pair_fp.end == pair_lookup.end and
                              pair_fp.site == pair_lookup.site and
                              pair_fp.mutations == pair_lookup.mutations):
                            count += 1
                            match += 1
                            continue
                    json_base["id"] = str(item[0])
                    json_base["R1"] = str(item[1])
                    json_base["R2"] = str(item[2])
                    print('After {}/{} matches; mismatched pair: {} != {}\n{}'.format(match, count, pair_fp, pair_lookup,
                                                                                      json.dumps(json_base, sort_keys = True,indent = 4, separators = (',', ': '))))
                    return
                print('{}/{}-{}...'.format(match, count, total))
        spats_fp.counters.total_pairs = count
        spats_lookup.counters.total_pairs = count
        print('All match {}/{}.'.format(match, count))
        print(spats_fp._report_counts())
        print(spats_lookup._report_counts())
Ejemplo n.º 7
0
def cotrans_debug():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #s.run.algorithm = "find_partial"
    #s.run._p_v102_compat = True
    s.run.minimum_target_match_length = 10
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71]
    #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ]
    #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ]
    #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ]
    #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ]
    #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ]
    #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ]
    #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ]
    c = [
        "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA",
        "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68
    ]
    #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ]
    #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ]
    #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ]
    #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ]
    #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ]
    #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ]
    #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ]
    #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ]
    #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ]
    #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ]
    #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ]
    #c =     [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ]
    #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None]
    #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1]
    c = [
        'jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA',
        'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71
    ]
    pair.set_from_data(c[0], c[1], c[2])
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq,
                               pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.end)
    else:
        print "FAIL: {}".format(pair.failure)
Ejemplo n.º 8
0
class TestPairs(unittest.TestCase):
    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(
            case[3], pair.site,
            "res={} != {} ({}, {})".format(pair.site, case[3],
                                           self.__class__.__name__, case[0]))
        return pair

    def test_pairs(self):
        for case in cases:
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))

    def test_find_partial_weird_case(self):
        pair = Pair()
        pair.set_from_data("x", 'CTCAGTCCTTGGTGCCCGAGTCAGGATCGGAAGAG',
                           'TGACTCGGGCACCAAAGACTGAGAGATCGGAAGAG')
        self.spats.process_pair(pair)
        print("{} / {}".format(pair.site, pair.failure))

    def test_minimum_length(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = "find_partial"
        self.spats.run.minimum_target_match_length = 11
        self.spats.addTargets("test/5s/5s.fa")
        self.assertEqual(11, self.spats._targets.minimum_match_length)
        case = [
            '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA',
            'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None
        ]
        self.run_case(case)

        # this case only matches if the minimum length is set to 8
        self.spats.run.minimum_target_match_length = 8
        self.spats = Spats()
        self.spats.run.algorithm = "lookup"
        self.spats.addTargets("test/5s/5s.fa")
        case[3] = 135
        self.run_case(case)
Ejemplo n.º 9
0
def cotrans_test():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.addTargets(bp + "F_wt.fa")
    from spats_shape_seq.pair import Pair
    pair = Pair()
    pair.set_from_data('x', 'GAGCGTCCTTGGTGCCCGAGTCAGAAATAGACTCCT', 'TATCACTACTGGTAGGAGTCTATTTCTGACTCGGGC')
    s.process_pair(pair)
    print "{}: {}".format(pair.target.name, pair.site)
Ejemplo n.º 10
0
def tmut_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    spats = Spats(cotrans=False)
    spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    spats.run.count_mutations = True
    spats.run.algorithm = "find_partial"
    spats.run.allowed_target_errors = 1
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
    spats.run._process_all_pairs = True
    spats.run.writeback_results = True
    spats.run.num_workers = 1
    spats.run.result_set_name = "mut"
    spats.addTargets(bp + "mut_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ]
    #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ]
    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ]
    #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ]
    #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ]
    #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ]
    #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ]
    #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ]
    #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ]
    #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ]
    #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ]
    #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ]
    #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ]
    #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ]
    c = [
        'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA',
        'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA'
    ]
    #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ]
    #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ]
    pair.set_from_data('x', c[0], c[1])
    spats.process_pair(pair)
    print diagram(pair, spats.run)
    if pair.has_site:
        print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end,
                                      pair.mutations)
    else:
        print "FAIL: {}".format(pair.failure)
Ejemplo n.º 11
0
def cotrans_test():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.addTargets(bp + "F_wt.fa")
    from spats_shape_seq.pair import Pair
    pair = Pair()
    pair.set_from_data('x', 'GAGCGTCCTTGGTGCCCGAGTCAGAAATAGACTCCT',
                       'TATCACTACTGGTAGGAGTCTATTTCTGACTCGGGC')
    s.process_pair(pair)
    print "{}: {}".format(pair.target.name, pair.site)
Ejemplo n.º 12
0
def cotrans_debug():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #s.run.algorithm = "find_partial"
    #s.run._p_v102_compat = True
    s.run.minimum_target_match_length = 10
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71]
    #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ]
    #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ]
    #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ]
    #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ]
    #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ]
    #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ]
    #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ]
    c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ]
    #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ]
    #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ]
    #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ]
    #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ]
    #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ]
    #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ]
    #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ]
    #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ]
    #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ]
    #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ]
    #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ]
    #c =     [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ]
    #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None]
    #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1]
    c = ['jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA', 'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71 ]
    pair.set_from_data(c[0], c[1], c[2])
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.end)
    else:
        print "FAIL: {}".format(pair.failure)
Ejemplo n.º 13
0
class TestPairs(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0]))
        return pair

    def test_pairs(self):
        for case in cases:
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))

    def test_find_partial_weird_case(self):
        pair = Pair()
        pair.set_from_data("x", 'CTCAGTCCTTGGTGCCCGAGTCAGGATCGGAAGAG', 'TGACTCGGGCACCAAAGACTGAGAGATCGGAAGAG')
        self.spats.process_pair(pair)
        print("{} / {}".format(pair.site, pair.failure))

    def test_minimum_length(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = "find_partial"
        self.spats.run.minimum_target_match_length = 11
        self.spats.addTargets("test/5s/5s.fa")
        self.assertEqual(11, self.spats._targets.minimum_match_length)
        case = [ '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA', 'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None ]
        self.run_case(case)

        # this case only matches if the minimum length is set to 8
        self.spats.run.minimum_target_match_length = 8
        self.spats = Spats()
        self.spats.addTargets("test/5s/5s.fa")
        case[3] = 135
        self.run_case(case)
Ejemplo n.º 14
0
def tmut_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    spats = Spats(cotrans = False)
    spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    spats.run.count_mutations = True
    spats.run.algorithm = "find_partial"
    spats.run.allowed_target_errors = 1
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
    spats.run._process_all_pairs = True
    spats.run.writeback_results = True
    spats.run.num_workers = 1
    spats.run.result_set_name = "mut"
    spats.addTargets(bp + "mut_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ]
    #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ]
    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ]
    #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ]
    #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ]
    #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ]
    #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ]
    #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ]
    #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ]
    #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ]
    #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ]
    #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ]
    #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ]
    #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ]
    c = [ 'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA', 'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA' ]
    #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ]
    #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ]
    pair.set_from_data('x', c[0], c[1])
    spats.process_pair(pair)
    print diagram(pair, spats.run)
    if pair.has_site:
        print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end, pair.mutations)
    else:
        print "FAIL: {}".format(pair.failure)
Ejemplo n.º 15
0
class TestMutPairs(unittest.TestCase):
    
    def setUp(self):
        self.spats = Spats()
        self.spats.run.count_mutations = True
        self.spats.run.mutations_require_quality_score = ord('.') - ord('!')
        self.spats.run.allowed_target_errors = 1
        self.spats.run.ignore_stops_with_mismatched_overlap = True
        self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
        self.setup_processor()

    def setup_processor(self):
        self.spats.run.algorithm = "find_partial"
        self.spats.addTargets("test/mut/mut_single.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        if len(case) > 6:
            pair.r1.quality = case[6]
            pair.r2.quality = case[7]
        else:
            pair.r1.quality = 'K' * len(case[1])
            pair.r2.quality = 'K' * len(case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(case[4], pair.site, "res={} != {} ({}, {}, {})".format(pair.site, case[4], self.__class__.__name__, case[0], pair.failure))
        if pair.site is not None:
            self.assertEqual(case[3], pair.end, "end={} != {} ({}, {}, {})".format(pair.end, case[3], self.__class__.__name__, case[0], pair.failure))
            self.assertEqual(case[5], sorted(pair.mutations) if pair.mutations else pair.mutations, "muts={} != {} ({}, {}, {})".format(pair.mutations, case[5], self.__class__.__name__, case[0], pair.failure))
        return pair

    def cases(self):
        return cotrans_cases if self.spats.run.cotrans else cases

    def test_pairs(self):
        self.spats.run.pair_length = len(cases[0][1])
        for case in self.cases():
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))
Ejemplo n.º 16
0
def test_tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.tag import TagProcessor
    s.run._processor_class = TagProcessor

    #from spats_shape_seq.target import Targets
    #s.addTargets(bp + "5S.fa")
    s.addTarget("5s", "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC")
    #s.addTarget("rc(5s)", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC")
    #s.addTarget("adapter_t", s.run.adapter_t)
    #s.addTarget("adapter_b", s.run.adapter_b)
    #s._targets._index_word_length = 8
    #s._targets._minimum_length = 8
    #s.addTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    #s.addTarget("adapter_b_rc", reverse_complement(s.run.adapter_b))

    p = s._processor
    p.addTagTarget("5s", "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC")
    p.addTagTarget("5s_rc", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC")
    from spats_shape_seq.util import reverse_complement
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)

    from spats_shape_seq.pair import Pair
    cases = [
        [ "1101:20069:1063", "TTTAGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG", "TCCCACCTGACCCCATGCCGAACTCAGAAGTGAAA" ],
        [ "1101:11562:1050", "AAACGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG", "CCACCTGACCCCATGCCGAACTCAGAAGTGAAACG" ],
        [ "21189", "TTTGGTCCTTGGTGCCCGAGTCAGAGATCGGAAGA", "CTGACTCGGGCACCAAGGACCAAAAGATCGGAAGA" ],
        [ "1101:12888:8140", "GGATGTCCTTGGTGCCCGAGTCAGATGCCAGATCG", "GGCATCTGACTCGGGCACCAAGGACATACAGATCG" ],
        [ "18333", "GAGTGTCCTTGGTGCCCGAGTCAGTGGTAGATCGG", "ACCACTGACTCGGGCACCAAGGACACTCAGATCGG" ],
    ]

    pair = Pair()
    for case in cases:
        pair.set_from_data(case[0], case[1], case[2])
        s.process_pair(pair)

        print pair.r1.original_seq
        print pair.r1.tags
        print pair.r2.original_seq
        print pair.r2.tags
        print "-----------------------------"
Ejemplo n.º 17
0
class TestPanelPairs(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.minimum_target_match_length = 10
        self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa")

    def tearDown(self):
        self.spats = None

    def test_single_R1_match_with_adapter_multiple_without(self):
        pair = Pair()
        pair.set_from_data('M02465:8:000000000-A5D', 'CCCGCCGTCCTTGGTGCCCGAGTGAGATCGGAAGA','CACTCGGGCACCAAGGACGGCGGGAGATCGGAAGA')
        self.spats.run.debug = True
        self.spats.run.algorithm = "find_partial"
        self.spats.process_pair(pair)
        self.assertEqual(None, pair.target)
        self.assertEqual(1, self.spats.counters.multiple_R1_match)
Ejemplo n.º 18
0
class TestPanelPairs(unittest.TestCase):
    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.minimum_target_match_length = 10
        self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa")

    def tearDown(self):
        self.spats = None

    def test_single_R1_match_with_adapter_multiple_without(self):
        pair = Pair()
        pair.set_from_data('M02465:8:000000000-A5D',
                           'CCCGCCGTCCTTGGTGCCCGAGTGAGATCGGAAGA',
                           'CACTCGGGCACCAAGGACGGCGGGAGATCGGAAGA')
        self.spats.run.debug = True
        self.spats.run.algorithm = "find_partial"
        self.spats.process_pair(pair)
        self.assertEqual(None, pair.target)
        self.assertEqual(1, self.spats.counters.multiple_R1_match)
Ejemplo n.º 19
0
def tag_test():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.run.algorithm = "find_partial"

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()
    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.right)
    else:
        print "FAIL: {}".format(pair.failure)
Ejemplo n.º 20
0
def tag_test():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.run.algorithm = "find_partial"

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()
    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq,
                               pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.right)
    else:
        print "FAIL: {}".format(pair.failure)
Ejemplo n.º 21
0
class TestPairsPartial(unittest.TestCase):
    
    def setUp(self):
        self.spats = Spats()
        self.spats.run.cotrans = True
        self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        self.setup_processor()
        self.spats.addTargets("test/cotrans/cotrans_single.fa")

    def setup_processor(self):
        self.spats.run.algorithm = "find_partial"

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(case[4], pair.site, "res={} != {} ({}, {})".format(pair.site, case[4], self.__class__.__name__, case[0]))
        if pair.site is not None:
            self.assertEqual(case[3], pair.end)
        return pair

    def test_pairs(self):
        self.spats.run.pair_length = len(cases[0][1])
        if not self.spats._processor.exists():
            # just ignore the native test if it's not available
            self.assertEqual("native", self.spats.run.algorithm)
            return
        for case in cases:
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))
Ejemplo n.º 22
0
    def _show_case(self, test_case):

        from spats_shape_seq import Spats
        from spats_shape_seq.diagram import diagram

        alg = test_case.run_opts.get('algorithm')
        algs = [ alg ] if alg else test_case.run_opts.get('algorithms', [ 'find_partial', 'lookup' ])
        for algorithm in algs:
            spats = Spats()
            spats.run.algorithm = algorithm

            for key, value in test_case.run_opts.items():
                if str(key) == 'algorithms':
                    continue
                if isinstance(value, unicode):
                    value = str(value)
                if not hasattr(spats.run, key):
                    raise Exception('Invalid run_opt: {}'.format(key))
                setattr(spats.run, key, value)

            for name, seq in test_case.targets.iteritems():
                spats.addTarget(name, seq)

            pair = test_case.pair()
            if len(algs) > 1:
                print('\n[[ ALGORITHM: {} ]]'.format(algorithm))
            spats.process_pair(pair)
            if test_case.comment:
                print('Comment: {}'.format(test_case.comment))
            print(diagram(pair, spats.run))

            if test_case.expect:
                # should mirror `_check_expect` in test_harness.py...
                expects = test_case.expect
                fail = False

                try:

                    if expects['site'] is None:
                        if pair.site is not None:
                            raise Exception("pair.site={} when expecting none.".format(pair.site))
                    else:
                        if pair.site is None:
                            raise Exception("pair.site is none when expecting {}.".format(expects['site']))
                        if pair.site != expects['site']:
                            raise Exception("pair.site={} != expect.site={}".format(pair.site, expects['site']))
                        if 'end' in expects and pair.end != expects['end']:
                            raise Exception("pair.end={} != expect.end={}".format(pair.end, expects['end']))
                        if 'muts' in expects:
                            if expects['muts'] is not None  and  len(expects['muts']) > 0:
                                if not sorted(expects['muts']) == (sorted(pair.mutations) if pair.mutations else pair.mutations):
                                    raise Exception("mismatching mutations:  expected={}, pair.mutations={}".format(expects['muts'], pair.mutations))
                            else:
                                if not (pair.mutations is None  or len(pair.mutations) == 0):
                                    raise Exception("unexpected mutations: {}".format(pair.mutations))
                        if 'r1_indels' in expects:
                            r1inds = objdict_to_dict(pair.r1.indels)
                            if expects['r1_indels']:
                                if expects['r1_indels'] != r1inds:
                                    raise Exception("mismatching R1 indels:  expected={}, pair.r1.indels={}".format(expects['r1_indels'], r1inds))
                            elif pair.r1.indels:
                                raise Exception("unexpected R1 indels:  pair.r1.indels={}".format(pair.r1.indels))
                        if 'r2_indels' in expects:
                            r2inds = objdict_to_dict(pair.r2.indels)
                            if expects['r2_indels']:
                                if expects['r2_indels'] != r2inds:
                                    raise Exception("mismatching R2 indels:  expected={}, pair.r2.indels={}".format(expects['r2_indels'], r2inds))
                            elif pair.r2.indels:
                                raise Exception("unexpected R2 indels:  pair.r2.indels={}".format(r2inds))
                        if 'counters' in expects:
                            for counter, value in expects['counters'].iteritems():
                                if getattr(spats.counters, str(counter)) != value:
                                    raise Exception("counter '{}' value off: expected={} != got={}".format(counter, value, getattr(spats.counters, counter)))
                        if 'pair.target' in expects:
                            tname = pair.target.name if pair.target else None
                            if tname != expects['pair.target']:
                                raise Exception("pair.target={} != expect.pair.target={}".format(tname, expects['pair.target']))

                except Exception as e:
                    print('FAIL: {}'.format(e))
                    sys.exit(1)

                print('PASS')
Ejemplo n.º 23
0
    class SpatsTestCase(unittest.TestCase):
        def __init__(self, test_set, case):
            super(TestHarness.SpatsTestCase, self).__init__()
            self.test_set = test_set
            self.case = case

        def setUp(self):
            try:
                self.spats = Spats()
                self.test_set.spats_setUp(self.spats)
            except Exception as e:
                print("exception caught on testset '{}' setup : {}".format(
                    self.test_set.name, e))
                raise e

        def tearDown(self):
            self.spats = None

        def runTest(self):
            for algorithm in self.test_set.algorithms:
                self.spats.reset_processor(
                )  # Necessary to change algorithm (which determines the processor)
                self.spats.run.algorithm = algorithm
                self._run_case(self.case)

        def _run_case(self, case):
            pair = case.pair()
            self.spats.counters.reset()
            self.spats.process_pair(pair)
            self._check_expects(case, pair)

        def _check_expects(self, case, pair):
            expects = case.expect
            msg = "testset='{}', test id='{}', algorithm='{}' failed: ".format(
                self.test_set.name, case.id, self.spats.run.algorithm)
            if expects['site'] is None:
                self.assertIs(
                    pair.site, None, msg +
                    "pair.site={} when expecting none.".format(pair.site))

            else:
                self.assertIsNot(
                    pair.site, None,
                    msg + "pair.site is none when expecting {}.".format(
                        expects['site']))
                self.assertEqual(
                    expects['site'], pair.site,
                    msg + "pair.site={} != expect.site={}".format(
                        pair.site, expects['site']))
                if 'end' in expects:
                    self.assertEqual(
                        expects['end'], pair.end,
                        msg + "pair.end={} != expect.end={}".format(
                            pair.end, expects['end']))

                if 'muts' in expects:
                    if expects['muts'] is not None and len(
                            expects['muts']) > 0:
                        self.assertEqual(
                            sorted(expects['muts']),
                            sorted(pair.mutations)
                            if pair.mutations else pair.mutations, msg +
                            "mismatching mutations:  expected={}, pair.mutations={}"
                            .format(expects['muts'], pair.mutations))
                    else:
                        self.assertTrue(
                            pair.mutations is None or len(pair.mutations) == 0,
                            msg +
                            "unexpected mutations: {}".format(pair.mutations))

                if 'r1_indels' in expects:
                    r1inds = dict(
                        zip(map(str, pair.r1.indels.keys()),
                            map(vars, pair.r1.indels.values())))
                    if expects['r1_indels']:
                        self.assertEqual(expects['r1_indels'], r1inds)
                    else:
                        self.assertTrue(
                            not pair.r1.indels,
                            msg + "unexpected R1 indels: {}".format(r1inds))
                if 'r2_indels' in expects:
                    r2inds = dict(
                        zip(map(str, pair.r2.indels.keys()),
                            map(vars, pair.r2.indels.values())))
                    if expects['r2_indels']:
                        self.assertEqual(expects['r2_indels'], r2inds)
                    else:
                        self.assertTrue(
                            not pair.r2.indels,
                            msg + "unexpected R2 indels: {}".format(r2inds))

            if 'counters' in expects:
                for counter, value in expects['counters'].iteritems():
                    self.assertEqual(
                        getattr(self.spats.counters,
                                str(counter)), value, msg +
                        "counter '{}' value off: expected={} != got={}".format(
                            counter, value,
                            getattr(self.spats.counters, counter)))

            if 'pair.target' in expects:
                tname = pair.target.name if pair.target else None
                self.assertEqual(
                    tname, expects['pair.target'],
                    msg + "pair.target={} != expect.pair.target={}".format(
                        tname, expects['pair.target']))
Ejemplo n.º 24
0
def test_tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.tag import TagProcessor
    s.run._processor_class = TagProcessor

    #from spats_shape_seq.target import Targets
    #s.addTargets(bp + "5S.fa")
    s.addTarget(
        "5s",
        "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC"
    )
    #s.addTarget("rc(5s)", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC")
    #s.addTarget("adapter_t", s.run.adapter_t)
    #s.addTarget("adapter_b", s.run.adapter_b)
    #s._targets._index_word_length = 8
    #s._targets._minimum_length = 8
    #s.addTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    #s.addTarget("adapter_b_rc", reverse_complement(s.run.adapter_b))

    p = s._processor
    p.addTagTarget(
        "5s",
        "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC"
    )
    p.addTagTarget(
        "5s_rc",
        "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC"
    )
    from spats_shape_seq.util import reverse_complement
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)

    from spats_shape_seq.pair import Pair
    cases = [
        [
            "1101:20069:1063", "TTTAGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG",
            "TCCCACCTGACCCCATGCCGAACTCAGAAGTGAAA"
        ],
        [
            "1101:11562:1050", "AAACGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG",
            "CCACCTGACCCCATGCCGAACTCAGAAGTGAAACG"
        ],
        [
            "21189", "TTTGGTCCTTGGTGCCCGAGTCAGAGATCGGAAGA",
            "CTGACTCGGGCACCAAGGACCAAAAGATCGGAAGA"
        ],
        [
            "1101:12888:8140", "GGATGTCCTTGGTGCCCGAGTCAGATGCCAGATCG",
            "GGCATCTGACTCGGGCACCAAGGACATACAGATCG"
        ],
        [
            "18333", "GAGTGTCCTTGGTGCCCGAGTCAGTGGTAGATCGG",
            "ACCACTGACTCGGGCACCAAGGACACTCAGATCGG"
        ],
    ]

    pair = Pair()
    for case in cases:
        pair.set_from_data(case[0], case[1], case[2])
        s.process_pair(pair)

        print pair.r1.original_seq
        print pair.r1.tags
        print pair.r2.original_seq
        print pair.r2.tags
        print "-----------------------------"