Esempio n. 1
0
class TestPrefixPairs(unittest.TestCase):
    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.collapse_left_prefixes = True
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.counters.reset()
        self.spats.process_pair(pair)
        self.assertEqual(
            case[3], pair.site,
            "res={} != {} ({}, {})".format(pair.site, case[3],
                                           self.__class__.__name__, case[0]))
        if case[4]:
            self.assertEqual(
                1, getattr(self.spats.counters, 'prefix_RRRY_' + case[4]),
                "prefix {} not counted ({})".format(case[4], case[0]))
        return pair

    def test_pairs(self):
        for case in prefix_cases:
            self.run_case(case)
        print("Ran {} prefix test cases.".format(len(cases)))
Esempio n. 2
0
class TestShortAdapterB(unittest.TestCase):

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        print('running: {} / {}'.format(case[0], self.spats.run.algorithm))
        self.spats.process_pair(pair)
        self.assertEqual(case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format(pair.site, case[3], self.__class__.__name__, case[0], self.spats.run.algorithm, pair.failure))

    def test_pairs(self):
        for alg in algorithms:
            if alg == 'native':
                continue
            self.run_algorithm(alg)

    def run_algorithm(self, alg):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = alg
        self.spats.addTargets("test/5SrRNA/5SrRNA.fa")
        self.run_pairs()

    def run_pairs(self):
        for case in short_adapter_b_cases:
            self.run_case(case)
        print("Ran {} adapter_b test cases.".format(len(short_adapter_b_cases)))
Esempio n. 3
0
def d5s_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    #from spats_shape_seq.db import PairDB
    #pair_db = PairDB(bp + "dev_out/pairs.db")
    #if False:
    #    pair_db.add_targets_table(bp + "5s/5S.fa")
    #    pair_db.parse(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq",
    #                  bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq")

    from spats_shape_seq import Spats
    from spats_shape_seq.partial import PartialFindProcessor
    s = Spats()
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    #s.run.writeback_results = True
    #s.run.resume_processing = True
    #s.run.result_set_name = "lookup"
    s.addTargets(bp + "5s/5S.fa")
    #s.process_pair_db(pair_db)
    s.process_pair_data(
        bp +
        "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq",
        bp +
        "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq"
    )
    s.compute_profiles()
    s.write_reactivities(bp + "dev_out/rx2.out")
Esempio n. 4
0
class TestOnlyPrefixes(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.cotrans = True
        self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        self.spats.run.collapse_left_prefixes = True
        self.spats.run.collapse_only_prefixes = "T,ACGT,CCA"
        self.spats.addTargets("test/cotrans/cotrans_single.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.counters.reset()
        self.spats.process_pair(pair)
        if case[5]:
            expect = case[4] if case[5] in self.spats.run.collapse_only_prefixes.split(',') else None
            self.assertEqual(expect, pair.site, "PREF res={} != {} ({}, {})".format(pair.site, case[4], self.__class__.__name__, case[0]))
        return pair

    def test_pairs(self):
        for case in prefix_cases:
            self.run_case(case)
        print("Ran {} prefix test cases.".format(len(cases)))
Esempio n. 5
0
def diag_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.pair import Pair
    from spats_shape_seq.tests.test_mut import cases
    #from spats_shape_seq.tests.test_pairs import prefix_cases as cases
    from spats_shape_seq.diagram import diagram
    #spats_config.minimum_target_match_length = 8
    spats = Spats()
    #spats.addTargets("test/5s/5s.fa")
    spats.addTargets("test/mut/mut_single.fa")
    spats.run.debug = True
    spats.run.algorithm = "find_partial"
    spats.run.count_mutations = True
    #spats.run.mutations_require_quality_score = ord('.') - ord('!')
    spats.run.allowed_target_errors = 1
    spats.run.ignore_stops_with_mismatched_overlap = True
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"

    #spats.run.collapse_left_prefixes = True
    spats._case_errors = False
    def run_case(case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        spats.process_pair(pair)
        print diagram(pair, spats.run)
        if case[3] != pair.site:
            spats._case_errors = True
            print "******* mismatch: {} != {}".format(case[3], pair.site)
    for case in cases:
        if case[0].startswith("*"):
            run_case(case)
    spats.run.debug = False
    if spats._case_errors:
        raise Exception("Case failed")
Esempio n. 6
0
    def run(self):
        """Process the SPATS data for the configured target(s) and r1/r2 fragment pairs.
        """
        run_name = self._run_file()
        if os.path.exists(run_name):
            self._add_note("** removing previous run file")
            os.remove(run_name)

        native_tool = self._native_tool('cotrans')
        if native_tool and not self.cotrans:
            self._add_note("skipping native tool due to non-cotrans run")
            native_tool = None

        spats = Spats(cotrans = self.cotrans)
        if self._update_run_config(spats.run) and native_tool:
            self._add_note("skipping native tool due to custom config")
            native_tool = None

        if native_tool:
            self._add_note("using native cotrans processor")
            subprocess.check_call([native_tool, self.config['target'], self.r1, self.r2, run_name], cwd = self.path)
        else:
            self._add_note("using python processor")
            spats.addTargets(self.config['target'])
            if self.using_separate_channel_files:
                spats.process_pair_data(self.r1_plus, self.r2_plus, force_mask = spats.run.masks[0])
                spats.process_pair_data(self.r1_minus, self.r2_minus, force_mask = spats.run.masks[1])
            else:
                spats.process_pair_data(self.r1, self.r2)
            spats.store(run_name)
        self._add_note("wrote output to {}".format(os.path.basename(run_name)))
        nb = self._notebook()
        if nb:
            nb.add_spats_run(self.cotrans, spats.run.count_mutations).save()
Esempio n. 7
0
class TestPrefixPairs(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.collapse_left_prefixes = True
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.counters.reset()
        self.spats.process_pair(pair)
        self.assertEqual(case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0]))
        if case[4]:
            self.assertEqual(1, getattr(self.spats.counters, 'prefix_RRRY_' + case[4]), "prefix {} not counted ({})".format(case[4], case[0]))
        return pair

    def test_pairs(self):
        for case in prefix_cases:
            self.run_case(case)
        print("Ran {} prefix test cases.".format(len(cases)))
Esempio n. 8
0
    def compare(self):

        from spats_shape_seq import Spats
        from spats_shape_seq.pair import Pair

        json_base = { 'target' : self.config['target'], 'config' : { 'algorithm' : 'find_partial', 'debug' : True }, 'expect' : {}}

        spats_fp = Spats(cotrans = self.cotrans)
        spats_lookup = Spats(cotrans = self.cotrans)
        self._update_run_config(spats_fp.run)
        self._update_run_config(spats_lookup.run, json_base['config'])
        spats_fp.run.algorithm = 'find_partial'
        spats_lookup.run.algorithm = 'lookup'

        spats_fp.addTargets(self.config['target'])
        spats_lookup.addTargets(self.config['target'])

        count = 0
        match = 0
        with FastFastqParser(self.r1, self.r2) as parser:
            total = parser.appx_number_of_pairs()
            for batch in parser.iterator(5000):
                for item in batch:
                    pair_fp = Pair()
                    pair_lookup = Pair()
                    pair_fp.set_from_data(str(item[0]), item[1], item[2])
                    pair_lookup.set_from_data(str(item[0]), item[1], item[2])
                    try:
                        spats_fp.process_pair(pair_fp)
                        spats_lookup.process_pair(pair_lookup)
                    except:
                        print('Error after {}/{}'.format(match, count))
                        raise
                    if (pair_fp.has_site == pair_lookup.has_site):
                        if not pair_fp.has_site:
                            count += 1
                            continue
                        elif (pair_fp.target.name == pair_lookup.target.name and
                              pair_fp.end == pair_lookup.end and
                              pair_fp.site == pair_lookup.site and
                              pair_fp.mutations == pair_lookup.mutations):
                            count += 1
                            match += 1
                            continue
                    json_base["id"] = str(item[0])
                    json_base["R1"] = str(item[1])
                    json_base["R2"] = str(item[2])
                    print('After {}/{} matches; mismatched pair: {} != {}\n{}'.format(match, count, pair_fp, pair_lookup,
                                                                                      json.dumps(json_base, sort_keys = True,indent = 4, separators = (',', ': '))))
                    return
                print('{}/{}-{}...'.format(match, count, total))
        spats_fp.counters.total_pairs = count
        spats_lookup.counters.total_pairs = count
        print('All match {}/{}.'.format(match, count))
        print(spats_fp._report_counts())
        print(spats_lookup._report_counts())
Esempio n. 9
0
def cotrans_debug():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #s.run.algorithm = "find_partial"
    #s.run._p_v102_compat = True
    s.run.minimum_target_match_length = 10
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71]
    #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ]
    #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ]
    #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ]
    #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ]
    #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ]
    #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ]
    #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ]
    c = [
        "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA",
        "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68
    ]
    #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ]
    #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ]
    #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ]
    #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ]
    #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ]
    #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ]
    #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ]
    #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ]
    #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ]
    #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ]
    #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ]
    #c =     [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ]
    #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None]
    #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1]
    c = [
        'jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA',
        'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71
    ]
    pair.set_from_data(c[0], c[1], c[2])
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq,
                               pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.end)
    else:
        print "FAIL: {}".format(pair.failure)
Esempio n. 10
0
def spats(target, r1, r2, out, show_sites=True):
    from spats_shape_seq import Spats, spats_config
    s = Spats()
    s.addTargets(target)
    s.addMasks("RRRY", "YYYR")
    if show_sites:
        spats_config.show_id_to_site = True
    s.process_pair_data(r1, r2)
    if not show_sites:
        s.compute_profiles()
        s.write_reactivities(out + "/rx.out")
Esempio n. 11
0
def spats(target, r1, r2, out, show_sites = True):
    from spats_shape_seq import Spats, spats_config
    s = Spats()
    s.addTargets(target)
    s.addMasks("RRRY", "YYYR")
    if show_sites:
        spats_config.show_id_to_site = True
    s.process_pair_data(r1, r2)
    if not show_sites:
        s.compute_profiles()
        s.write_reactivities(out + "/rx.out")
Esempio n. 12
0
def cotrans_test():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.addTargets(bp + "F_wt.fa")
    from spats_shape_seq.pair import Pair
    pair = Pair()
    pair.set_from_data('x', 'GAGCGTCCTTGGTGCCCGAGTCAGAAATAGACTCCT', 'TATCACTACTGGTAGGAGTCTATTTCTGACTCGGGC')
    s.process_pair(pair)
    print "{}: {}".format(pair.target.name, pair.site)
Esempio n. 13
0
def tmut_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    spats = Spats(cotrans=False)
    spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    spats.run.count_mutations = True
    spats.run.algorithm = "find_partial"
    spats.run.allowed_target_errors = 1
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
    spats.run._process_all_pairs = True
    spats.run.writeback_results = True
    spats.run.num_workers = 1
    spats.run.result_set_name = "mut"
    spats.addTargets(bp + "mut_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ]
    #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ]
    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ]
    #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ]
    #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ]
    #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ]
    #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ]
    #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ]
    #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ]
    #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ]
    #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ]
    #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ]
    #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ]
    #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ]
    c = [
        'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA',
        'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA'
    ]
    #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ]
    #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ]
    pair.set_from_data('x', c[0], c[1])
    spats.process_pair(pair)
    print diagram(pair, spats.run)
    if pair.has_site:
        print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end,
                                      pair.mutations)
    else:
        print "FAIL: {}".format(pair.failure)
Esempio n. 14
0
def ligation_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/Shape_Seq_ligation/"
    from spats_shape_seq import Spats
    s = Spats()
    #s.config.debug = True
    #s.run.minimum_target_match_length = 10
    #s.run.num_workers = 1
    #from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    s.addTargets(bp + "panel_RNAs_complete.fa")
    s.process_pair_data(bp + "data/KEW1_S1_L001_R1_001.fastq",
                        bp + "data/KEW1_S1_L001_R2_001.fastq")
Esempio n. 15
0
def d5s_writeback_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "dev_out/pairs.db")
    pair_db.add_targets_table(bp + "5s/5S.fa")

    from spats_shape_seq import Spats
    s = Spats()
    s.addTargets(bp + "5s/5S.fa")
    s.writeback_results = True
    s.result_set_name = "pure_python"
    s.process_pair_db(pair_db)
Esempio n. 16
0
def cotrans_test():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.addTargets(bp + "F_wt.fa")
    from spats_shape_seq.pair import Pair
    pair = Pair()
    pair.set_from_data('x', 'GAGCGTCCTTGGTGCCCGAGTCAGAAATAGACTCCT',
                       'TATCACTACTGGTAGGAGTCTATTTCTGACTCGGGC')
    s.process_pair(pair)
    print "{}: {}".format(pair.target.name, pair.site)
Esempio n. 17
0
def ligation_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/Shape_Seq_ligation/"
    from spats_shape_seq import Spats
    s = Spats()
    #s.config.debug = True
    #s.run.minimum_target_match_length = 10
    #s.run.num_workers = 1
    #from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    s.addTargets(bp + "panel_RNAs_complete.fa")
    s.process_pair_data(bp + "data/KEW1_S1_L001_R1_001.fastq",
                        bp + "data/KEW1_S1_L001_R2_001.fastq")
Esempio n. 18
0
def d5s_writeback_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "dev_out/pairs.db")
    pair_db.add_targets_table(bp + "5s/5S.fa")

    from spats_shape_seq import Spats
    s = Spats()
    s.addTargets(bp + "5s/5S.fa")
    s.writeback_results = True
    s.result_set_name = "pure_python"
    s.process_pair_db(pair_db)
Esempio n. 19
0
def cotrans_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    #from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    #s.run.writeback_results = True
    #s.run.resume_processing = True
    #s.run.result_set_name = "lookup"
    s.addTargets(bp + "F_wt.fa")
    s.process_pair_data(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                        bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq")
    s.compute_profiles()
    s.write_reactivities(bp + "dev_out/rx.out")
Esempio n. 20
0
def cotrans_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    #from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    #s.run.writeback_results = True
    #s.run.resume_processing = True
    #s.run.result_set_name = "lookup"
    s.addTargets(bp + "F_wt.fa")
    s.process_pair_data(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                        bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq")
    s.compute_profiles()
    s.write_reactivities(bp + "dev_out/rx.out")
Esempio n. 21
0
def cotrans_debug():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #s.run.algorithm = "find_partial"
    #s.run._p_v102_compat = True
    s.run.minimum_target_match_length = 10
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71]
    #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ]
    #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ]
    #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ]
    #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ]
    #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ]
    #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ]
    #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ]
    c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ]
    #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ]
    #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ]
    #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ]
    #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ]
    #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ]
    #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ]
    #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ]
    #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ]
    #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ]
    #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ]
    #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ]
    #c =     [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ]
    #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None]
    #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1]
    c = ['jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA', 'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71 ]
    pair.set_from_data(c[0], c[1], c[2])
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.end)
    else:
        print "FAIL: {}".format(pair.failure)
Esempio n. 22
0
def tmut_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    spats = Spats(cotrans = False)
    spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    spats.run.count_mutations = True
    spats.run.algorithm = "find_partial"
    spats.run.allowed_target_errors = 1
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
    spats.run._process_all_pairs = True
    spats.run.writeback_results = True
    spats.run.num_workers = 1
    spats.run.result_set_name = "mut"
    spats.addTargets(bp + "mut_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ]
    #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ]
    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ]
    #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ]
    #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ]
    #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ]
    #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ]
    #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ]
    #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ]
    #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ]
    #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ]
    #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ]
    #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ]
    #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ]
    c = [ 'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA', 'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA' ]
    #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ]
    #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ]
    pair.set_from_data('x', c[0], c[1])
    spats.process_pair(pair)
    print diagram(pair, spats.run)
    if pair.has_site:
        print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end, pair.mutations)
    else:
        print "FAIL: {}".format(pair.failure)
Esempio n. 23
0
def prof_run():
    from spats_shape_seq import Spats
    spats = Spats()
    #spats.run.cotrans = True
    #spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #spats.run.writeback_results = False
    spats.run._process_all_pairs = True
    spats.run.skip_database = True
    spats.run.algorithm = "lookup"
    spats.run.count_mutations = True
    spats.run.num_workers = 1

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/pdc_muts/PDC_tweaked/PDC_09_001_6/"
    spats.addTargets(bp + "target.fa")
    spats.process_pair_data(bp + "2k_R1.fastq", bp + "2k_R2.fastq")
    exit(0)
Esempio n. 24
0
def indels_run():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.algorithm = True
    s.run.count_indels = True
    s.run.count_mutations = True
    s.run.allowed_target_errors = 8
    s.collapse_left_prefixes = True
    s.run.ignore_stops_with_mismatched_overlap = True
    s.run.allow_negative_values = True
    s.mutations_require_quality_score = 30
    bp = "/Users/steve/mos/tasks/oughxX/code"
    s.addTargets(bp + "/test/hairpin/hairpinA_circ.fa")
    rp = bp + "/TESTING/cmp_muts_favored/steve_test"
    s.process_pair_data(rp + "/R1_match_failures.fastq",
                        rp + "/R2_match_failures.fastq")
    exit(0)
Esempio n. 25
0
def indels_run():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.algorithm = True
    s.run.count_indels = True
    s.run.count_mutations = True
    s.run.allowed_target_errors = 8
    s.collapse_left_prefixes = True
    s.run.ignore_stops_with_mismatched_overlap = True
    s.run.allow_negative_values = True
    s.mutations_require_quality_score = 30
    bp = "/Users/steve/mos/tasks/oughxX/code"
    s.addTargets(bp + "/test/hairpin/hairpinA_circ.fa")
    rp = bp + "/TESTING/cmp_muts_favored/steve_test"
    s.process_pair_data(rp + "/R1_match_failures.fastq",
                        rp + "/R2_match_failures.fastq")
    exit(0)
Esempio n. 26
0
class TestMutPairs(unittest.TestCase):
    
    def setUp(self):
        self.spats = Spats()
        self.spats.run.count_mutations = True
        self.spats.run.mutations_require_quality_score = ord('.') - ord('!')
        self.spats.run.allowed_target_errors = 1
        self.spats.run.ignore_stops_with_mismatched_overlap = True
        self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
        self.setup_processor()

    def setup_processor(self):
        self.spats.run.algorithm = "find_partial"
        self.spats.addTargets("test/mut/mut_single.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        if len(case) > 6:
            pair.r1.quality = case[6]
            pair.r2.quality = case[7]
        else:
            pair.r1.quality = 'K' * len(case[1])
            pair.r2.quality = 'K' * len(case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(case[4], pair.site, "res={} != {} ({}, {}, {})".format(pair.site, case[4], self.__class__.__name__, case[0], pair.failure))
        if pair.site is not None:
            self.assertEqual(case[3], pair.end, "end={} != {} ({}, {}, {})".format(pair.end, case[3], self.__class__.__name__, case[0], pair.failure))
            self.assertEqual(case[5], sorted(pair.mutations) if pair.mutations else pair.mutations, "muts={} != {} ({}, {}, {})".format(pair.mutations, case[5], self.__class__.__name__, case[0], pair.failure))
        return pair

    def cases(self):
        return cotrans_cases if self.spats.run.cotrans else cases

    def test_pairs(self):
        self.spats.run.pair_length = len(cases[0][1])
        for case in self.cases():
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))
Esempio n. 27
0
def prof_run():
    from spats_shape_seq import Spats
    spats = Spats()
    #spats.run.cotrans = True
    #spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #spats.run.writeback_results = False
    spats.run._process_all_pairs = True
    spats.run.skip_database = True
    spats.run.algorithm = "lookup"
    spats.run.count_mutations = True
    spats.run.num_workers = 1

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/pdc_muts/PDC_tweaked/PDC_09_001_6/"
    spats.addTargets(bp + "target.fa")
    spats.process_pair_data(bp + "2k_R1.fastq",
                            bp + "2k_R2.fastq")
    exit(0)
Esempio n. 28
0
class TestPanelPairs(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.minimum_target_match_length = 10
        self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa")

    def tearDown(self):
        self.spats = None

    def test_single_R1_match_with_adapter_multiple_without(self):
        pair = Pair()
        pair.set_from_data('M02465:8:000000000-A5D', 'CCCGCCGTCCTTGGTGCCCGAGTGAGATCGGAAGA','CACTCGGGCACCAAGGACGGCGGGAGATCGGAAGA')
        self.spats.run.debug = True
        self.spats.run.algorithm = "find_partial"
        self.spats.process_pair(pair)
        self.assertEqual(None, pair.target)
        self.assertEqual(1, self.spats.counters.multiple_R1_match)
Esempio n. 29
0
class TestOverlap(unittest.TestCase):
    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        print('running: {} / {}'.format(case[0], self.spats.run.algorithm))
        self.spats.process_pair(pair)
        self.assertEqual(
            case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format(
                pair.site, case[3], self.__class__.__name__, case[0],
                self.spats.run.algorithm, pair.failure))
        self.assertEqual(
            case[4], pair.failure, "failure res={} != {} ({}, {}, {})".format(
                pair.failure, case[4], self.__class__.__name__, case[0],
                self.spats.run.algorithm))

    def test_pairs(self):
        for alg in algorithms:
            if alg == 'native':
                continue
            self.run_algorithm(alg)

    def run_algorithm(self, alg):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = alg
        self.spats.run.count_mutations = True
        self.spats.run.allowed_target_errors = True
        self.spats.run.ignore_stops_with_mismatched_overlap = True
        self.spats.addTargets("test/SRP/SRP.fa")
        self.run_pairs()

    def run_pairs(self):
        for case in overlap_cases:
            self.run_case(case)
        print("Ran {} overlap test cases.".format(len(overlap_cases)))
Esempio n. 30
0
class TestPanelPairs(unittest.TestCase):
    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.minimum_target_match_length = 10
        self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa")

    def tearDown(self):
        self.spats = None

    def test_single_R1_match_with_adapter_multiple_without(self):
        pair = Pair()
        pair.set_from_data('M02465:8:000000000-A5D',
                           'CCCGCCGTCCTTGGTGCCCGAGTGAGATCGGAAGA',
                           'CACTCGGGCACCAAGGACGGCGGGAGATCGGAAGA')
        self.spats.run.debug = True
        self.spats.run.algorithm = "find_partial"
        self.spats.process_pair(pair)
        self.assertEqual(None, pair.target)
        self.assertEqual(1, self.spats.counters.multiple_R1_match)
Esempio n. 31
0
def tag_test():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.run.algorithm = "find_partial"

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()
    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.right)
    else:
        print "FAIL: {}".format(pair.failure)
Esempio n. 32
0
def tag_test():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.run.algorithm = "find_partial"

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()
    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq,
                               pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.right)
    else:
        print "FAIL: {}".format(pair.failure)
Esempio n. 33
0
class TestOverlap(unittest.TestCase):

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        print('running: {} / {}'.format(case[0], self.spats.run.algorithm))
        self.spats.process_pair(pair)
        self.assertEqual(case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format(pair.site, case[3], self.__class__.__name__, case[0], self.spats.run.algorithm, pair.failure))
        self.assertEqual(case[4], pair.failure, "failure res={} != {} ({}, {}, {})".format(pair.failure, case[4], self.__class__.__name__, case[0], self.spats.run.algorithm))

    def test_pairs(self):
        for alg in algorithms:
            if alg == 'native':
                continue
            self.run_algorithm(alg)

    def run_algorithm(self, alg):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = alg
        self.spats.run.count_mutations = True
        self.spats.run.allowed_target_errors = True
        self.spats.run.ignore_stops_with_mismatched_overlap = True
        self.spats.addTargets("test/SRP/SRP.fa")
        self.run_pairs()

    def run_pairs(self):
        for case in overlap_cases:
            self.run_case(case)
        print("Ran {} overlap test cases.".format(len(overlap_cases)))
Esempio n. 34
0
class TestPairsPartial(unittest.TestCase):
    
    def setUp(self):
        self.spats = Spats()
        self.spats.run.cotrans = True
        self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        self.setup_processor()
        self.spats.addTargets("test/cotrans/cotrans_single.fa")

    def setup_processor(self):
        self.spats.run.algorithm = "find_partial"

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(case[4], pair.site, "res={} != {} ({}, {})".format(pair.site, case[4], self.__class__.__name__, case[0]))
        if pair.site is not None:
            self.assertEqual(case[3], pair.end)
        return pair

    def test_pairs(self):
        self.spats.run.pair_length = len(cases[0][1])
        if not self.spats._processor.exists():
            # just ignore the native test if it's not available
            self.assertEqual("native", self.spats.run.algorithm)
            return
        for case in cases:
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))
Esempio n. 35
0
class TestMutPairs(unittest.TestCase):
    def setUp(self):
        self.spats = Spats()
        self.spats.run.count_mutations = True
        self.spats.run.mutations_require_quality_score = ord('.') - ord('!')
        self.spats.run.allowed_target_errors = 1
        self.spats.run.ignore_stops_with_mismatched_overlap = True
        self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
        self.setup_processor()

    def setup_processor(self):
        self.spats.run.algorithm = "find_partial"
        self.spats.addTargets("test/mut/mut_single.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        if len(case) > 6:
            pair.r1.quality = case[6]
            pair.r2.quality = case[7]
        else:
Esempio n. 36
0
class TestShortAdapterB(unittest.TestCase):
    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        print('running: {} / {}'.format(case[0], self.spats.run.algorithm))
        self.spats.process_pair(pair)
        self.assertEqual(
            case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format(
                pair.site, case[3], self.__class__.__name__, case[0],
                self.spats.run.algorithm, pair.failure))

    def test_pairs(self):
        for alg in algorithms:
            if alg == 'native':
                continue
            self.run_algorithm(alg)

    def run_algorithm(self, alg):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = alg
        self.spats.addTargets("test/5SrRNA/5SrRNA.fa")
        self.run_pairs()

    def run_pairs(self):
        for case in short_adapter_b_cases:
            self.run_case(case)
        print("Ran {} adapter_b test cases.".format(
            len(short_adapter_b_cases)))
Esempio n. 37
0
def diag_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.pair import Pair
    from spats_shape_seq.tests.test_mut import cases
    #from spats_shape_seq.tests.test_pairs import prefix_cases as cases
    from spats_shape_seq.diagram import diagram
    #spats_config.minimum_target_match_length = 8
    spats = Spats()
    #spats.addTargets("test/5s/5s.fa")
    spats.addTargets("test/mut/mut_single.fa")
    spats.run.debug = True
    spats.run.algorithm = "find_partial"
    spats.run.count_mutations = True
    #spats.run.mutations_require_quality_score = ord('.') - ord('!')
    spats.run.allowed_target_errors = 1
    spats.run.ignore_stops_with_mismatched_overlap = True
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"

    #spats.run.collapse_left_prefixes = True
    spats._case_errors = False

    def run_case(case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        spats.process_pair(pair)
        print diagram(pair, spats.run)
        if case[3] != pair.site:
            spats._case_errors = True
            print "******* mismatch: {} != {}".format(case[3], pair.site)

    for case in cases:
        if case[0].startswith("*"):
            run_case(case)
    spats.run.debug = False
    if spats._case_errors:
        raise Exception("Case failed")
Esempio n. 38
0
def d5s_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    #from spats_shape_seq.db import PairDB
    #pair_db = PairDB(bp + "dev_out/pairs.db")
    #if False:
    #    pair_db.add_targets_table(bp + "5s/5S.fa")
    #    pair_db.parse(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq",
    #                  bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq")

    from spats_shape_seq import Spats
    from spats_shape_seq.partial import PartialFindProcessor
    s = Spats()
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    #s.run.writeback_results = True
    #s.run.resume_processing = True
    #s.run.result_set_name = "lookup"
    s.addTargets(bp + "5s/5S.fa")
    #s.process_pair_db(pair_db)
    s.process_pair_data(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq",
                        bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq")
    s.compute_profiles()
    s.write_reactivities(bp + "dev_out/rx2.out")
Esempio n. 39
0
class TestPairs(unittest.TestCase):
    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(
            case[3], pair.site,
            "res={} != {} ({}, {})".format(pair.site, case[3],
                                           self.__class__.__name__, case[0]))
        return pair

    def test_pairs(self):
        for case in cases:
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))

    def test_find_partial_weird_case(self):
        pair = Pair()
        pair.set_from_data("x", 'CTCAGTCCTTGGTGCCCGAGTCAGGATCGGAAGAG',
                           'TGACTCGGGCACCAAAGACTGAGAGATCGGAAGAG')
        self.spats.process_pair(pair)
        print("{} / {}".format(pair.site, pair.failure))

    def test_minimum_length(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = "find_partial"
        self.spats.run.minimum_target_match_length = 11
        self.spats.addTargets("test/5s/5s.fa")
        self.assertEqual(11, self.spats._targets.minimum_match_length)
        case = [
            '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA',
            'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None
        ]
        self.run_case(case)

        # this case only matches if the minimum length is set to 8
        self.spats.run.minimum_target_match_length = 8
        self.spats = Spats()
        self.spats.run.algorithm = "lookup"
        self.spats.addTargets("test/5s/5s.fa")
        case[3] = 135
        self.run_case(case)
Esempio n. 40
0
class TestPairs(unittest.TestCase):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.addTargets("test/5s/5s.fa")

    def tearDown(self):
        self.spats = None

    def pair_for_case(self, case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        return pair

    def run_case(self, case):
        pair = self.pair_for_case(case)
        self.spats.process_pair(pair)
        self.assertEqual(case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0]))
        return pair

    def test_pairs(self):
        for case in cases:
            self.run_case(case)
        print("Ran {} pair->site cases.".format(len(cases)))

    def test_find_partial_weird_case(self):
        pair = Pair()
        pair.set_from_data("x", 'CTCAGTCCTTGGTGCCCGAGTCAGGATCGGAAGAG', 'TGACTCGGGCACCAAAGACTGAGAGATCGGAAGAG')
        self.spats.process_pair(pair)
        print("{} / {}".format(pair.site, pair.failure))

    def test_minimum_length(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = "find_partial"
        self.spats.run.minimum_target_match_length = 11
        self.spats.addTargets("test/5s/5s.fa")
        self.assertEqual(11, self.spats._targets.minimum_match_length)
        case = [ '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA', 'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None ]
        self.run_case(case)

        # this case only matches if the minimum length is set to 8
        self.spats.run.minimum_target_match_length = 8
        self.spats = Spats()
        self.spats.addTargets("test/5s/5s.fa")
        case[3] = 135
        self.run_case(case)
Esempio n. 41
0
class TestPairsWithU(TestPairs):
    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.addTargets("test/5s/5su.fa")
Esempio n. 42
0
class TestPairsWithU(TestPairs):

    def setUp(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.addTargets("test/5s/5su.fa")