def show_failure_types(): from spats_clean import Spats, Pair, FastqRecord spats = Spats("test/5s/5s.fa", "test/5s") spats.setup() bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/" with open(bp + "t11/x/filtered_R1.fq", 'rb') as r1_in: with open(bp + "t11/x/filtered_R2.fq", 'rb') as r2_in: r1_record = FastqRecord() r2_record = FastqRecord() pair = Pair() while True: r1_record.read(r1_in) if not r1_record.identifier: break r2_record.read(r2_in) pair.set_from_records(r1_record, r2_record) spats.process_pair(pair) summary = "{} :: {}".format(pair.identifier, pair.site if pair.has_site else pair.failure) if pair.r1.match_errors: summary += " R1!: {}".format(pair.r1.match_errors) if pair.r1.adapter_errors: summary += " R1A!: {}, adapter_len={}".format(pair.r1.adapter_errors, pair.r1._rtrim) if pair.r2.match_errors: summary += " R2!: {}".format(pair.r2.match_errors) if pair.r2.adapter_errors: summary += " R2A!: {}, adapter_len={}".format(pair.r2.adapter_errors, pair.r2._rtrim - 4) print summary
class TestOnlyPrefixes(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.cotrans = True self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' self.spats.run.collapse_left_prefixes = True self.spats.run.collapse_only_prefixes = "T,ACGT,CCA" self.spats.addTargets("test/cotrans/cotrans_single.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.counters.reset() self.spats.process_pair(pair) if case[5]: expect = case[4] if case[5] in self.spats.run.collapse_only_prefixes.split(',') else None self.assertEqual(expect, pair.site, "PREF res={} != {} ({}, {})".format(pair.site, case[4], self.__class__.__name__, case[0])) return pair def test_pairs(self): for case in prefix_cases: self.run_case(case) print("Ran {} prefix test cases.".format(len(cases)))
class TestPrefixPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.collapse_left_prefixes = True self.spats.addTargets("test/5s/5s.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.counters.reset() self.spats.process_pair(pair) self.assertEqual( case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0])) if case[4]: self.assertEqual( 1, getattr(self.spats.counters, 'prefix_RRRY_' + case[4]), "prefix {} not counted ({})".format(case[4], case[0])) return pair def test_pairs(self): for case in prefix_cases: self.run_case(case) print("Ran {} prefix test cases.".format(len(cases)))
class TestPrefixPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.collapse_left_prefixes = True self.spats.addTargets("test/5s/5s.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.counters.reset() self.spats.process_pair(pair) self.assertEqual(case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0])) if case[4]: self.assertEqual(1, getattr(self.spats.counters, 'prefix_RRRY_' + case[4]), "prefix {} not counted ({})".format(case[4], case[0])) return pair def test_pairs(self): for case in prefix_cases: self.run_case(case) print("Ran {} prefix test cases.".format(len(cases)))
def show_failure_types(): from spats_clean import Spats, Pair, FastqRecord spats = Spats("test/5s/5s.fa", "test/5s") spats.setup() bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/" with open(bp + "t11/x/filtered_R1.fq", 'rb') as r1_in: with open(bp + "t11/x/filtered_R2.fq", 'rb') as r2_in: r1_record = FastqRecord() r2_record = FastqRecord() pair = Pair() while True: r1_record.read(r1_in) if not r1_record.identifier: break r2_record.read(r2_in) pair.set_from_records(r1_record, r2_record) spats.process_pair(pair) summary = "{} :: {}".format( pair.identifier, pair.site if pair.has_site else pair.failure) if pair.r1.match_errors: summary += " R1!: {}".format(pair.r1.match_errors) if pair.r1.adapter_errors: summary += " R1A!: {}, adapter_len={}".format( pair.r1.adapter_errors, pair.r1._rtrim) if pair.r2.match_errors: summary += " R2!: {}".format(pair.r2.match_errors) if pair.r2.adapter_errors: summary += " R2A!: {}, adapter_len={}".format( pair.r2.adapter_errors, pair.r2._rtrim - 4) print summary
def compare(self): from spats_shape_seq import Spats from spats_shape_seq.pair import Pair json_base = { 'target' : self.config['target'], 'config' : { 'algorithm' : 'find_partial', 'debug' : True }, 'expect' : {}} spats_fp = Spats(cotrans = self.cotrans) spats_lookup = Spats(cotrans = self.cotrans) self._update_run_config(spats_fp.run) self._update_run_config(spats_lookup.run, json_base['config']) spats_fp.run.algorithm = 'find_partial' spats_lookup.run.algorithm = 'lookup' spats_fp.addTargets(self.config['target']) spats_lookup.addTargets(self.config['target']) count = 0 match = 0 with FastFastqParser(self.r1, self.r2) as parser: total = parser.appx_number_of_pairs() for batch in parser.iterator(5000): for item in batch: pair_fp = Pair() pair_lookup = Pair() pair_fp.set_from_data(str(item[0]), item[1], item[2]) pair_lookup.set_from_data(str(item[0]), item[1], item[2]) try: spats_fp.process_pair(pair_fp) spats_lookup.process_pair(pair_lookup) except: print('Error after {}/{}'.format(match, count)) raise if (pair_fp.has_site == pair_lookup.has_site): if not pair_fp.has_site: count += 1 continue elif (pair_fp.target.name == pair_lookup.target.name and pair_fp.end == pair_lookup.end and pair_fp.site == pair_lookup.site and pair_fp.mutations == pair_lookup.mutations): count += 1 match += 1 continue json_base["id"] = str(item[0]) json_base["R1"] = str(item[1]) json_base["R2"] = str(item[2]) print('After {}/{} matches; mismatched pair: {} != {}\n{}'.format(match, count, pair_fp, pair_lookup, json.dumps(json_base, sort_keys = True,indent = 4, separators = (',', ': ')))) return print('{}/{}-{}...'.format(match, count, total)) spats_fp.counters.total_pairs = count spats_lookup.counters.total_pairs = count print('All match {}/{}.'.format(match, count)) print(spats_fp._report_counts()) print(spats_lookup._report_counts())
def cotrans_debug(): from spats_shape_seq import Spats s = Spats() s.run.cotrans = True #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' #s.run.algorithm = "find_partial" #s.run._p_v102_compat = True s.run.minimum_target_match_length = 10 bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" s.addTargets(bp + "cotrans_single.fa") from spats_shape_seq.pair import Pair pair = Pair() import cjb.util d = cjb.util.jsonAtPath("/tmp/spats_test.json") pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2'])) #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71] #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ] #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ] #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ] #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ] #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ] #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ] #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ] c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ] #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ] #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ] #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ] #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ] #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ] #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ] #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ] #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ] #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ] #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ] #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ] #c = [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ] #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None] #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1] c = [ 'jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA', 'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71 ] pair.set_from_data(c[0], c[1], c[2]) print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq) s.process_pair(pair) if pair.has_site: print "{}: {} / {}".format(pair.target.name, pair.site, pair.end) else: print "FAIL: {}".format(pair.failure)
class TestPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.addTargets("test/5s/5s.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.process_pair(pair) self.assertEqual( case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0])) return pair def test_pairs(self): for case in cases: self.run_case(case) print("Ran {} pair->site cases.".format(len(cases))) def test_find_partial_weird_case(self): pair = Pair() pair.set_from_data("x", 'CTCAGTCCTTGGTGCCCGAGTCAGGATCGGAAGAG', 'TGACTCGGGCACCAAAGACTGAGAGATCGGAAGAG') self.spats.process_pair(pair) print("{} / {}".format(pair.site, pair.failure)) def test_minimum_length(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.algorithm = "find_partial" self.spats.run.minimum_target_match_length = 11 self.spats.addTargets("test/5s/5s.fa") self.assertEqual(11, self.spats._targets.minimum_match_length) case = [ '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA', 'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None ] self.run_case(case) # this case only matches if the minimum length is set to 8 self.spats.run.minimum_target_match_length = 8 self.spats = Spats() self.spats.run.algorithm = "lookup" self.spats.addTargets("test/5s/5s.fa") case[3] = 135 self.run_case(case)
def cotrans_test(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" from spats_shape_seq import Spats s = Spats() from spats_shape_seq.partial import PartialFindProcessor #s.run._processor_class = PartialFindProcessor s.addTargets(bp + "F_wt.fa") from spats_shape_seq.pair import Pair pair = Pair() pair.set_from_data('x', 'GAGCGTCCTTGGTGCCCGAGTCAGAAATAGACTCCT', 'TATCACTACTGGTAGGAGTCTATTTCTGACTCGGGC') s.process_pair(pair) print "{}: {}".format(pair.target.name, pair.site)
def tmut_case(): from spats_shape_seq import Spats from spats_shape_seq.db import PairDB from spats_shape_seq.diagram import diagram bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/" spats = Spats(cotrans=False) spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' spats.run.count_mutations = True spats.run.algorithm = "find_partial" spats.run.allowed_target_errors = 1 spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" spats.run._process_all_pairs = True spats.run.writeback_results = True spats.run.num_workers = 1 spats.run.result_set_name = "mut" spats.addTargets(bp + "mut_single.fa") from spats_shape_seq.pair import Pair pair = Pair() #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ] #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ] #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ] #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ] #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ] #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ] #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ] #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ] #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ] #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ] #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ] #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ] #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ] #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ] #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ] #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ] c = [ 'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA', 'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA' ] #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ] #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ] pair.set_from_data('x', c[0], c[1]) spats.process_pair(pair) print diagram(pair, spats.run) if pair.has_site: print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end, pair.mutations) else: print "FAIL: {}".format(pair.failure)
def cotrans_debug(): from spats_shape_seq import Spats s = Spats() s.run.cotrans = True #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' #s.run.algorithm = "find_partial" #s.run._p_v102_compat = True s.run.minimum_target_match_length = 10 bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" s.addTargets(bp + "cotrans_single.fa") from spats_shape_seq.pair import Pair pair = Pair() import cjb.util d = cjb.util.jsonAtPath("/tmp/spats_test.json") pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2'])) #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71] #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ] #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ] #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ] #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ] #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ] #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ] #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ] c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ] #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ] #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ] #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ] #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ] #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ] #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ] #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ] #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ] #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ] #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ] #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ] #c = [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ] #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None] #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1] c = ['jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA', 'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71 ] pair.set_from_data(c[0], c[1], c[2]) print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq) s.process_pair(pair) if pair.has_site: print "{}: {} / {}".format(pair.target.name, pair.site, pair.end) else: print "FAIL: {}".format(pair.failure)
class TestPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.addTargets("test/5s/5s.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.process_pair(pair) self.assertEqual(case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0])) return pair def test_pairs(self): for case in cases: self.run_case(case) print("Ran {} pair->site cases.".format(len(cases))) def test_find_partial_weird_case(self): pair = Pair() pair.set_from_data("x", 'CTCAGTCCTTGGTGCCCGAGTCAGGATCGGAAGAG', 'TGACTCGGGCACCAAAGACTGAGAGATCGGAAGAG') self.spats.process_pair(pair) print("{} / {}".format(pair.site, pair.failure)) def test_minimum_length(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.algorithm = "find_partial" self.spats.run.minimum_target_match_length = 11 self.spats.addTargets("test/5s/5s.fa") self.assertEqual(11, self.spats._targets.minimum_match_length) case = [ '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA', 'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None ] self.run_case(case) # this case only matches if the minimum length is set to 8 self.spats.run.minimum_target_match_length = 8 self.spats = Spats() self.spats.addTargets("test/5s/5s.fa") case[3] = 135 self.run_case(case)
def tmut_case(): from spats_shape_seq import Spats from spats_shape_seq.db import PairDB from spats_shape_seq.diagram import diagram bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/" spats = Spats(cotrans = False) spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' spats.run.count_mutations = True spats.run.algorithm = "find_partial" spats.run.allowed_target_errors = 1 spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" spats.run._process_all_pairs = True spats.run.writeback_results = True spats.run.num_workers = 1 spats.run.result_set_name = "mut" spats.addTargets(bp + "mut_single.fa") from spats_shape_seq.pair import Pair pair = Pair() #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ] #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ] #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ] #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ] #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ] #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ] #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ] #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ] #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ] #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ] #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ] #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ] #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ] #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ] #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ] #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ] c = [ 'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA', 'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA' ] #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ] #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ] pair.set_from_data('x', c[0], c[1]) spats.process_pair(pair) print diagram(pair, spats.run) if pair.has_site: print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end, pair.mutations) else: print "FAIL: {}".format(pair.failure)
class TestMutPairs(unittest.TestCase): def setUp(self): self.spats = Spats() self.spats.run.count_mutations = True self.spats.run.mutations_require_quality_score = ord('.') - ord('!') self.spats.run.allowed_target_errors = 1 self.spats.run.ignore_stops_with_mismatched_overlap = True self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" self.setup_processor() def setup_processor(self): self.spats.run.algorithm = "find_partial" self.spats.addTargets("test/mut/mut_single.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) if len(case) > 6: pair.r1.quality = case[6] pair.r2.quality = case[7] else: pair.r1.quality = 'K' * len(case[1]) pair.r2.quality = 'K' * len(case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.process_pair(pair) self.assertEqual(case[4], pair.site, "res={} != {} ({}, {}, {})".format(pair.site, case[4], self.__class__.__name__, case[0], pair.failure)) if pair.site is not None: self.assertEqual(case[3], pair.end, "end={} != {} ({}, {}, {})".format(pair.end, case[3], self.__class__.__name__, case[0], pair.failure)) self.assertEqual(case[5], sorted(pair.mutations) if pair.mutations else pair.mutations, "muts={} != {} ({}, {}, {})".format(pair.mutations, case[5], self.__class__.__name__, case[0], pair.failure)) return pair def cases(self): return cotrans_cases if self.spats.run.cotrans else cases def test_pairs(self): self.spats.run.pair_length = len(cases[0][1]) for case in self.cases(): self.run_case(case) print("Ran {} pair->site cases.".format(len(cases)))
def test_tags(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/" from spats_shape_seq import Spats s = Spats() from spats_shape_seq.tag import TagProcessor s.run._processor_class = TagProcessor #from spats_shape_seq.target import Targets #s.addTargets(bp + "5S.fa") s.addTarget("5s", "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC") #s.addTarget("rc(5s)", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC") #s.addTarget("adapter_t", s.run.adapter_t) #s.addTarget("adapter_b", s.run.adapter_b) #s._targets._index_word_length = 8 #s._targets._minimum_length = 8 #s.addTarget("adapter_t_rc", reverse_complement(s.run.adapter_t)) #s.addTarget("adapter_b_rc", reverse_complement(s.run.adapter_b)) p = s._processor p.addTagTarget("5s", "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC") p.addTagTarget("5s_rc", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC") from spats_shape_seq.util import reverse_complement p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t)) p.addTagTarget("adapter_b", s.run.adapter_b) from spats_shape_seq.pair import Pair cases = [ [ "1101:20069:1063", "TTTAGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG", "TCCCACCTGACCCCATGCCGAACTCAGAAGTGAAA" ], [ "1101:11562:1050", "AAACGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG", "CCACCTGACCCCATGCCGAACTCAGAAGTGAAACG" ], [ "21189", "TTTGGTCCTTGGTGCCCGAGTCAGAGATCGGAAGA", "CTGACTCGGGCACCAAGGACCAAAAGATCGGAAGA" ], [ "1101:12888:8140", "GGATGTCCTTGGTGCCCGAGTCAGATGCCAGATCG", "GGCATCTGACTCGGGCACCAAGGACATACAGATCG" ], [ "18333", "GAGTGTCCTTGGTGCCCGAGTCAGTGGTAGATCGG", "ACCACTGACTCGGGCACCAAGGACACTCAGATCGG" ], ] pair = Pair() for case in cases: pair.set_from_data(case[0], case[1], case[2]) s.process_pair(pair) print pair.r1.original_seq print pair.r1.tags print pair.r2.original_seq print pair.r2.tags print "-----------------------------"
class TestPanelPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.minimum_target_match_length = 10 self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa") def tearDown(self): self.spats = None def test_single_R1_match_with_adapter_multiple_without(self): pair = Pair() pair.set_from_data('M02465:8:000000000-A5D', 'CCCGCCGTCCTTGGTGCCCGAGTGAGATCGGAAGA','CACTCGGGCACCAAGGACGGCGGGAGATCGGAAGA') self.spats.run.debug = True self.spats.run.algorithm = "find_partial" self.spats.process_pair(pair) self.assertEqual(None, pair.target) self.assertEqual(1, self.spats.counters.multiple_R1_match)
class TestPanelPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.minimum_target_match_length = 10 self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa") def tearDown(self): self.spats = None def test_single_R1_match_with_adapter_multiple_without(self): pair = Pair() pair.set_from_data('M02465:8:000000000-A5D', 'CCCGCCGTCCTTGGTGCCCGAGTGAGATCGGAAGA', 'CACTCGGGCACCAAGGACGGCGGGAGATCGGAAGA') self.spats.run.debug = True self.spats.run.algorithm = "find_partial" self.spats.process_pair(pair) self.assertEqual(None, pair.target) self.assertEqual(1, self.spats.counters.multiple_R1_match)
def tag_test(): from spats_shape_seq import Spats s = Spats() s.run.cotrans = True s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' s.run.algorithm = "find_partial" bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" s.addTargets(bp + "cotrans_single.fa") from spats_shape_seq.pair import Pair pair = Pair() import cjb.util d = cjb.util.jsonAtPath("/tmp/spats_test.json") pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2'])) print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq) s.process_pair(pair) if pair.has_site: print "{}: {} / {}".format(pair.target.name, pair.site, pair.right) else: print "FAIL: {}".format(pair.failure)
class TestPairsPartial(unittest.TestCase): def setUp(self): self.spats = Spats() self.spats.run.cotrans = True self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' self.setup_processor() self.spats.addTargets("test/cotrans/cotrans_single.fa") def setup_processor(self): self.spats.run.algorithm = "find_partial" def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.process_pair(pair) self.assertEqual(case[4], pair.site, "res={} != {} ({}, {})".format(pair.site, case[4], self.__class__.__name__, case[0])) if pair.site is not None: self.assertEqual(case[3], pair.end) return pair def test_pairs(self): self.spats.run.pair_length = len(cases[0][1]) if not self.spats._processor.exists(): # just ignore the native test if it's not available self.assertEqual("native", self.spats.run.algorithm) return for case in cases: self.run_case(case) print("Ran {} pair->site cases.".format(len(cases)))
def _show_case(self, test_case): from spats_shape_seq import Spats from spats_shape_seq.diagram import diagram alg = test_case.run_opts.get('algorithm') algs = [ alg ] if alg else test_case.run_opts.get('algorithms', [ 'find_partial', 'lookup' ]) for algorithm in algs: spats = Spats() spats.run.algorithm = algorithm for key, value in test_case.run_opts.items(): if str(key) == 'algorithms': continue if isinstance(value, unicode): value = str(value) if not hasattr(spats.run, key): raise Exception('Invalid run_opt: {}'.format(key)) setattr(spats.run, key, value) for name, seq in test_case.targets.iteritems(): spats.addTarget(name, seq) pair = test_case.pair() if len(algs) > 1: print('\n[[ ALGORITHM: {} ]]'.format(algorithm)) spats.process_pair(pair) if test_case.comment: print('Comment: {}'.format(test_case.comment)) print(diagram(pair, spats.run)) if test_case.expect: # should mirror `_check_expect` in test_harness.py... expects = test_case.expect fail = False try: if expects['site'] is None: if pair.site is not None: raise Exception("pair.site={} when expecting none.".format(pair.site)) else: if pair.site is None: raise Exception("pair.site is none when expecting {}.".format(expects['site'])) if pair.site != expects['site']: raise Exception("pair.site={} != expect.site={}".format(pair.site, expects['site'])) if 'end' in expects and pair.end != expects['end']: raise Exception("pair.end={} != expect.end={}".format(pair.end, expects['end'])) if 'muts' in expects: if expects['muts'] is not None and len(expects['muts']) > 0: if not sorted(expects['muts']) == (sorted(pair.mutations) if pair.mutations else pair.mutations): raise Exception("mismatching mutations: expected={}, pair.mutations={}".format(expects['muts'], pair.mutations)) else: if not (pair.mutations is None or len(pair.mutations) == 0): raise Exception("unexpected mutations: {}".format(pair.mutations)) if 'r1_indels' in expects: r1inds = objdict_to_dict(pair.r1.indels) if expects['r1_indels']: if expects['r1_indels'] != r1inds: raise Exception("mismatching R1 indels: expected={}, pair.r1.indels={}".format(expects['r1_indels'], r1inds)) elif pair.r1.indels: raise Exception("unexpected R1 indels: pair.r1.indels={}".format(pair.r1.indels)) if 'r2_indels' in expects: r2inds = objdict_to_dict(pair.r2.indels) if expects['r2_indels']: if expects['r2_indels'] != r2inds: raise Exception("mismatching R2 indels: expected={}, pair.r2.indels={}".format(expects['r2_indels'], r2inds)) elif pair.r2.indels: raise Exception("unexpected R2 indels: pair.r2.indels={}".format(r2inds)) if 'counters' in expects: for counter, value in expects['counters'].iteritems(): if getattr(spats.counters, str(counter)) != value: raise Exception("counter '{}' value off: expected={} != got={}".format(counter, value, getattr(spats.counters, counter))) if 'pair.target' in expects: tname = pair.target.name if pair.target else None if tname != expects['pair.target']: raise Exception("pair.target={} != expect.pair.target={}".format(tname, expects['pair.target'])) except Exception as e: print('FAIL: {}'.format(e)) sys.exit(1) print('PASS')
class SpatsTestCase(unittest.TestCase): def __init__(self, test_set, case): super(TestHarness.SpatsTestCase, self).__init__() self.test_set = test_set self.case = case def setUp(self): try: self.spats = Spats() self.test_set.spats_setUp(self.spats) except Exception as e: print("exception caught on testset '{}' setup : {}".format( self.test_set.name, e)) raise e def tearDown(self): self.spats = None def runTest(self): for algorithm in self.test_set.algorithms: self.spats.reset_processor( ) # Necessary to change algorithm (which determines the processor) self.spats.run.algorithm = algorithm self._run_case(self.case) def _run_case(self, case): pair = case.pair() self.spats.counters.reset() self.spats.process_pair(pair) self._check_expects(case, pair) def _check_expects(self, case, pair): expects = case.expect msg = "testset='{}', test id='{}', algorithm='{}' failed: ".format( self.test_set.name, case.id, self.spats.run.algorithm) if expects['site'] is None: self.assertIs( pair.site, None, msg + "pair.site={} when expecting none.".format(pair.site)) else: self.assertIsNot( pair.site, None, msg + "pair.site is none when expecting {}.".format( expects['site'])) self.assertEqual( expects['site'], pair.site, msg + "pair.site={} != expect.site={}".format( pair.site, expects['site'])) if 'end' in expects: self.assertEqual( expects['end'], pair.end, msg + "pair.end={} != expect.end={}".format( pair.end, expects['end'])) if 'muts' in expects: if expects['muts'] is not None and len( expects['muts']) > 0: self.assertEqual( sorted(expects['muts']), sorted(pair.mutations) if pair.mutations else pair.mutations, msg + "mismatching mutations: expected={}, pair.mutations={}" .format(expects['muts'], pair.mutations)) else: self.assertTrue( pair.mutations is None or len(pair.mutations) == 0, msg + "unexpected mutations: {}".format(pair.mutations)) if 'r1_indels' in expects: r1inds = dict( zip(map(str, pair.r1.indels.keys()), map(vars, pair.r1.indels.values()))) if expects['r1_indels']: self.assertEqual(expects['r1_indels'], r1inds) else: self.assertTrue( not pair.r1.indels, msg + "unexpected R1 indels: {}".format(r1inds)) if 'r2_indels' in expects: r2inds = dict( zip(map(str, pair.r2.indels.keys()), map(vars, pair.r2.indels.values()))) if expects['r2_indels']: self.assertEqual(expects['r2_indels'], r2inds) else: self.assertTrue( not pair.r2.indels, msg + "unexpected R2 indels: {}".format(r2inds)) if 'counters' in expects: for counter, value in expects['counters'].iteritems(): self.assertEqual( getattr(self.spats.counters, str(counter)), value, msg + "counter '{}' value off: expected={} != got={}".format( counter, value, getattr(self.spats.counters, counter))) if 'pair.target' in expects: tname = pair.target.name if pair.target else None self.assertEqual( tname, expects['pair.target'], msg + "pair.target={} != expect.pair.target={}".format( tname, expects['pair.target']))
def test_tags(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/" from spats_shape_seq import Spats s = Spats() from spats_shape_seq.tag import TagProcessor s.run._processor_class = TagProcessor #from spats_shape_seq.target import Targets #s.addTargets(bp + "5S.fa") s.addTarget( "5s", "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC" ) #s.addTarget("rc(5s)", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC") #s.addTarget("adapter_t", s.run.adapter_t) #s.addTarget("adapter_b", s.run.adapter_b) #s._targets._index_word_length = 8 #s._targets._minimum_length = 8 #s.addTarget("adapter_t_rc", reverse_complement(s.run.adapter_t)) #s.addTarget("adapter_b_rc", reverse_complement(s.run.adapter_b)) p = s._processor p.addTagTarget( "5s", "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC" ) p.addTagTarget( "5s_rc", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC" ) from spats_shape_seq.util import reverse_complement p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t)) p.addTagTarget("adapter_b", s.run.adapter_b) from spats_shape_seq.pair import Pair cases = [ [ "1101:20069:1063", "TTTAGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG", "TCCCACCTGACCCCATGCCGAACTCAGAAGTGAAA" ], [ "1101:11562:1050", "AAACGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG", "CCACCTGACCCCATGCCGAACTCAGAAGTGAAACG" ], [ "21189", "TTTGGTCCTTGGTGCCCGAGTCAGAGATCGGAAGA", "CTGACTCGGGCACCAAGGACCAAAAGATCGGAAGA" ], [ "1101:12888:8140", "GGATGTCCTTGGTGCCCGAGTCAGATGCCAGATCG", "GGCATCTGACTCGGGCACCAAGGACATACAGATCG" ], [ "18333", "GAGTGTCCTTGGTGCCCGAGTCAGTGGTAGATCGG", "ACCACTGACTCGGGCACCAAGGACACTCAGATCGG" ], ] pair = Pair() for case in cases: pair.set_from_data(case[0], case[1], case[2]) s.process_pair(pair) print pair.r1.original_seq print pair.r1.tags print pair.r2.original_seq print pair.r2.tags print "-----------------------------"