class TestPrefixPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.collapse_left_prefixes = True self.spats.addTargets("test/5s/5s.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.counters.reset() self.spats.process_pair(pair) self.assertEqual( case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0])) if case[4]: self.assertEqual( 1, getattr(self.spats.counters, 'prefix_RRRY_' + case[4]), "prefix {} not counted ({})".format(case[4], case[0])) return pair def test_pairs(self): for case in prefix_cases: self.run_case(case) print("Ran {} prefix test cases.".format(len(cases)))
class TestShortAdapterB(unittest.TestCase): def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) print('running: {} / {}'.format(case[0], self.spats.run.algorithm)) self.spats.process_pair(pair) self.assertEqual(case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format(pair.site, case[3], self.__class__.__name__, case[0], self.spats.run.algorithm, pair.failure)) def test_pairs(self): for alg in algorithms: if alg == 'native': continue self.run_algorithm(alg) def run_algorithm(self, alg): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.algorithm = alg self.spats.addTargets("test/5SrRNA/5SrRNA.fa") self.run_pairs() def run_pairs(self): for case in short_adapter_b_cases: self.run_case(case) print("Ran {} adapter_b test cases.".format(len(short_adapter_b_cases)))
def d5s_run(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/" #from spats_shape_seq.db import PairDB #pair_db = PairDB(bp + "dev_out/pairs.db") #if False: # pair_db.add_targets_table(bp + "5s/5S.fa") # pair_db.parse(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", # bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq") from spats_shape_seq import Spats from spats_shape_seq.partial import PartialFindProcessor s = Spats() #s.run._processor_class = PartialFindProcessor s.run.skip_database = True #s.run.writeback_results = True #s.run.resume_processing = True #s.run.result_set_name = "lookup" s.addTargets(bp + "5s/5S.fa") #s.process_pair_db(pair_db) s.process_pair_data( bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq" ) s.compute_profiles() s.write_reactivities(bp + "dev_out/rx2.out")
class TestOnlyPrefixes(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.cotrans = True self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' self.spats.run.collapse_left_prefixes = True self.spats.run.collapse_only_prefixes = "T,ACGT,CCA" self.spats.addTargets("test/cotrans/cotrans_single.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.counters.reset() self.spats.process_pair(pair) if case[5]: expect = case[4] if case[5] in self.spats.run.collapse_only_prefixes.split(',') else None self.assertEqual(expect, pair.site, "PREF res={} != {} ({}, {})".format(pair.site, case[4], self.__class__.__name__, case[0])) return pair def test_pairs(self): for case in prefix_cases: self.run_case(case) print("Ran {} prefix test cases.".format(len(cases)))
def diag_case(): from spats_shape_seq import Spats from spats_shape_seq.pair import Pair from spats_shape_seq.tests.test_mut import cases #from spats_shape_seq.tests.test_pairs import prefix_cases as cases from spats_shape_seq.diagram import diagram #spats_config.minimum_target_match_length = 8 spats = Spats() #spats.addTargets("test/5s/5s.fa") spats.addTargets("test/mut/mut_single.fa") spats.run.debug = True spats.run.algorithm = "find_partial" spats.run.count_mutations = True #spats.run.mutations_require_quality_score = ord('.') - ord('!') spats.run.allowed_target_errors = 1 spats.run.ignore_stops_with_mismatched_overlap = True spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" #spats.run.collapse_left_prefixes = True spats._case_errors = False def run_case(case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) spats.process_pair(pair) print diagram(pair, spats.run) if case[3] != pair.site: spats._case_errors = True print "******* mismatch: {} != {}".format(case[3], pair.site) for case in cases: if case[0].startswith("*"): run_case(case) spats.run.debug = False if spats._case_errors: raise Exception("Case failed")
def run(self): """Process the SPATS data for the configured target(s) and r1/r2 fragment pairs. """ run_name = self._run_file() if os.path.exists(run_name): self._add_note("** removing previous run file") os.remove(run_name) native_tool = self._native_tool('cotrans') if native_tool and not self.cotrans: self._add_note("skipping native tool due to non-cotrans run") native_tool = None spats = Spats(cotrans = self.cotrans) if self._update_run_config(spats.run) and native_tool: self._add_note("skipping native tool due to custom config") native_tool = None if native_tool: self._add_note("using native cotrans processor") subprocess.check_call([native_tool, self.config['target'], self.r1, self.r2, run_name], cwd = self.path) else: self._add_note("using python processor") spats.addTargets(self.config['target']) if self.using_separate_channel_files: spats.process_pair_data(self.r1_plus, self.r2_plus, force_mask = spats.run.masks[0]) spats.process_pair_data(self.r1_minus, self.r2_minus, force_mask = spats.run.masks[1]) else: spats.process_pair_data(self.r1, self.r2) spats.store(run_name) self._add_note("wrote output to {}".format(os.path.basename(run_name))) nb = self._notebook() if nb: nb.add_spats_run(self.cotrans, spats.run.count_mutations).save()
class TestPrefixPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.collapse_left_prefixes = True self.spats.addTargets("test/5s/5s.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.counters.reset() self.spats.process_pair(pair) self.assertEqual(case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0])) if case[4]: self.assertEqual(1, getattr(self.spats.counters, 'prefix_RRRY_' + case[4]), "prefix {} not counted ({})".format(case[4], case[0])) return pair def test_pairs(self): for case in prefix_cases: self.run_case(case) print("Ran {} prefix test cases.".format(len(cases)))
def compare(self): from spats_shape_seq import Spats from spats_shape_seq.pair import Pair json_base = { 'target' : self.config['target'], 'config' : { 'algorithm' : 'find_partial', 'debug' : True }, 'expect' : {}} spats_fp = Spats(cotrans = self.cotrans) spats_lookup = Spats(cotrans = self.cotrans) self._update_run_config(spats_fp.run) self._update_run_config(spats_lookup.run, json_base['config']) spats_fp.run.algorithm = 'find_partial' spats_lookup.run.algorithm = 'lookup' spats_fp.addTargets(self.config['target']) spats_lookup.addTargets(self.config['target']) count = 0 match = 0 with FastFastqParser(self.r1, self.r2) as parser: total = parser.appx_number_of_pairs() for batch in parser.iterator(5000): for item in batch: pair_fp = Pair() pair_lookup = Pair() pair_fp.set_from_data(str(item[0]), item[1], item[2]) pair_lookup.set_from_data(str(item[0]), item[1], item[2]) try: spats_fp.process_pair(pair_fp) spats_lookup.process_pair(pair_lookup) except: print('Error after {}/{}'.format(match, count)) raise if (pair_fp.has_site == pair_lookup.has_site): if not pair_fp.has_site: count += 1 continue elif (pair_fp.target.name == pair_lookup.target.name and pair_fp.end == pair_lookup.end and pair_fp.site == pair_lookup.site and pair_fp.mutations == pair_lookup.mutations): count += 1 match += 1 continue json_base["id"] = str(item[0]) json_base["R1"] = str(item[1]) json_base["R2"] = str(item[2]) print('After {}/{} matches; mismatched pair: {} != {}\n{}'.format(match, count, pair_fp, pair_lookup, json.dumps(json_base, sort_keys = True,indent = 4, separators = (',', ': ')))) return print('{}/{}-{}...'.format(match, count, total)) spats_fp.counters.total_pairs = count spats_lookup.counters.total_pairs = count print('All match {}/{}.'.format(match, count)) print(spats_fp._report_counts()) print(spats_lookup._report_counts())
def cotrans_debug(): from spats_shape_seq import Spats s = Spats() s.run.cotrans = True #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' #s.run.algorithm = "find_partial" #s.run._p_v102_compat = True s.run.minimum_target_match_length = 10 bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" s.addTargets(bp + "cotrans_single.fa") from spats_shape_seq.pair import Pair pair = Pair() import cjb.util d = cjb.util.jsonAtPath("/tmp/spats_test.json") pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2'])) #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71] #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ] #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ] #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ] #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ] #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ] #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ] #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ] c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ] #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ] #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ] #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ] #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ] #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ] #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ] #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ] #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ] #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ] #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ] #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ] #c = [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ] #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None] #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1] c = [ 'jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA', 'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71 ] pair.set_from_data(c[0], c[1], c[2]) print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq) s.process_pair(pair) if pair.has_site: print "{}: {} / {}".format(pair.target.name, pair.site, pair.end) else: print "FAIL: {}".format(pair.failure)
def spats(target, r1, r2, out, show_sites=True): from spats_shape_seq import Spats, spats_config s = Spats() s.addTargets(target) s.addMasks("RRRY", "YYYR") if show_sites: spats_config.show_id_to_site = True s.process_pair_data(r1, r2) if not show_sites: s.compute_profiles() s.write_reactivities(out + "/rx.out")
def spats(target, r1, r2, out, show_sites = True): from spats_shape_seq import Spats, spats_config s = Spats() s.addTargets(target) s.addMasks("RRRY", "YYYR") if show_sites: spats_config.show_id_to_site = True s.process_pair_data(r1, r2) if not show_sites: s.compute_profiles() s.write_reactivities(out + "/rx.out")
def cotrans_test(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" from spats_shape_seq import Spats s = Spats() from spats_shape_seq.partial import PartialFindProcessor #s.run._processor_class = PartialFindProcessor s.addTargets(bp + "F_wt.fa") from spats_shape_seq.pair import Pair pair = Pair() pair.set_from_data('x', 'GAGCGTCCTTGGTGCCCGAGTCAGAAATAGACTCCT', 'TATCACTACTGGTAGGAGTCTATTTCTGACTCGGGC') s.process_pair(pair) print "{}: {}".format(pair.target.name, pair.site)
def tmut_case(): from spats_shape_seq import Spats from spats_shape_seq.db import PairDB from spats_shape_seq.diagram import diagram bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/" spats = Spats(cotrans=False) spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' spats.run.count_mutations = True spats.run.algorithm = "find_partial" spats.run.allowed_target_errors = 1 spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" spats.run._process_all_pairs = True spats.run.writeback_results = True spats.run.num_workers = 1 spats.run.result_set_name = "mut" spats.addTargets(bp + "mut_single.fa") from spats_shape_seq.pair import Pair pair = Pair() #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ] #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ] #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ] #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ] #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ] #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ] #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ] #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ] #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ] #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ] #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ] #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ] #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ] #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ] #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ] #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ] c = [ 'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA', 'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA' ] #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ] #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ] pair.set_from_data('x', c[0], c[1]) spats.process_pair(pair) print diagram(pair, spats.run) if pair.has_site: print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end, pair.mutations) else: print "FAIL: {}".format(pair.failure)
def ligation_run(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/Shape_Seq_ligation/" from spats_shape_seq import Spats s = Spats() #s.config.debug = True #s.run.minimum_target_match_length = 10 #s.run.num_workers = 1 #from spats_shape_seq.partial import PartialFindProcessor #s.run._processor_class = PartialFindProcessor s.run.skip_database = True s.addTargets(bp + "panel_RNAs_complete.fa") s.process_pair_data(bp + "data/KEW1_S1_L001_R1_001.fastq", bp + "data/KEW1_S1_L001_R2_001.fastq")
def d5s_writeback_run(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/" from spats_shape_seq.db import PairDB pair_db = PairDB(bp + "dev_out/pairs.db") pair_db.add_targets_table(bp + "5s/5S.fa") from spats_shape_seq import Spats s = Spats() s.addTargets(bp + "5s/5S.fa") s.writeback_results = True s.result_set_name = "pure_python" s.process_pair_db(pair_db)
def cotrans_run(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" from spats_shape_seq import Spats s = Spats() #from spats_shape_seq.partial import PartialFindProcessor #s.run._processor_class = PartialFindProcessor s.run.skip_database = True #s.run.writeback_results = True #s.run.resume_processing = True #s.run.result_set_name = "lookup" s.addTargets(bp + "F_wt.fa") s.process_pair_data(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq", bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq") s.compute_profiles() s.write_reactivities(bp + "dev_out/rx.out")
def cotrans_debug(): from spats_shape_seq import Spats s = Spats() s.run.cotrans = True #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' #s.run.algorithm = "find_partial" #s.run._p_v102_compat = True s.run.minimum_target_match_length = 10 bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" s.addTargets(bp + "cotrans_single.fa") from spats_shape_seq.pair import Pair pair = Pair() import cjb.util d = cjb.util.jsonAtPath("/tmp/spats_test.json") pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2'])) #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71] #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ] #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ] #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ] #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ] #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ] #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ] #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ] c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ] #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ] #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ] #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ] #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ] #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ] #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ] #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ] #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ] #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ] #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ] #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ] #c = [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ] #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None] #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1] c = ['jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA', 'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71 ] pair.set_from_data(c[0], c[1], c[2]) print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq) s.process_pair(pair) if pair.has_site: print "{}: {} / {}".format(pair.target.name, pair.site, pair.end) else: print "FAIL: {}".format(pair.failure)
def tmut_case(): from spats_shape_seq import Spats from spats_shape_seq.db import PairDB from spats_shape_seq.diagram import diagram bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/" spats = Spats(cotrans = False) spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' spats.run.count_mutations = True spats.run.algorithm = "find_partial" spats.run.allowed_target_errors = 1 spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" spats.run._process_all_pairs = True spats.run.writeback_results = True spats.run.num_workers = 1 spats.run.result_set_name = "mut" spats.addTargets(bp + "mut_single.fa") from spats_shape_seq.pair import Pair pair = Pair() #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ] #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ] #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ] #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ] #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ] #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ] #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ] #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ] #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ] #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ] #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ] #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ] #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ] #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ] #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ] #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ] c = [ 'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA', 'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA' ] #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ] #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ] pair.set_from_data('x', c[0], c[1]) spats.process_pair(pair) print diagram(pair, spats.run) if pair.has_site: print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end, pair.mutations) else: print "FAIL: {}".format(pair.failure)
def prof_run(): from spats_shape_seq import Spats spats = Spats() #spats.run.cotrans = True #spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' #spats.run.writeback_results = False spats.run._process_all_pairs = True spats.run.skip_database = True spats.run.algorithm = "lookup" spats.run.count_mutations = True spats.run.num_workers = 1 bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/pdc_muts/PDC_tweaked/PDC_09_001_6/" spats.addTargets(bp + "target.fa") spats.process_pair_data(bp + "2k_R1.fastq", bp + "2k_R2.fastq") exit(0)
def indels_run(): from spats_shape_seq import Spats s = Spats() s.run.algorithm = True s.run.count_indels = True s.run.count_mutations = True s.run.allowed_target_errors = 8 s.collapse_left_prefixes = True s.run.ignore_stops_with_mismatched_overlap = True s.run.allow_negative_values = True s.mutations_require_quality_score = 30 bp = "/Users/steve/mos/tasks/oughxX/code" s.addTargets(bp + "/test/hairpin/hairpinA_circ.fa") rp = bp + "/TESTING/cmp_muts_favored/steve_test" s.process_pair_data(rp + "/R1_match_failures.fastq", rp + "/R2_match_failures.fastq") exit(0)
class TestMutPairs(unittest.TestCase): def setUp(self): self.spats = Spats() self.spats.run.count_mutations = True self.spats.run.mutations_require_quality_score = ord('.') - ord('!') self.spats.run.allowed_target_errors = 1 self.spats.run.ignore_stops_with_mismatched_overlap = True self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" self.setup_processor() def setup_processor(self): self.spats.run.algorithm = "find_partial" self.spats.addTargets("test/mut/mut_single.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) if len(case) > 6: pair.r1.quality = case[6] pair.r2.quality = case[7] else: pair.r1.quality = 'K' * len(case[1]) pair.r2.quality = 'K' * len(case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.process_pair(pair) self.assertEqual(case[4], pair.site, "res={} != {} ({}, {}, {})".format(pair.site, case[4], self.__class__.__name__, case[0], pair.failure)) if pair.site is not None: self.assertEqual(case[3], pair.end, "end={} != {} ({}, {}, {})".format(pair.end, case[3], self.__class__.__name__, case[0], pair.failure)) self.assertEqual(case[5], sorted(pair.mutations) if pair.mutations else pair.mutations, "muts={} != {} ({}, {}, {})".format(pair.mutations, case[5], self.__class__.__name__, case[0], pair.failure)) return pair def cases(self): return cotrans_cases if self.spats.run.cotrans else cases def test_pairs(self): self.spats.run.pair_length = len(cases[0][1]) for case in self.cases(): self.run_case(case) print("Ran {} pair->site cases.".format(len(cases)))
class TestPanelPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.minimum_target_match_length = 10 self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa") def tearDown(self): self.spats = None def test_single_R1_match_with_adapter_multiple_without(self): pair = Pair() pair.set_from_data('M02465:8:000000000-A5D', 'CCCGCCGTCCTTGGTGCCCGAGTGAGATCGGAAGA','CACTCGGGCACCAAGGACGGCGGGAGATCGGAAGA') self.spats.run.debug = True self.spats.run.algorithm = "find_partial" self.spats.process_pair(pair) self.assertEqual(None, pair.target) self.assertEqual(1, self.spats.counters.multiple_R1_match)
class TestOverlap(unittest.TestCase): def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) print('running: {} / {}'.format(case[0], self.spats.run.algorithm)) self.spats.process_pair(pair) self.assertEqual( case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format( pair.site, case[3], self.__class__.__name__, case[0], self.spats.run.algorithm, pair.failure)) self.assertEqual( case[4], pair.failure, "failure res={} != {} ({}, {}, {})".format( pair.failure, case[4], self.__class__.__name__, case[0], self.spats.run.algorithm)) def test_pairs(self): for alg in algorithms: if alg == 'native': continue self.run_algorithm(alg) def run_algorithm(self, alg): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.algorithm = alg self.spats.run.count_mutations = True self.spats.run.allowed_target_errors = True self.spats.run.ignore_stops_with_mismatched_overlap = True self.spats.addTargets("test/SRP/SRP.fa") self.run_pairs() def run_pairs(self): for case in overlap_cases: self.run_case(case) print("Ran {} overlap test cases.".format(len(overlap_cases)))
class TestPanelPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.minimum_target_match_length = 10 self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa") def tearDown(self): self.spats = None def test_single_R1_match_with_adapter_multiple_without(self): pair = Pair() pair.set_from_data('M02465:8:000000000-A5D', 'CCCGCCGTCCTTGGTGCCCGAGTGAGATCGGAAGA', 'CACTCGGGCACCAAGGACGGCGGGAGATCGGAAGA') self.spats.run.debug = True self.spats.run.algorithm = "find_partial" self.spats.process_pair(pair) self.assertEqual(None, pair.target) self.assertEqual(1, self.spats.counters.multiple_R1_match)
def tag_test(): from spats_shape_seq import Spats s = Spats() s.run.cotrans = True s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' s.run.algorithm = "find_partial" bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" s.addTargets(bp + "cotrans_single.fa") from spats_shape_seq.pair import Pair pair = Pair() import cjb.util d = cjb.util.jsonAtPath("/tmp/spats_test.json") pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2'])) print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq, pair.r2.original_seq) s.process_pair(pair) if pair.has_site: print "{}: {} / {}".format(pair.target.name, pair.site, pair.right) else: print "FAIL: {}".format(pair.failure)
class TestOverlap(unittest.TestCase): def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) print('running: {} / {}'.format(case[0], self.spats.run.algorithm)) self.spats.process_pair(pair) self.assertEqual(case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format(pair.site, case[3], self.__class__.__name__, case[0], self.spats.run.algorithm, pair.failure)) self.assertEqual(case[4], pair.failure, "failure res={} != {} ({}, {}, {})".format(pair.failure, case[4], self.__class__.__name__, case[0], self.spats.run.algorithm)) def test_pairs(self): for alg in algorithms: if alg == 'native': continue self.run_algorithm(alg) def run_algorithm(self, alg): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.algorithm = alg self.spats.run.count_mutations = True self.spats.run.allowed_target_errors = True self.spats.run.ignore_stops_with_mismatched_overlap = True self.spats.addTargets("test/SRP/SRP.fa") self.run_pairs() def run_pairs(self): for case in overlap_cases: self.run_case(case) print("Ran {} overlap test cases.".format(len(overlap_cases)))
class TestPairsPartial(unittest.TestCase): def setUp(self): self.spats = Spats() self.spats.run.cotrans = True self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' self.setup_processor() self.spats.addTargets("test/cotrans/cotrans_single.fa") def setup_processor(self): self.spats.run.algorithm = "find_partial" def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.process_pair(pair) self.assertEqual(case[4], pair.site, "res={} != {} ({}, {})".format(pair.site, case[4], self.__class__.__name__, case[0])) if pair.site is not None: self.assertEqual(case[3], pair.end) return pair def test_pairs(self): self.spats.run.pair_length = len(cases[0][1]) if not self.spats._processor.exists(): # just ignore the native test if it's not available self.assertEqual("native", self.spats.run.algorithm) return for case in cases: self.run_case(case) print("Ran {} pair->site cases.".format(len(cases)))
class TestMutPairs(unittest.TestCase): def setUp(self): self.spats = Spats() self.spats.run.count_mutations = True self.spats.run.mutations_require_quality_score = ord('.') - ord('!') self.spats.run.allowed_target_errors = 1 self.spats.run.ignore_stops_with_mismatched_overlap = True self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" self.setup_processor() def setup_processor(self): self.spats.run.algorithm = "find_partial" self.spats.addTargets("test/mut/mut_single.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) if len(case) > 6: pair.r1.quality = case[6] pair.r2.quality = case[7] else:
class TestShortAdapterB(unittest.TestCase): def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) print('running: {} / {}'.format(case[0], self.spats.run.algorithm)) self.spats.process_pair(pair) self.assertEqual( case[3], pair.site, "site res={} != {} ({}, {}, {}, {})".format( pair.site, case[3], self.__class__.__name__, case[0], self.spats.run.algorithm, pair.failure)) def test_pairs(self): for alg in algorithms: if alg == 'native': continue self.run_algorithm(alg) def run_algorithm(self, alg): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.algorithm = alg self.spats.addTargets("test/5SrRNA/5SrRNA.fa") self.run_pairs() def run_pairs(self): for case in short_adapter_b_cases: self.run_case(case) print("Ran {} adapter_b test cases.".format( len(short_adapter_b_cases)))
def d5s_run(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/" #from spats_shape_seq.db import PairDB #pair_db = PairDB(bp + "dev_out/pairs.db") #if False: # pair_db.add_targets_table(bp + "5s/5S.fa") # pair_db.parse(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", # bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq") from spats_shape_seq import Spats from spats_shape_seq.partial import PartialFindProcessor s = Spats() #s.run._processor_class = PartialFindProcessor s.run.skip_database = True #s.run.writeback_results = True #s.run.resume_processing = True #s.run.result_set_name = "lookup" s.addTargets(bp + "5s/5S.fa") #s.process_pair_db(pair_db) s.process_pair_data(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq") s.compute_profiles() s.write_reactivities(bp + "dev_out/rx2.out")
class TestPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.addTargets("test/5s/5s.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.process_pair(pair) self.assertEqual( case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0])) return pair def test_pairs(self): for case in cases: self.run_case(case) print("Ran {} pair->site cases.".format(len(cases))) def test_find_partial_weird_case(self): pair = Pair() pair.set_from_data("x", 'CTCAGTCCTTGGTGCCCGAGTCAGGATCGGAAGAG', 'TGACTCGGGCACCAAAGACTGAGAGATCGGAAGAG') self.spats.process_pair(pair) print("{} / {}".format(pair.site, pair.failure)) def test_minimum_length(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.algorithm = "find_partial" self.spats.run.minimum_target_match_length = 11 self.spats.addTargets("test/5s/5s.fa") self.assertEqual(11, self.spats._targets.minimum_match_length) case = [ '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA', 'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None ] self.run_case(case) # this case only matches if the minimum length is set to 8 self.spats.run.minimum_target_match_length = 8 self.spats = Spats() self.spats.run.algorithm = "lookup" self.spats.addTargets("test/5s/5s.fa") case[3] = 135 self.run_case(case)
class TestPairs(unittest.TestCase): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.addTargets("test/5s/5s.fa") def tearDown(self): self.spats = None def pair_for_case(self, case): pair = Pair() pair.set_from_data(case[0], case[1], case[2]) return pair def run_case(self, case): pair = self.pair_for_case(case) self.spats.process_pair(pair) self.assertEqual(case[3], pair.site, "res={} != {} ({}, {})".format(pair.site, case[3], self.__class__.__name__, case[0])) return pair def test_pairs(self): for case in cases: self.run_case(case) print("Ran {} pair->site cases.".format(len(cases))) def test_find_partial_weird_case(self): pair = Pair() pair.set_from_data("x", 'CTCAGTCCTTGGTGCCCGAGTCAGGATCGGAAGAG', 'TGACTCGGGCACCAAAGACTGAGAGATCGGAAGAG') self.spats.process_pair(pair) print("{} / {}".format(pair.site, pair.failure)) def test_minimum_length(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.run.algorithm = "find_partial" self.spats.run.minimum_target_match_length = 11 self.spats.addTargets("test/5s/5s.fa") self.assertEqual(11, self.spats._targets.minimum_match_length) case = [ '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA', 'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None ] self.run_case(case) # this case only matches if the minimum length is set to 8 self.spats.run.minimum_target_match_length = 8 self.spats = Spats() self.spats.addTargets("test/5s/5s.fa") case[3] = 135 self.run_case(case)
class TestPairsWithU(TestPairs): def setUp(self): from spats_shape_seq import Spats self.spats = Spats() self.spats.addTargets("test/5s/5su.fa")