class ProjectConfigurationTest(unittest.TestCase): def setUp(self): self.defaultJsonIO = StringIO("""\ { "projects": { "R1": { "max_variants": 5, "regions": [ { "coordinate_region": "R1", "seed_region_names": ["R1-seed"], "id": 10042 } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ], "seed_group": "R1-seeds" }, "R1": { "is_nucleotide": false, "reference": [ "RWN", "NWR" ], "seed_group": null } } } """) self.config = ProjectConfig() def testConvert(self): expected_fasta = """\ >R1-seed ACTGAAAGGG """ fasta = StringIO() self.config.load(self.defaultJsonIO) self.config.writeSeedFasta(fasta) self.assertMultiLineEqual(expected_fasta, fasta.getvalue()) def testSharedRegions(self): jsonIO = StringIO("""\ { "projects": { "R1": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1-seed"] } ] }, "R1 and R2": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1-seed"] }, { "coordinate_region": null, "seed_region_names": ["R2-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] }, "R2-seed": { "is_nucleotide": true, "reference": [ "TTT" ] } } } """) expected_fasta = """\ >R1-seed ACTGAAAGGG >R2-seed TTT """ fasta = StringIO() self.config.load(jsonIO) self.config.writeSeedFasta(fasta) self.assertMultiLineEqual(expected_fasta, fasta.getvalue()) def testUnusedRegion(self): jsonIO = StringIO("""\ { "projects": { "R1": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] }, "R2-seed": { "is_nucleotide": true, "reference": [ "TTT" ] } } } """) expected_fasta = """\ >R1-seed ACTGAAAGGG """ fasta = StringIO() self.config.load(jsonIO) self.config.writeSeedFasta(fasta) self.assertMultiLineEqual(expected_fasta, fasta.getvalue()) def testExcludeSeeds(self): jsonIO = StringIO("""\ { "projects": { "R1": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1-seed"] } ] }, "R2": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R2-seed"] } ] }, "R3": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R3-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] }, "R2-seed": { "is_nucleotide": true, "reference": [ "TTT" ] }, "R3-seed": { "is_nucleotide": true, "reference": [ "TAG" ] } } } """) expected_fasta = """\ >R2-seed TTT """ fasta = StringIO() self.config.load(jsonIO) self.config.writeSeedFasta(fasta, excluded_seeds=['R1-seed', 'R3-seed']) self.assertMultiLineEqual(expected_fasta, fasta.getvalue()) def testExcludeUnknownSeed(self): expected_fasta = """\ >R1-seed ACTGAAAGGG """ fasta = StringIO() self.config.load(self.defaultJsonIO) self.config.writeSeedFasta(fasta, excluded_seeds=['R99-seed']) self.assertMultiLineEqual(expected_fasta, fasta.getvalue()) def testDuplicateReference(self): jsonIO = StringIO("""\ { "projects": { "R1": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1a-seed", "R1b-seed"] } ] } }, "regions": { "R1a-seed": { "is_nucleotide": true, "reference": [ "ACTAAAGGG" ] }, "R1b-seed": { "is_nucleotide": true, "reference": [ "ACTAAAGGG" ] } } } """) fasta = StringIO() self.config.load(jsonIO) self.assertRaisesRegex(RuntimeError, "Duplicate references: R1a-seed and R1b-seed.", self.config.writeSeedFasta, fasta) def testGetReference(self): self.config.load(self.defaultJsonIO) seed_name = 'R1-seed' expected_ref = 'ACTGAAAGGG' seed_ref = self.config.getReference(seed_name) self.assertSequenceEqual(expected_ref, seed_ref) def testGetCoordinateReferences(self): self.config.load(self.defaultJsonIO) seed_name = 'R1-seed' expected_refs = {'R1': 'RWNNWR'} coordinate_refs = self.config.getCoordinateReferences(seed_name) self.assertDictEqual(expected_refs, coordinate_refs) def testGetAllReferences(self): expected_references = {'R1-seed': 'ACTGAAAGGG', 'R1': 'RWNNWR'} self.config.load(self.defaultJsonIO) references = self.config.getAllReferences() self.assertEqual(expected_references, references) def testUnknownReference(self): self.config.load(self.defaultJsonIO) seed_name = 'R-unknown' self.assertRaises(KeyError, self.config.getReference, seed_name) def testMaxVariants(self): self.config.load(self.defaultJsonIO) coordinate_region_name = 'R1' self.assertEqual(5, self.config.getMaxVariants(coordinate_region_name)) def testMaxVariantsUnusedRegion(self): jsonIO = StringIO("""\ { "projects": { "R1": { "max_variants": 2, "regions": [ { "coordinate_region": "R1", "seed_region_names": ["R1-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] }, "R1": { "is_nucleotide": false, "reference": [ "NSFW" ] }, "R2": { "is_nucleotide": false, "reference": [ "RSW" ] } } } """) self.config.load(jsonIO) coordinate_region_name = 'R2' self.assertEqual(0, self.config.getMaxVariants(coordinate_region_name)) def testMaxVariantsTwoProjects(self): """ If two projects specify a maximum for the same coordinate region, use the bigger of the two. """ jsonIO = StringIO("""\ { "projects": { "R1": { "max_variants": 9, "regions": [ { "coordinate_region": "R1", "seed_region_names": ["R1-seed"] } ] }, "R1-and-R2": { "max_variants": 2, "regions": [ { "coordinate_region": "R1", "seed_region_names": ["R1-seed"] }, { "coordinate_region": "R2", "seed_region_names": ["R1-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] }, "R1": { "is_nucleotide": false, "reference": [ "NSFW" ] }, "R2": { "is_nucleotide": false, "reference": [ "RSW" ] } } } """) self.config.load(jsonIO) coordinate_region_name = 'R1' self.assertEqual(9, self.config.getMaxVariants(coordinate_region_name)) def testReload(self): jsonIO1 = StringIO("""\ { "projects": { "R1": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] } } } """) jsonIO2 = StringIO("""\ { "projects": { "R2": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R2-seed"] } ] } }, "regions": { "R2-seed": { "is_nucleotide": true, "reference": [ "GACCTA" ] } } } """) self.config.load(jsonIO1) self.config.load(jsonIO2) self.assertRaises(KeyError, self.config.getReference, "R1-seed") self.assertSequenceEqual("GACCTA", self.config.getReference("R2-seed")) def testProjectSeeds(self): expected_seeds = set(['R1-seed']) self.config.load(self.defaultJsonIO) seeds = self.config.getProjectSeeds('R1') self.assertSetEqual(expected_seeds, seeds) def testSeedGroup(self): expected_group = "R1-seeds" self.config.load(self.defaultJsonIO) group = self.config.getSeedGroup('R1-seed') self.assertEqual(expected_group, group)
class CoveragePlotsTest(TestCase): def setUp(self): self.addTypeEqualityFunc(str, self.assertMultiLineEqual) config_json = StringIO("""\ { "projects": { "R1": { "max_variants": 0, "regions": [ { "coordinate_region": "R1", "coordinate_region_length": 3, "key_positions": [], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R1-seed" ] } ] }, "R1-and-R2": { "max_variants": 0, "regions": [ { "coordinate_region": "R1", "coordinate_region_length": 3, "key_positions": [ { "end_pos": null, "start_pos": 1 }, { "end_pos": null, "start_pos": 3 } ], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R1-seed" ] }, { "coordinate_region": "R2", "coordinate_region_length": 1, "key_positions": [], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R2-seed" ] } ] } } } """) self.config = ProjectConfig() self.config.load(config_json) @patch('matplotlib.pyplot.savefig') @patch('micall.core.project_config.ProjectConfig.loadScoring') def test_simple(self, config_mock, savefig_mock): config_mock.return_value = self.config amino_csv = StringIO("""\ seed,region,q-cutoff,query.aa.pos,refseq.aa.pos,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,* R1-seed,R1,15,100,1,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 R1-seed,R1,15,101,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0 R1-seed,R1,15,102,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0 """) expected_scores = """\ project,region,seed,q.cut,min.coverage,which.key.pos,off.score,on.score R1,R1,R1-seed,15,5,1,-1,1 R1-and-R2,R1,R1-seed,15,5,1,-1,1 """ scores_csv = StringIO() amino_csv.name = 'E1234.amino.csv' expected_calls = [ call('E1234.R1.R1.png'), call('E1234.R1-and-R2.R1.png') ] coverage_plot(amino_csv, coverage_scores_csv=scores_csv) self.assertEqual(expected_calls, savefig_mock.mock_calls) self.assertEqual(expected_scores, scores_csv.getvalue())
class ProjectConfigurationProjectRegionsTest(unittest.TestCase): def setUp(self): self.config = ProjectConfig() self.defaultJsonIO = StringIO("""\ { "projects": { "R1": { "max_variants": 0, "regions": [ { "coordinate_region": "R1", "coordinate_region_length": 3, "key_positions": [], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R1-seed" ] } ] }, "R1 and R2": { "max_variants": 0, "regions": [ { "coordinate_region": "R1", "coordinate_region_length": 3, "key_positions": [1, 3], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R1-seed" ] }, { "coordinate_region": "R2", "coordinate_region_length": 1, "key_positions": [], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R2-seed" ] } ] } } } """) def testProjectRegions(self): expected_project_regions = [{ "project_name": "R1", "coordinate_region_length": 3, "key_positions": [], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100 }, { "project_name": "R1 and R2", "coordinate_region_length": 3, "key_positions": [1, 3], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100 }] self.config.load(self.defaultJsonIO) project_regions = list(self.config.getProjectRegions('R1-seed', 'R1')) self.assertEqual(expected_project_regions, project_regions) def testProjectExcluded(self): excluded_projects = ['R1'] expected_project_regions = [{ "project_name": "R1 and R2", "coordinate_region_length": 3, "key_positions": [1, 3], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100 }] self.config.load(self.defaultJsonIO) project_regions = list( self.config.getProjectRegions('R1-seed', 'R1', excluded_projects)) self.assertEqual(expected_project_regions, project_regions)
class ConvertPrelimTest(unittest.TestCase): def setUp(self): self.projects = ProjectConfig() self.projects.load(StringIO("""\ { "regions": { "R1-seed": { "seed_group": "main", "reference": ["ACTAAAGGG"] }, "R2-seed": { "seed_group": "main", "reference": ["ACTAAAGGGAAA"] } } } """)) self.sam_file = StringIO() self.remap_counts = StringIO() self.remap_counts_writer = DictWriter( self.remap_counts, ['type', 'filtered_count', 'count'], lineterminator=os.linesep) self.remap_counts_writer.writeheader() def test_simple(self): prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,9M,=,1,0,AAACCCTTT,BBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t9M\t=\t1\t0\tAAACCCTTT\tBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim R1-seed,0,1 """ expected_seed_counts = {} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts) def test_two_regions(self): prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,9M,=,1,0,AAACCCTTT,BBBBBBBBB example2,89,R2-seed,1,0,9M,=,1,0,AAAACCTTT,BBBBBBBBB example3,89,R2-seed,1,0,9M,=,1,0,AAAAACTTT,BBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t9M\t=\t1\t0\tAAACCCTTT\tBBBBBBBBB example2\t89\tR2-seed\t1\t0\t9M\t=\t1\t0\tAAAACCTTT\tBBBBBBBBB example3\t89\tR2-seed\t1\t0\t9M\t=\t1\t0\tAAAAACTTT\tBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim R1-seed,0,1 prelim R2-seed,0,2 """ expected_seed_counts = {} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts) def test_long_reads(self): self.maxDiff = None prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,54M,=,1,0,\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2,89,R1-seed,1,0,54M,=,1,0,\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim R1-seed,2,2 """ expected_seed_counts = {'R1-seed': 2} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts) def test_star_region(self): self.maxDiff = None prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,54M,=,1,0,\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2,89,R1-seed,1,0,54M,=,1,0,\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example3,93,*,*,*,*,*,*,*,\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example3\t93\t*\t*\t*\t*\t*\t*\t*\t\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim *,0,1 prelim R1-seed,2,2 """ expected_seed_counts = {'R1-seed': 2} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts) def test_best_in_group(self): self.maxDiff = None prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,54M,=,1,0,\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2,89,R2-seed,1,0,54M,=,1,0,\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example3,89,R1-seed,1,0,54M,=,1,0,\ AAAAAATTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example4,89,R2-seed,1,0,54M,=,1,0,\ AAAAAAAATAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example5,89,R2-seed,1,0,54M,=,1,0,\ AAAAAAAAAAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example3\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAAAAATTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example4\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\ AAAAAAAATAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example5\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\ AAAAAAAAAAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim R1-seed,2,2 prelim R2-seed,3,3 """ expected_seed_counts = {'R2-seed': 3} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts) def test_unmapped_read(self): self.maxDiff = None prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,54M,=,1,0,\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2,93,R1-seed,1,0,54M,=,1,0,\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2\t93\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim R1-seed,1,2 """ expected_seed_counts = {} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts)
class ConvertPrelimTest(unittest.TestCase): def setUp(self): self.projects = ProjectConfig() self.projects.load( StringIO("""\ { "regions": { "R1-seed": { "seed_group": "main", "reference": ["ACTAAAGGG"] }, "R2-seed": { "seed_group": "main", "reference": ["ACTAAAGGGAAA"] } } } """)) self.sam_file = StringIO() self.remap_counts = StringIO() self.remap_counts_writer = DictWriter( self.remap_counts, ['type', 'filtered_count', 'count'], lineterminator=os.linesep) self.remap_counts_writer.writeheader() def test_simple(self): prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,9M,=,1,0,AAACCCTTT,BBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t9M\t=\t1\t0\tAAACCCTTT\tBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim R1-seed,0,1 """ expected_seed_counts = {} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts) def test_two_regions(self): prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,9M,=,1,0,AAACCCTTT,BBBBBBBBB example2,89,R2-seed,1,0,9M,=,1,0,AAAACCTTT,BBBBBBBBB example3,89,R2-seed,1,0,9M,=,1,0,AAAAACTTT,BBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t9M\t=\t1\t0\tAAACCCTTT\tBBBBBBBBB example2\t89\tR2-seed\t1\t0\t9M\t=\t1\t0\tAAAACCTTT\tBBBBBBBBB example3\t89\tR2-seed\t1\t0\t9M\t=\t1\t0\tAAAAACTTT\tBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim R1-seed,0,1 prelim R2-seed,0,2 """ expected_seed_counts = {} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts) def test_long_reads(self): self.maxDiff = None prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,54M,=,1,0,\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2,89,R1-seed,1,0,54M,=,1,0,\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim R1-seed,2,2 """ expected_seed_counts = {'R1-seed': 2} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts) def test_star_region(self): self.maxDiff = None prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,54M,=,1,0,\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2,89,R1-seed,1,0,54M,=,1,0,\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example3,93,*,*,*,*,*,*,*,\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example3\t93\t*\t*\t*\t*\t*\t*\t*\t\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim *,0,1 prelim R1-seed,2,2 """ expected_seed_counts = {'R1-seed': 2} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts) def test_best_in_group(self): self.maxDiff = None prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,54M,=,1,0,\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2,89,R2-seed,1,0,54M,=,1,0,\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example3,89,R1-seed,1,0,54M,=,1,0,\ AAAAAATTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example4,89,R2-seed,1,0,54M,=,1,0,\ AAAAAAAATAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example5,89,R2-seed,1,0,54M,=,1,0,\ AAAAAAAAAAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example3\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAAAAATTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example4\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\ AAAAAAAATAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example5\t89\tR2-seed\t1\t0\t54M\t=\t1\t0\t\ AAAAAAAAAAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim R1-seed,2,2 prelim R2-seed,3,3 """ expected_seed_counts = {'R2-seed': 3} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts) def test_unmapped_read(self): self.maxDiff = None prelim_csv = StringIO("""\ qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual example1,89,R1-seed,1,0,54M,=,1,0,\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2,93,R1-seed,1,0,54M,=,1,0,\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """) count_threshold = 2 expected_sam_file = """\ @HD VN:1.0 SO:unsorted @SQ SN:R1-seed LN:9 @SQ SN:R2-seed LN:12 @PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:"" example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB example2\t93\tR1-seed\t1\t0\t54M\t=\t1\t0\t\ AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\ BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB """ expected_remap_counts = """\ type,filtered_count,count prelim R1-seed,1,2 """ expected_seed_counts = {} seed_counts = convert_prelim(prelim_csv, self.sam_file, self.remap_counts_writer, count_threshold, self.projects) self.assertEqual(expected_sam_file, self.sam_file.getvalue()) self.assertEqual(expected_remap_counts, self.remap_counts.getvalue()) self.assertEqual(expected_seed_counts, seed_counts)
class ProjectConfigurationTest(unittest.TestCase): def setUp(self): self.defaultJsonIO = StringIO.StringIO("""\ { "projects": { "R1": { "max_variants": 5, "regions": [ { "coordinate_region": "R1", "seed_region_names": ["R1-seed"], "id": 10042 } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ], "seed_group": "R1-seeds" }, "R1": { "is_nucleotide": false, "reference": [ "RWN", "NWR" ], "seed_group": null } } } """) self.config = ProjectConfig() def testConvert(self): expected_fasta = """\ >R1-seed ACTGAAAGGG """ fasta = StringIO.StringIO() self.config.load(self.defaultJsonIO) self.config.writeSeedFasta(fasta) self.assertMultiLineEqual(expected_fasta, fasta.getvalue()) def testSharedRegions(self): jsonIO = StringIO.StringIO("""\ { "projects": { "R1": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1-seed"] } ] }, "R1 and R2": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1-seed"] }, { "coordinate_region": null, "seed_region_names": ["R2-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] }, "R2-seed": { "is_nucleotide": true, "reference": [ "TTT" ] } } } """) expected_fasta = """\ >R1-seed ACTGAAAGGG >R2-seed TTT """ fasta = StringIO.StringIO() self.config.load(jsonIO) self.config.writeSeedFasta(fasta) self.assertMultiLineEqual(expected_fasta, fasta.getvalue()) def testUnusedRegion(self): jsonIO = StringIO.StringIO("""\ { "projects": { "R1": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] }, "R2-seed": { "is_nucleotide": true, "reference": [ "TTT" ] } } } """) expected_fasta = """\ >R1-seed ACTGAAAGGG """ fasta = StringIO.StringIO() self.config.load(jsonIO) self.config.writeSeedFasta(fasta) self.assertMultiLineEqual(expected_fasta, fasta.getvalue()) def testDuplicateReference(self): jsonIO = StringIO.StringIO("""\ { "projects": { "R1": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1a-seed", "R1b-seed"] } ] } }, "regions": { "R1a-seed": { "is_nucleotide": true, "reference": [ "ACTAAAGGG" ] }, "R1b-seed": { "is_nucleotide": true, "reference": [ "ACTAAAGGG" ] } } } """) fasta = StringIO.StringIO() self.config.load(jsonIO) self.assertRaisesRegexp(RuntimeError, "Duplicate references: R1a-seed and R1b-seed.", self.config.writeSeedFasta, fasta) def testGetReference(self): self.config.load(self.defaultJsonIO) seed_name = 'R1-seed' expected_ref = 'ACTGAAAGGG' seed_ref = self.config.getReference(seed_name) self.assertSequenceEqual(expected_ref, seed_ref) def testGetCoordinateReferences(self): self.config.load(self.defaultJsonIO) seed_name = 'R1-seed' expected_refs = {'R1': 'RWNNWR'} coordinate_refs = self.config.getCoordinateReferences(seed_name) self.assertDictEqual(expected_refs, coordinate_refs) def testUnknownReference(self): self.config.load(self.defaultJsonIO) seed_name = 'R-unknown' self.assertRaises(KeyError, self.config.getReference, seed_name) def testMaxVariants(self): self.config.load(self.defaultJsonIO) coordinate_region_name = 'R1' self.assertEqual(5, self.config.getMaxVariants(coordinate_region_name)) def testMaxVariantsUnusedRegion(self): jsonIO = StringIO.StringIO("""\ { "projects": { "R1": { "max_variants": 2, "regions": [ { "coordinate_region": "R1", "seed_region_names": ["R1-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] }, "R1": { "is_nucleotide": false, "reference": [ "NSFW" ] }, "R2": { "is_nucleotide": false, "reference": [ "RSW" ] } } } """) self.config.load(jsonIO) coordinate_region_name = 'R2' self.assertEqual(0, self.config.getMaxVariants(coordinate_region_name)) def testMaxVariantsTwoProjects(self): """ If two projects specify a maximum for the same coordinate region, use the bigger of the two. """ jsonIO = StringIO.StringIO("""\ { "projects": { "R1": { "max_variants": 9, "regions": [ { "coordinate_region": "R1", "seed_region_names": ["R1-seed"] } ] }, "R1-and-R2": { "max_variants": 2, "regions": [ { "coordinate_region": "R1", "seed_region_names": ["R1-seed"] }, { "coordinate_region": "R2", "seed_region_names": ["R1-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] }, "R1": { "is_nucleotide": false, "reference": [ "NSFW" ] }, "R2": { "is_nucleotide": false, "reference": [ "RSW" ] } } } """) self.config.load(jsonIO) coordinate_region_name = 'R1' self.assertEqual(9, self.config.getMaxVariants(coordinate_region_name)) def testReload(self): jsonIO1 = StringIO.StringIO("""\ { "projects": { "R1": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R1-seed"] } ] } }, "regions": { "R1-seed": { "is_nucleotide": true, "reference": [ "ACTGAAA", "GGG" ] } } } """) jsonIO2 = StringIO.StringIO("""\ { "projects": { "R2": { "regions": [ { "coordinate_region": null, "seed_region_names": ["R2-seed"] } ] } }, "regions": { "R2-seed": { "is_nucleotide": true, "reference": [ "GACCTA" ] } } } """) self.config.load(jsonIO1) self.config.load(jsonIO2) self.assertRaises(KeyError, self.config.getReference, "R1-seed") self.assertSequenceEqual("GACCTA", self.config.getReference("R2-seed")) def testProjectSeeds(self): expected_seeds = set(['R1-seed']) self.config.load(self.defaultJsonIO) seeds = self.config.getProjectSeeds('R1') self.assertSetEqual(expected_seeds, seeds) def testSeedGroup(self): expected_group = "R1-seeds" self.config.load(self.defaultJsonIO) group = self.config.getSeedGroup('R1-seed') self.assertEqual(expected_group, group) def testProjectRegions(self): jsonIO = StringIO.StringIO("""\ { "projects": { "R1": { "max_variants": 0, "regions": [ { "coordinate_region": "R1", "coordinate_region_length": 3, "key_positions": [], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R1-seed" ] } ] }, "R1 and R2": { "max_variants": 0, "regions": [ { "coordinate_region": "R1", "coordinate_region_length": 3, "key_positions": [1, 3], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R1-seed" ] }, { "coordinate_region": "R2", "coordinate_region_length": 1, "key_positions": [], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R2-seed" ] } ] } } } """) expected_project_regions = [{"project_name": "R1", "coordinate_region_length": 3, "key_positions": [], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100}, {"project_name": "R1 and R2", "coordinate_region_length": 3, "key_positions": [1, 3], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100}] self.config.load(jsonIO) project_regions = list(self.config.getProjectRegions('R1-seed', 'R1')) self.assertEqual(expected_project_regions, project_regions)
class CoveragePlotsTest(TestCase): def setUp(self): self.addTypeEqualityFunc(str, self.assertMultiLineEqual) config_json = StringIO("""\ { "projects": { "R1": { "max_variants": 0, "regions": [ { "coordinate_region": "R1", "coordinate_region_length": 3, "key_positions": [], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R1-seed" ] } ] }, "R1-and-R2": { "max_variants": 0, "regions": [ { "coordinate_region": "R1", "coordinate_region_length": 3, "key_positions": [ { "end_pos": null, "start_pos": 1 }, { "end_pos": null, "start_pos": 3 } ], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R1-seed" ] }, { "coordinate_region": "R2", "coordinate_region_length": 1, "key_positions": [], "min_coverage1": 10, "min_coverage2": 50, "min_coverage3": 100, "seed_region_names": [ "R2-seed" ] } ] } } } """) self.config = ProjectConfig() self.config.load(config_json) @patch('matplotlib.pyplot.savefig') @patch('micall.core.project_config.ProjectConfig.loadScoring') def test_simple(self, config_mock, savefig_mock): config_mock.return_value = self.config amino_csv = StringIO("""\ seed,region,q-cutoff,query.aa.pos,refseq.aa.pos,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,* R1-seed,R1,15,100,1,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 R1-seed,R1,15,101,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0 R1-seed,R1,15,102,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0 """) expected_scores = """\ project,region,seed,q.cut,min.coverage,which.key.pos,off.score,on.score R1,R1,R1-seed,15,5,1,-1,1 R1-and-R2,R1,R1-seed,15,5,1,-1,1 """ scores_csv = StringIO() amino_csv.name = 'E1234.amino.csv' expected_calls = [call('E1234.R1.R1.png'), call('E1234.R1-and-R2.R1.png')] coverage_plot(amino_csv, coverage_scores_csv=scores_csv) self.assertEqual(expected_calls, savefig_mock.mock_calls) self.assertEqual(expected_scores, scores_csv.getvalue())