def test_movie_split(self): N_RECORDS = 1745161 N_RECORDS_1 = 959539 N_RECORDS_2 = 785622 test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/" "2372215/0007/Analysis_Results/m150404_101626_42" "267_c100807920800000001823174110291514_s1_p0.al" "l.subreadset.xml") test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/" "2590980/0008/Analysis_Results/m141115_075238_et" "han_c100699872550000001823139203261572_s1_p0.al" "l.subreadset.xml") ds1 = SubreadSet(test_file_1, test_file_2) # used to get total: #self.assertEqual(sum(1 for _ in ds1), N_RECORDS) self.assertEqual(len(ds1), N_RECORDS) dss = ds1.split_movies(1) self.assertEqual(len(dss), 1) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) self.assertEqual(len(ds1), N_RECORDS) self.assertFalse(ds1.filters) dss = ds1.split_movies(12) self.assertEqual(len(dss), 2) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) self.assertEqual(len(set(dss[0].index.qId)), 1) self.assertEqual(len(set(dss[-1].index.qId)), 1) self.assertEqual( dss[0].qid2mov[list(set(dss[0].index.qId))[0]], 'm150404_101626_42267_c100807920800000001823174110291514_s1_p0') self.assertEqual(len(dss[0]), N_RECORDS_1) self.assertEqual( dss[-1].qid2mov[list(set(dss[-1].index.qId))[0]], 'm141115_075238_ethan_c100699872550000001823139203261572_s1_p0') self.assertEqual(len(dss[-1]), N_RECORDS_2)
def test_multi_movie_split_zmws_with_existing_movie_filter(self): # TODO: test with three movies and two chunks N_RECORDS = 959539 test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/" "2372215/0007/Analysis_Results/m150404_101626_42" "267_c100807920800000001823174110291514_s1_p0.al" "l.subreadset.xml") test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/" "2590980/0008/Analysis_Results/m141115_075238_et" "han_c100699872550000001823139203261572_s1_p0.al" "l.subreadset.xml") ds1 = SubreadSet(test_file_1, test_file_2) dss = ds1.split_movies(2) self.assertEqual(len(dss), 2) ds1 = dss[0] # used to get total: #self.assertEqual(sum(1 for _ in ds1), N_RECORDS) self.assertEqual(len(ds1), N_RECORDS) dss = ds1.split(chunks=1, zmws=True) self.assertEqual(len(dss), 1) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) dss = ds1.split(chunks=12, zmws=True) self.assertEqual(len(dss), 12) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) for ds in dss: self.assertEqual( ds.zmwRanges[0][0], 'm150404_101626_42267_c100807920800000001823174110291514_s1_p0' )
def test_multi_movie_split_zmws_with_existing_movie_filter(self): # TODO: test with three movies and two chunks N_RECORDS = 959539 test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/" "2372215/0007/Analysis_Results/m150404_101626_42" "267_c100807920800000001823174110291514_s1_p0.al" "l.subreadset.xml") test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/" "2590980/0008/Analysis_Results/m141115_075238_et" "han_c100699872550000001823139203261572_s1_p0.al" "l.subreadset.xml") ds1 = SubreadSet(test_file_1, test_file_2) dss = ds1.split_movies(2) self.assertEqual(len(dss), 2) ds1 = dss[0] # used to get total: #self.assertEqual(sum(1 for _ in ds1), N_RECORDS) self.assertEqual(len(ds1), N_RECORDS) dss = ds1.split(chunks=1, zmws=True) self.assertEqual(len(dss), 1) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) dss = ds1.split(chunks=12, zmws=True) self.assertEqual(len(dss), 12) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) for ds in dss: self.assertEqual( ds.zmwRanges[0][0], 'm150404_101626_42267_c100807920800000001823174110291514_s1_p0')