def test_fastq_screen_finished(self): """Detecting finished state of fastq_screen """ # Assert that an empty directory doesn't indicate finished state self.assertFalse(sq.fastq_screen_finished(self.rootdir), "Fastq screen should not be considered finished without output files") # Create an output file and corresponding png but no rows in output sample_file = os.path.join(self.rootdir,"{}_fastq_screen.txt".format(td.generate_sample())) png_file = "{}.png".format(os.path.splitext(sample_file)[0]) utils.touch_file(sample_file) utils.touch_file(png_file) self.assertFalse(sq.fastq_screen_finished(self.rootdir), "Fastq screen should not be considered finished with empty output file") # Write some output and assert fastq_screen is detected as finished with open(sample_file,"w") as fh: for n in range(5): fh.write("{}\n".format(str(n))) self.assertTrue(sq.fastq_screen_finished(self.rootdir), "Fastq screen should be considered finished with non-empty output file and corresponding png") # Remove the png and assert fastq_screen is not finished os.unlink(png_file) self.assertFalse(sq.fastq_screen_finished(self.rootdir), "Fastq screen should not be considered finished with non-empty output file but without corresponding png")
def test_get_samplesheet(self): """Test that the _get_samplesheet method behaves as expected """ # Create a few random files and folders and assert that they are not returned suffixes = [".csv","",""] for n in range(3): os.mkdir(os.path.join(self.rootdir,''.join(random.choice(string.ascii_uppercase) for x in range(5)))) fh, _ = tempfile.mkstemp(dir=self.rootdir, suffix=suffixes[n]) os.close(fh) self.assertIsNone(sq.get_samplesheet(self.rootdir), "Getting non-existing samplesheet did not return None") # Create a SampleSheet.csv and a [FCID].csv file and assert that they are # returned with a preference for the [FCID].csv file fcid = td.generate_fc_barcode() fcdir = os.path.join(self.rootdir,td.generate_run_id(fc_barcode=fcid)) os.mkdir(fcdir) ss = [os.path.join(fcdir,"SampleSheet.csv"), os.path.join(fcdir,"{}.csv".format(fcid))] for s in ss: utils.touch_file(s) self.assertEqual(s,sq.get_samplesheet(fcdir), "Did not get existing {}".format(os.path.basename(s)))
def test_get_pipeline_indicator(self): """Get pipeline indicator files """ self.assertListEqual([],sq.get_pipeline_indicator(self.rootdir), "Empty directory did not return an empty list") # Create some random files and assert that they are not picked up for n in range(5): fh, _ = tempfile.mkstemp(dir=self.rootdir) os.close(fh) self.assertListEqual([],sq.get_pipeline_indicator(self.rootdir), "Non-existing indicator files did not return an empty list") # Create some indicator files and assert that they are returned ifiles = [] for n in range(1,6): ifiles.append(os.path.join(self.rootdir,"{s:02d}_{act}.txt".format(s=n,act=''.join(random.choice(string.ascii_lowercase) for x in range(5))))) utils.touch_file(ifiles[-1]) self.assertListEqual(sorted(ifiles),sorted(sq.get_pipeline_indicator(self.rootdir)), "Existing indicator files did not return the expected output") # Assert that asking for a specific indicator returns the expected output self.assertListEqual(sorted(ifiles[0:2]),sorted(sq.get_pipeline_indicator(self.rootdir,range(1,3))), "Specific indicator files did not return the expected output") # Assert that asking for a specific non-existing indicator returns the expected output self.assertListEqual([ifiles[-1]],sorted(sq.get_pipeline_indicator(self.rootdir,range(len(ifiles),len(ifiles)+2))), "Specific non-existing indicator files did not return the expected output")
def test__get_project_analysis_dir(self): """Test that getting the project analysis folder behaves as expected """ # Assert that none is returned when no folder exists proj = td.generate_project() self.assertIsNone(sq.get_project_analysis_dir(self.rootdir,proj), "Did not return empty result for non-existing folders") # Assert that none is still returned when some mismatching folders exist for n in range(5): os.mkdir(os.path.join(self.rootdir,td.generate_project())) self.assertIsNone(sq.get_project_analysis_dir(self.rootdir,proj), "Did not return empty result for mismatching folders") # Assert that a file with the same name as the project is not returned projdir = os.path.join(self.rootdir,proj) utils.touch_file(projdir) self.assertIsNone(sq.get_project_analysis_dir(self.rootdir,proj), "Returned a file with matching name. Should only return folders") os.unlink(projdir) # Assert that the corrct folder is returned when it exists os.mkdir(projdir) self.assertEqual(projdir,sq.get_project_analysis_dir(self.rootdir,proj), "The expected project folder was not returned")
def test__do_first_read_processing(self): """First read processing logic """ runinfo = os.path.join(self.rootdir,"RunInfo.xml") self._runinfo(runinfo) self.assertFalse(_do_first_read_processing(self.rootdir), "Processing should not be run before first read is finished") utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read1.txt")) self.assertFalse(_do_first_read_processing(self.rootdir), "Processing should not be run before last index read is finished") utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read2.txt")) utils.touch_indicator_file(os.path.join(self.rootdir, "initial_processing_started.txt")) self.assertFalse(_do_first_read_processing(self.rootdir), "Processing should not be run when previous processing step is in progress") utils.touch_indicator_file(os.path.join(self.rootdir, "initial_processing_completed.txt")) self.assertTrue(_do_first_read_processing(self.rootdir), "Processing should be run when last index read is finished") utils.touch_indicator_file(os.path.join(self.rootdir, "first_read_processing_started.txt")) self.assertFalse(_do_first_read_processing(self.rootdir), "Processing should not be run when processing has started")
def test_get_sample_pipeline_log(self): """Get the sample pipeline logfile """ sample = td.generate_sample() # Assert that an empty directory returns None self.assertIsNone( sq.get_sample_pipeline_log(self.rootdir, sample), "Getting a sample log from an empty directory did not return None") # Assert that non-relevant log files are not returned for n in range(5): utils.touch_file( os.path.join(self.rootdir, "{}-bcbb.log".format(td.generate_sample()))) self.assertIsNone( sq.get_sample_pipeline_log(self.rootdir, sample), "Getting a non-existing sample log did not return None") # Assert that the correct log file is returned when it exists utils.touch_file( os.path.join(self.rootdir, "{}-bcbb.log".format(sample))) self.assertEqual( os.path.join(self.rootdir, "{}-bcbb.log".format(sample)), sq.get_sample_pipeline_log(self.rootdir, sample), "Getting an existing sample log file did not return the expected file" )
def test_get_samplesheet(self): """Locate the samplesheet in a folder """ # Work in a separate subdirectory subdir = os.path.join(self.rootdir,"test_get_samplesheet") os.mkdir(subdir) # Create a few random files and folders and assert that they are not returned suffixes = [".csv","",""] for n in range(3): os.mkdir(os.path.join(subdir,''.join(random.choice(string.ascii_uppercase) for x in range(5)))) fh, _ = tempfile.mkstemp(dir=subdir, suffix=suffixes[n]) os.close(fh) self.assertIsNone(IlluminaRun.get_samplesheet(subdir), "Getting non-existing samplesheet did not return None") # Create a SampleSheet.csv and a [FCID].csv file and assert that they are # returned with a preference for the [FCID].csv file fcid = td.generate_fc_barcode() fcdir = os.path.join(subdir,td.generate_run_id(fc_barcode=fcid)) os.mkdir(fcdir) ss = [os.path.join(fcdir,"SampleSheet.csv"), os.path.join(fcdir,"{}.csv".format(fcid))] for s in ss: utils.touch_file(s) self.assertEqual(s,IlluminaRun.get_samplesheet(fcdir), "Did not get existing {}".format(os.path.basename(s))) shutil.rmtree(subdir)
def test__get_directories(self): """Get run output directories """ config = {"dump_directories": [self.rootdir]} obs_dirs = [d for d in ifm._get_directories(config)] self.assertListEqual( [], obs_dirs, "Expected empty list for getting non-existing run directories") utils.touch_file( os.path.join(self.rootdir, "111111_SN111_1111_A11111111")) obs_dirs = [d for d in ifm._get_directories(config)] self.assertListEqual([], obs_dirs, "Should not pick up files, only directories") exp_dirs = [ os.path.join(self.rootdir, "222222_SN222_2222_A2222222"), os.path.join(self.rootdir, "333333_D0023_3333_B33333XX") ] os.mkdir(exp_dirs[-1]) os.mkdir(exp_dirs[-2]) obs_dirs = [d for d in ifm._get_directories(config)] self.assertListEqual( sorted(exp_dirs), sorted(obs_dirs), "Should pick up matching directory - hiseq-style") exp_dirs.append( os.path.join(self.rootdir, "333333_M33333_3333_A000000000-A3333")) os.mkdir(exp_dirs[-1]) obs_dirs = [d for d in ifm._get_directories(config)] self.assertListEqual( sorted(exp_dirs), sorted(obs_dirs), "Should pick up matching directory - miseq-style")
def test_get_unmatched_reads(self): """Get the undetermined indexes reads """ # Create some files representing undetermined index reads lanes = [1, 3, 5, 7] readfiles = [] for lane in lanes: fdir = os.path.join(self.exp_unmatched_directory[0], "Sample_lane{:d}".format(lane)) readfiles.append([ os.path.join( fdir, "lane{l:d}_Undetermined_L00{l:d}_R{r:d}_*.fastq.gz".format( l=lane, r=read)) for read in [1, 2] ]) os.makedirs(fdir) for readfile in readfiles[-1]: utils.touch_file(readfile) # Assert that the correct files are returned self.assertListEqual( sorted(readfiles[-1]), sorted(self.run.get_unmatched_reads(lanes=[lane])[0]), "Did not get expected undetermined indexes reads")
def test__is_finished_basecalling_read(self): """Detect finished read basecalling """ # Create a custom RunInfo.xml in the current directory runinfo = os.path.join(self.rootdir, "RunInfo.xml") self._runinfo(runinfo, "Y101,Y101") with self.assertRaises(ValueError): ifm._is_finished_basecalling_read(self.rootdir, 0) with self.assertRaises(ValueError): ifm._is_finished_basecalling_read(self.rootdir, 3) for read in (1, 2): self.assertFalse( ifm._is_finished_basecalling_read(self.rootdir, read), "Should not return true with missing indicator file") utils.touch_file( os.path.join( self.rootdir, "Basecalling_Netcopy_complete_Read{:d}.txt".format(read))) self.assertTrue( ifm._is_finished_basecalling_read(self.rootdir, read), "Should return true with existing indicator file")
def test__do_first_read_processing(self): """First read processing logic """ runinfo = os.path.join(self.rootdir, "RunInfo.xml") self._runinfo(runinfo) self.assertFalse( ifm._do_first_read_processing(self.rootdir), "Processing should not be run before first read is finished") utils.touch_file( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read1.txt")) self.assertFalse( ifm._do_first_read_processing(self.rootdir), "Processing should not be run before last index read is finished") utils.touch_file( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read2.txt")) utils.touch_indicator_file( os.path.join(self.rootdir, "initial_processing_started.txt")) self.assertFalse( ifm._do_first_read_processing(self.rootdir), "Processing should not be run when previous processing step is in progress" ) utils.touch_indicator_file( os.path.join(self.rootdir, "initial_processing_completed.txt")) self.assertTrue( ifm._do_first_read_processing(self.rootdir), "Processing should be run when last index read is finished") utils.touch_indicator_file( os.path.join(self.rootdir, "first_read_processing_started.txt")) self.assertFalse( ifm._do_first_read_processing(self.rootdir), "Processing should not be run when processing has started")
def test__do_initial_processing(self): """Initial processing logic """ self.assertFalse(_do_initial_processing(self.rootdir), "Initial processing should not be run with missing indicator flags") utils.touch_file(os.path.join(self.rootdir,"First_Base_Report.htm")) self.assertTrue(_do_initial_processing(self.rootdir), "Initial processing should be run after first base report creation") utils.touch_indicator_file(os.path.join(self.rootdir,"initial_processing_started.txt")) self.assertFalse(_do_initial_processing(self.rootdir), "Initial processing should not be run when processing has been started") os.unlink(os.path.join(self.rootdir,"First_Base_Report.htm")) self.assertFalse(_do_initial_processing(self.rootdir), "Initial processing should not be run when processing has been started " \ "and missing first base report")
def test_get_sample_pipeline_log(self): """Get the sample pipeline logfile """ sample = td.generate_sample() # Assert that an empty directory returns None self.assertIsNone(sq.get_sample_pipeline_log(self.rootdir,sample), "Getting a sample log from an empty directory did not return None") # Assert that non-relevant log files are not returned for n in range(5): utils.touch_file(os.path.join(self.rootdir,"{}-bcbb.log".format(td.generate_sample()))) self.assertIsNone(sq.get_sample_pipeline_log(self.rootdir,sample), "Getting a non-existing sample log did not return None") # Assert that the correct log file is returned when it exists utils.touch_file(os.path.join(self.rootdir,"{}-bcbb.log".format(sample))) self.assertEqual(os.path.join(self.rootdir,"{}-bcbb.log".format(sample)),sq.get_sample_pipeline_log(self.rootdir,sample), "Getting an existing sample log file did not return the expected file")
def test_get_unmatched_reads(self): """Get the undetermined indexes reads """ # Create some files representing undetermined index reads lanes = [1,3,5,7] readfiles = [] for lane in lanes: fdir = os.path.join(self.exp_unmatched_directory[0],"Sample_lane{:d}".format(lane)) readfiles.append([os.path.join(fdir,"lane{l:d}_Undetermined_L00{l:d}_R{r:d}_*.fastq.gz".format(l=lane, r=read)) for read in [1,2]]) os.makedirs(fdir) for readfile in readfiles[-1]: utils.touch_file(readfile) # Assert that the correct files are returned self.assertListEqual(sorted(readfiles[-1]), sorted(self.run.get_unmatched_reads(lanes=[lane])[0]), "Did not get expected undetermined indexes reads")
def test__get_directories(self): """Get run output directories """ config = {"dump_directories": [self.rootdir]} obs_dirs = [d for d in _get_directories(config)] self.assertListEqual([],obs_dirs, "Expected empty list for getting non-existing run directories") utils.touch_file(os.path.join(self.rootdir, "111111_SN111_1111_A11111111")) obs_dirs = [d for d in _get_directories(config)] self.assertListEqual([],obs_dirs, "Should not pick up files, only directories") exp_dirs = [os.path.join(self.rootdir, "222222_SN222_2222_A2222222")] os.mkdir(exp_dirs[-1]) obs_dirs = [d for d in _get_directories(config)] self.assertListEqual(sorted(exp_dirs),sorted(obs_dirs), "Should pick up matching directory - hiseq-style") exp_dirs.append(os.path.join(self.rootdir, "333333_M33333_3333_A000000000-A3333")) os.mkdir(exp_dirs[-1]) obs_dirs = [d for d in _get_directories(config)] self.assertListEqual(sorted(exp_dirs),sorted(obs_dirs), "Should pick up matching directory - miseq-style")
def test__is_finished_basecalling_read(self): """Detect finished read basecalling """ # Create a custom RunInfo.xml in the current directory runinfo = os.path.join(self.rootdir,"RunInfo.xml") self._runinfo(runinfo, "Y101,Y101") with self.assertRaises(ValueError): _is_finished_basecalling_read(self.rootdir,0) with self.assertRaises(ValueError): _is_finished_basecalling_read(self.rootdir,3) for read in (1,2): self.assertFalse(_is_finished_basecalling_read(self.rootdir,read), "Should not return true with missing indicator file") utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read{:d}.txt".format(read))) self.assertTrue(_is_finished_basecalling_read(self.rootdir,read), "Should return true with existing indicator file")
def test__do_second_read_processing(self): """Second read processing logic """ runinfo = os.path.join(self.rootdir, "RunInfo.xml") self._runinfo(runinfo) utils.touch_file( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_READ2.txt")) self.assertTrue( ifm._do_second_read_processing(self.rootdir), "Processing should be run when last read GAII checkpoint exists") os.unlink( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_READ2.txt")) self.assertFalse( ifm._do_second_read_processing(self.rootdir), "Processing should not be run before any reads are finished") utils.touch_file( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read2.txt")) self.assertFalse( ifm._do_second_read_processing(self.rootdir), "Processing should not be run before last read is finished") utils.touch_file( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read3.txt")) self.assertTrue(ifm._do_second_read_processing(self.rootdir), "Processing should be run when last read is finished") utils.touch_indicator_file( os.path.join(self.rootdir, "second_read_processing_started.txt")) self.assertFalse( ifm._do_second_read_processing(self.rootdir), "Processing should not be run when processing has started")
def test__do_second_read_processing(self): """Second read processing logic """ runinfo = os.path.join(self.rootdir,"RunInfo.xml") self._runinfo(runinfo) utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_READ2.txt")) self.assertTrue(_do_second_read_processing(self.rootdir), "Processing should be run when last read GAII checkpoint exists") os.unlink(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_READ2.txt")) self.assertFalse(_do_second_read_processing(self.rootdir), "Processing should not be run before any reads are finished") utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read2.txt")) self.assertFalse(_do_second_read_processing(self.rootdir), "Processing should not be run before last read is finished") utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read3.txt")) self.assertTrue(_do_second_read_processing(self.rootdir), "Processing should be run when last read is finished") utils.touch_indicator_file(os.path.join(self.rootdir, "second_read_processing_started.txt")) self.assertFalse(_do_second_read_processing(self.rootdir), "Processing should not be run when processing has started")
def test__do_initial_processing(self): """Initial processing logic """ self.assertFalse( ifm._do_initial_processing(self.rootdir), "Initial processing should not be run with missing indicator flags" ) utils.touch_file(os.path.join(self.rootdir, "First_Base_Report.htm")) self.assertTrue( ifm._do_initial_processing(self.rootdir), "Initial processing should be run after first base report creation" ) utils.touch_indicator_file( os.path.join(self.rootdir, "initial_processing_started.txt")) self.assertFalse( ifm._do_initial_processing(self.rootdir), "Initial processing should not be run when processing has been started" ) os.unlink(os.path.join(self.rootdir, "First_Base_Report.htm")) self.assertFalse(ifm._do_initial_processing(self.rootdir), "Initial processing should not be run when processing has been started " \ "and missing first base report")
def test__is_finished_first_base_report(self): """First base report""" self.assertFalse(ifm._is_finished_first_base_report(self.rootdir)) utils.touch_file(os.path.join(self.rootdir, "First_Base_Report.htm")) self.assertTrue(ifm._is_finished_first_base_report(self.rootdir))
def test__is_finished_first_base_report(self): """First base report""" self.assertFalse(_is_finished_first_base_report(self.rootdir)) utils.touch_file(os.path.join(self.rootdir,"First_Base_Report.htm")) self.assertTrue(_is_finished_first_base_report(self.rootdir))