def test_find_groups_checks_overrides(tmpdir): sample_sheet_path = Path(str(tmpdir)) / 'SampleSheet.csv' sample_sheet_path.write_text(BASIC_HEADER + """\ Sample_ID,Sample_Name,Sample_Plate,Sample_Well,index,index2,Sample_Project,Description,GenomeFolder CFE_SomeId_10-Jul-2014_N501-N701_Sample1_Proj1,Sample1_Proj1,,,ACGTACGT,TGCATGCA,,, CFE_SomeId_10-Jul-2014_N501-N702_Sample2_Proj2,Sample2_Proj2,,,AAAAGGGG,CCCCTTTT,,, """) sample_sheet_overrides_path = sample_sheet_path.parent / 'SampleSheetOverrides.csv' sample_sheet_overrides_path.write_text("""\ sample,project Sample1-Proj1_S1,AltProjA """) expected_groups = [ SampleGroup('Sample1', ('Sample1-Proj1_S1_L001_R1_001.fastq.gz', None), ('AltProjA', None)), SampleGroup('Sample2', ('Sample2-Proj2_S2_L001_R1_001.fastq.gz', None), ('Proj2', None)) ] groups = list( find_groups([ 'Sample1-Proj1_S1_L001_R1_001.fastq.gz', 'Sample2-Proj2_S2_L001_R1_001.fastq.gz' ], sample_sheet_path)) assert expected_groups == groups
def test_hcv_mixed_hcv_running_on_singleton(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('NEG1', ('NEG1-HCV_S15_L001_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main, midi, and mixed HCV folder_watcher.poll_runs() # main, midi, and mixed HCV still running assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MAIN), 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN) } == session.active_runs expected_active_samples = {'NEG1-HCV_S15_L001_R1_001.fastq.gz'} assert expected_active_samples == folder_watcher.active_samples assert 2 == len(folder_watcher.active_runs)
def test_mixed_hcv_skipped_and_complete(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession( skipped_types={ PipelineType.MIXED_HCV_MAIN, PipelineType.MIXED_HCV_MIDI, PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI, PipelineType.DENOVO_RESISTANCE }) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'), ('HCV', 'MidHCV'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main and midi session.finish_all_runs() # Finish main and midi folder_watcher.poll_runs() # start resistance session.finish_all_runs() # Finish resistance folder_watcher.poll_runs() # done assert not session.active_runs assert not folder_watcher.active_runs assert not folder_watcher.active_samples assert folder_watcher.is_complete
def test_mid_hcv_complete(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession(skipped_types={ PipelineType.MIXED_HCV_MAIN, PipelineType.MIXED_HCV_MIDI }) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main and midi session.finish_run(dict(id=103)) # Finish midi folder_watcher.poll_runs() assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN) } == session.active_runs assert 1 == len(folder_watcher.active_runs) expected_active_samples = {'2130A-HCV_S15_L001_R1_001.fastq.gz'} assert expected_active_samples == folder_watcher.active_samples
def test_resistance_running(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession( skipped_types={ PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI, PipelineType.DENOVO_RESISTANCE }) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None), ('V3LOOP', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main session.finish_all_runs() # Finish main folder_watcher.poll_runs() # Start resistance folder_watcher.poll_runs() # resistance still running assert { 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.RESISTANCE) } == session.active_runs assert {'1234A-V3LOOP_R1_001.fastq.gz'} == folder_watcher.active_samples assert not folder_watcher.is_complete
def test_denovo_main_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None), ('V3LOOP', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main session.finish_all_runs() # Finish main folder_watcher.poll_runs() # Start resistance assert { 104: dict(id=104, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.DENOVO_RESISTANCE), 105: dict(id=105, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.RESISTANCE) } == session.active_runs assert 2 == len(folder_watcher.active_runs)
def test_main_failed(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None), ('V3LOOP', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main denovo_main, mapping_main = session.active_runs.values() session.fail_run(mapping_main) folder_watcher.poll_runs() # Notice run failed is_complete_after_failure = folder_watcher.is_complete session.finish_all_runs() folder_watcher.poll_runs() is_complete_at_end = folder_watcher.is_complete assert not is_complete_after_failure assert is_complete_at_end
def test_filter_quality_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession( skipped_types={ PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI, PipelineType.DENOVO_RESISTANCE }) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None), ('V3LOOP', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN) } == session.active_runs
def find_full_groups(fastq_files, sandbox_path): groups = list( find_groups([p.name for p in fastq_files], sandbox_path / 'SampleSheet.csv')) full_groups = [] for group in groups: full_names = tuple(name and (sandbox_path / name) for name in group.names) full_groups.append( SampleGroup(group.enum, full_names, group.project_codes)) return full_groups
def test_hcv_filter_quality_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'), ('HCV', 'MidHCV'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main, midi, and mixed HCV assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MAIN), 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MIDI), 104: dict(id=104, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.DENOVO_MAIN), 105: dict(id=105, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.DENOVO_MIDI), 106: dict(id=106, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN), 107: dict(id=107, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIDI) } == session.active_runs expected_active_samples = { '2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz' } assert expected_active_samples == folder_watcher.active_samples assert 6 == len(folder_watcher.active_runs)
def test_filter_quality_failed(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality filter_quality, = session.active_runs.values() session.fail_run(filter_quality) folder_watcher.poll_runs() # start main assert {} == session.active_runs assert folder_watcher.is_complete
def test_unmatched_midi_file_not_found(tmpdir): sample_sheet_path = Path(str(tmpdir)) / 'SampleSheet.csv' sample_sheet_path.write_text(BASIC_HEADER + """\ Sample_ID,Sample_Name,Sample_Plate,Sample_Well,index,index2,Sample_Project,Description,GenomeFolder CFE_SomeId_10-Jul-2014_N501-N701_Sample1MIDI_MidHCV,Sample1MIDI_MidHCV,,,ACGTACGT,TGCATGCA,,, CFE_SomeId_10-Jul-2014_N501-N702_Sample2_Proj2,Sample2_Proj2,,,AAAAGGGG,CCCCTTTT,,, """) expected_groups = [ SampleGroup('Sample2', ('Sample2-Proj2_S2_L001_R1_001.fastq.gz', None), ('Proj2', None)) ] groups = list( find_groups(['Sample2-Proj2_S2_L001_R1_001.fastq.gz'], sample_sheet_path)) assert expected_groups == groups
def test_filter_quality_running(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality folder_watcher.poll_runs() # filter_quality still running assert { 101: dict(id=101, folder_watcher=folder_watcher, sample_watcher=None, pipeline_type=PipelineType.FILTER_QUALITY) } == session.active_runs
def test_hcv_mixed_hcv_not_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession( skipped_types={ PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI, PipelineType.DENOVO_RESISTANCE }) folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'), ('HCV', 'MidHCV'))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main, midi, and mixed HCV session.finish_run(dict(id=104)) # Finish main session.finish_run(dict(id=105)) # Finish midi folder_watcher.poll_runs() # mixed HCV still running, resistance started session.finish_run(dict(id=106)) # Finish res folder_watcher.poll_runs() # mixed HCV still running, resistance finished assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MAIN), 103: dict(id=103, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MIXED_HCV_MIDI) } == session.active_runs expected_active_samples = { '2130A-HCV_S15_L001_R1_001.fastq.gz', '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz' } assert expected_active_samples == folder_watcher.active_samples assert 2 == len(folder_watcher.active_runs) assert not folder_watcher.is_complete
def test_combine_midi(tmpdir): sample_sheet_path = Path(str(tmpdir)) / 'SampleSheet.csv' sample_sheet_path.write_text(BASIC_HEADER + """\ Sample_ID,Sample_Name,Sample_Plate,Sample_Well,index,index2,Sample_Project,Description,GenomeFolder CFE_SomeId_10-Jul-2014_N501-N701_Sample1_HCV,Sample1_HCV,,,ACGTACGT,TGCATGCA,,, CFE_SomeId_10-Jul-2014_N501-N702_Sample1MIDI_MidHCV,Sample1MIDI_MidHCV,,,AAAAGGGG,CCCCTTTT,,, """) expected_groups = [ SampleGroup('Sample1', ('Sample1-HCV_S1_L001_R1_001.fastq.gz', 'Sample1MIDI-MidHCV_S2_L001_R1_001.fastq.gz'), ('HCV', 'MidHCV')) ] groups = list( find_groups([ 'Sample1-HCV_S1_L001_R1_001.fastq.gz', 'Sample1MIDI-MidHCV_S2_L001_R1_001.fastq.gz' ], sample_sheet_path)) assert expected_groups == groups
def test_resistance_finished(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main session.finish_all_runs() # Finish main folder_watcher.poll_runs() # Start resistance session.finish_all_runs() # Finish resistance folder_watcher.poll_runs() # Finish sample assert not session.active_runs assert not folder_watcher.active_samples assert not folder_watcher.active_runs assert folder_watcher.is_complete
def test_main_running(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # start main folder_watcher.poll_runs() # main still running assert { 102: dict(id=102, folder_watcher=folder_watcher, sample_watcher=sample_watcher, pipeline_type=PipelineType.MAIN) } == session.active_runs
def test_denovo_resistance_complete(): base_calls_folder = '/path/Data/Intensities/BaseCalls' session = DummySession() folder_watcher = FolderWatcher(base_calls_folder, runner=session) sample_watcher = SampleWatcher( SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None), ('V3LOOP', None))) folder_watcher.sample_watchers.append(sample_watcher) folder_watcher.poll_runs() # Start filter_quality session.finish_all_runs() # Finish filter_quality folder_watcher.poll_runs() # Start main session.finish_all_runs() # Finish main folder_watcher.poll_runs() # Start resistance denovo_resistance = sample_watcher.runs[PipelineType.DENOVO_RESISTANCE] session.finish_run(denovo_resistance) folder_watcher.poll_runs() # denovo resistance finished folder_watcher.poll_runs() # main resistance still running assert not folder_watcher.is_complete
def test_sample_watcher_repr_single(): expected_repr = "SampleWatcher(SampleGroup('1234A', ('...', None)))" watcher = SampleWatcher(SampleGroup('1234A', ('foo', None))) assert expected_repr == repr(watcher)
def test_sample_watcher_repr_pair(): expected_repr = "SampleWatcher(SampleGroup('1234A', ('...', '...')))" watcher = SampleWatcher(SampleGroup('1234A', ('foo', 'bar'))) assert expected_repr == repr(watcher)