Ejemplo n.º 1
0
def test_mixed_hcv_skipped():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession(skipped_types={PipelineType.MIXED_HCV_MAIN,
                                          PipelineType.MIXED_HCV_MIDI})
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(SampleGroup(
        '2130A',
        ('2130A-HCV_S15_L001_R1_001.fastq.gz',
         '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz')))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()   # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality

    folder_watcher.poll_runs()   # start main and midi

    assert {102: dict(id=102,
                      folder_watcher=folder_watcher,
                      sample_watcher=sample_watcher,
                      pipeline_type=PipelineType.MAIN),
            103: dict(id=103,
                      folder_watcher=folder_watcher,
                      sample_watcher=sample_watcher,
                      pipeline_type=PipelineType.MIDI)
            } == session.active_runs
    expected_active_samples = {'2130A-HCV_S15_L001_R1_001.fastq.gz',
                               '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'}
    assert expected_active_samples == folder_watcher.active_samples
    assert 2 == len(folder_watcher.active_runs)
Ejemplo n.º 2
0
def test_hcv_mixed_hcv_running_on_singleton():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(SampleGroup(
        'NEG1',
        ('NEG1-HCV_S15_L001_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()   # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality

    folder_watcher.poll_runs()   # start main, midi, and mixed HCV
    folder_watcher.poll_runs()   # main, midi, and mixed HCV still running

    assert {102: dict(id=102,
                      folder_watcher=folder_watcher,
                      sample_watcher=sample_watcher,
                      pipeline_type=PipelineType.MIXED_HCV_MAIN),
            103: dict(id=103,
                      folder_watcher=folder_watcher,
                      sample_watcher=sample_watcher,
                      pipeline_type=PipelineType.MAIN)
            } == session.active_runs
    expected_active_samples = {'NEG1-HCV_S15_L001_R1_001.fastq.gz'}
    assert expected_active_samples == folder_watcher.active_samples
    assert 2 == len(folder_watcher.active_runs)
Ejemplo n.º 3
0
def test_mixed_hcv_skipped_and_complete():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession(
        skipped_types={
            PipelineType.MIXED_HCV_MAIN, PipelineType.MIXED_HCV_MIDI,
            PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI,
            PipelineType.DENOVO_RESISTANCE
        })
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz',
                              '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'),
                    ('HCV', 'MidHCV')))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality
    folder_watcher.poll_runs()  # start main and midi
    session.finish_all_runs()  # Finish main and midi
    folder_watcher.poll_runs()  # start resistance
    session.finish_all_runs()  # Finish resistance
    folder_watcher.poll_runs()  # done

    assert not session.active_runs
    assert not folder_watcher.active_runs
    assert not folder_watcher.active_samples
    assert folder_watcher.is_complete
Ejemplo n.º 4
0
def test_main_failed():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None),
                    ('V3LOOP', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality
    folder_watcher.poll_runs()  # Start main

    denovo_main, mapping_main = session.active_runs.values()
    session.fail_run(mapping_main)

    folder_watcher.poll_runs()  # Notice run failed

    is_complete_after_failure = folder_watcher.is_complete

    session.finish_all_runs()

    folder_watcher.poll_runs()

    is_complete_at_end = folder_watcher.is_complete

    assert not is_complete_after_failure
    assert is_complete_at_end
Ejemplo n.º 5
0
def test_hcv_mixed_hcv_not_finished():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(SampleGroup(
        '2130A',
        ('2130A-HCV_S15_L001_R1_001.fastq.gz',
         '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz')))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()   # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality

    folder_watcher.poll_runs()   # start main, midi, and mixed HCV
    session.finish_run(dict(id=104))  # Finish main
    session.finish_run(dict(id=105))  # Finish midi
    folder_watcher.poll_runs()   # mixed HCV still running, resistance started
    session.finish_run(dict(id=106))  # Finish res
    folder_watcher.poll_runs()   # mixed HCV still running, resistance finished

    assert {102: dict(id=102,
                      folder_watcher=folder_watcher,
                      sample_watcher=sample_watcher,
                      pipeline_type=PipelineType.MIXED_HCV_MAIN),
            103: dict(id=103,
                      folder_watcher=folder_watcher,
                      sample_watcher=sample_watcher,
                      pipeline_type=PipelineType.MIXED_HCV_MIDI)
            } == session.active_runs
    expected_active_samples = {'2130A-HCV_S15_L001_R1_001.fastq.gz',
                               '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'}
    assert expected_active_samples == folder_watcher.active_samples
    assert 2 == len(folder_watcher.active_runs)
    assert not folder_watcher.is_complete
Ejemplo n.º 6
0
def test_denovo_main_finished():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None),
                    ('V3LOOP', None)))

    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality
    folder_watcher.poll_runs()  # Start main
    session.finish_all_runs()  # Finish main
    folder_watcher.poll_runs()  # Start resistance

    assert {
        104:
        dict(id=104,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.DENOVO_RESISTANCE),
        105:
        dict(id=105,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.RESISTANCE)
    } == session.active_runs
    assert 2 == len(folder_watcher.active_runs)
Ejemplo n.º 7
0
def test_resistance_running():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession(
        skipped_types={
            PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI,
            PipelineType.DENOVO_RESISTANCE
        })
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None),
                    ('V3LOOP', None)))

    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality
    folder_watcher.poll_runs()  # Start main
    session.finish_all_runs()  # Finish main
    folder_watcher.poll_runs()  # Start resistance
    folder_watcher.poll_runs()  # resistance still running

    assert {
        103:
        dict(id=103,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.RESISTANCE)
    } == session.active_runs
    assert {'1234A-V3LOOP_R1_001.fastq.gz'} == folder_watcher.active_samples
    assert not folder_watcher.is_complete
Ejemplo n.º 8
0
def test_mid_hcv_complete():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession(skipped_types={
        PipelineType.MIXED_HCV_MAIN, PipelineType.MIXED_HCV_MIDI
    })
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz',
                              '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz')))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality

    folder_watcher.poll_runs()  # start main and midi
    session.finish_run(dict(id=103))  # Finish midi
    folder_watcher.poll_runs()

    assert {
        102:
        dict(id=102,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MAIN)
    } == session.active_runs
    assert 1 == len(folder_watcher.active_runs)
    expected_active_samples = {'2130A-HCV_S15_L001_R1_001.fastq.gz'}
    assert expected_active_samples == folder_watcher.active_samples
Ejemplo n.º 9
0
def test_filter_quality_failed():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()   # Start filter_quality
    filter_quality, = session.active_runs.values()
    session.fail_run(filter_quality)

    folder_watcher.poll_runs()   # start main

    assert {} == session.active_runs
    assert folder_watcher.is_complete
Ejemplo n.º 10
0
def test_filter_quality_running():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    folder_watcher.poll_runs()  # filter_quality still running

    assert {101: dict(id=101,
                      folder_watcher=folder_watcher,
                      sample_watcher=None,
                      pipeline_type=PipelineType.FILTER_QUALITY)
            } == session.active_runs
Ejemplo n.º 11
0
def test_folder_watcher_run_details():
    base_calls_folder = '/path/140101_M01234_JUNK/Data/Intensities/BaseCalls'
    expected_run_folder = Path('/path/140101_M01234_JUNK')
    expected_run_name = '140101_M01234'
    watcher = FolderWatcher(base_calls_folder)

    assert expected_run_folder == watcher.run_folder
    assert expected_run_name == watcher.run_name
Ejemplo n.º 12
0
def test_filter_quality_finished():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()   # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality

    folder_watcher.poll_runs()   # start main

    assert {102: dict(id=102,
                      folder_watcher=folder_watcher,
                      sample_watcher=sample_watcher,
                      pipeline_type=PipelineType.MAIN)
            } == session.active_runs
Ejemplo n.º 13
0
def test_filter_quality_running():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    folder_watcher.poll_runs()  # filter_quality still running

    assert {
        101:
        dict(id=101,
             folder_watcher=folder_watcher,
             sample_watcher=None,
             pipeline_type=PipelineType.FILTER_QUALITY)
    } == session.active_runs
Ejemplo n.º 14
0
def test_hcv_mixed_hcv_not_finished():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession(
        skipped_types={
            PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI,
            PipelineType.DENOVO_RESISTANCE
        })
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz',
                              '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'),
                    ('HCV', 'MidHCV')))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality

    folder_watcher.poll_runs()  # start main, midi, and mixed HCV
    session.finish_run(dict(id=104))  # Finish main
    session.finish_run(dict(id=105))  # Finish midi
    folder_watcher.poll_runs()  # mixed HCV still running, resistance started
    session.finish_run(dict(id=106))  # Finish res
    folder_watcher.poll_runs()  # mixed HCV still running, resistance finished

    assert {
        102:
        dict(id=102,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MIXED_HCV_MAIN),
        103:
        dict(id=103,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MIXED_HCV_MIDI)
    } == session.active_runs
    expected_active_samples = {
        '2130A-HCV_S15_L001_R1_001.fastq.gz',
        '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'
    }
    assert expected_active_samples == folder_watcher.active_samples
    assert 2 == len(folder_watcher.active_runs)
    assert not folder_watcher.is_complete
Ejemplo n.º 15
0
def test_resistance_finished():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality
    folder_watcher.poll_runs()  # Start main
    session.finish_all_runs()  # Finish main
    folder_watcher.poll_runs()  # Start resistance
    session.finish_all_runs()  # Finish resistance
    folder_watcher.poll_runs()  # Finish sample

    assert not session.active_runs
    assert not folder_watcher.active_samples
    assert not folder_watcher.active_runs
    assert folder_watcher.is_complete
Ejemplo n.º 16
0
    def add_sample_group(self, base_calls, sample_group):
        """ Add a sample group (main and optional midi sample) to process.

        Also checks to see whether the folder finished processing since the
        last folder scan.
        :param base_calls: path to the BaseCalls folder with FASTQ files in it
        :param SampleGroup sample_group: the sample(s) to add
        :return: SampleWatcher for the sample group, or None if that folder has
            already finished processing
        """
        for attempt_count in count(1):
            # noinspection PyBroadException
            try:
                self.check_session()
                folder_watcher = self.folder_watchers.get(base_calls)
                if folder_watcher is None:
                    folder_watcher = FolderWatcher(base_calls, self)

                    # Check if folder has finished since it was scanned.
                    results_path = self.get_results_path(folder_watcher)
                    done_path = results_path / "doneprocessing"
                    if done_path.exists():
                        return None
                    error_path = folder_watcher.run_folder / "errorprocessing"
                    if error_path.exists():
                        return None

                    self.create_batch(folder_watcher)
                    self.upload_filter_quality(folder_watcher)
                    shutil.rmtree(results_path, ignore_errors=True)
                    self.folder_watchers[base_calls] = folder_watcher

                for sample_watcher in folder_watcher.sample_watchers:
                    if sample_watcher.sample_group == sample_group:
                        return sample_watcher

                sample_watcher = SampleWatcher(sample_group)
                for fastq1 in filter(None, sample_group.names):
                    fastq2 = fastq1.replace('_R1_', '_R2_')
                    for fastq_name, direction in ((fastq1, 'forward'), (fastq2, 'reverse')):
                        with (base_calls / fastq_name).open('rb') as fastq_file:
                            fastq_dataset = self.find_or_upload_dataset(
                                fastq_file,
                                fastq_name,
                                direction + ' read from MiSeq run ' +
                                folder_watcher.run_name)
                            sample_watcher.fastq_datasets.append(fastq_dataset)
                folder_watcher.sample_watchers.append(sample_watcher)
                return sample_watcher
            except Exception:
                if not self.retry:
                    raise
                wait_for_retry(attempt_count)
Ejemplo n.º 17
0
def test_hcv_mixed_hcv_running_on_singleton():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('NEG1', ('NEG1-HCV_S15_L001_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality

    folder_watcher.poll_runs()  # start main, midi, and mixed HCV
    folder_watcher.poll_runs()  # main, midi, and mixed HCV still running

    assert {
        102:
        dict(id=102,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MIXED_HCV_MAIN),
        103:
        dict(id=103,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MAIN)
    } == session.active_runs
    expected_active_samples = {'NEG1-HCV_S15_L001_R1_001.fastq.gz'}
    assert expected_active_samples == folder_watcher.active_samples
    assert 2 == len(folder_watcher.active_runs)
Ejemplo n.º 18
0
def test_filter_quality_finished():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession(
        skipped_types={
            PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI,
            PipelineType.DENOVO_RESISTANCE
        })
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None),
                    ('V3LOOP', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality

    folder_watcher.poll_runs()  # start main

    assert {
        102:
        dict(id=102,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MAIN)
    } == session.active_runs
Ejemplo n.º 19
0
def test_resistance_finished():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()   # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality
    folder_watcher.poll_runs()   # Start main
    session.finish_all_runs()  # Finish main
    folder_watcher.poll_runs()   # Start resistance
    session.finish_all_runs()  # Finish resistance
    folder_watcher.poll_runs()   # Finish sample

    assert not session.active_runs
    assert not folder_watcher.active_samples
    assert not folder_watcher.active_runs
    assert folder_watcher.is_complete
Ejemplo n.º 20
0
def test_denovo_resistance_complete():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None),
                    ('V3LOOP', None)))

    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality
    folder_watcher.poll_runs()  # Start main
    session.finish_all_runs()  # Finish main
    folder_watcher.poll_runs()  # Start resistance

    denovo_resistance = sample_watcher.runs[PipelineType.DENOVO_RESISTANCE]
    session.finish_run(denovo_resistance)

    folder_watcher.poll_runs()  # denovo resistance finished
    folder_watcher.poll_runs()  # main resistance still running

    assert not folder_watcher.is_complete
Ejemplo n.º 21
0
def test_hcv_filter_quality_finished():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('2130A', ('2130A-HCV_S15_L001_R1_001.fastq.gz',
                              '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'),
                    ('HCV', 'MidHCV')))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality

    folder_watcher.poll_runs()  # start main, midi, and mixed HCV

    assert {
        102:
        dict(id=102,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MIXED_HCV_MAIN),
        103:
        dict(id=103,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MIXED_HCV_MIDI),
        104:
        dict(id=104,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.DENOVO_MAIN),
        105:
        dict(id=105,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.DENOVO_MIDI),
        106:
        dict(id=106,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MAIN),
        107:
        dict(id=107,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MIDI)
    } == session.active_runs
    expected_active_samples = {
        '2130A-HCV_S15_L001_R1_001.fastq.gz',
        '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz'
    }
    assert expected_active_samples == folder_watcher.active_samples
    assert 6 == len(folder_watcher.active_runs)
Ejemplo n.º 22
0
def test_mixed_hcv_skipped_and_complete():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession(skipped_types={PipelineType.MIXED_HCV_MAIN,
                                          PipelineType.MIXED_HCV_MIDI})
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(SampleGroup(
        '2130A',
        ('2130A-HCV_S15_L001_R1_001.fastq.gz',
         '2130AMIDI-MidHCV_S16_L001_R1_001.fastq.gz')))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()   # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality
    folder_watcher.poll_runs()   # start main and midi
    session.finish_all_runs()  # Finish main and midi
    folder_watcher.poll_runs()   # start resistance
    session.finish_all_runs()  # Finish resistance
    folder_watcher.poll_runs()   # done

    assert not session.active_runs
    assert not folder_watcher.active_runs
    assert not folder_watcher.active_samples
    assert folder_watcher.is_complete
Ejemplo n.º 23
0
def test_filter_quality_failed():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    filter_quality, = session.active_runs.values()
    session.fail_run(filter_quality)

    folder_watcher.poll_runs()  # start main

    assert {} == session.active_runs
    assert folder_watcher.is_complete
Ejemplo n.º 24
0
def test_resistance_running():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()   # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality
    folder_watcher.poll_runs()   # Start main
    session.finish_all_runs()  # Finish main
    folder_watcher.poll_runs()   # Start resistance
    folder_watcher.poll_runs()   # resistance still running

    assert {103: dict(id=103,
                      folder_watcher=folder_watcher,
                      sample_watcher=sample_watcher,
                      pipeline_type=PipelineType.RESISTANCE)
            } == session.active_runs
    assert {'1234A-V3LOOP_R1_001.fastq.gz'} == folder_watcher.active_samples
    assert not folder_watcher.is_complete
Ejemplo n.º 25
0
def test_main_running():
    base_calls_folder = '/path/Data/Intensities/BaseCalls'
    session = DummySession()
    folder_watcher = FolderWatcher(base_calls_folder, runner=session)
    sample_watcher = SampleWatcher(
        SampleGroup('1234A', ('1234A-V3LOOP_R1_001.fastq.gz', None)))
    folder_watcher.sample_watchers.append(sample_watcher)

    folder_watcher.poll_runs()  # Start filter_quality
    session.finish_all_runs()  # Finish filter_quality

    folder_watcher.poll_runs()  # start main
    folder_watcher.poll_runs()  # main still running

    assert {
        102:
        dict(id=102,
             folder_watcher=folder_watcher,
             sample_watcher=sample_watcher,
             pipeline_type=PipelineType.MAIN)
    } == session.active_runs
Ejemplo n.º 26
0
 def add_folder(self, base_calls):
     folder_watcher = FolderWatcher(base_calls, self)
     self.folder_watchers[base_calls] = folder_watcher
     return folder_watcher
Ejemplo n.º 27
0
def test_folder_watcher_repr_with_pathlib():
    base_calls_folder = Path('/path/Data/Intensities/BaseCalls')
    expected_repr = "FolderWatcher('/path/Data/Intensities/BaseCalls')"
    watcher = FolderWatcher(base_calls_folder)

    assert expected_repr == repr(watcher)
Ejemplo n.º 28
0
    def run_pipeline(self, folder_watcher: FolderWatcher,
                     pipeline_type: PipelineType,
                     sample_watcher: SampleWatcher):
        if pipeline_type == PipelineType.FILTER_QUALITY:
            return self.find_or_launch_run(
                self.config.micall_filter_quality_pipeline_id,
                dict(quality_csv=folder_watcher.quality_dataset),
                'MiCall filter quality on ' + folder_watcher.run_name,
                folder_watcher.batch)
        if pipeline_type == PipelineType.RESISTANCE:
            run = self.run_resistance_pipeline(
                sample_watcher, folder_watcher,
                (PipelineType.MAIN, PipelineType.MIDI), 'MiCall resistance')
            return run
        if pipeline_type == PipelineType.DENOVO_RESISTANCE:
            run = self.run_resistance_pipeline(
                sample_watcher, folder_watcher,
                (PipelineType.DENOVO_MAIN, PipelineType.DENOVO_MIDI),
                'MiCall denovo resistance')
            return run
        if pipeline_type in (PipelineType.MIXED_HCV_MAIN,
                             PipelineType.MIXED_HCV_MIDI):
            if self.config.mixed_hcv_pipeline_id is None:
                return None
            if pipeline_type == PipelineType.MIXED_HCV_MAIN:
                input_datasets = dict(fastq1=sample_watcher.fastq_datasets[0],
                                      fastq2=sample_watcher.fastq_datasets[1])
                sample_name = sample_watcher.sample_group.names[0]
            else:
                input_datasets = dict(fastq1=sample_watcher.fastq_datasets[2],
                                      fastq2=sample_watcher.fastq_datasets[3])
                sample_name = sample_watcher.sample_group.names[1]
            return self.find_or_launch_run(
                self.config.mixed_hcv_pipeline_id, input_datasets,
                'Mixed HCV on ' + trim_name(sample_name), folder_watcher.batch)
        if pipeline_type == PipelineType.MAIN:
            group_position = 0
            run_name = 'MiCall main'
            pipeline_id = self.config.micall_main_pipeline_id
        elif pipeline_type == PipelineType.MIDI:
            group_position = 1
            run_name = 'MiCall main'
            pipeline_id = self.config.micall_main_pipeline_id
        elif pipeline_type == PipelineType.DENOVO_MAIN:
            group_position = 0
            run_name = 'MiCall denovo main'
            pipeline_id = self.config.denovo_main_pipeline_id
        else:
            assert pipeline_type == PipelineType.DENOVO_MIDI
            group_position = 1
            run_name = 'MiCall denovo main'
            pipeline_id = self.config.denovo_main_pipeline_id
        if pipeline_id is None:
            return None
        fastq1, fastq2 = sample_watcher.fastq_datasets[group_position *
                                                       2:(group_position + 1) *
                                                       2]
        sample_name = sample_watcher.sample_group.names[group_position]
        run_name += ' on ' + trim_name(sample_name)
        sample_info = self.get_sample_info(pipeline_id, sample_watcher,
                                           group_position)
        if folder_watcher.bad_cycles_dataset is None:
            filter_run_id = folder_watcher.filter_quality_run['id']
            run_datasets = self.kive_retry(
                lambda: self.session.endpoints.containerruns.get(
                    f'{filter_run_id}/dataset_list/'))
            bad_cycles_run_dataset, = [
                run_dataset for run_dataset in run_datasets
                if run_dataset['argument_name'] == 'bad_cycles_csv'
            ]
            folder_watcher.bad_cycles_dataset = self.kive_retry(
                lambda: self.session.get(bad_cycles_run_dataset['dataset']
                                         ).json())

        inputs = dict(fastq1=fastq1,
                      fastq2=fastq2,
                      bad_cycles_csv=folder_watcher.bad_cycles_dataset)
        if sample_info is not None:
            inputs['sample_info_csv'] = sample_info
        return self.find_or_launch_run(pipeline_id, inputs, run_name,
                                       folder_watcher.batch)