Beispiel #1
0
    def setUp(self):
        logging.disable(logging.CRITICAL)  # avoid polluting test output
        self.loader = KiveLoader()
        self.loader.add_pipeline(pipeline_id=DEFAULT_PIPELINE_ID,
                                 inputs=['quality', 'fastq1', 'fastq2'],
                                 name_format='Default - {sample} ({folder})')
        self.existing_datasets = []
        self.existing_runs = {}
        self.uploaded = []
        self.launched = []
        self.batches = []
        self.cancelled = []
        self.completed = []
        self.failed = []
        self.purged = []
        self.downloaded = []
        self.now = datetime(2000, 1, 1)
        self.quality_cdt = 'quality CDT'
        self.disabled_folders = []
        self.folder_retries = []
        self.run_info_path = os.path.join(os.path.dirname(__file__),
                                          'kive_loader_run_info.xml')

        def check_kive_connection():
            self.loader.quality_cdt = self.quality_cdt

        self.loader.check_kive_connection = check_kive_connection
        self.loader.find_folders = lambda: []
        self.loader.find_files = lambda folder: []
        self.loader.find_preexisting_runs = lambda: self.existing_runs
        self.loader.upload_kive_dataset = lambda filename, description, cdt: (
            self.uploaded.append((filename, description, cdt)) or filename)
        self.loader.download_quality = lambda folder: folder + '/quality.csv'
        self.loader.find_kive_dataset = lambda filename, cdt: (
            filename if filename in self.existing_datasets else None)
        self.loader.get_sample_name = lambda fastq1: os.path.basename(
            fastq1).split('_')[0]
        self.loader.fetch_run_status = lambda run: (
            RUN_COMPLETED if run[1] in self.completed else RUN_CANCELLED
            if run[1] in self.cancelled else RUN_ACTIVE)
        self.loader.fetch_output_status = lambda run: (
            RUN_PURGED if run[1] in self.purged else RUN_FAILED
            if run[1] in self.failed else RUN_COMPLETED)
        self.loader.get_run_id = lambda run: run
        self.loader.download_results = lambda folder, runs: (
            self.downloaded.append((folder, runs)))
        self.loader.get_time = lambda: self.now
        self.loader.mark_folder_disabled = lambda folder, message, exc_info: (
            self.disabled_folders.append(folder))
        self.loader.log_retry = lambda folder: self.folder_retries.append(
            folder)

        # Note: get_run_key must match return value of launch_run
        self.loader.get_run_key = lambda pipeline_id, quality, fastq1, fastq2: (
            self.loader.get_run_name(pipeline_id,
                                     self.loader.get_sample_name(fastq1)))
        self.loader.launch_run = lambda pipeline_id, run_name, inputs, batch_id: (
            self.launched.append(run_name) or run_name)
        self.loader.create_batch = lambda batch_name: (self.batches.append(
            batch_name) or len(self.batches))
Beispiel #2
0
    def test_write_quality(self):
        quality_csv = StringIO()
        metrics = [{
            "tile": 1101,
            "cycle": 1,
            "errorrate": "0.01"
        }, {
            "tile": 1101,
            "cycle": 2,
            "errorrate": "0.02"
        }, {
            "tile": 1101,
            "cycle": 12,
            "errorrate": "0.12"
        }]
        run_info = KiveLoader.parse_run_info(self.run_info_path)
        expected_quality = """\
tile,cycle,errorrate
1101,1,0.01
1101,2,0.02
1101,3,
1101,4,
1101,5,
1101,-1,0.12
1101,-2,
1101,-3,
1101,-4,
1101,-5,
"""

        KiveLoader.write_quality(quality_csv, metrics, run_info)

        self.assertEqual(expected_quality, quality_csv.getvalue())
Beispiel #3
0
def main():
    args = parse_args()
    logger.info('Starting up.')
    try:
        loader = KiveLoader(launch_limit=args.max,
                            status_delay=args.status_delay,
                            folder_delay=args.folder_delay,
                            retry_delay=args.retry_delay)
        while True:
            delay = loader.poll()
            sleep(delay)
    except:
        logger.error('Fatal error.', exc_info=True)
Beispiel #4
0
def main():
    args = parse_args()
    logger.info('Starting up.')
    loader = KiveLoader(launch_limit=args.max,
                        status_delay=args.status_delay,
                        folder_delay=args.folder_delay,
                        retry_delay=args.retry_delay)
    for pipeline_id, options in settings.kive_pipelines.items():
        loader.add_pipeline(pipeline_id, **options)

    while True:
        delay = loader.poll()
        sleep(delay)
Beispiel #5
0
def main():
    args = parse_args()
    logger.info('Starting up.')
    try:
        loader = KiveLoader(launch_limit=args.max,
                            status_delay=args.status_delay,
                            folder_delay=args.folder_delay,
                            retry_delay=args.retry_delay)
        while True:
            delay = loader.poll()
            sleep(delay)
    except:
        logger.error('Fatal error.', exc_info=True)
Beispiel #6
0
    def setUp(self):
        logging.disable(logging.CRITICAL)  # avoid polluting test output
        self.loader = KiveLoader()
        self.existing_datasets = []
        self.existing_runs = {}
        self.uploaded = []
        self.launched = []
        self.completed = []
        self.downloaded = []
        self.now = datetime(2000, 1, 1)
        self.quality_cdt = 'quality CDT'
        self.disabled_folders = []
        self.folder_retries = []

        def check_kive_connection():
            self.loader.quality_cdt = self.quality_cdt

        self.loader.check_kive_connection = check_kive_connection
        self.loader.find_folders = lambda: []
        self.loader.find_files = lambda folder: []
        self.loader.find_preexisting_runs = lambda: self.existing_runs
        self.loader.get_run_key = lambda quality, fastq1, fastq2: (
            quality, fastq1, fastq2)
        self.loader.upload_kive_dataset = lambda filename, description, cdt: (
            self.uploaded.append((filename, description, cdt)) or filename)
        self.loader.download_quality = lambda folder: folder + '/quality.csv'
        self.loader.find_kive_dataset = lambda filename, cdt: (
            filename if filename in self.existing_datasets else None)
        self.loader.launch_run = lambda quality, fastq1, fastq2: (
            self.launched.append(
                (quality, fastq1, fastq2)) or (quality, fastq1, fastq2))
        self.loader.is_run_complete = lambda run: run in self.completed
        self.loader.download_results = lambda folder, runs: (
            self.downloaded.append((folder, runs)))
        self.loader.get_time = lambda: self.now
        self.loader.mark_folder_disabled = lambda folder, message, exc_info: (
            self.disabled_folders.append(folder))
        self.loader.log_retry = lambda folder: self.folder_retries.append(
            folder)
Beispiel #7
0
    def setUp(self):
        logging.disable(logging.CRITICAL)  # avoid polluting test output
        self.loader = KiveLoader()
        self.existing_datasets = []
        self.existing_runs = {}
        self.uploaded = []
        self.launched = []
        self.completed = []
        self.downloaded = []
        self.now = datetime(2000, 1, 1)
        self.quality_cdt = 'quality CDT'
        self.disabled_folders = []
        self.folder_retries = []

        def check_kive_connection():
            self.loader.quality_cdt = self.quality_cdt
        self.loader.check_kive_connection = check_kive_connection
        self.loader.find_folders = lambda: []
        self.loader.find_files = lambda folder: []
        self.loader.find_preexisting_runs = lambda: self.existing_runs
        self.loader.get_run_key = lambda quality, fastq1, fastq2: (quality,
                                                                   fastq1,
                                                                   fastq2)
        self.loader.upload_kive_dataset = lambda filename, description, cdt: (
            self.uploaded.append((filename, description, cdt)) or filename)
        self.loader.download_quality = lambda folder: folder + '/quality.csv'
        self.loader.find_kive_dataset = lambda filename, cdt: (
            filename if filename in self.existing_datasets else None)
        self.loader.launch_run = lambda quality, fastq1, fastq2: (
            self.launched.append((quality, fastq1, fastq2)) or
            (quality, fastq1, fastq2))
        self.loader.is_run_complete = lambda run: run in self.completed
        self.loader.download_results = lambda folder, runs: (
            self.downloaded.append((folder, runs)))
        self.loader.get_time = lambda: self.now
        self.loader.mark_folder_disabled = lambda folder, message, exc_info: (
            self.disabled_folders.append(folder))
        self.loader.log_retry = lambda folder: self.folder_retries.append(folder)
Beispiel #8
0
class KiveLoaderTest(unittest.TestCase):
    def setUp(self):
        logging.disable(logging.CRITICAL)  # avoid polluting test output
        self.loader = KiveLoader()
        self.loader.add_pipeline(id=DEFAULT_PIPELINE_ID,
                                 inputs=['quality', 'fastq1', 'fastq2'],
                                 format='Default - {sample} ({folder})')
        self.existing_datasets = []
        self.existing_runs = {}
        self.uploaded = []
        self.launched = []
        self.cancelled = []
        self.completed = []
        self.failed = []
        self.purged = []
        self.downloaded = []
        self.now = datetime(2000, 1, 1)
        self.quality_cdt = 'quality CDT'
        self.disabled_folders = []
        self.folder_retries = []

        def check_kive_connection():
            self.loader.quality_cdt = self.quality_cdt

        self.loader.check_kive_connection = check_kive_connection
        self.loader.find_folders = lambda: []
        self.loader.find_files = lambda folder: []
        self.loader.find_preexisting_runs = lambda: self.existing_runs
        self.loader.upload_kive_dataset = lambda filename, description, cdt: (
            self.uploaded.append((filename, description, cdt)) or filename)
        self.loader.download_quality = lambda folder: folder + '/quality.csv'
        self.loader.find_kive_dataset = lambda filename, cdt: (
            filename if filename in self.existing_datasets else None)
        self.loader.get_sample_name = lambda fastq1: os.path.basename(
            fastq1).split('_')[0]
        self.loader.fetch_run_status = lambda run: (
            RUN_COMPLETED if run[1] in self.completed else RUN_CANCELLED
            if run[1] in self.cancelled else RUN_ACTIVE)
        self.loader.fetch_output_status = lambda run: (
            RUN_PURGED if run[1] in self.purged else RUN_FAILED
            if run[1] in self.failed else RUN_COMPLETED)
        self.loader.get_run_id = lambda run: run
        self.loader.download_results = lambda folder, runs: (
            self.downloaded.append((folder, runs)))
        self.loader.get_time = lambda: self.now
        self.loader.mark_folder_disabled = lambda folder, message, exc_info: (
            self.disabled_folders.append(folder))
        self.loader.log_retry = lambda folder: self.folder_retries.append(
            folder)

        # Note: get_run_key must match return value of launch_run
        self.loader.get_run_key = lambda pipeline_id, quality, fastq1, fastq2: (
            self.loader.get_run_name(pipeline_id,
                                     self.loader.get_sample_name(fastq1)))
        self.loader.launch_run = lambda pipeline_id, run_name, inputs: (
            self.launched.append(run_name) or run_name)

    def test_idle(self):
        delay = self.loader.poll()

        self.assertEqual(self.loader.folder_delay, delay)

    def test_upload_one_sample(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_uploaded = [
            ('run2/quality.csv', 'phiX174 quality from MiSeq run run2',
             self.quality_cdt),
            ('run2/sample1_R1_x.fastq', 'forward read from MiSeq run run2',
             None),
            ('run2/sample1_R2_x.fastq', 'reverse read from MiSeq run run2',
             None)
        ]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_launch_one_sample(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = ['Default - sample1 (run2)']

        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_launch_two_samples(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run1)', 'Default - sample2 (run1)'
        ]

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_launch_two_folders(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run2)', 'Default - sample1 (run1)'
        ]

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_launch_two_pipelines(self):
        self.loader.add_pipeline(id=EXTRA_PIPELINE_ID,
                                 inputs=['quality', 'fastq1', 'fastq2'],
                                 format='Extra - {sample} ({folder})')
        self.loader.find_folders = lambda: ['run2']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run2)', 'Extra - sample1 (run2)',
            'Default - sample2 (run2)', 'Extra - sample2 (run2)'
        ]
        expected_downloaded = [('run2',
                                [('sample1', 'Default - sample1 (run2)'),
                                 ('sample1', 'Extra - sample1 (run2)'),
                                 ('sample2', 'Default - sample2 (run2)'),
                                 ('sample2', 'Extra - sample2 (run2)')])]

        self.loader.poll()
        self.loader.poll()

        self.completed = expected_launched[:]
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)
        self.assertEqual(expected_downloaded, self.downloaded)

    def test_pipelines_diff_inputs(self):
        self.loader.launch_run = lambda pipeline_id, run_name, inputs: (
            self.launched.append((pipeline_id, inputs)))
        self.loader.add_pipeline(id=EXTRA_PIPELINE_ID,
                                 inputs=['fastq1'],
                                 format='Extra - {sample} ({folder})')
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [(DEFAULT_PIPELINE_ID, [
            'run2/quality.csv', 'run2/sample1_R1_x.fastq',
            'run2/sample1_R2_x.fastq'
        ]), (EXTRA_PIPELINE_ID, ['run2/sample1_R1_x.fastq'])]

        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_pipelines_diff_patterns(self):
        self.loader.add_pipeline(id=EXTRA_PIPELINE_ID,
                                 inputs=['quality', 'fastq1', 'fastq2'],
                                 pattern='HCV',
                                 format='Extra - {sample} ({folder})')
        self.loader.find_folders = lambda: ['run2']
        self.loader.find_files = lambda folder: [
            folder + '/sample1-HIV_R1_x.fastq', folder +
            '/sample2-HCV_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1-HIV (run2)', 'Default - sample2-HCV (run2)',
            'Extra - sample2-HCV (run2)'
        ]
        expected_downloaded = [
            ('run2', [('sample1-HIV', 'Default - sample1-HIV (run2)'),
                      ('sample2-HCV', 'Default - sample2-HCV (run2)'),
                      ('sample2-HCV', 'Extra - sample2-HCV (run2)')])
        ]

        self.loader.poll()
        self.loader.poll()

        self.completed = expected_launched[:]
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)
        self.assertEqual(expected_downloaded, self.downloaded)

    def test_launch_finished(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched = ['Default - sample1 (run1)']

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_trimmed_folder(self):
        self.loader.find_folders = lambda: ['path/160214_M01234_AX23']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_uploaded = [
            ('path/160214_M01234_AX23/quality.csv',
             'phiX174 quality from MiSeq run 160214_M01234', 'quality CDT'),
            ('path/160214_M01234_AX23/sample1_R1_x.fastq',
             'forward read from MiSeq run 160214_M01234', None),
            ('path/160214_M01234_AX23/sample1_R2_x.fastq',
             'reverse read from MiSeq run 160214_M01234', None)
        ]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_datasets_exist(self):
        self.existing_datasets = [
            'run1/quality.csv', 'run1/sample1_R1_x.fastq',
            'run1/sample1_R2_x.fastq'
        ]
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_uploaded = []

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_some_datasets_exist(self):
        self.existing_datasets = [
            'run1/quality.csv', 'run1/sample1_R2_x.fastq'
        ]
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_uploaded = [('run1/sample1_R1_x.fastq',
                              'forward read from MiSeq run run1', None)]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_download_one_sample(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_downloaded = [('run1', [('sample1',
                                          'Default - sample1 (run1)')])]

        self.loader.poll()  # launches
        self.loader.poll()  # checks status, not finished
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish run
        self.loader.poll()  # finished, so now download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)
        self.assertEqual({}, self.loader.batches)

    def test_download_two_samples(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_downloaded = [('run1',
                                [('sample1', 'Default - sample1 (run1)'),
                                 ('sample2', 'Default - sample2 (run1)')])]

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.completed = self.launched[:1]  # finish sample 1
        self.loader.poll()  # check status, sample 2 not finished
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish sample 2
        self.loader.poll()  # finished, so now download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)

    def test_download_two_folders(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_downloaded1 = [('run2', [('sample1',
                                           'Default - sample1 (run2)')])]
        expected_downloaded2 = [
            ('run2', [('sample1', 'Default - sample1 (run2)')]),
            ('run1', [('sample1', 'Default - sample1 (run1)')])
        ]

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.completed = self.launched[:1]  # finish sample 1
        self.loader.poll()  # check status, download 1
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish sample 2
        self.loader.poll()  # check status, download 2
        downloaded2 = self.downloaded[:]

        self.assertEqual(expected_downloaded1, downloaded1)
        self.assertEqual(expected_downloaded2, downloaded2)

    def test_limit_launches(self):
        self.loader.launch_limit = 3
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched1 = [
            'Default - sample1 (run2)', 'Default - sample2 (run2)',
            'Default - sample1 (run1)'
        ]
        expected_launched2 = [
            'Default - sample1 (run2)', 'Default - sample2 (run2)',
            'Default - sample1 (run1)', 'Default - sample2 (run1)'
        ]

        self.loader.poll()  # launch run2, sample1
        self.loader.poll()  # launch run2, sample2
        self.loader.poll()  # launch run1, sample1
        self.loader.poll()  # don't launch anything, limit reached
        launched1 = self.launched[:]

        self.completed = self.launched[:1]
        self.loader.poll()  # launch run1, sample2
        launched2 = self.launched[:]

        self.assertEqual(expected_launched1, launched1)
        self.assertEqual(expected_launched2, launched2)

    def test_no_limit_on_latest_run(self):
        self.loader.launch_limit = 1
        self.loader.latest_folder = 'run2'
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run2)', 'Default - sample2 (run2)'
        ]

        self.loader.poll()  # launch run1, sample1
        self.loader.poll()  # launch run1, sample2
        self.loader.poll()  # don't launch anything, limit reached
        launched = self.launched[:]

        self.assertEqual(expected_launched, launched)

    def test_timing_wait(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]

        sleep1 = self.loader.poll()  # upload, no wait before next upload
        sleep2 = self.loader.poll()  # check status, wait before next check

        self.assertEqual(0, sleep1)
        self.assertEqual(self.loader.status_delay, sleep2)

    def test_new_folder(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched1 = ['Default - sample1 (run1)']
        expected_launched2 = [
            'Default - sample1 (run1)', 'Default - sample1 (run2)'
        ]

        self.loader.poll()  # launch run1, sample1
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.now += timedelta(seconds=self.loader.folder_delay - 1)

        self.loader.poll()  # not ready to scan for new folder

        launched1 = self.launched[:]
        self.now += timedelta(seconds=1)

        self.loader.poll()  # launch run2, sample1

        launched2 = self.launched[:]

        self.assertEqual(expected_launched1, launched1)
        self.assertEqual(expected_launched2, launched2)

    def test_completed_folder_no_reset(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run2)', 'Default - sample1 (run1)'
        ]

        self.loader.poll()  # launch 2
        self.loader.poll()  # launch 1
        self.completed = self.launched[:1]  # finish run 2
        self.loader.poll()  # check status, download 2

        self.loader.find_folders = lambda: ['run1']
        self.now += timedelta(seconds=self.loader.folder_delay)

        self.loader.poll()  # check status
        launched = self.launched[:]

        self.assertEqual(expected_launched, launched)

    def test_cancelled_run(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run1)', 'Default - sample2 (run1)',
            'Default - sample1 (run1)'
        ]
        expected_downloaded = []

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.cancelled = self.launched[:1]  # cancel run 1
        self.loader.poll()  # check status, rerun 1

        launched = self.launched[:]
        downloaded = self.downloaded[:]

        self.assertEqual(expected_launched, launched)
        self.assertEqual(expected_downloaded, downloaded)

    def test_purged_run(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run1)', 'Default - sample2 (run1)',
            'Default - sample1 (run1)'
        ]
        expected_downloaded = []

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.completed = self.launched[:1]  # complete run 1
        self.loader.poll()
        self.purged = self.launched[:1]  # purge some results from run 1
        self.loader.poll()  # check status, rerun 1

        launched = self.launched[:]
        downloaded = self.downloaded[:]

        self.assertEqual(expected_launched, launched)
        self.assertEqual(expected_downloaded, downloaded)

    def test_preexisting_runs(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        self.existing_runs = {'Default - sample1 (run1)': 'dummy run status'}
        expected_launched = ['Default - sample2 (run1)']

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_unable_to_check_status(self):
        is_kive_running = False

        def fetch_run_status(run):
            if not is_kive_running:
                raise StandardError('Kive connection failed.')
            return RUN_COMPLETED

        self.loader.fetch_run_status = fetch_run_status

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_downloaded = [('run1', [('sample1',
                                          'Default - sample1 (run1)')])]

        self.loader.poll()  # launch 1
        self.loader.poll()  # check status, catch exception
        downloaded1 = self.downloaded[:]

        is_kive_running = True
        self.loader.poll()  # check status successfully, and download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)

    def test_failed_quality_download(self):
        def download_quality(folder):
            raise StandardError('Mock quality failure.')

        self.loader.download_quality = download_quality

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        retry_delays = []

        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        final_delay = self.loader.poll()

        self.assertEqual(self.loader.retry_delay, sum(retry_delays))
        self.assertEqual(0, final_delay)

    def test_failed_results_download(self):
        def download_results(folder):
            raise StandardError('Mock Kive failure.')

        self.loader.download_results = download_results

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        retry_delays = []

        first_delay = self.loader.poll()  # launch 1
        self.completed = self.launched[:]
        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        final_delay = self.loader.poll()

        self.assertEqual(0, first_delay)
        self.assertEqual(self.loader.retry_delay, sum(retry_delays))
        self.assertEqual(0, final_delay)
Beispiel #9
0
    def test_parse_run_info(self):
        expected_read_lengths = [5, 5]

        run_info = KiveLoader.parse_run_info(self.run_info_path)

        self.assertEqual(expected_read_lengths, run_info.read_lengths)
Beispiel #10
0
class KiveLoaderTest(unittest.TestCase):
    def setUp(self):
        logging.disable(logging.CRITICAL)  # avoid polluting test output
        self.loader = KiveLoader()
        self.loader.add_pipeline(pipeline_id=DEFAULT_PIPELINE_ID,
                                 inputs=['quality', 'fastq1', 'fastq2'],
                                 name_format='Default - {sample} ({folder})')
        self.existing_datasets = []
        self.existing_runs = {}
        self.uploaded = []
        self.launched = []
        self.batches = []
        self.cancelled = []
        self.completed = []
        self.failed = []
        self.purged = []
        self.downloaded = []
        self.now = datetime(2000, 1, 1)
        self.quality_cdt = 'quality CDT'
        self.disabled_folders = []
        self.folder_retries = []
        self.run_info_path = os.path.join(os.path.dirname(__file__),
                                          'kive_loader_run_info.xml')

        def check_kive_connection():
            self.loader.quality_cdt = self.quality_cdt

        self.loader.check_kive_connection = check_kive_connection
        self.loader.find_folders = lambda: []
        self.loader.find_files = lambda folder: []
        self.loader.find_preexisting_runs = lambda: self.existing_runs
        self.loader.upload_kive_dataset = lambda filename, description, cdt: (
            self.uploaded.append((filename, description, cdt)) or filename)
        self.loader.download_quality = lambda folder: folder + '/quality.csv'
        self.loader.find_kive_dataset = lambda filename, cdt: (
            filename if filename in self.existing_datasets else None)
        self.loader.get_sample_name = lambda fastq1: os.path.basename(
            fastq1).split('_')[0]
        self.loader.fetch_run_status = lambda run: (
            RUN_COMPLETED if run[1] in self.completed else RUN_CANCELLED
            if run[1] in self.cancelled else RUN_ACTIVE)
        self.loader.fetch_output_status = lambda run: (
            RUN_PURGED if run[1] in self.purged else RUN_FAILED
            if run[1] in self.failed else RUN_COMPLETED)
        self.loader.get_run_id = lambda run: run
        self.loader.download_results = lambda folder, runs: (
            self.downloaded.append((folder, runs)))
        self.loader.get_time = lambda: self.now
        self.loader.mark_folder_disabled = lambda folder, message, exc_info: (
            self.disabled_folders.append(folder))
        self.loader.log_retry = lambda folder: self.folder_retries.append(
            folder)

        # Note: get_run_key must match return value of launch_run
        self.loader.get_run_key = lambda pipeline_id, quality, fastq1, fastq2: (
            self.loader.get_run_name(pipeline_id,
                                     self.loader.get_sample_name(fastq1)))
        self.loader.launch_run = lambda pipeline_id, run_name, inputs, batch_id: (
            self.launched.append(run_name) or run_name)
        self.loader.create_batch = lambda batch_name: (self.batches.append(
            batch_name) or len(self.batches))

    def test_idle(self):
        delay = self.loader.poll()

        self.assertEqual(self.loader.folder_delay, delay)

    def test_upload_one_sample(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_uploaded = [
            ('run2/quality.csv', 'phiX174 quality from MiSeq run run2',
             self.quality_cdt),
            ('run2/sample1_R1_x.fastq', 'forward read from MiSeq run run2',
             None),
            ('run2/sample1_R2_x.fastq', 'reverse read from MiSeq run run2',
             None)
        ]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_launch_one_sample(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = ['Default - sample1 (run2)']

        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_launch_two_samples(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run1)', 'Default - sample2 (run1)'
        ]
        expected_batches = ['run1']

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)
        self.assertEqual(expected_batches, self.batches)

    def test_launch_two_folders(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run2)', 'Default - sample1 (run1)'
        ]
        expected_batches = ['run2', 'run1']

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)
        self.assertEqual(expected_batches, self.batches)

    def test_launch_two_pipelines(self):
        self.loader.add_pipeline(pipeline_id=EXTRA_PIPELINE_ID,
                                 inputs=['quality', 'fastq1', 'fastq2'],
                                 name_format='Extra - {sample} ({folder})')
        self.loader.find_folders = lambda: ['run2']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run2)', 'Extra - sample1 (run2)',
            'Default - sample2 (run2)', 'Extra - sample2 (run2)'
        ]
        expected_downloaded = [('run2',
                                [('sample1', 'Default - sample1 (run2)'),
                                 ('sample1', 'Extra - sample1 (run2)'),
                                 ('sample2', 'Default - sample2 (run2)'),
                                 ('sample2', 'Extra - sample2 (run2)')])]

        self.loader.poll()
        self.loader.poll()

        self.completed = expected_launched[:]
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)
        self.assertEqual(expected_downloaded, self.downloaded)

    def test_pipelines_diff_inputs(self):
        self.loader.launch_run = lambda pipeline_id, run_name, inputs, batch_id: (
            self.launched.append((pipeline_id, inputs)))
        self.loader.add_pipeline(pipeline_id=EXTRA_PIPELINE_ID,
                                 inputs=['fastq1'],
                                 name_format='Extra - {sample} ({folder})')
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [(DEFAULT_PIPELINE_ID, [
            'run2/quality.csv', 'run2/sample1_R1_x.fastq',
            'run2/sample1_R2_x.fastq'
        ]), (EXTRA_PIPELINE_ID, ['run2/sample1_R1_x.fastq'])]

        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_pipelines_diff_patterns(self):
        self.loader.add_pipeline(pipeline_id=EXTRA_PIPELINE_ID,
                                 inputs=['quality', 'fastq1', 'fastq2'],
                                 pattern='HCV',
                                 name_format='Extra - {sample} ({folder})')
        self.loader.find_folders = lambda: ['run2']
        self.loader.find_files = lambda folder: [
            folder + '/sample1-HIV_R1_x.fastq', folder +
            '/sample2-HCV_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1-HIV (run2)', 'Default - sample2-HCV (run2)',
            'Extra - sample2-HCV (run2)'
        ]
        expected_downloaded = [
            ('run2', [('sample1-HIV', 'Default - sample1-HIV (run2)'),
                      ('sample2-HCV', 'Default - sample2-HCV (run2)'),
                      ('sample2-HCV', 'Extra - sample2-HCV (run2)')])
        ]

        self.loader.poll()
        self.loader.poll()

        self.completed = expected_launched[:]
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)
        self.assertEqual(expected_downloaded, self.downloaded)

    def test_launch_finished(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched = ['Default - sample1 (run1)']

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_trimmed_folder(self):
        self.loader.find_folders = lambda: ['path/160214_M01234_AX23']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_uploaded = [
            ('path/160214_M01234_AX23/quality.csv',
             'phiX174 quality from MiSeq run 160214_M01234', 'quality CDT'),
            ('path/160214_M01234_AX23/sample1_R1_x.fastq',
             'forward read from MiSeq run 160214_M01234', None),
            ('path/160214_M01234_AX23/sample1_R2_x.fastq',
             'reverse read from MiSeq run 160214_M01234', None)
        ]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_datasets_exist(self):
        self.existing_datasets = [
            'run1/quality.csv', 'run1/sample1_R1_x.fastq',
            'run1/sample1_R2_x.fastq'
        ]
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_uploaded = []

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_some_datasets_exist(self):
        self.existing_datasets = [
            'run1/quality.csv', 'run1/sample1_R2_x.fastq'
        ]
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_uploaded = [('run1/sample1_R1_x.fastq',
                              'forward read from MiSeq run run1', None)]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_download_one_sample(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_downloaded = [('run1', [('sample1',
                                          'Default - sample1 (run1)')])]

        self.loader.poll()  # launches
        self.loader.poll()  # checks status, not finished
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish run
        self.loader.poll()  # finished, so now download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)
        self.assertEqual({}, self.loader.batches)

    def test_download_two_samples(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_downloaded = [('run1',
                                [('sample1', 'Default - sample1 (run1)'),
                                 ('sample2', 'Default - sample2 (run1)')])]

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.completed = self.launched[:1]  # finish sample 1
        self.loader.poll()  # check status, sample 2 not finished
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish sample 2
        self.loader.poll()  # finished, so now download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)

    def test_download_two_folders(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_downloaded1 = [('run2', [('sample1',
                                           'Default - sample1 (run2)')])]
        expected_downloaded2 = [
            ('run2', [('sample1', 'Default - sample1 (run2)')]),
            ('run1', [('sample1', 'Default - sample1 (run1)')])
        ]

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.completed = self.launched[:1]  # finish sample 1
        self.loader.poll()  # check status, download 1
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish sample 2
        self.loader.poll()  # check status, download 2
        downloaded2 = self.downloaded[:]

        self.assertEqual(expected_downloaded1, downloaded1)
        self.assertEqual(expected_downloaded2, downloaded2)

    def test_limit_launches(self):
        self.loader.launch_limit = 3
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched1 = [
            'Default - sample1 (run2)', 'Default - sample2 (run2)',
            'Default - sample1 (run1)'
        ]
        expected_launched2 = [
            'Default - sample1 (run2)', 'Default - sample2 (run2)',
            'Default - sample1 (run1)', 'Default - sample2 (run1)'
        ]

        self.loader.poll()  # launch run2, sample1
        self.loader.poll()  # launch run2, sample2
        self.loader.poll()  # launch run1, sample1
        self.loader.poll()  # don't launch anything, limit reached
        launched1 = self.launched[:]

        self.completed = self.launched[:1]
        self.loader.poll()  # launch run1, sample2
        launched2 = self.launched[:]

        self.assertEqual(expected_launched1, launched1)
        self.assertEqual(expected_launched2, launched2)

    def test_no_limit_on_latest_run(self):
        self.loader.launch_limit = 1
        self.loader.latest_folder = 'run2'
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run2)', 'Default - sample2 (run2)'
        ]

        self.loader.poll()  # launch run1, sample1
        self.loader.poll()  # launch run1, sample2
        self.loader.poll()  # don't launch anything, limit reached
        launched = self.launched[:]

        self.assertEqual(expected_launched, launched)

    def test_timing_wait(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]

        sleep1 = self.loader.poll()  # upload, no wait before next upload
        sleep2 = self.loader.poll()  # check status, wait before next check

        self.assertEqual(0, sleep1)
        self.assertEqual(self.loader.status_delay, sleep2)

    def test_new_folder(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched1 = ['Default - sample1 (run1)']
        expected_launched2 = [
            'Default - sample1 (run1)', 'Default - sample1 (run2)'
        ]

        self.loader.poll()  # launch run1, sample1
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.now += timedelta(seconds=self.loader.folder_delay - 1)

        self.loader.poll()  # not ready to scan for new folder

        launched1 = self.launched[:]
        self.now += timedelta(seconds=1)

        self.loader.poll()  # launch run2, sample1

        launched2 = self.launched[:]

        self.assertEqual(expected_launched1, launched1)
        self.assertEqual(expected_launched2, launched2)

    def test_completed_folder_no_reset(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run2)', 'Default - sample1 (run1)'
        ]

        self.loader.poll()  # launch 2
        self.loader.poll()  # launch 1
        self.completed = self.launched[:1]  # finish run 2
        self.loader.poll()  # check status, download 2

        self.loader.find_folders = lambda: ['run1']
        self.now += timedelta(seconds=self.loader.folder_delay)

        self.loader.poll()  # check status
        launched = self.launched[:]

        self.assertEqual(expected_launched, launched)

    def test_cancelled_run(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run1)', 'Default - sample2 (run1)',
            'Default - sample1 (run1)'
        ]
        expected_downloaded = []

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.cancelled = self.launched[:1]  # cancel run 1
        self.loader.poll()  # check status, rerun 1

        launched = self.launched[:]
        downloaded = self.downloaded[:]

        self.assertEqual(expected_launched, launched)
        self.assertEqual(expected_downloaded, downloaded)

    def test_purged_run(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [
            'Default - sample1 (run1)', 'Default - sample2 (run1)',
            'Default - sample1 (run1)'
        ]
        expected_downloaded = []

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.completed = self.launched[:1]  # complete run 1
        self.loader.poll()
        self.purged = self.launched[:1]  # purge some results from run 1
        self.loader.poll()  # check status, rerun 1

        launched = self.launched[:]
        downloaded = self.downloaded[:]

        self.assertEqual(expected_launched, launched)
        self.assertEqual(expected_downloaded, downloaded)

    def test_preexisting_runs(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        self.existing_runs = {'Default - sample1 (run1)': 'dummy run status'}
        expected_launched = ['Default - sample2 (run1)']

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_unable_to_check_status(self):
        is_kive_running = False

        # noinspection PyUnusedLocal
        def fetch_run_status(run):
            if not is_kive_running:
                raise RuntimeError('Kive connection failed.')
            return RUN_COMPLETED

        self.loader.fetch_run_status = fetch_run_status

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_downloaded = [('run1', [('sample1',
                                          'Default - sample1 (run1)')])]

        self.loader.poll()  # launch 1
        self.loader.poll()  # check status, catch exception
        downloaded1 = self.downloaded[:]

        is_kive_running = True
        self.loader.poll()  # check status successfully, and download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)

    def test_failed_quality_download(self):
        # noinspection PyUnusedLocal
        def download_quality(folder):
            raise RuntimeError('Mock quality failure.')

        self.loader.download_quality = download_quality

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]

        retry_delays = [
            self.loader.poll(),
            self.loader.poll(),
            self.loader.poll()
        ]
        final_delay = self.loader.poll()

        self.assertEqual(self.loader.retry_delay, sum(retry_delays))
        self.assertEqual(0, final_delay)

    def test_failed_results_download(self):
        self.loader.download_results = lambda folder, runs: 1 / 0  # fails
        expected_disabled = ['run1']

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]

        self.loader.poll()  # launch 1
        self.completed = self.launched[:]
        self.loader.poll()

        self.assertEqual(expected_disabled, self.disabled_folders)

    def test_removing_failure_will_retry_folder(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched1 = [
            'Default - sample1 (run2)', 'Default - sample1 (run1)'
        ]
        expected_launched2 = [
            'Default - sample1 (run2)', 'Default - sample1 (run1)',
            'Default - sample1 (run2)'
        ]

        self.loader.poll()  # launch 2
        self.loader.poll()  # launch 1
        self.completed = self.launched[:1]  # fail run 2
        self.loader.download_results = lambda folder, runs: 1 / 0  # fails
        self.loader.poll()  # check status, mark 2 as failed

        self.loader.find_folders = lambda: ['run1']  # run2 marked as failed
        self.now += timedelta(seconds=self.loader.folder_delay)

        self.loader.poll()  # Nothing to launch
        launched1 = self.launched[:]

        self.loader.find_folders = lambda: ['run2', 'run1']  # run2 unmarked
        self.now += timedelta(seconds=self.loader.folder_delay)

        self.loader.poll()  # Should relaunch run2
        launched2 = self.launched[:]

        self.assertEqual(expected_launched1, launched1)
        self.assertEqual(expected_launched2, launched2)

    def test_get_sample_number(self):
        fastq_name = '1234A-DEL-DRT-PR-RT_S71_L001_R1_001.fastq.gz'
        expected_sample_number = 71

        sample_number = self.loader.get_sample_number(fastq_name)

        self.assertEqual(expected_sample_number, sample_number)

    def test_parse_run_info(self):
        expected_read_lengths = [5, 5]

        run_info = KiveLoader.parse_run_info(self.run_info_path)

        self.assertEqual(expected_read_lengths, run_info.read_lengths)

    def test_write_quality(self):
        quality_csv = StringIO()
        metrics = [{
            "tile": 1101,
            "cycle": 1,
            "errorrate": "0.01"
        }, {
            "tile": 1101,
            "cycle": 2,
            "errorrate": "0.02"
        }, {
            "tile": 1101,
            "cycle": 12,
            "errorrate": "0.12"
        }]
        run_info = KiveLoader.parse_run_info(self.run_info_path)
        expected_quality = """\
tile,cycle,errorrate
1101,1,0.01
1101,2,0.02
1101,3,
1101,4,
1101,5,
1101,-1,0.12
1101,-2,
1101,-3,
1101,-4,
1101,-5,
"""

        KiveLoader.write_quality(quality_csv, metrics, run_info)

        self.assertEqual(expected_quality, quality_csv.getvalue())
Beispiel #11
0
class KiveLoaderTest(unittest.TestCase):
    def setUp(self):
        logging.disable(logging.CRITICAL)  # avoid polluting test output
        self.loader = KiveLoader()
        self.existing_datasets = []
        self.existing_runs = {}
        self.uploaded = []
        self.launched = []
        self.completed = []
        self.downloaded = []
        self.now = datetime(2000, 1, 1)
        self.quality_cdt = 'quality CDT'
        self.disabled_folders = []
        self.folder_retries = []

        def check_kive_connection():
            self.loader.quality_cdt = self.quality_cdt

        self.loader.check_kive_connection = check_kive_connection
        self.loader.find_folders = lambda: []
        self.loader.find_files = lambda folder: []
        self.loader.find_preexisting_runs = lambda: self.existing_runs
        self.loader.get_run_key = lambda quality, fastq1, fastq2: (
            quality, fastq1, fastq2)
        self.loader.upload_kive_dataset = lambda filename, description, cdt: (
            self.uploaded.append((filename, description, cdt)) or filename)
        self.loader.download_quality = lambda folder: folder + '/quality.csv'
        self.loader.find_kive_dataset = lambda filename, cdt: (
            filename if filename in self.existing_datasets else None)
        self.loader.launch_run = lambda quality, fastq1, fastq2: (
            self.launched.append(
                (quality, fastq1, fastq2)) or (quality, fastq1, fastq2))
        self.loader.is_run_complete = lambda run: run in self.completed
        self.loader.download_results = lambda folder, runs: (
            self.downloaded.append((folder, runs)))
        self.loader.get_time = lambda: self.now
        self.loader.mark_folder_disabled = lambda folder, message, exc_info: (
            self.disabled_folders.append(folder))
        self.loader.log_retry = lambda folder: self.folder_retries.append(
            folder)

    def test_idle(self):
        delay = self.loader.poll()

        self.assertEqual(self.loader.folder_delay, delay)

    def test_upload_one_sample(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_uploaded = [
            ('run2/quality.csv', 'phiX174 quality from MiSeq run run2',
             self.quality_cdt),
            ('run2/sample1_R1_x.fastq', 'forward read from MiSeq run run2',
             None),
            ('run2/sample1_R2_x.fastq', 'reverse read from MiSeq run run2',
             None)
        ]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_launch_one_sample(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [('run2/quality.csv', 'run2/sample1_R1_x.fastq',
                              'run2/sample1_R2_x.fastq')]

        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_launch_two_samples(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                              'run1/sample1_R2_x.fastq'),
                             ('run1/quality.csv', 'run1/sample2_R1_x.fastq',
                              'run1/sample2_R2_x.fastq')]

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_launch_two_folders(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched = [('run2/quality.csv', 'run2/sample1_R1_x.fastq',
                              'run2/sample1_R2_x.fastq'),
                             ('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                              'run1/sample1_R2_x.fastq')]

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_launch_finished(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched = [('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                              'run1/sample1_R2_x.fastq')]

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_trimmed_folder(self):
        self.loader.find_folders = lambda: ['path/160214_M01234_AX23']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_uploaded = [
            ('path/160214_M01234_AX23/quality.csv',
             'phiX174 quality from MiSeq run 160214_M01234', 'quality CDT'),
            ('path/160214_M01234_AX23/sample1_R1_x.fastq',
             'forward read from MiSeq run 160214_M01234', None),
            ('path/160214_M01234_AX23/sample1_R2_x.fastq',
             'reverse read from MiSeq run 160214_M01234', None)
        ]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_datasets_exist(self):
        self.existing_datasets = [
            'run1/quality.csv', 'run1/sample1_R1_x.fastq',
            'run1/sample1_R2_x.fastq'
        ]
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_uploaded = []

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_some_datasets_exist(self):
        self.existing_datasets = [
            'run1/quality.csv', 'run1/sample1_R2_x.fastq'
        ]
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_uploaded = [('run1/sample1_R1_x.fastq',
                              'forward read from MiSeq run run1', None)]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_download_one_sample(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_downloaded = [
            ('run1', [('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                       'run1/sample1_R2_x.fastq')])
        ]

        self.loader.poll()  # launches
        self.loader.poll()  # checks status, not finished
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish run
        self.loader.poll()  # finished, so now download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)

    def test_download_two_samples(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_downloaded = [
            ('run1', [('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                       'run1/sample1_R2_x.fastq'),
                      ('run1/quality.csv', 'run1/sample2_R1_x.fastq',
                       'run1/sample2_R2_x.fastq')])
        ]

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.completed = self.launched[:1]  # finish sample 1
        self.loader.poll()  # check status, sample 2 not finished
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish sample 2
        self.loader.poll()  # finished, so now download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)

    def test_download_two_folders(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_downloaded1 = [
            ('run2', [('run2/quality.csv', 'run2/sample1_R1_x.fastq',
                       'run2/sample1_R2_x.fastq')])
        ]
        expected_downloaded2 = [
            ('run2', [('run2/quality.csv', 'run2/sample1_R1_x.fastq',
                       'run2/sample1_R2_x.fastq')]),
            ('run1', [('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                       'run1/sample1_R2_x.fastq')])
        ]

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.completed = self.launched[:1]  # finish sample 1
        self.loader.poll()  # check status, download 1
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish sample 2
        self.loader.poll()  # check status, download 2
        downloaded2 = self.downloaded[:]

        self.assertEqual(expected_downloaded1, downloaded1)
        self.assertEqual(expected_downloaded2, downloaded2)

    def test_limit_launches(self):
        self.loader.launch_limit = 3
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched1 = [('run2/quality.csv', 'run2/sample1_R1_x.fastq',
                               'run2/sample1_R2_x.fastq'),
                              ('run2/quality.csv', 'run2/sample2_R1_x.fastq',
                               'run2/sample2_R2_x.fastq'),
                              ('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                               'run1/sample1_R2_x.fastq')]
        expected_launched2 = [('run2/quality.csv', 'run2/sample1_R1_x.fastq',
                               'run2/sample1_R2_x.fastq'),
                              ('run2/quality.csv', 'run2/sample2_R1_x.fastq',
                               'run2/sample2_R2_x.fastq'),
                              ('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                               'run1/sample1_R2_x.fastq'),
                              ('run1/quality.csv', 'run1/sample2_R1_x.fastq',
                               'run1/sample2_R2_x.fastq')]

        self.loader.poll()  # launch run2, sample1
        self.loader.poll()  # launch run2, sample2
        self.loader.poll()  # launch run1, sample1
        self.loader.poll()  # don't launch anything, limit reached
        launched1 = self.launched[:]

        self.completed = self.launched[:1]
        self.loader.poll()  # launch run1, sample2
        launched2 = self.launched[:]

        self.assertEqual(expected_launched1, launched1)
        self.assertEqual(expected_launched2, launched2)

    def test_no_limit_on_latest_run(self):
        self.loader.launch_limit = 1
        self.loader.latest_folder = 'run2'
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_launched = [('run2/quality.csv', 'run2/sample1_R1_x.fastq',
                              'run2/sample1_R2_x.fastq'),
                             ('run2/quality.csv', 'run2/sample2_R1_x.fastq',
                              'run2/sample2_R2_x.fastq')]

        self.loader.poll()  # launch run1, sample1
        self.loader.poll()  # launch run1, sample2
        self.loader.poll()  # don't launch anything, limit reached
        launched = self.launched[:]

        self.assertEqual(expected_launched, launched)

    def test_timing_wait(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]

        sleep1 = self.loader.poll()  # upload, no wait before next upload
        sleep2 = self.loader.poll()  # check status, wait before next check

        self.assertEqual(0, sleep1)
        self.assertEqual(self.loader.status_delay, sleep2)

    def test_new_folder(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched1 = [('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                               'run1/sample1_R2_x.fastq')]
        expected_launched2 = [('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                               'run1/sample1_R2_x.fastq'),
                              ('run2/quality.csv', 'run2/sample1_R1_x.fastq',
                               'run2/sample1_R2_x.fastq')]

        self.loader.poll()  # launch run1, sample1
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.now += timedelta(seconds=self.loader.folder_delay - 1)

        self.loader.poll()  # not ready to scan for new folder

        launched1 = self.launched[:]
        self.now += timedelta(seconds=1)

        self.loader.poll()  # launch run2, sample1

        launched2 = self.launched[:]

        self.assertEqual(expected_launched1, launched1)
        self.assertEqual(expected_launched2, launched2)

    def test_completed_folder_no_reset(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_launched = [('run2/quality.csv', 'run2/sample1_R1_x.fastq',
                              'run2/sample1_R2_x.fastq'),
                             ('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                              'run1/sample1_R2_x.fastq')]

        self.loader.poll()  # launch 2
        self.loader.poll()  # launch 1
        self.completed = self.launched[:1]  # finish run 2
        self.loader.poll()  # check status, download 2

        self.loader.find_folders = lambda: ['run1']
        self.now += timedelta(seconds=self.loader.folder_delay)

        self.loader.poll()  # check status
        launched = self.launched[:]

        self.assertEqual(expected_launched, launched)

    def test_preexisting_runs(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        self.existing_runs = {
            ('run1/quality.csv', 'run1/sample1_R1_x.fastq', 'run1/sample1_R2_x.fastq'):
            'dummy run status'
        }
        expected_launched = [('run1/quality.csv', 'run1/sample2_R1_x.fastq',
                              'run1/sample2_R2_x.fastq')]

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_failed_run(self):
        def is_run_complete(run):
            if run == ('run1/quality.csv', 'run1/sample2_R1_x.fastq',
                       'run1/sample2_R2_x.fastq'):
                raise KiveRunFailedException('Mock failure.')
            return True

        self.loader.is_run_complete = is_run_complete

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        expected_downloaded = [
            ('run1', [('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                       'run1/sample1_R2_x.fastq'),
                      ('run1/quality.csv', 'run1/sample2_R1_x.fastq',
                       'run1/sample2_R2_x.fastq')])
        ]

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.loader.poll()  # check status, catch exception
        downloaded = self.downloaded[:]

        self.assertEqual(expected_downloaded, downloaded)

    def test_unable_to_check_status(self):
        is_kive_running = False

        def is_run_complete(run):
            if not is_kive_running:
                raise StandardError('Kive connection failed.')
            return True

        self.loader.is_run_complete = is_run_complete

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        expected_downloaded = [
            ('run1', [('run1/quality.csv', 'run1/sample1_R1_x.fastq',
                       'run1/sample1_R2_x.fastq')])
        ]

        self.loader.poll()  # launch 1
        self.loader.poll()  # check status, catch exception
        downloaded1 = self.downloaded[:]

        is_kive_running = True
        self.loader.poll()  # check status successfully, and download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)

    def test_failed_quality_download(self):
        def download_quality(folder):
            raise StandardError('Mock quality failure.')

        self.loader.download_quality = download_quality

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq', folder + '/sample2_R1_x.fastq'
        ]
        retry_delays = []

        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        final_delay = self.loader.poll()

        self.assertEqual(self.loader.retry_delay, sum(retry_delays))
        self.assertEqual(0, final_delay)

    def test_failed_results_download(self):
        def download_results(folder):
            raise StandardError('Mock Kive failure.')

        self.loader.download_results = download_results

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [
            folder + '/sample1_R1_x.fastq'
        ]
        retry_delays = []

        first_delay = self.loader.poll()  # launch 1
        self.completed = self.launched[:]
        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        final_delay = self.loader.poll()

        self.assertEqual(0, first_delay)
        self.assertEqual(self.loader.retry_delay, sum(retry_delays))
        self.assertEqual(0, final_delay)
Beispiel #12
0
class KiveLoaderTest(unittest.TestCase):
    def setUp(self):
        logging.disable(logging.CRITICAL)  # avoid polluting test output
        self.loader = KiveLoader()
        self.existing_datasets = []
        self.existing_runs = {}
        self.uploaded = []
        self.launched = []
        self.completed = []
        self.downloaded = []
        self.now = datetime(2000, 1, 1)
        self.quality_cdt = 'quality CDT'
        self.disabled_folders = []
        self.folder_retries = []

        def check_kive_connection():
            self.loader.quality_cdt = self.quality_cdt
        self.loader.check_kive_connection = check_kive_connection
        self.loader.find_folders = lambda: []
        self.loader.find_files = lambda folder: []
        self.loader.find_preexisting_runs = lambda: self.existing_runs
        self.loader.get_run_key = lambda quality, fastq1, fastq2: (quality,
                                                                   fastq1,
                                                                   fastq2)
        self.loader.upload_kive_dataset = lambda filename, description, cdt: (
            self.uploaded.append((filename, description, cdt)) or filename)
        self.loader.download_quality = lambda folder: folder + '/quality.csv'
        self.loader.find_kive_dataset = lambda filename, cdt: (
            filename if filename in self.existing_datasets else None)
        self.loader.launch_run = lambda quality, fastq1, fastq2: (
            self.launched.append((quality, fastq1, fastq2)) or
            (quality, fastq1, fastq2))
        self.loader.is_run_complete = lambda run: run in self.completed
        self.loader.download_results = lambda folder, runs: (
            self.downloaded.append((folder, runs)))
        self.loader.get_time = lambda: self.now
        self.loader.mark_folder_disabled = lambda folder, message, exc_info: (
            self.disabled_folders.append(folder))
        self.loader.log_retry = lambda folder: self.folder_retries.append(folder)

    def test_idle(self):
        delay = self.loader.poll()

        self.assertEqual(self.loader.folder_delay, delay)

    def test_upload_one_sample(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq',
                                                 folder + '/sample2_R1_x.fastq']
        expected_uploaded = [('run2/quality.csv',
                              'phiX174 quality from MiSeq run run2',
                              self.quality_cdt),
                             ('run2/sample1_R1_x.fastq',
                              'forward read from MiSeq run run2',
                              None),
                             ('run2/sample1_R2_x.fastq',
                              'reverse read from MiSeq run run2',
                              None)]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_launch_one_sample(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq',
                                                 folder + '/sample2_R1_x.fastq']
        expected_launched = [('run2/quality.csv',
                              'run2/sample1_R1_x.fastq',
                              'run2/sample1_R2_x.fastq')]

        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_launch_two_samples(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq',
                                                 folder + '/sample2_R1_x.fastq']
        expected_launched = [('run1/quality.csv',
                              'run1/sample1_R1_x.fastq',
                              'run1/sample1_R2_x.fastq'),
                             ('run1/quality.csv',
                              'run1/sample2_R1_x.fastq',
                              'run1/sample2_R2_x.fastq')]

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_launch_two_folders(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        expected_launched = [('run2/quality.csv',
                              'run2/sample1_R1_x.fastq',
                              'run2/sample1_R2_x.fastq'),
                             ('run1/quality.csv',
                              'run1/sample1_R1_x.fastq',
                              'run1/sample1_R2_x.fastq')]

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_launch_finished(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        expected_launched = [('run1/quality.csv',
                              'run1/sample1_R1_x.fastq',
                              'run1/sample1_R2_x.fastq')]

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_trimmed_folder(self):
        self.loader.find_folders = lambda: ['path/160214_M01234_AX23']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        expected_uploaded = [('path/160214_M01234_AX23/quality.csv',
                              'phiX174 quality from MiSeq run 160214_M01234',
                              'quality CDT'),
                             ('path/160214_M01234_AX23/sample1_R1_x.fastq',
                              'forward read from MiSeq run 160214_M01234',
                              None),
                             ('path/160214_M01234_AX23/sample1_R2_x.fastq',
                              'reverse read from MiSeq run 160214_M01234',
                              None)]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_datasets_exist(self):
        self.existing_datasets = ['run1/quality.csv',
                                  'run1/sample1_R1_x.fastq',
                                  'run1/sample1_R2_x.fastq']
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        expected_uploaded = []

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_some_datasets_exist(self):
        self.existing_datasets = ['run1/quality.csv',
                                  'run1/sample1_R2_x.fastq']
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        expected_uploaded = [('run1/sample1_R1_x.fastq',
                              'forward read from MiSeq run run1',
                              None)]

        self.loader.poll()

        self.assertEqual(expected_uploaded, self.uploaded)

    def test_download_one_sample(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        expected_downloaded = [('run1', [('run1/quality.csv',
                                          'run1/sample1_R1_x.fastq',
                                          'run1/sample1_R2_x.fastq')])]

        self.loader.poll()  # launches
        self.loader.poll()  # checks status, not finished
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish run
        self.loader.poll()  # finished, so now download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)

    def test_download_two_samples(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq',
                                                 folder + '/sample2_R1_x.fastq']
        expected_downloaded = [('run1', [('run1/quality.csv',
                                          'run1/sample1_R1_x.fastq',
                                          'run1/sample1_R2_x.fastq'),
                                         ('run1/quality.csv',
                                          'run1/sample2_R1_x.fastq',
                                          'run1/sample2_R2_x.fastq')])]

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.completed = self.launched[:1]  # finish sample 1
        self.loader.poll()  # check status, sample 2 not finished
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish sample 2
        self.loader.poll()  # finished, so now download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)

    def test_download_two_folders(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        expected_downloaded1 = [('run2', [('run2/quality.csv',
                                           'run2/sample1_R1_x.fastq',
                                           'run2/sample1_R2_x.fastq')])]
        expected_downloaded2 = [('run2', [('run2/quality.csv',
                                           'run2/sample1_R1_x.fastq',
                                           'run2/sample1_R2_x.fastq')]),
                                ('run1', [('run1/quality.csv',
                                           'run1/sample1_R1_x.fastq',
                                           'run1/sample1_R2_x.fastq')])]

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.completed = self.launched[:1]  # finish sample 1
        self.loader.poll()  # check status, download 1
        downloaded1 = self.downloaded[:]

        self.completed = self.launched[:]  # finish sample 2
        self.loader.poll()  # check status, download 2
        downloaded2 = self.downloaded[:]

        self.assertEqual(expected_downloaded1, downloaded1)
        self.assertEqual(expected_downloaded2, downloaded2)

    def test_limit_launches(self):
        self.loader.launch_limit = 3
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq',
                                                 folder + '/sample2_R1_x.fastq']
        expected_launched1 = [('run2/quality.csv',
                               'run2/sample1_R1_x.fastq',
                               'run2/sample1_R2_x.fastq'),
                              ('run2/quality.csv',
                               'run2/sample2_R1_x.fastq',
                               'run2/sample2_R2_x.fastq'),
                              ('run1/quality.csv',
                               'run1/sample1_R1_x.fastq',
                               'run1/sample1_R2_x.fastq')]
        expected_launched2 = [('run2/quality.csv',
                               'run2/sample1_R1_x.fastq',
                               'run2/sample1_R2_x.fastq'),
                              ('run2/quality.csv',
                               'run2/sample2_R1_x.fastq',
                               'run2/sample2_R2_x.fastq'),
                              ('run1/quality.csv',
                               'run1/sample1_R1_x.fastq',
                               'run1/sample1_R2_x.fastq'),
                              ('run1/quality.csv',
                               'run1/sample2_R1_x.fastq',
                               'run1/sample2_R2_x.fastq')]

        self.loader.poll()  # launch run2, sample1
        self.loader.poll()  # launch run2, sample2
        self.loader.poll()  # launch run1, sample1
        self.loader.poll()  # don't launch anything, limit reached
        launched1 = self.launched[:]

        self.completed = self.launched[:1]
        self.loader.poll()  # launch run1, sample2
        launched2 = self.launched[:]

        self.assertEqual(expected_launched1, launched1)
        self.assertEqual(expected_launched2, launched2)

    def test_no_limit_on_latest_run(self):
        self.loader.launch_limit = 1
        self.loader.latest_folder = 'run2'
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq',
                                                 folder + '/sample2_R1_x.fastq']
        expected_launched = [('run2/quality.csv',
                              'run2/sample1_R1_x.fastq',
                              'run2/sample1_R2_x.fastq'),
                             ('run2/quality.csv',
                              'run2/sample2_R1_x.fastq',
                              'run2/sample2_R2_x.fastq')]

        self.loader.poll()  # launch run1, sample1
        self.loader.poll()  # launch run1, sample2
        self.loader.poll()  # don't launch anything, limit reached
        launched = self.launched[:]

        self.assertEqual(expected_launched, launched)

    def test_timing_wait(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']

        sleep1 = self.loader.poll()  # upload, no wait before next upload
        sleep2 = self.loader.poll()  # check status, wait before next check

        self.assertEqual(0, sleep1)
        self.assertEqual(self.loader.status_delay, sleep2)

    def test_new_folder(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        expected_launched1 = [('run1/quality.csv',
                               'run1/sample1_R1_x.fastq',
                               'run1/sample1_R2_x.fastq')]
        expected_launched2 = [('run1/quality.csv',
                               'run1/sample1_R1_x.fastq',
                               'run1/sample1_R2_x.fastq'),
                              ('run2/quality.csv',
                               'run2/sample1_R1_x.fastq',
                               'run2/sample1_R2_x.fastq')]

        self.loader.poll()  # launch run1, sample1
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.now += timedelta(seconds=self.loader.folder_delay-1)

        self.loader.poll()  # not ready to scan for new folder

        launched1 = self.launched[:]
        self.now += timedelta(seconds=1)

        self.loader.poll()  # launch run2, sample1

        launched2 = self.launched[:]

        self.assertEqual(expected_launched1, launched1)
        self.assertEqual(expected_launched2, launched2)

    def test_completed_folder_no_reset(self):
        self.loader.find_folders = lambda: ['run2', 'run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        expected_launched = [('run2/quality.csv',
                              'run2/sample1_R1_x.fastq',
                              'run2/sample1_R2_x.fastq'),
                             ('run1/quality.csv',
                              'run1/sample1_R1_x.fastq',
                              'run1/sample1_R2_x.fastq')]

        self.loader.poll()  # launch 2
        self.loader.poll()  # launch 1
        self.completed = self.launched[:1]  # finish run 2
        self.loader.poll()  # check status, download 2

        self.loader.find_folders = lambda: ['run1']
        self.now += timedelta(seconds=self.loader.folder_delay)

        self.loader.poll()  # check status
        launched = self.launched[:]

        self.assertEqual(expected_launched, launched)

    def test_preexisting_runs(self):
        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq',
                                                 folder + '/sample2_R1_x.fastq']
        self.existing_runs = {('run1/quality.csv',
                               'run1/sample1_R1_x.fastq',
                               'run1/sample1_R2_x.fastq'): 'dummy run status'}
        expected_launched = [('run1/quality.csv',
                              'run1/sample2_R1_x.fastq',
                              'run1/sample2_R2_x.fastq')]

        self.loader.poll()
        self.loader.poll()

        self.assertEqual(expected_launched, self.launched)

    def test_failed_run(self):
        def is_run_complete(run):
            if run == ('run1/quality.csv',
                       'run1/sample2_R1_x.fastq',
                       'run1/sample2_R2_x.fastq'):
                raise KiveRunFailedException('Mock failure.')
            return True

        self.loader.is_run_complete = is_run_complete

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq',
                                                 folder + '/sample2_R1_x.fastq']
        expected_downloaded = [('run1', [('run1/quality.csv',
                                          'run1/sample1_R1_x.fastq',
                                          'run1/sample1_R2_x.fastq'),
                                         ('run1/quality.csv',
                                          'run1/sample2_R1_x.fastq',
                                          'run1/sample2_R2_x.fastq')])]

        self.loader.poll()  # launch 1
        self.loader.poll()  # launch 2
        self.loader.poll()  # check status, catch exception
        downloaded = self.downloaded[:]

        self.assertEqual(expected_downloaded, downloaded)

    def test_unable_to_check_status(self):
        is_kive_running = False

        def is_run_complete(run):
            if not is_kive_running:
                raise StandardError('Kive connection failed.')
            return True

        self.loader.is_run_complete = is_run_complete

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        expected_downloaded = [('run1', [('run1/quality.csv',
                                          'run1/sample1_R1_x.fastq',
                                          'run1/sample1_R2_x.fastq')])]

        self.loader.poll()  # launch 1
        self.loader.poll()  # check status, catch exception
        downloaded1 = self.downloaded[:]

        is_kive_running = True
        self.loader.poll()  # check status successfully, and download
        downloaded2 = self.downloaded[:]

        self.assertEqual([], downloaded1)
        self.assertEqual(expected_downloaded, downloaded2)

    def test_failed_quality_download(self):
        def download_quality(folder):
            raise StandardError('Mock quality failure.')

        self.loader.download_quality = download_quality

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq',
                                                 folder + '/sample2_R1_x.fastq']
        retry_delays = []

        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        final_delay = self.loader.poll()

        self.assertEqual(self.loader.retry_delay, sum(retry_delays))
        self.assertEqual(0, final_delay)

    def test_failed_results_download(self):
        def download_results(folder):
            raise StandardError('Mock Kive failure.')

        self.loader.download_results = download_results

        self.loader.find_folders = lambda: ['run1']
        self.loader.find_files = lambda folder: [folder + '/sample1_R1_x.fastq']
        retry_delays = []

        first_delay = self.loader.poll()  # launch 1
        self.completed = self.launched[:]
        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        retry_delays.append(self.loader.poll())
        final_delay = self.loader.poll()

        self.assertEqual(0, first_delay)
        self.assertEqual(self.loader.retry_delay, sum(retry_delays))
        self.assertEqual(0, final_delay)