def handle(self, *args, **options): if options["job_id"] is None: logger.error("You must specify a job ID.") sys.exit(1) try: job_type = Downloaders[options["job_name"]] except KeyError: logger.error("You must specify a valid job name.") sys.exit(1) if job_type is Downloaders.ARRAY_EXPRESS: download_array_express(options["job_id"]) elif job_type is Downloaders.TRANSCRIPTOME_INDEX: download_transcriptome(options["job_id"]) elif job_type is Downloaders.SRA: download_sra(options["job_id"]) elif job_type is Downloaders.GEO: download_geo(options["job_id"]) else: logger.error( ("A valid job name was specified for job %s with id %d but " "no downloader function is known to run it."), options["job_name"], options["job_id"], ) sys.exit(1) sys.exit(0)
def test_multiple_batches(self, mock_download_file): # Just in case this test ever breaks, we don't actually want # to download the file because that'll take a while to fail. mock_download_file.return_value = True batch, _ = self.insert_objects() batch2 = Batch(survey_job=self.survey_job, source_type="SRA", pipeline_required="SALMON", platform_accession_code="IlluminaHiSeq2000", experiment_accession_code="DRX001564", experiment_title="It doesn't really matter.", organism_id=9031, organism_name="GALLUS GALLUS", release_date="2013-07-19", last_uploaded_date="2017-09-11", status=BatchStatuses.NEW.value) batch2.save() downloader_job = DownloaderJob.create_job_and_relationships( batches=[batch, batch2], downloader_task="dummy") downloader_job.save() sra.download_sra(downloader_job.id) completed_job = DownloaderJob.objects.get(id=downloader_job.id) self.assertFalse(completed_job.success) self.assertEqual(completed_job.failure_reason, ("More than one batch found for SRA downloader job. " "There should only be one."))
def test_upload_fails(self, mock_download_file, mock_upload_raw_file, mock_getsize): # We don't actually want to download anything and we're # testing this function separately anyway. mock_download_file.return_value = True mock_getsize.return_value = 1337 def raise_exception(job_dir): raise Exception("We're testing that this fails.") mock_upload_raw_file.side_effect = raise_exception batch, files = self.insert_objects() downloader_job = DownloaderJob.create_job_and_relationships( batches=[batch], downloader_task="dummy") downloader_job.save() sra.download_sra(downloader_job.id) downloader_job.refresh_from_db() self.assertFalse(downloader_job.success) self.assertEquals(downloader_job.failure_reason, "Exception caught while uploading file.") self.assertEquals(len(mock_upload_raw_file.mock_calls), 1)
def test_zero_batches(self, mock_download_file): # Just in case this test ever breaks, we don't actually want # to download the file because that'll take a while to fail. mock_download_file.return_value = True downloader_job = DownloaderJob.create_job_and_relationships( batches=[], downloader_task="dummy") downloader_job.save() sra.download_sra(downloader_job.id) completed_job = DownloaderJob.objects.get(id=downloader_job.id) self.assertFalse(completed_job.success) self.assertEqual(completed_job.failure_reason, "No batches found.")
def test_download_file_ncbi(self): dlj = DownloaderJob() dlj.accession_code = "SRR9117853" dlj.save() og = OriginalFile() og.source_filename = "SRR9117853.sra" og.source_url = "[email protected]:/sra/sra-instant/reads/ByRun/sra/SRR/SRR9117/SRR9117853/SRR9117853.sra" og.is_archive = True og.save() sample = Sample() sample.accession_code = "SRR9117853" sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() result, downloaded_files = sra.download_sra(dlj.pk) utils.end_downloader_job(dlj, result) self.assertTrue(result) self.assertEqual(downloaded_files[0].sha1, "e7ad484fe6f134ba7d1b2664e58cc15ae5a958cc") self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))
def test_download_file_ncbi(self, mock_send_job): mock_send_job.return_value = None dlj = DownloaderJob() dlj.accession_code = "DRR002116" dlj.save() og = OriginalFile() og.source_filename = "DRR002116.sra" og.source_url = "[email protected]:/sra/sra-instant/reads/ByRun/sra/DRR/DRR002/DRR002116/DRR002116.sra" og.is_archive = True og.save() sample = Sample() sample.accession_code = 'DRR002116' sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() result, downloaded_files = sra.download_sra(dlj.pk) utils.end_downloader_job(dlj, result) self.assertTrue(result) self.assertEqual(downloaded_files[0].sha1, 'd5374e7fe047d4f76b165c3f5148ab2df9d42cea') self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))
def test_download_file(self, mock_send_job): mock_send_job.return_value = None dlj = DownloaderJob() dlj.accession_code = "ERR036" dlj.save() og = OriginalFile() og.source_filename = "ERR036000.fastq.gz" og.source_url = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz" og.is_archive = True og.save() sample = Sample() sample.accession_code = 'ERR036000' sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() success = sra.download_sra(dlj.pk)
def test_download_file(self): dlj = DownloaderJob() dlj.accession_code = "ERR036" dlj.save() og = OriginalFile() og.source_filename = "ERR036000.fastq.gz" og.source_url = "ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz" og.is_archive = True og.save() sample = Sample() sample.accession_code = "ERR036000" sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() result, downloaded_files = sra.download_sra(dlj.pk) self.assertTrue(result) self.assertEqual(downloaded_files[0].sha1, "1dfe5460a4101fe87feeffec0cb2e053f6695961") self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))
def test_happy_path(self, mock_download_file, mock_send_job, mock_upload_raw_file, mock_getsize): mock_send_job.return_value = None # We don't actually want to download anything and we're # testing this function separately anyway. mock_download_file.return_value = True mock_getsize.return_value = 1337 batch, files = self.insert_objects() downloader_job = DownloaderJob.create_job_and_relationships( batches=[batch], downloader_task="dummy") downloader_job.save() sra.download_sra(downloader_job.id) downloader_job.refresh_from_db() self.assertTrue(downloader_job.success) for file in files: file.refresh_from_db() self.assertEquals(file.size_in_bytes, 1337) processor_job = ProcessorJob.objects.get() target_path_template = "/home/user/data_store/temp/IlluminaHiSeq2000/SALMON/downloader_job_{}/DRR002116_{}.fastq.gz" # noqa target_path_1 = target_path_template.format(downloader_job.id, 1) target_path_2 = target_path_template.format(downloader_job.id, 2) # Impossible to match the exact File and DownloaderJob # objects, so rather than trying to do so, just pull them out # from the calls and test the path it was called with: first_call = mock_download_file.call_args_list[0][0] second_call = mock_download_file.call_args_list[1][0] mock_download_file.assert_has_calls([ call(first_call[0], first_call[1], target_path_2), call(second_call[0], second_call[1], target_path_1) ]) mock_send_job.assert_called_once_with(ProcessorPipeline.SALMON, processor_job.id) self.assertEquals(len(mock_upload_raw_file.mock_calls), 2)
def test_download_file_unmated_reads(self): dlj = DownloaderJob() dlj.accession_code = "SRR1603661" dlj.save() og_1 = OriginalFile() og_1.source_filename = "SRR1603661_1.fastq.gz" og_1.source_url = "ftp.sra.ebi.ac.uk/vol1/fastq/SRR160/001/SRR1603661/SRR1603661_1.fastq.gz" og_1.expected_md5 = "502a9a482bfa5aa75865ccc0105ad13c" og_1.expected_size_in_bytes = 6751980628 og_1.is_archive = True og_1.save() og_2 = OriginalFile() og_2.source_filename = "SRR1603661_2.fastq.gz" og_2.source_url = "ftp.sra.ebi.ac.uk/vol1/fastq/SRR160/001/SRR1603661/SRR1603661_2.fastq.gz" og_1.expected_md5 = "fffd24457418d255991f54ec82a39d57" og_1.expected_size_in_bytes = 6949912932 og_2.is_archive = True og_2.save() sample = Sample() sample.accession_code = "SRR1603661" sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og_1 assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og_1 assoc.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og_2 assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og_2 assoc.save() result, downloaded_files = sra.download_sra(dlj.pk) utils.end_downloader_job(dlj, result) self.assertTrue(result) self.assertEqual(downloaded_files[0].sha1, "52bf22472069d04fa7767429f6ab78ebd10c0152") self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))