Ejemplo n.º 1
0
    def handle(self, *args, **options):
        if options["job_id"] is None:
            logger.error("You must specify a job ID.")
            sys.exit(1)

        try:
            job_type = Downloaders[options["job_name"]]
        except KeyError:
            logger.error("You must specify a valid job name.")
            sys.exit(1)

        if job_type is Downloaders.ARRAY_EXPRESS:
            download_array_express(options["job_id"])
        elif job_type is Downloaders.TRANSCRIPTOME_INDEX:
            download_transcriptome(options["job_id"])
        elif job_type is Downloaders.SRA:
            download_sra(options["job_id"])
        elif job_type is Downloaders.GEO:
            download_geo(options["job_id"])
        else:
            logger.error(
                ("A valid job name was specified for job %s with id %d but "
                 "no downloader function is known to run it."),
                options["job_name"],
                options["job_id"],
            )
            sys.exit(1)

        sys.exit(0)
Ejemplo n.º 2
0
    def test_verification_failure(self, _upload_files, _download_file,
                                  mock_send_job):
        mock_send_job.return_value = None

        # Set a different download URL to trigger a failure in the
        # _verify_batch_grouping function
        batches = self.insert_objects()
        batches[0].files[0].download_url = "https://wompwomp.com"
        batches[0].files[0].save()
        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=batches)

        # Call the downloader function
        transcriptome_index.download_transcriptome(downloader_job.id)

        _download_file.assert_not_called()
        _upload_files.assert_not_called()
        mock_send_job.assert_not_called()

        # Verify that the database has been updated correctly:
        downloader_job = DownloaderJob.objects.get()
        self.assertFalse(downloader_job.success)
        self.assertIsNotNone(downloader_job.start_time)
        self.assertIsNotNone(downloader_job.end_time)
        self.assertEqual(downloader_job.failure_reason,
                         ("A Batch's file doesn't have the same download "
                          "URL as the other batch's file."))
Ejemplo n.º 3
0
    def test_download(self, _upload_files, _download_file, _verify_files,
                      mock_send_job):
        # Clean up temp directory:
        shutil.rmtree(
            "/home/user/data_store/temp/EnsemblPlants/TRANSCRIPTOME_INDEX",
            ignore_errors=True)

        mock_send_job.return_value = None

        batches = self.insert_objects()
        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=batches)

        # Call the downloader function we're testing:
        transcriptome_index.download_transcriptome(downloader_job.id)

        target_gtf_path = (
            "/home/user/data_store/temp/EnsemblPlants/TRANSCRIPTOME_INDEX/downloader_job_{}"
            "/Aegilops_tauschii_short.gtf.gz").format(str(downloader_job.id))
        target_fasta_path = (
            "/home/user/data_store/temp/EnsemblPlants/TRANSCRIPTOME_INDEX/downloader_job_{}"
            "/Aegilops_tauschii_short.fa.gz").format(str(downloader_job.id))

        # Verify that all expected functionality is run:
        self.assertEqual(_verify_files.call_count, 2)
        self.assertEqual(_download_file.call_count, 2)
        _download_file.assert_any_call(self.gtf_download_url, target_gtf_path,
                                       downloader_job)
        _download_file.assert_any_call(self.fasta_download_url,
                                       target_fasta_path, downloader_job)
        args, _ = _upload_files.call_args
        job_dir, files, job = args
        self.assertEqual(set(files), set(batches[0].files + batches[1].files))
        self.assertEqual(job.id, downloader_job.id)

        # Verify that the database has been updated correctly:
        batches = Batch.objects.all()
        for batch in batches:
            self.assertEqual(batch.status, BatchStatuses.DOWNLOADED.value)

        downloader_job = DownloaderJob.objects.get()
        self.assertTrue(downloader_job.success)
        self.assertIsNotNone(downloader_job.start_time)
        self.assertIsNotNone(downloader_job.end_time)

        processor_jobs = ProcessorJob.objects.all()
        self.assertEqual(len(processor_jobs), 2)

        mock_send_job.assert_has_calls([
            call(ProcessorPipeline.TRANSCRIPTOME_INDEX, processor_jobs[0].id),
            call(ProcessorPipeline.TRANSCRIPTOME_INDEX, processor_jobs[1].id)
        ])
Ejemplo n.º 4
0
    def test_download_file(self, mock_send_job):
        mock_send_job.return_value = None
        dlj = DownloaderJob()
        dlj.save()
        og = OriginalFile()
        og.source_filename = "Aegilops_tauschii.ASM34733v1.37.gtf.gz"
        og.source_url = self.gtf_download_url
        og.is_archive = True
        og.save()

        assoc = DownloaderJobOriginalFileAssociation()
        assoc.downloader_job = dlj
        assoc.original_file = og
        assoc.save()

        transcriptome_index.download_transcriptome(dlj.pk)
Ejemplo n.º 5
0
    def test_download_failure(self, _upload_files, _open, mock_send_job):
        # Set up mocks:
        mock_send_job.return_value = None
        _open.side_effect = Exception()

        batches = self.insert_objects()
        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=batches)

        # Call the downloader function
        transcriptome_index.download_transcriptome(downloader_job.id)

        _upload_files.assert_not_called()
        mock_send_job.assert_not_called()

        # Verify that the database has been updated correctly:
        downloader_job = DownloaderJob.objects.get()
        self.assertFalse(downloader_job.success)
        self.assertIsNotNone(downloader_job.start_time)
        self.assertIsNotNone(downloader_job.end_time)
        failure_reason = "Exception caught while downloading file from: {}".format(
            batches[0].files[0].download_url)
        self.assertEqual(downloader_job.failure_reason, failure_reason)