Esempio n. 1
0
    def test_verification_failure(self, _extract_file, _download_file,
                                  mock_send_job):
        mock_send_job.return_value = None

        # Set a different download URL to trigger a failure in the
        # _verify_batch_grouping function
        batches, files = self.insert_objects()
        files[1].download_url = "https://wompwomp.com"
        files[1].save()
        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=batches)

        # Call the downloader function
        array_express.download_array_express(downloader_job.id)

        _download_file.assert_not_called()
        _extract_file.assert_not_called()
        mock_send_job.assert_not_called()

        # Verify that the database has been updated correctly:
        downloader_job = DownloaderJob.objects.get()
        self.assertFalse(downloader_job.success)
        self.assertIsNotNone(downloader_job.start_time)
        self.assertIsNotNone(downloader_job.end_time)
        self.assertEqual(downloader_job.failure_reason,
                         ("A Batch's file doesn't have the same download "
                          "URL as the other batches' files."))
    def handle(self, *args, **options):
        if options["job_id"] is None:
            logger.error("You must specify a job ID.")
            sys.exit(1)

        try:
            job_type = Downloaders[options["job_name"]]
        except KeyError:
            logger.error("You must specify a valid job name.")
            sys.exit(1)

        if job_type is Downloaders.ARRAY_EXPRESS:
            download_array_express(options["job_id"])
        elif job_type is Downloaders.TRANSCRIPTOME_INDEX:
            download_transcriptome(options["job_id"])
        elif job_type is Downloaders.SRA:
            download_sra(options["job_id"])
        elif job_type is Downloaders.GEO:
            download_geo(options["job_id"])
        else:
            logger.error(
                ("A valid job name was specified for job %s with id %d but "
                 "no downloader function is known to run it."),
                options["job_name"],
                options["job_id"],
            )
            sys.exit(1)

        sys.exit(0)
Esempio n. 3
0
    def test_download(self, _extract_file, _download_file,
                      _verify_batch_grouping, mock_send_job):
        mock_send_job.return_value = None

        batches, files = self.insert_objects()
        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=batches)

        # Call the function we're testing:
        array_express.download_array_express(downloader_job.id)

        target_file_path = (
            "/home/user/data_store/temp/A-AFFY-1/AFFY_TO_PCL/downloader_job_{}"
            "/E-GEOD-59071.raw.3.zip").format(str(downloader_job.id))

        # Verify that all expected functionality is run:
        self.assertEqual(_verify_batch_grouping.call_count, 1)
        download_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip"  # noqa
        _download_file.assert_called_with(download_url, target_file_path,
                                          downloader_job)
        args, _ = _extract_file.call_args
        file_query_set, job = args
        self.assertEqual(list(file_query_set), files)
        self.assertEqual(job.id, downloader_job.id)

        # Verify that the database has been updated correctly:
        batches = Batch.objects.all()
        for batch in batches:
            self.assertEqual(batch.status, BatchStatuses.DOWNLOADED.value)

        downloader_job = DownloaderJob.objects.get()
        self.assertTrue(downloader_job.success)
        self.assertIsNotNone(downloader_job.start_time)
        self.assertIsNotNone(downloader_job.end_time)

        processor_jobs = ProcessorJob.objects.all()
        self.assertEqual(len(processor_jobs), 2)

        mock_send_job.assert_has_calls([
            call(ProcessorPipeline.AFFY_TO_PCL, processor_jobs[0].id),
            call(ProcessorPipeline.AFFY_TO_PCL, processor_jobs[1].id)
        ])
Esempio n. 4
0
    def test_extraction_failure(self, _download_file, mock_send_job):
        mock_send_job.return_value = None

        batches, files = self.insert_objects()
        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=batches)

        # Call the downloader function
        array_express.download_array_express(downloader_job.id)

        mock_send_job.assert_not_called()

        # Verify that the database has been updated correctly:
        downloader_job = DownloaderJob.objects.get()
        self.assertFalse(downloader_job.success)
        self.assertIsNotNone(downloader_job.start_time)
        self.assertIsNotNone(downloader_job.end_time)

        job_dir = utils.JOB_DIR_PREFIX + str(downloader_job.id)
        zip_path = files[0].get_temp_download_path(job_dir)
        self.assertEqual(downloader_job.failure_reason,
                         "Exception caught while extracting " + zip_path)
Esempio n. 5
0
    def test_download_failure(self, _extract_file, _open, mock_send_job):
        # Set up mocks:
        mock_send_job.return_value = None
        _open.side_effect = Exception()

        batches, _ = self.insert_objects()
        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=batches)

        # Call the downloader function
        array_express.download_array_express(downloader_job.id)

        _extract_file.assert_not_called()
        mock_send_job.assert_not_called()

        # Verify that the database has been updated correctly:
        downloader_job = DownloaderJob.objects.get()
        self.assertFalse(downloader_job.success)
        self.assertIsNotNone(downloader_job.start_time)
        self.assertIsNotNone(downloader_job.end_time)
        self.assertEqual(downloader_job.failure_reason,
                         "Exception caught while downloading batch")
Esempio n. 6
0
    def test_download_multiple_zips(self, mock_send_job):
        """Tests that each sample gets one processor job no matter what.

        https://github.com/AlexsLemonade/refinebio/pull/351 deals with
        a bug where every file that was extracted to a directory got a
        processor job queued for it each time a downloader job ran
        which pointed to that directory. This test makes sure this bug
        stays squashed.

        It does so by running two downloader jobs for the same
        experiment which use two different zip files. Before this bug
        was squashed this would have resulted in the first sample
        getting a second processor job queued for it because the
        second downloader job would have found the file in the
        directory.
        """
        dlj1 = DownloaderJob()
        dlj1.accession_code = 'E-MEXP-433'
        dlj1.save()

        original_file = OriginalFile()
        original_file.source_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MEXP/E-MEXP-433/E-MEXP-433.raw.1.zip"
        original_file.source_filename = "Waldhof_020604_R30_01-2753_U133A.CEL"
        original_file.save()

        assoc = DownloaderJobOriginalFileAssociation()
        assoc.original_file = original_file
        assoc.downloader_job = dlj1
        assoc.save()

        sample = Sample()
        sample.accession_code = 'E-MEXP-433-Waldhof_020604_R30_01-2753_U133A'
        sample.technology = "MICROARRAY"
        sample.manufacturer = "AFFYMETRIX"
        sample.has_raw = True
        # This is fake, but we don't currently support any agilent
        # platforms so we're using a platform that is supported.
        sample.platform_accession_code = "hgu133a"
        sample.save()

        OriginalFileSampleAssociation.objects.get_or_create(
            sample=sample, original_file=original_file)

        dlj2 = DownloaderJob()
        dlj2.accession_code = 'E-MEXP-433'
        dlj2.save()

        original_file = OriginalFile()
        original_file.source_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MEXP/E-MEXP-433/E-MEXP-433.raw.2.zip"
        original_file.source_filename = "N08_U133A.CEL"
        original_file.save()

        assoc = DownloaderJobOriginalFileAssociation()
        assoc.original_file = original_file
        assoc.downloader_job = dlj2
        assoc.save()

        sample = Sample()
        sample.accession_code = 'E-MEXP-433-N08_U133A'
        sample.technology = "MICROARRAY"
        sample.manufacturer = "AFFYMETRIX"
        sample.has_raw = True
        # This is fake, but we don't currently support any agilent
        # platforms so we're using a platform that is supported.
        sample.platform_accession_code = "hgu133a"
        sample.save()

        OriginalFileSampleAssociation.objects.get_or_create(
            sample=sample, original_file=original_file)

        array_express.download_array_express(dlj1.id)
        array_express.download_array_express(dlj2.id)

        self.assertEqual(ProcessorJob.objects.all().count(), 2)