Beispiel #1
0
    def setUp(self):
        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        self.survey_job = survey_job

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="experiment_accession_code",
                                           value="DRR002116")
        key_value_pair.save()

        # Insert the organism into the database so the model doesn't call the
        # taxonomy API to populate it.
        organism = Organism(name="HOMO_SAPIENS",
                            taxonomy_id=9606,
                            is_scientific_name=True)
        organism.save()

        organism1 = Organism(name="GALLUS_GALLUS",
                             taxonomy_id=9031,
                             is_scientific_name=True)
        organism1.save()

        organism2 = Organism(name="DANIO_RERIO",
                             taxonomy_id=7955,
                             is_scientific_name=True)
        organism2.save()
Beispiel #2
0
    def test_survey_unmated_reads(self, mock_send_job):
        """Test an experiment with unmated reads.

        Also make sure the file report endpoint's properties are recorded.
        """
        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="SRP048683"
        )
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        experiment, samples = sra_surveyor.discover_experiment_and_samples()

        self.assertEqual(experiment.accession_code, "SRP048683")
        self.assertEqual(len(samples), 12)

        expected_file_names = set()
        # Just check one file for one sample's expected file size/md5
        for sample in samples:
            if sample.accession_code == "SRR1603661":
                for original_file in sample.original_files.all():
                    expected_file_names.add(original_file.source_filename)
                    if original_file.source_filename == "SRR1603661_1.fastq.gz":
                        self.assertEqual(
                            original_file.expected_md5, "502a9a482bfa5aa75865ccc0105ad13c"
                        )
                        self.assertEqual(original_file.expected_size_in_bytes, 6751980628)

        self.assertEqual({"SRR1603661_1.fastq.gz", "SRR1603661_2.fastq.gz"}, expected_file_names)
Beispiel #3
0
    def test_calls_survey(self, mock_get):
        """If source_type is supported calls the appropriate survey method."""
        mock_get.side_effect = mocked_requests_get

        # Prevent a call being made to NCBI's API to determine
        # organism name/id.
        organism = Organism(name="H**O SAPIENS", taxonomy_id=9606, is_scientific_name=True)
        organism.save()

        survey_job = SurveyJob(source_type="ARRAY_EXPRESS")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="experiment_accession_code",
                                           value="E-GEOD-22166")
        key_value_pair.save()

        surveyor.run_job(survey_job)
        logger.info("Started Survey Job %d, waiting for it to complete.", survey_job.id)
        survey_job = wait_for_job(survey_job, SurveyJob)
        self.assertTrue(survey_job.success)

        batch = Batch.objects.all()[0]
        batch = Batch.objects.filter(survey_job=survey_job).get()

        downloader_job = batch.downloaderjob_set.get()
        logger.info("Survey Job finished, waiting for Downloader Job %d to complete.",
                    downloader_job.id)
        downloader_job = wait_for_job(downloader_job, DownloaderJob)
        self.assertTrue(downloader_job.success)

        processor_job = batch.processorjob_set.get()
        logger.info("Downloader Job finished, waiting for processor Job %d to complete.",
                    processor_job.id)
        processor_job = wait_for_job(processor_job, ProcessorJob)
        self.assertTrue(processor_job.success)
Beispiel #4
0
def survey_ae_experiment(experiment_accession):
    survey_job = SurveyJob(source_type="ARRAY_EXPRESS")
    survey_job.save()
    key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                       key="experiment_accession_code",
                                       value=experiment_accession)
    key_value_pair.save()
    run_job(survey_job)
Beispiel #5
0
    def setUp(self):
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()
        self.survey_job = survey_job

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblPlants")
        key_value_pair.save()
Beispiel #6
0
def test():
    survey_job = SurveyJob(source_type="ARRAY_EXPRESS")
    survey_job.save()
    key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                       key="experiment_accession_code",
                                       value="E-MTAB-3050")
    key_value_pair.save()
    run_job(survey_job)
    return
Beispiel #7
0
    def prep_test(self, experiment_accession):
        survey_job = SurveyJob(source_type="GEO")
        survey_job.save()
        self.survey_job = survey_job

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="experiment_accession_code",
                                           value=experiment_accession)
        key_value_pair.save()
Beispiel #8
0
def queue_surveyor_for_accession(accession: str) -> None:
    """Dispatches a surveyor job for the accession code."""
    # Start at 256MB of RAM for surveyor jobs.
    survey_job = SurveyJob(ram_amount=256)
    set_source_type_for_accession(survey_job, accession)

    key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                       key="experiment_accession_code",
                                       value=accession)
    key_value_pair.save()
    def create_job_for_accession(self, accession_code: str):
        survey_job = SurveyJob(source_type="ARRAY_EXPRESS")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="experiment_accession_code",
                                           value=accession_code)
        key_value_pair.save()

        return survey_job
    def test_survey_bacteria(self, mock_send_job):
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblBacteria")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="PSEUDOMONAS_AERUGINOSA")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        surveyor.survey(source_type="TRANSCRIPTOME_INDEX")

        downloader_jobs = DownloaderJob.objects.order_by("id").all()
        self.assertEqual(downloader_jobs.count(), 1)
        send_job_calls = []
        for downloader_job in downloader_jobs:
            send_job_calls.append(
                call(Downloaders.TRANSCRIPTOME_INDEX, downloader_job))

        mock_send_job.assert_has_calls(send_job_calls)

        # Make sure the organism object got created with the correct
        # taxonomy id by making sure this doesn't raise an exception.
        Organism.objects.get(name="PSEUDOMONAS_AERUGINOSA", taxonomy_id=287)
    def test_single_plant(self):
        """ Tests that the files returned actually exist.

        Tests the Metazoa division instead of the main division.
        """
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblPlants")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="Arabidopsis thaliana")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        files = surveyor.discover_species()[0]

        for file in files:
            urllib.request.urlopen(file.source_url)

        # Make sure the organism object got created by making sure
        # this doesn't raise an exception.
        Organism.objects.get(name="ARABIDOPSIS_THALIANA")
    def test_correct_index_location_metazoa(self):
        """ Tests that the files returned actually exist.

        Tests the Metazoa division instead of the main division.
        """
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblMetazoa")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="Octopus bimaculoides")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        files = surveyor.discover_species()[0]

        for file in files:
            urllib.request.urlopen(file.source_url)

        # Make sure the organism object got created by making sure
        # this doesn't raise an exception.
        Organism.objects.get(name="OCTOPUS_BIMACULOIDES")
def queue_surveyor_for_accession(accession: str) -> None:
    """Dispatches a surveyor job for the accession code."""
    # Start at 1GB of RAM for surveyor jobs.
    survey_job = SurveyJob(ram_amount=1024)
    set_source_type_for_accession(survey_job, accession)

    key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                       key="experiment_accession_code",
                                       value=accession)
    key_value_pair.save()

    # We don't actually send the job here, we just create it.
    # The foreman will pick it up and dispatch it when the time is appropriate.
    return survey_job
    def test_correct_index_location(self):
        """ Tests that the files returned actually exist.

        Uses an organism in the main division.
        """
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="Ensembl")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="Danio rerio")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        files = surveyor.discover_species()[0]

        # Make sure the organism object got created by making sure
        # this doesn't raise an exception.
        Organism.objects.get(name="DANIO_RERIO")

        for file in files:
            urllib.request.urlopen(file.source_url)
Beispiel #15
0
    def test_nonexistant_srp_survey(self):
        """Try surveying an accession that does not exist
        """
        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="ERP006216"
        )
        key_value_pair.save()

        run_job(survey_job)

        survey_job.refresh_from_db()
        self.assertFalse(survey_job.success)
        self.assertEqual(survey_job.failure_reason, "No experiment found.")
Beispiel #16
0
    def setUp(self):
        survey_job = SurveyJob(source_type="ARRAY_EXPRESS")
        survey_job.save()
        self.survey_job = survey_job

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="experiment_accession_code",
                                           value="E-MTAB-3050")
        key_value_pair.save()

        # Insert the organism into the database so the model doesn't call the
        # taxonomy API to populate it.
        organism = Organism(name="H**O SAPIENS",
                            taxonomy_id=9606,
                            is_scientific_name=True)
        organism.save()
Beispiel #17
0
    def test_arrayexpress_alternate_accession(self):
        """ Make sure that ENA experiments correctly detect their ArrayExpress alternate accession
        """

        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="ERP108370"
        )
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        experiment, _ = sra_surveyor.discover_experiment_and_samples()

        self.assertEqual(experiment.accession_code, "ERP108370")
        self.assertEqual(experiment.alternate_accession_code, "E-MTAB-6681")
    def test_survey_fungi_none(self, mock_send_job):
        """When surveying fungi an organism_name must be supplied."""
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblFungi")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        surveyor.survey(source_type="TRANSCRIPTOME_INDEX")

        downloader_jobs = DownloaderJob.objects.order_by("id").all()
        self.assertEqual(downloader_jobs.count(), 0)

        mock_send_job.assert_not_called()
Beispiel #19
0
def survey_experiment(experiment_accession: str, source_type: str):
    """Survey an experiment of type `source_type`.

    Source type corresponds to one of the external sources we
    support. It must be one of the following values:
      * SRA
      * GEO
      * ARRAY_EXPRESS
    """
    survey_job = SurveyJob(source_type=source_type)
    survey_job.save()
    key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                       key="experiment_accession_code",
                                       value=experiment_accession)
    key_value_pair.save()
    run_job(survey_job)

    return survey_job
Beispiel #20
0
    def test_survey(self, mock_send_job):
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="ensembl_division", value="EnsemblPlants"
        )
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        surveyor.survey(source_type="TRANSCRIPTOME_INDEX")

        downloader_jobs = DownloaderJob.objects.order_by("id").all()
        self.assertGreater(downloader_jobs.count(), 50)
        send_job_calls = []
        for downloader_job in downloader_jobs:
            send_job_calls.append(call(Downloaders.TRANSCRIPTOME_INDEX, downloader_job))

        mock_send_job.assert_has_calls(send_job_calls)
Beispiel #21
0
def survey_sra_experiments(start_accession, end_accession):
    survey_job = SurveyJob(source_type="SRA")
    survey_job.save()
    key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                       key="start_accession",
                                       value=start_accession)
    key_value_pair.save()
    key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                       key="end_accession",
                                       value=end_accession)
    key_value_pair.save()
    run_job(survey_job)
Beispiel #22
0
    def test_batch_created(self, mock_get):
        mock_get.side_effect = mocked_requests_get

        # Use same run accession for the start and end of the range to
        # achieve a length of 1
        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="start_accession",
                                           value=RUN_ACCESSION)
        key_value_pair.save()
        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="end_accession",
                                           value=RUN_ACCESSION)
        key_value_pair.save()

        surveyor = SraSurveyor(survey_job)

        self.assertTrue(surveyor.discover_batches())
        # With only a single run accession there should only be a
        # single batch.
        self.assertEqual(len(surveyor.batches), 1)

        batch = surveyor.batches[0]
        self.assertEqual(batch.survey_job.id, survey_job.id)
        self.assertEqual(batch.source_type, "SRA")
        self.assertEqual(batch.pipeline_required, "SALMON")
        self.assertEqual(batch.platform_accession_code, "IlluminaHiSeq2000")
        self.assertEqual(batch.experiment_accession_code, "DRX001563")
        self.assertEqual(batch.experiment_title,
                         ("Illumina HiSeq 2000 sequencing; "
                          "Exp_Gg_HH16_1_embryo_mRNAseq"))
        self.assertEqual(batch.status, "NEW")
        self.assertEqual(batch.release_date, "2013-07-19")
        self.assertEqual(batch.last_uploaded_date, "2017-08-11")
        self.assertEqual(batch.organism_id, 9031)
        self.assertEqual(batch.organism_name, "GALLUS GALLUS")

        file = batch.files[0]
        self.assertEqual(file.size_in_bytes, -1)
        self.assertEqual(
            file.download_url,
            "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/DRR002/DRR002116/DRR002116.fastq.gz"
        )  # noqa
        self.assertEqual(file.raw_format, "fastq.gz")
        self.assertEqual(file.processed_format, "tar.gz")
        self.assertEqual(file.name, "DRR002116.fastq.gz")
        self.assertEqual(file.internal_location, "IlluminaHiSeq2000/SALMON")
Beispiel #23
0
    def create_survey_job(self):
        job = SurveyJob(source_type="SRA",
                        nomad_job_id="SURVEYOR/dispatch-1528945054-e8eaf540",
                        num_retries=0,
                        success=None)

        job.save()

        sjkv = SurveyJobKeyValue()
        sjkv.key = "experiment_accession_code"
        sjkv.value = "RJ-1234-XYZ"
        sjkv.survey_job = job
        sjkv.save()

        return job
Beispiel #24
0
    def test_discover_batches(self, mock_generate_batch):
        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="start_accession",
                                           value="DRR012345")
        key_value_pair.save()
        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="end_accession",
                                           value="DRR012348")
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        sra_surveyor.discover_batches()

        mock_generate_batch.assert_has_calls([
            call("DRR012345"),
            call("DRR012346"),
            call("DRR012347"),
            call("DRR012348")
        ])
Beispiel #25
0
def set_source_type_for_accession(survey_job, accession: str) -> None:
    """Type a surveyor based on accession structure"""
    if 'GSE' in accession[:3]:
        survey_job.source_type = "GEO"
        survey_job.save()
        return
    elif 'E-' in accession[:2]:
        survey_job.source_type = "ARRAY_EXPRESS"
        survey_job.save()
        return
    elif " " in accession:

        survey_job.source_type = "TRANSCRIPTOME_INDEX"
        survey_job.save()

        args = accession.split(",")
        # Allow organism to be unspecified so we survey the entire division.
        organism_name = args[0] if len(args[0]) > 0 else None
        if len(args) > 1:
            ensembl_division = args[1].strip()
        else:
            ensembl_division = "Ensembl"

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value=ensembl_division)
        key_value_pair.save()
        if organism_name:
            key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                               key="organism_name",
                                               value=organism_name)
            key_value_pair.save()

        return
    else:
        survey_job.source_type = "SRA"
        survey_job.save()
        return
    def test_correct_index_location_protist(self):
        """ Tests that the files returned actually exist.

        Tests the Metazoa division instead of the main division.
        """
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblProtists")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="Leishmania major")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        files = surveyor.discover_species()[0]

        for file in files:
            urllib.request.urlopen(file.source_url)
Beispiel #27
0
def survey_transcriptome_index(organism_name=None, ensembl_division='Ensembl'):
    """Special one-off surveyor to build transcriptome indices.

    The external source this uses is ensembl.org which is divided into
    multiple divisions. This function surveys only one division at a
    time. If an `organism_name` is provided, survey only that
    organism, otherwise survey the entire division.
    """
    survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
    survey_job.save()
    key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                       key="ensembl_division",
                                       value=ensembl_division)
    key_value_pair.save()

    if organism_name:
        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value=organism_name)
        key_value_pair.save()

    run_job(survey_job)

    return survey_job
    def test_correct_index_location(self):
        """ Tests that the files returned actually exist.

        Uses an organism in the main division.
        """
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()
        self.survey_job = survey_job

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="Ensembl")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="Danio rerio")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(self.survey_job)
        files = surveyor.discover_species()[0]

        for file in files:
            urllib.request.urlopen(file.source_url)
Beispiel #29
0
    def test_srp_survey(self, mock_send_job):
        """A slightly harder test of the SRA surveyor.
        """
        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="SRP068364"
        )
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        experiment, samples = sra_surveyor.discover_experiment_and_samples()
        self.assertEqual(experiment.accession_code, "SRP068364")
        self.assertEqual(experiment.alternate_accession_code, "GSE76780")
        self.assertEqual(len(samples), 4)

        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="SRP111553"
        )
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        experiment, samples = sra_surveyor.discover_experiment_and_samples()

        self.assertEqual(experiment.accession_code, "SRP111553")
        self.assertEqual(experiment.alternate_accession_code, "GSE101204")
        self.assertEqual(len(samples), 16)  # 8 samples with 2 runs each

        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="DRP003977"
        )
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        experiment, samples = sra_surveyor.discover_experiment_and_samples()

        self.assertEqual(experiment.accession_code, "DRP003977")
        self.assertEqual(experiment.alternate_accession_code, None)
        self.assertEqual(len(samples), 9)
Beispiel #30
0
    def test_geo_survey_microarray(self, mock_send_task):
        """Test that the unsurveyor works correctly.

        This includes not deleting samples which also belong to other
        experiments. Therefore we survey a superseries and one of its
        sub-experiments, then delete the superseries to make sure the
        sub-experiment wasn't touched.

        We mock out the send_job function so that we don't actually
        process these. The unsurveyor code related to ComputedFile,
        ComputationalResult, and ProcessorJobs won't be tested by
        this, but it's been functionally tested.
        """
        superseries_accession = "GSE59795"
        sub_experiment_accession = "GSE46580"

        # Survey the superseries.
        survey_job = SurveyJob(source_type="GEO")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="experiment_accession_code",
                                           value=superseries_accession)
        key_value_pair.save()

        geo_surveyor = GeoSurveyor(survey_job)
        geo_surveyor.survey()

        # Survey the sub-experiment
        survey_job = SurveyJob(source_type="GEO")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="experiment_accession_code",
                                           value=sub_experiment_accession)
        key_value_pair.save()

        geo_surveyor = GeoSurveyor(survey_job)
        geo_surveyor.survey()

        # Establish baselines before purge
        experiment = Experiment.objects.filter(accession_code=sub_experiment_accession)[0]
        experiment_sample_assocs = ExperimentSampleAssociation.objects.filter(experiment=experiment)
        samples = Sample.objects.filter(id__in=experiment_sample_assocs.values('sample_id'))
        self.assertEqual(samples.count(), 4)

        og_file_sample_assocs = OriginalFileSampleAssociation.objects.filter(sample_id__in=samples.values('id'))
        original_files = OriginalFile.objects.filter(id__in=og_file_sample_assocs.values('original_file_id'))
        self.assertEqual(original_files.count(), 4)

        experiment = Experiment.objects.filter(accession_code=superseries_accession)[0]
        experiment_sample_assocs = ExperimentSampleAssociation.objects.filter(experiment=experiment)
        samples = Sample.objects.filter(id__in=experiment_sample_assocs.values('sample_id'))
        self.assertEqual(samples.count(), 20)

        og_file_sample_assocs = OriginalFileSampleAssociation.objects.filter(sample_id__in=samples.values('id'))
        original_files = OriginalFile.objects.filter(id__in=og_file_sample_assocs.values('original_file_id'))
        self.assertEqual(original_files.count(), 20)

        # Purge the superseries
        purge_experiment(superseries_accession)

        # Make sure the subexperiment samples weren't affected.
        experiment = Experiment.objects.filter(accession_code=sub_experiment_accession)[0]
        experiment_sample_assocs = ExperimentSampleAssociation.objects.filter(experiment=experiment)
        samples = Sample.objects.filter(id__in=experiment_sample_assocs.values('sample_id'))
        self.assertEqual(samples.count(), 4)

        # Make sure sub-experiment original files weren't affected.
        og_file_sample_assocs = OriginalFileSampleAssociation.objects.filter(sample_id__in=samples.values('id'))
        original_files = OriginalFile.objects.filter(id__in=og_file_sample_assocs.values('original_file_id'))
        self.assertEqual(original_files.count(), 4)

        # And that samples and files that remain are from the subseries.
        self.assertEqual(Sample.objects.count(), 4)
        self.assertEqual(OriginalFile.objects.count(), 4)