Beispiel #1
0
    def test_geo_survey_agilent(self, mock_send_task):
        """ Run the GEO surveyor and make sure we get some files to DL!

        For an Agilent Microarray platform.
        """
        self.prep_test("GSE35186")

        geo_surveyor = GeoSurveyor(self.survey_job)
        geo_surveyor.survey()

        self.assertEqual(124, Sample.objects.all().count())

        sample_object = Sample.objects.first()
        self.assertEqual(
            sample_object.platform_name,
            "Agilent-014850 Whole Human Genome Microarray 4x44K G4112F (Probe Name version)"
        )
        self.assertEqual(sample_object.platform_accession_code, "GPL6480")
        # We currently do not support Agilent platforms, so we can't
        # match its accession to one we know about.
        self.assertEqual(sample_object.technology, "UNKNOWN")

        downloader_jobs = DownloaderJob.objects.all()
        # There would be 124 samples + 2 metadata files. However at
        # the moment Agilent is unsupported so we don't want to queue
        # downloader jobs.
        self.assertEqual(0, downloader_jobs.count())
Beispiel #2
0
    def test_geo_survey_microarray(self, mock_send_task):
        """ Run the GEO surveyor and make sure we get some files to DL!

        For an Illumina Microarray platform.
        """
        self.prep_test("GSE11915")

        geo_surveyor = GeoSurveyor(self.survey_job)
        geo_surveyor.survey()

        self.assertEqual(34, Sample.objects.all().count())

        sample_object = Sample.objects.first()
        self.assertEqual(sample_object.platform_name,
                         "[HG-U133A] Affymetrix Human Genome U133A Array")
        self.assertEqual(sample_object.platform_accession_code, "hgu133a")
        self.assertEqual(sample_object.technology, "MICROARRAY")

        # Confirm sample protocol_info
        GSM299800 = Sample.objects.get(accession_code="GSM299800")
        protocol_info = GSM299800.protocol_info
        self.assertEqual(protocol_info['Extraction protocol'], [
            'Chromatin IP performed as described in Odom et al., Science 303, 1378 (Feb 27, 2004)'
        ])
        self.assertEqual(protocol_info['Data processing'],
                         ['Z-score normalization'])

        downloader_jobs = DownloaderJob.objects.all()
        self.assertEqual(45, downloader_jobs.count())

        # Make sure there aren't extra OriginalFiles
        original_files = OriginalFile.objects.all()
        self.assertEqual(45, original_files.count())
Beispiel #3
0
    def test_geo_survey_rnaseq(self, mock_send_task):
        """ Run the GEO surveyor and make sure we get some files to DL!

        For an Illumina RNASeq platform.
        """
        self.prep_test("GSE99264")

        geo_surveyor = GeoSurveyor(self.survey_job)
        geo_surveyor.survey()

        self.assertEqual(7, Sample.objects.all().count())

        sample_object = Sample.objects.first()
        self.assertEqual(sample_object.platform_name,
                         "Illumina Genome Analyzer II")
        self.assertEqual(sample_object.platform_accession_code,
                         "Illumina Genome Analyzer II")
        self.assertEqual(sample_object.technology, "RNA-SEQ")

        downloader_jobs = DownloaderJob.objects.all()
        self.assertEqual(1, downloader_jobs.count())

        # Make sure there aren't extra OriginalFiles
        original_files = OriginalFile.objects.all()
        self.assertEqual(1, original_files.count())
Beispiel #4
0
    def test_geo_survey_superseries(self, mock_send_task):
        """Run the GEO surveyor and make sure we get some files to DL!

        For a Super Series. But also that we don't queue downloader
        jobs for RNA-Seq samples coming from GEO.
        """
        self.prep_test("GSE103217")

        geo_surveyor = GeoSurveyor(self.survey_job)
        geo_surveyor.survey()

        # 28 total samples
        self.assertEqual(28, Sample.objects.all().count())

        # 10 of which are microarray and therefore need downloader jobs
        microarray_samples = Sample.objects.filter(technology='MICROARRAY')
        self.assertEqual(10, microarray_samples.count())
        downloader_jobs = DownloaderJob.objects.all()
        self.assertEqual(10, downloader_jobs.count())

        # And 18 of which are RNA-Seq so they won't have downloader jobs.
        rna_seq_samples = Sample.objects.filter(technology='RNA-SEQ')
        self.assertEqual(18, rna_seq_samples.count())

        # Make sure there aren't extra OriginalFiles
        original_files = OriginalFile.objects.all()
        self.assertEqual(10, original_files.count())
Beispiel #5
0
    def test_geo_survey_not_agilent(self, mock_send_task):
        """ Test to make sure we're setting MFG correctly
        """
        self.prep_test("GSE34198")

        geo_surveyor = GeoSurveyor(self.survey_job)
        geo_surveyor.survey()

        sample_object = Sample.objects.first()
        self.assertEqual(sample_object.manufacturer, "ILLUMINA")
Beispiel #6
0
    def test_geo_survey_rnaseq(self, mock_send_task):
        """Run the GEO surveyor and make sure we discover the experiment/samples.

        For an Illumina RNASeq platform. However it shouldn't actually
        queue any downloader jobs because its RNA-Seq data coming from
        GEO.
        """
        self.prep_test("GSE99264")

        geo_surveyor = GeoSurveyor(self.survey_job)
        geo_surveyor.survey()

        self.assertEqual(7, Sample.objects.all().count())

        sample_object = Sample.objects.first()
        self.assertEqual(sample_object.platform_name,
                         "Illumina Genome Analyzer II")
        self.assertEqual(sample_object.platform_accession_code,
                         "Illumina Genome Analyzer II")
        self.assertEqual(sample_object.technology, "RNA-SEQ")

        downloader_jobs = DownloaderJob.objects.all()
        self.assertEqual(0, downloader_jobs.count())
Beispiel #7
0
    def test_geo_survey_microarray(self, mock_send_task):
        """Test that the unsurveyor works correctly.

        This includes not deleting samples which also belong to other
        experiments. Therefore we survey a superseries and one of its
        sub-experiments, then delete the superseries to make sure the
        sub-experiment wasn't touched.

        We mock out the send_job function so that we don't actually
        process these. The unsurveyor code related to ComputedFile,
        ComputationalResult, and ProcessorJobs won't be tested by
        this, but it's been functionally tested.
        """
        superseries_accession = "GSE59795"
        sub_experiment_accession = "GSE46580"

        # Survey the superseries.
        survey_job = SurveyJob(source_type="GEO")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="experiment_accession_code",
                                           value=superseries_accession)
        key_value_pair.save()

        geo_surveyor = GeoSurveyor(survey_job)
        geo_surveyor.survey()

        # Survey the sub-experiment
        survey_job = SurveyJob(source_type="GEO")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="experiment_accession_code",
                                           value=sub_experiment_accession)
        key_value_pair.save()

        geo_surveyor = GeoSurveyor(survey_job)
        geo_surveyor.survey()

        # Establish baselines before purge
        experiment = Experiment.objects.filter(accession_code=sub_experiment_accession)[0]
        experiment_sample_assocs = ExperimentSampleAssociation.objects.filter(experiment=experiment)
        samples = Sample.objects.filter(id__in=experiment_sample_assocs.values('sample_id'))
        self.assertEqual(samples.count(), 4)

        og_file_sample_assocs = OriginalFileSampleAssociation.objects.filter(sample_id__in=samples.values('id'))
        original_files = OriginalFile.objects.filter(id__in=og_file_sample_assocs.values('original_file_id'))
        self.assertEqual(original_files.count(), 4)

        experiment = Experiment.objects.filter(accession_code=superseries_accession)[0]
        experiment_sample_assocs = ExperimentSampleAssociation.objects.filter(experiment=experiment)
        samples = Sample.objects.filter(id__in=experiment_sample_assocs.values('sample_id'))
        self.assertEqual(samples.count(), 20)

        og_file_sample_assocs = OriginalFileSampleAssociation.objects.filter(sample_id__in=samples.values('id'))
        original_files = OriginalFile.objects.filter(id__in=og_file_sample_assocs.values('original_file_id'))
        self.assertEqual(original_files.count(), 20)

        # Purge the superseries
        purge_experiment(superseries_accession)

        # Make sure the subexperiment samples weren't affected.
        experiment = Experiment.objects.filter(accession_code=sub_experiment_accession)[0]
        experiment_sample_assocs = ExperimentSampleAssociation.objects.filter(experiment=experiment)
        samples = Sample.objects.filter(id__in=experiment_sample_assocs.values('sample_id'))
        self.assertEqual(samples.count(), 4)

        # Make sure sub-experiment original files weren't affected.
        og_file_sample_assocs = OriginalFileSampleAssociation.objects.filter(sample_id__in=samples.values('id'))
        original_files = OriginalFile.objects.filter(id__in=og_file_sample_assocs.values('original_file_id'))
        self.assertEqual(original_files.count(), 4)

        # And that samples and files that remain are from the subseries.
        self.assertEqual(Sample.objects.count(), 4)
        self.assertEqual(OriginalFile.objects.count(), 4)