def test_geo_survey_agilent(self, mock_send_task): """ Run the GEO surveyor and make sure we get some files to DL! For an Agilent Microarray platform. """ self.prep_test("GSE35186") geo_surveyor = GeoSurveyor(self.survey_job) geo_surveyor.survey() self.assertEqual(124, Sample.objects.all().count()) sample_object = Sample.objects.first() self.assertEqual( sample_object.platform_name, "Agilent-014850 Whole Human Genome Microarray 4x44K G4112F (Probe Name version)" ) self.assertEqual(sample_object.platform_accession_code, "GPL6480") # We currently do not support Agilent platforms, so we can't # match its accession to one we know about. self.assertEqual(sample_object.technology, "UNKNOWN") downloader_jobs = DownloaderJob.objects.all() # There would be 124 samples + 2 metadata files. However at # the moment Agilent is unsupported so we don't want to queue # downloader jobs. self.assertEqual(0, downloader_jobs.count())
def test_geo_survey_microarray(self, mock_send_task): """ Run the GEO surveyor and make sure we get some files to DL! For an Illumina Microarray platform. """ self.prep_test("GSE11915") geo_surveyor = GeoSurveyor(self.survey_job) geo_surveyor.survey() self.assertEqual(34, Sample.objects.all().count()) sample_object = Sample.objects.first() self.assertEqual(sample_object.platform_name, "[HG-U133A] Affymetrix Human Genome U133A Array") self.assertEqual(sample_object.platform_accession_code, "hgu133a") self.assertEqual(sample_object.technology, "MICROARRAY") # Confirm sample protocol_info GSM299800 = Sample.objects.get(accession_code="GSM299800") protocol_info = GSM299800.protocol_info self.assertEqual(protocol_info['Extraction protocol'], [ 'Chromatin IP performed as described in Odom et al., Science 303, 1378 (Feb 27, 2004)' ]) self.assertEqual(protocol_info['Data processing'], ['Z-score normalization']) downloader_jobs = DownloaderJob.objects.all() self.assertEqual(45, downloader_jobs.count()) # Make sure there aren't extra OriginalFiles original_files = OriginalFile.objects.all() self.assertEqual(45, original_files.count())
def test_geo_survey_rnaseq(self, mock_send_task): """ Run the GEO surveyor and make sure we get some files to DL! For an Illumina RNASeq platform. """ self.prep_test("GSE99264") geo_surveyor = GeoSurveyor(self.survey_job) geo_surveyor.survey() self.assertEqual(7, Sample.objects.all().count()) sample_object = Sample.objects.first() self.assertEqual(sample_object.platform_name, "Illumina Genome Analyzer II") self.assertEqual(sample_object.platform_accession_code, "Illumina Genome Analyzer II") self.assertEqual(sample_object.technology, "RNA-SEQ") downloader_jobs = DownloaderJob.objects.all() self.assertEqual(1, downloader_jobs.count()) # Make sure there aren't extra OriginalFiles original_files = OriginalFile.objects.all() self.assertEqual(1, original_files.count())
def test_geo_survey_superseries(self, mock_send_task): """Run the GEO surveyor and make sure we get some files to DL! For a Super Series. But also that we don't queue downloader jobs for RNA-Seq samples coming from GEO. """ self.prep_test("GSE103217") geo_surveyor = GeoSurveyor(self.survey_job) geo_surveyor.survey() # 28 total samples self.assertEqual(28, Sample.objects.all().count()) # 10 of which are microarray and therefore need downloader jobs microarray_samples = Sample.objects.filter(technology='MICROARRAY') self.assertEqual(10, microarray_samples.count()) downloader_jobs = DownloaderJob.objects.all() self.assertEqual(10, downloader_jobs.count()) # And 18 of which are RNA-Seq so they won't have downloader jobs. rna_seq_samples = Sample.objects.filter(technology='RNA-SEQ') self.assertEqual(18, rna_seq_samples.count()) # Make sure there aren't extra OriginalFiles original_files = OriginalFile.objects.all() self.assertEqual(10, original_files.count())
def test_geo_survey_not_agilent(self, mock_send_task): """ Test to make sure we're setting MFG correctly """ self.prep_test("GSE34198") geo_surveyor = GeoSurveyor(self.survey_job) geo_surveyor.survey() sample_object = Sample.objects.first() self.assertEqual(sample_object.manufacturer, "ILLUMINA")
def test_geo_survey_rnaseq(self, mock_send_task): """Run the GEO surveyor and make sure we discover the experiment/samples. For an Illumina RNASeq platform. However it shouldn't actually queue any downloader jobs because its RNA-Seq data coming from GEO. """ self.prep_test("GSE99264") geo_surveyor = GeoSurveyor(self.survey_job) geo_surveyor.survey() self.assertEqual(7, Sample.objects.all().count()) sample_object = Sample.objects.first() self.assertEqual(sample_object.platform_name, "Illumina Genome Analyzer II") self.assertEqual(sample_object.platform_accession_code, "Illumina Genome Analyzer II") self.assertEqual(sample_object.technology, "RNA-SEQ") downloader_jobs = DownloaderJob.objects.all() self.assertEqual(0, downloader_jobs.count())
def test_geo_survey_microarray(self, mock_send_task): """Test that the unsurveyor works correctly. This includes not deleting samples which also belong to other experiments. Therefore we survey a superseries and one of its sub-experiments, then delete the superseries to make sure the sub-experiment wasn't touched. We mock out the send_job function so that we don't actually process these. The unsurveyor code related to ComputedFile, ComputationalResult, and ProcessorJobs won't be tested by this, but it's been functionally tested. """ superseries_accession = "GSE59795" sub_experiment_accession = "GSE46580" # Survey the superseries. survey_job = SurveyJob(source_type="GEO") survey_job.save() key_value_pair = SurveyJobKeyValue(survey_job=survey_job, key="experiment_accession_code", value=superseries_accession) key_value_pair.save() geo_surveyor = GeoSurveyor(survey_job) geo_surveyor.survey() # Survey the sub-experiment survey_job = SurveyJob(source_type="GEO") survey_job.save() key_value_pair = SurveyJobKeyValue(survey_job=survey_job, key="experiment_accession_code", value=sub_experiment_accession) key_value_pair.save() geo_surveyor = GeoSurveyor(survey_job) geo_surveyor.survey() # Establish baselines before purge experiment = Experiment.objects.filter(accession_code=sub_experiment_accession)[0] experiment_sample_assocs = ExperimentSampleAssociation.objects.filter(experiment=experiment) samples = Sample.objects.filter(id__in=experiment_sample_assocs.values('sample_id')) self.assertEqual(samples.count(), 4) og_file_sample_assocs = OriginalFileSampleAssociation.objects.filter(sample_id__in=samples.values('id')) original_files = OriginalFile.objects.filter(id__in=og_file_sample_assocs.values('original_file_id')) self.assertEqual(original_files.count(), 4) experiment = Experiment.objects.filter(accession_code=superseries_accession)[0] experiment_sample_assocs = ExperimentSampleAssociation.objects.filter(experiment=experiment) samples = Sample.objects.filter(id__in=experiment_sample_assocs.values('sample_id')) self.assertEqual(samples.count(), 20) og_file_sample_assocs = OriginalFileSampleAssociation.objects.filter(sample_id__in=samples.values('id')) original_files = OriginalFile.objects.filter(id__in=og_file_sample_assocs.values('original_file_id')) self.assertEqual(original_files.count(), 20) # Purge the superseries purge_experiment(superseries_accession) # Make sure the subexperiment samples weren't affected. experiment = Experiment.objects.filter(accession_code=sub_experiment_accession)[0] experiment_sample_assocs = ExperimentSampleAssociation.objects.filter(experiment=experiment) samples = Sample.objects.filter(id__in=experiment_sample_assocs.values('sample_id')) self.assertEqual(samples.count(), 4) # Make sure sub-experiment original files weren't affected. og_file_sample_assocs = OriginalFileSampleAssociation.objects.filter(sample_id__in=samples.values('id')) original_files = OriginalFile.objects.filter(id__in=og_file_sample_assocs.values('original_file_id')) self.assertEqual(original_files.count(), 4) # And that samples and files that remain are from the subseries. self.assertEqual(Sample.objects.count(), 4) self.assertEqual(OriginalFile.objects.count(), 4)