def test_get_sample_keywords(self): experiment = Experiment() experiment.save() sample = Sample() sample.title = "123" sample.accession_code = "123" sample.age = 23 sample.save() experiment_sample_association = ExperimentSampleAssociation() experiment_sample_association.sample = sample experiment_sample_association.experiment = experiment experiment_sample_association.save() length = OntologyTerm() length.ontology_term = "EFO:0002939" length.human_readable_name = "medulloblastoma" length.save() sk = SampleKeyword() sk.name = length sk.source, _ = Contribution.objects.get_or_create( source_name="Refinebio Tests", methods_url="ccdatalab.org") sk.sample = sample sk.save() self.assertEqual(set(experiment.get_sample_keywords()), set(["medulloblastoma"]))
def make_test_data(organism): experiment = Experiment() experiment.accession_code = "GSE51088" experiment.technology = "RNA-SEQ" experiment.save() xoa = ExperimentOrganismAssociation() xoa.experiment = experiment xoa.organism = organism xoa.save() result = ComputationalResult() result.save() sample = Sample() sample.accession_code = "GSM1237818" sample.title = "GSM1237818" sample.organism = organism sample.technology = "RNA-SEQ" sample.is_processed = True sample.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() esa = ExperimentSampleAssociation() esa.experiment = experiment esa.sample = sample esa.save() computed_file = ComputedFile() computed_file.s3_key = "smasher-test-quant.sf" computed_file.s3_bucket = "data-refinery-test-assets" computed_file.filename = "quant.sf" computed_file.absolute_file_path = "/home/user/data_store/QUANT/smasher-test-quant.sf" computed_file.result = result computed_file.is_smashable = True computed_file.size_in_bytes = 123123 computed_file.sha1 = ( "08c7ea90b66b52f7cd9d9a569717a1f5f3874967" # this matches with the downloaded file ) computed_file.save() computed_file = ComputedFile() computed_file.filename = "logquant.tsv" computed_file.is_smashable = True computed_file.size_in_bytes = 123123 computed_file.result = result computed_file.save() assoc = SampleComputedFileAssociation() assoc.sample = sample assoc.computed_file = computed_file assoc.save()
def test_qn_reference(self, mock_send_job): organism = Organism(name="HOMO_SAPIENS", taxonomy_id=9606) organism.save() experiment = Experiment() experiment.accession_code = "12345" experiment.save() for code in [str(i) for i in range(1, 401)]: sample = Sample() sample.accession_code = code sample.title = code sample.platform_name = f"Affymetrix {organism.name}" sample.platform_accession_code = f"A-MEXP-{organism.name}" sample.manufacturer = "AFFYMETRIX" sample.organism = organism sample.technology = "MICROARRAY" sample.is_processed = True sample.has_raw = True sample.save() cr = ComputationalResult() cr.save() computed_file = ComputedFile() computed_file.filename = code + ".tsv" computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv" computed_file.size_in_bytes = int(code) computed_file.result = cr computed_file.is_smashable = True computed_file.save() scfa = SampleComputedFileAssociation() scfa.sample = sample scfa.computed_file = computed_file scfa.save() exsa = ExperimentSampleAssociation() exsa.experiment = experiment exsa.sample = sample exsa.save() # We need more than one organism for the tests, but can't # repeat accesion codes, so halfway through just change the organism. if int(code) == 200: organism = Organism(name="MUS_MUSCULUS", taxonomy_id=111) organism.save() # Setup is done, actually run the command. command = Command() command.handle(organisms="HOMO_SAPIENS,MUS_MUSCULUS") self.assertEqual(len(mock_send_job.mock_calls), 2) self.assertEqual(ProcessorJob.objects.count(), 2)
def prepare_experiment(ids: List[int]) -> Experiment: (homo_sapiens, _) = Organism.objects.get_or_create(name="HOMO_SAPIENS", taxonomy_id=9606) experiment = Experiment() experiment.accession_code = "12345" experiment.save() codes = [str(i) for i in ids] for code in codes: sample = Sample() sample.accession_code = code sample.title = code sample.platform_accession_code = "A-MEXP-1171" sample.manufacturer = "SLIPPERY DICK'S DISCOUNT MICROARRAYS" sample.organism = homo_sapiens sample.technology = "MICROARRAY" sample.is_processed = True sample.save() cr = ComputationalResult() cr.save() computed_file = ComputedFile() computed_file.filename = code + ".tsv" computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv" computed_file.size_in_bytes = int(code) computed_file.result = cr computed_file.is_smashable = True computed_file.save() scfa = SampleComputedFileAssociation() scfa.sample = sample scfa.computed_file = computed_file scfa.save() exsa = ExperimentSampleAssociation() exsa.experiment = experiment exsa.sample = sample exsa.save()
def create_sample_for_experiment(sample_info: Dict, experiment: Experiment) -> Sample: result = ComputationalResult() result.save() sample = Sample() sample.accession_code = sample_info["accession_code"] sample.title = sample_info.get("title", None) or sample_info["accession_code"] sample.organism = sample_info["organism"] sample.technology = sample_info["technology"] sample.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() esa = ExperimentSampleAssociation() esa.experiment = experiment esa.sample = sample esa.save() if sample_info.get("filename") is not None: computed_file = ComputedFile() computed_file.filename = sample_info["filename"] computed_file.absolute_file_path = sample_info[ "data_dir"] + sample_info["filename"] computed_file.result = result computed_file.size_in_bytes = 123 computed_file.is_smashable = True computed_file.save() assoc = SampleComputedFileAssociation() assoc.sample = sample assoc.computed_file = computed_file assoc.save() return sample
def setUpClass(cls): super(ESTestCases, cls).setUpClass() # ref https://stackoverflow.com/a/29655301/763705 """Set up class.""" experiment = Experiment() experiment.accession_code = "GSE000-X" experiment.title = "NONONONO" experiment.description = "Boooooourns. Wasabi." experiment.technology = "RNA-SEQ" experiment.save() experiment = Experiment() experiment.accession_code = "GSE123-X" experiment.title = "Hey Ho Let's Go" experiment.description = ( "This is a very exciting test experiment. Faygo soda. Blah blah blah." ) experiment.technology = "MICROARRAY" experiment.num_processed_samples = 1 # added below experiment.num_total_samples = 1 experiment.num_downloadable_samples = 1 experiment.save() experiment_annotation = ExperimentAnnotation() experiment_annotation.data = {"hello": "world", "123": 456} experiment_annotation.experiment = experiment experiment_annotation.save() sample = Sample() sample.title = "123" sample.accession_code = "123" sample.save() organism = Organism( name="AILUROPODA_MELANOLEUCA", taxonomy_id=9646, is_scientific_name=True ) organism.save() sample = Sample() sample.title = "789" sample.accession_code = "789" sample.is_processed = True sample.organism = organism sample.save() sample_annotation = SampleAnnotation() sample_annotation.data = {"goodbye": "world", "789": 123} sample_annotation.sample = sample sample_annotation.save() original_file = OriginalFile() original_file.save() original_file_sample_association = OriginalFileSampleAssociation() original_file_sample_association.sample = sample original_file_sample_association.original_file = original_file original_file_sample_association.save() downloader_job = DownloaderJob() downloader_job.save() download_assoc = DownloaderJobOriginalFileAssociation() download_assoc.original_file = original_file download_assoc.downloader_job = downloader_job download_assoc.save() processor_job = ProcessorJob() processor_job.save() processor_assoc = ProcessorJobOriginalFileAssociation() processor_assoc.original_file = original_file processor_assoc.processor_job = processor_job processor_assoc.save() # associate the experiment with the sample experiment_sample_association = ExperimentSampleAssociation() experiment_sample_association.sample = sample experiment_sample_association.experiment = experiment experiment_sample_association.save() result = ComputationalResult() result.save() # and create a qn tarjet for the sample computational_result = ComputationalResultAnnotation() computational_result.result = result computational_result.data = {"is_qn": True, "organism_id": sample.organism.id} computational_result.save() # and associate it with the sample organism sample.organism.qn_target = result sample.organism.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() result = ComputationalResult() result.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() # clear default cache and reindex # otherwise the organisms with qn_targes will be cached. cache.clear() call_command("search_index", "--rebuild", "-f")
def test_dataset_stats(self): """ Test the dataset stats endpoint """ gallus_gallus = Organism(name="GALLUS_GALLUS", taxonomy_id=9031, is_scientific_name=True) gallus_gallus.save() equus_ferus = Organism(name="EQUUS_FERUS", taxonomy_id=1114792, is_scientific_name=True) equus_ferus.save() ex = Experiment() ex.accession_code = "XYZ123" ex.title = "XYZ123" ex.description = "XYZ123" ex.technology = "MICROARRAY" ex.submitter_institution = "XYZ123" ex.save() ex2 = Experiment() ex2.accession_code = "ABC789" ex2.title = "ABC789" ex2.description = "ABC789" ex2.technology = "RNA-SEQ" ex2.submitter_institution = "Funkytown" ex2.save() sample1 = Sample() sample1.title = "1" sample1.accession_code = "1" sample1.platform_name = "AFFY" sample1.organism = self.homo_sapiens sample1.save() sample2 = Sample() sample2.title = "2" sample2.accession_code = "2" sample2.platform_name = "ILLUMINA" sample2.organism = gallus_gallus sample2.save() sample3 = Sample() sample3.title = "3" sample3.accession_code = "3" sample3.platform_name = "ILLUMINA" sample3.organism = gallus_gallus sample3.save() xoa = ExperimentOrganismAssociation() xoa.experiment = ex xoa.organism = self.homo_sapiens xoa.save() xoa = ExperimentOrganismAssociation() xoa.experiment = ex2 xoa.organism = gallus_gallus xoa.save() xoa = ExperimentOrganismAssociation() xoa.experiment = ex2 xoa.organism = equus_ferus xoa.save() experiment_sample_association = ExperimentSampleAssociation() experiment_sample_association.sample = sample1 experiment_sample_association.experiment = ex experiment_sample_association.save() experiment_sample_association = ExperimentSampleAssociation() experiment_sample_association.sample = sample2 experiment_sample_association.experiment = ex2 experiment_sample_association.save() experiment_sample_association = ExperimentSampleAssociation() experiment_sample_association.sample = sample3 experiment_sample_association.experiment = ex2 experiment_sample_association.save() jdata = json.dumps({"data": {"XYZ123": ["1"], "ABC789": ["2"]}}) response = self.client.post( reverse("create_dataset", kwargs={"version": API_VERSION}), jdata, content_type="application/json", ) self.assertEqual(response.status_code, 201) self.assertEqual(response.json()["data"], json.loads(jdata)["data"]) good_id = response.json()["id"] # Check that we can fetch these sample details via samples API response = self.client.get( reverse("samples", kwargs={"version": API_VERSION}), {"dataset_id": good_id} ) self.assertEqual(response.json()["count"], 2)
def test_qn_reference(self): job = ProcessorJob() job.pipeline_applied = "QN_REFERENCE" job.save() homo_sapiens = Organism.get_object_for_name("HOMO_SAPIENS") experiment = Experiment() experiment.accession_code = "12345" experiment.save() for code in ['1', '2', '3', '4', '5', '6']: sample = Sample() sample.accession_code = code sample.title = code sample.platform_accession_code = 'A-MEXP-1171' sample.manufacturer = "SLIPPERY DICK'S DISCOUNT MICROARRAYS" sample.organism = homo_sapiens sample.technology = "MICROARRAY" sample.is_processed = True sample.save() cr = ComputationalResult() cr.save() file = ComputedFile() file.filename = code + ".tsv" file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv" file.size_in_bytes = int(code) file.result = cr file.is_smashable = True file.save() scfa = SampleComputedFileAssociation() scfa.sample = sample scfa.computed_file = file scfa.save() exsa = ExperimentSampleAssociation() exsa.experiment = experiment exsa.sample = sample exsa.save() dataset = Dataset() dataset.data = {"12345": ["1", "2", "3", "4", "5", "6"]} dataset.aggregate_by = "ALL" dataset.scale_by = "NONE" dataset.quantile_normalize = False # We don't QN because we're creating the target now dataset.save() pjda = ProcessorJobDatasetAssociation() pjda.processor_job = job pjda.dataset = dataset pjda.save() final_context = qn_reference.create_qn_reference(job.pk) self.assertTrue(final_context['success']) self.assertTrue(os.path.exists(final_context['target_file'])) self.assertEqual(os.path.getsize(final_context['target_file']), 556) target = utils.get_most_recent_qn_target_for_organism(homo_sapiens) self.assertEqual(target.sha1, '636d72d5cbf4b9785b0bd271a1430b615feaa7ea') ### # Smasher with QN ### pj = ProcessorJob() pj.pipeline_applied = "SMASHER" pj.save() ds = Dataset() ds.data = {"12345": ["1", "2", "3", "4", "5"]} ds.aggregate_by = 'SPECIES' ds.scale_by = 'STANDARD' ds.email_address = "*****@*****.**" ds.quantile_normalize = True ds.save() pjda = ProcessorJobDatasetAssociation() pjda.processor_job = pj pjda.dataset = ds pjda.save() final_context = smasher.smash(pj.pk, upload=False) self.assertTrue(final_context['success']) self.assertEqual(final_context['merged_qn']['1'][0], -0.4379488528812934) self.assertEqual(final_context['original_merged']['1'][0], -0.576210936113982) ## # Test via management command ## from django.core.management import call_command from django.test import TestCase from django.utils.six import StringIO out = StringIO() try: call_command('create_qn_target', organism='homo_sapiens', min=1, stdout=out) except SystemExit as e: # this is okay! pass stdout = out.getvalue() self.assertTrue('Target file' in stdout) path = stdout.split('\n')[0].split(':')[1].strip() self.assertTrue(os.path.exists(path)) self.assertEqual(path, utils.get_most_recent_qn_target_for_organism(homo_sapiens).absolute_file_path)
def test_create_quantpendia(self): job = ProcessorJob() job.pipeline_applied = ProcessorPipeline.CREATE_QUANTPENDIA.value job.save() experiment = Experiment() experiment.accession_code = "GSE51088" experiment.save() result = ComputationalResult() result.save() homo_sapiens = Organism.get_object_for_name("HOMO_SAPIENS", taxonomy_id=9606) sample = Sample() sample.accession_code = "GSM1237818" sample.title = "GSM1237818" sample.organism = homo_sapiens sample.technology = "RNA-SEQ" sample.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() esa = ExperimentSampleAssociation() esa.experiment = experiment esa.sample = sample esa.save() computed_file = ComputedFile() computed_file.s3_key = "smasher-test-quant.sf" computed_file.s3_bucket = "data-refinery-test-assets" computed_file.filename = "quant.sf" computed_file.absolute_file_path = "/home/user/data_store/QUANT/smasher-test-quant.sf" computed_file.result = result computed_file.is_smashable = True computed_file.size_in_bytes = 123123 computed_file.sha1 = ( "08c7ea90b66b52f7cd9d9a569717a1f5f3874967" # this matches with the downloaded file ) computed_file.save() computed_file = ComputedFile() computed_file.filename = "logquant.tsv" computed_file.is_smashable = True computed_file.size_in_bytes = 123123 computed_file.result = result computed_file.save() assoc = SampleComputedFileAssociation() assoc.sample = sample assoc.computed_file = computed_file assoc.save() ds = Dataset() ds.data = {"GSE51088": ["GSM1237818"]} ds.aggregate_by = "EXPERIMENT" ds.scale_by = "STANDARD" ds.email_address = "*****@*****.**" ds.quant_sf_only = True # Make the dataset include quant.sf files only ds.save() pjda = ProcessorJobDatasetAssociation() pjda.processor_job = job pjda.dataset = ds pjda.save() final_context = create_quantpendia(job.id) self.assertTrue( os.path.exists(final_context["output_dir"] + "/GSE51088/GSM1237818_quant.sf")) self.assertTrue( os.path.exists(final_context["output_dir"] + "/README.md")) self.assertTrue( os.path.exists(final_context["output_dir"] + "/LICENSE.TXT")) self.assertTrue( os.path.exists(final_context["output_dir"] + "/aggregated_metadata.json")) self.assertTrue(final_context["metadata"]["quant_sf_only"]) self.assertEqual(final_context["metadata"]["num_samples"], 1) self.assertEqual(final_context["metadata"]["num_experiments"], 1) # test that archive exists quantpendia_file = ComputedFile.objects.filter( is_compendia=True, quant_sf_only=True).latest() self.assertTrue(os.path.exists(quantpendia_file.absolute_file_path))