def test_qn_management_command(self): """Test that the management command fires off and then does not create a job for an organism that does not have enough samples on the same platform.""" homo_sapiens = Organism(name="HOMO_SAPIENS", taxonomy_id=9606) homo_sapiens.save() # We don't have a 0.tsv experiment = prepare_experiment(range(1, 7)) out = StringIO() try: call_command("create_qn_target", organism="homo_sapiens", min=10, stdout=out) except SystemExit as e: # this is okay! pass stdout = out.getvalue() self.assertFalse("Target file" in stdout) # There's not enough samples available in this scenario so we # shouldn't have even made a processor job. self.assertEqual(ProcessorJob.objects.count(), 0)
def test_uncached_other_names_are_found(self, mock_get): mock_get.side_effect = mocked_requests_get taxonomy_id = Organism.get_id_for_name("Human") self.assertEqual(taxonomy_id, 9606) mock_get.assert_has_calls([ call(ESEARCH_URL, { "db": "taxonomy", "field": "scin", "term": "HUMAN" }), call(ESEARCH_URL, { "db": "taxonomy", "term": "HUMAN" }) ]) # The first call should have stored the organism record in the # database so this call should not make a request. mock_get.reset_mock() new_id = Organism.get_id_for_name("Human") self.assertEqual(new_id, 9606) mock_get.assert_not_called()
def test_unfound_names_return_0(self, mock_get): """If we can't find an NCBI taxonomy ID for an organism name we can keep things moving for a while without it. get_taxonomy_id will log an error message which will prompt a developer to investigate what the organism name that was unable to be found is. Therefore setting the ID to 0 is the right thing to do in this case despite not seeming like it. """ mock_get.return_value = Mock(ok=True) mock_get.return_value.text = ESEARCH_NOT_FOUND_XML taxonomy_id = Organism.get_id_for_name("blah") self.assertEqual(taxonomy_id, 0) mock_get.assert_has_calls([ call(ESEARCH_URL, { "db": "taxonomy", "field": "scin", "term": "BLAH" }), call(ESEARCH_URL, { "db": "taxonomy", "term": "BLAH" }) ]) # The first call should have stored the organism record in the # database so this call should not make a request. mock_get.reset_mock() new_id = Organism.get_id_for_name("BLAH") self.assertEqual(new_id, 0) mock_get.assert_not_called()
def test_unfound_names_raise(self, mock_get): """If we can't find the taxonomy id, it's likely a bad organism name. """ mock_get.return_value = Mock(ok=True) mock_get.return_value.text = ESEARCH_NOT_FOUND_XML with self.assertRaises(UnknownOrganismId): Organism.get_id_for_name("blah") mock_get.assert_has_calls( [ call( ESEARCH_URL, { "db": "taxonomy", "field": "scin", "api_key": "3a1f8d818b0aa05d1aa3c334fa2cc9a17e09", "term": "BLAH", }, ), call( ESEARCH_URL, { "db": "taxonomy", "api_key": "3a1f8d818b0aa05d1aa3c334fa2cc9a17e09", "term": "BLAH", }, ), ] )
def test_qn_management_command(self): """Test that the management command fires off and then does not create a job for an organism that does not have enough samples on the same platform.""" homo_sapiens = Organism(name="HOMO_SAPIENS", taxonomy_id=9606) homo_sapiens.save() experiment = Experiment() experiment.accession_code = "12345" experiment.save() codes = ["1", "2", "3", "4", "5", "6"] # We don't have a 0.tsv for code in codes: sample = Sample() sample.accession_code = code sample.title = code sample.platform_accession_code = "A-MEXP-1171" sample.manufacturer = "SLIPPERY DICK'S DISCOUNT MICROARRAYS" sample.organism = homo_sapiens sample.technology = "MICROARRAY" sample.is_processed = True sample.save() cr = ComputationalResult() cr.save() computed_file = ComputedFile() computed_file.filename = code + ".tsv" computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv" computed_file.size_in_bytes = int(code) computed_file.result = cr computed_file.is_smashable = True computed_file.save() scfa = SampleComputedFileAssociation() scfa.sample = sample scfa.computed_file = computed_file scfa.save() exsa = ExperimentSampleAssociation() exsa.experiment = experiment exsa.sample = sample exsa.save() out = StringIO() try: call_command("create_qn_target", organism="homo_sapiens", min=1, stdout=out) except SystemExit as e: # this is okay! pass stdout = out.getvalue() self.assertFalse("Target file" in stdout) # There's not enough samples available in this scenario so we # shouldn't have even made a processor job. self.assertEqual(ProcessorJob.objects.count(), 0)
def test_cached_ids_are_found(self, mock_get): Organism.objects.create(name="HOMO_SAPIENS", taxonomy_id=9606, is_scientific_name=True) id = Organism.get_id_for_name("H**o Sapiens") self.assertEqual(id, 9606) mock_get.assert_not_called()
def test_cached_names_are_found(self, mock_get): Organism.objects.create(name="HOMO_SAPIENS", taxonomy_id=9606, is_scientific_name=True) name = Organism.get_name_for_id(9606) self.assertEqual(name, "HOMO_SAPIENS") mock_get.assert_not_called()
def test_qn_reference(self, mock_send_job): organism = Organism(name="HOMO_SAPIENS", taxonomy_id=9606) organism.save() experiment = Experiment() experiment.accession_code = "12345" experiment.save() for code in [str(i) for i in range(1, 401)]: sample = Sample() sample.accession_code = code sample.title = code sample.platform_name = f"Affymetrix {organism.name}" sample.platform_accession_code = f"A-MEXP-{organism.name}" sample.manufacturer = "AFFYMETRIX" sample.organism = organism sample.technology = "MICROARRAY" sample.is_processed = True sample.has_raw = True sample.save() cr = ComputationalResult() cr.save() computed_file = ComputedFile() computed_file.filename = code + ".tsv" computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv" computed_file.size_in_bytes = int(code) computed_file.result = cr computed_file.is_smashable = True computed_file.save() scfa = SampleComputedFileAssociation() scfa.sample = sample scfa.computed_file = computed_file scfa.save() exsa = ExperimentSampleAssociation() exsa.experiment = experiment exsa.sample = sample exsa.save() # We need more than one organism for the tests, but can't # repeat accesion codes, so halfway through just change the organism. if int(code) == 200: organism = Organism(name="MUS_MUSCULUS", taxonomy_id=111) organism.save() # Setup is done, actually run the command. command = Command() command.handle(organisms="HOMO_SAPIENS,MUS_MUSCULUS") self.assertEqual(len(mock_send_job.mock_calls), 2) self.assertEqual(ProcessorJob.objects.count(), 2)
def test_uncached_ids_are_found(self, mock_get): mock_get.return_value = Mock(ok=True) mock_get.return_value.text = EFETCH_RESPONSE_XML organism_name = Organism.get_name_for_id(9606) self.assertEqual(organism_name, "HOMO_SAPIENS") mock_get.assert_called_once_with(EFETCH_URL, { "db": "taxonomy", "id": "9606" }) # The first call should have stored the organism record in the # database so this call should not make a request. mock_get.reset_mock() new_name = Organism.get_name_for_id(9606) self.assertEqual(new_name, "HOMO_SAPIENS") mock_get.assert_not_called()
def test_uncached_scientific_names_are_found(self, mock_get): mock_get.return_value = Mock(ok=True) mock_get.return_value.text = ESEARCH_RESPONSE_XML taxonomy_id = Organism.get_id_for_name("H**o Sapiens") self.assertEqual(taxonomy_id, 9606) mock_get.assert_called_once_with(ESEARCH_URL, { "db": "taxonomy", "field": "scin", "term": "HOMO_SAPIENS" }) # The first call should have stored the organism record in the # database so this call should not make a request. mock_get.reset_mock() new_id = Organism.get_id_for_name("H**o Sapiens") self.assertEqual(new_id, 9606) mock_get.assert_not_called()
def test_qn_reference(self): job = ProcessorJob() job.pipeline_applied = "QN_REFERENCE" job.save() homo_sapiens = Organism(name="HOMO_SAPIENS", taxonomy_id=9606) homo_sapiens.save() experiment = Experiment() experiment.accession_code = "12345" experiment.save() # We don't have a 0.tsv codes = [str(i) for i in range(1, 201)] for code in codes: sample = Sample() sample.accession_code = code sample.title = code sample.platform_accession_code = "A-MEXP-1171" sample.manufacturer = "SLIPPERY DICK'S DISCOUNT MICROARRAYS" sample.organism = homo_sapiens sample.technology = "MICROARRAY" sample.is_processed = True sample.save() cr = ComputationalResult() cr.save() computed_file = ComputedFile() computed_file.filename = code + ".tsv" computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv" computed_file.size_in_bytes = int(code) computed_file.result = cr computed_file.is_smashable = True computed_file.save() scfa = SampleComputedFileAssociation() scfa.sample = sample scfa.computed_file = computed_file scfa.save() exsa = ExperimentSampleAssociation() exsa.experiment = experiment exsa.sample = sample exsa.save() dataset = Dataset() dataset.data = {"12345": ["1", "2", "3", "4", "5", "6"]} dataset.aggregate_by = "ALL" dataset.scale_by = "NONE" dataset.quantile_normalize = False # We don't QN because we're creating the target now dataset.save() pjda = ProcessorJobDatasetAssociation() pjda.processor_job = job pjda.dataset = dataset pjda.save() final_context = qn_reference.create_qn_reference(job.pk) self.assertTrue(final_context["success"]) self.assertTrue(os.path.exists(final_context["target_file"])) self.assertEqual(os.path.getsize(final_context["target_file"]), 562) homo_sapiens.refresh_from_db() target = homo_sapiens.qn_target.computedfile_set.latest() self.assertEqual(target.sha1, "de69d348f8b239479e2330d596c4013a7b0b2b6a") # Create and run a smasher job that will use the QN target we just made. pj = ProcessorJob() pj.pipeline_applied = "SMASHER" pj.save() ds = Dataset() ds.data = {"12345": ["1", "2", "3", "4", "5"]} ds.aggregate_by = "SPECIES" ds.scale_by = "STANDARD" ds.email_address = "*****@*****.**" ds.quantile_normalize = True ds.save() pjda = ProcessorJobDatasetAssociation() pjda.processor_job = pj pjda.dataset = ds pjda.save() final_context = smasher.smash(pj.pk, upload=False) self.assertTrue(final_context["success"]) np.testing.assert_almost_equal(final_context["merged_qn"]["1"][0], -0.4379488527774811) np.testing.assert_almost_equal(final_context["original_merged"]["1"][0], -0.5762109)
def test_invalid_ids_cause_exceptions(self, mock_get): mock_get.return_value = Mock(ok=True) mock_get.return_value.text = EFETCH_NOT_FOUND_XML with self.assertRaises(InvalidNCBITaxonomyId): Organism.get_name_for_id(0)