예제 #1
0
    def test_qn_management_command(self):
        """Test that the management command fires off and then does not create
        a job for an organism that does not have enough samples on the same
        platform."""

        homo_sapiens = Organism(name="HOMO_SAPIENS", taxonomy_id=9606)
        homo_sapiens.save()

        # We don't have a 0.tsv
        experiment = prepare_experiment(range(1, 7))

        out = StringIO()
        try:
            call_command("create_qn_target",
                         organism="homo_sapiens",
                         min=10,
                         stdout=out)
        except SystemExit as e:  # this is okay!
            pass

        stdout = out.getvalue()
        self.assertFalse("Target file" in stdout)

        # There's not enough samples available in this scenario so we
        # shouldn't have even made a processor job.
        self.assertEqual(ProcessorJob.objects.count(), 0)
예제 #2
0
    def test_uncached_other_names_are_found(self, mock_get):
        mock_get.side_effect = mocked_requests_get

        taxonomy_id = Organism.get_id_for_name("Human")

        self.assertEqual(taxonomy_id, 9606)
        mock_get.assert_has_calls([
            call(ESEARCH_URL, {
                "db": "taxonomy",
                "field": "scin",
                "term": "HUMAN"
            }),
            call(ESEARCH_URL, {
                "db": "taxonomy",
                "term": "HUMAN"
            })
        ])

        # The first call should have stored the organism record in the
        # database so this call should not make a request.
        mock_get.reset_mock()
        new_id = Organism.get_id_for_name("Human")

        self.assertEqual(new_id, 9606)
        mock_get.assert_not_called()
예제 #3
0
    def test_unfound_names_return_0(self, mock_get):
        """If we can't find an NCBI taxonomy ID for an organism name
        we can keep things moving for a while without it.
        get_taxonomy_id will log an error message which will prompt
        a developer to investigate what the organism name that was
        unable to be found is. Therefore setting the ID to 0 is the
        right thing to do in this case despite not seeming like it.
        """
        mock_get.return_value = Mock(ok=True)
        mock_get.return_value.text = ESEARCH_NOT_FOUND_XML

        taxonomy_id = Organism.get_id_for_name("blah")

        self.assertEqual(taxonomy_id, 0)
        mock_get.assert_has_calls([
            call(ESEARCH_URL, {
                "db": "taxonomy",
                "field": "scin",
                "term": "BLAH"
            }),
            call(ESEARCH_URL, {
                "db": "taxonomy",
                "term": "BLAH"
            })
        ])

        # The first call should have stored the organism record in the
        # database so this call should not make a request.
        mock_get.reset_mock()
        new_id = Organism.get_id_for_name("BLAH")

        self.assertEqual(new_id, 0)
        mock_get.assert_not_called()
예제 #4
0
    def test_unfound_names_raise(self, mock_get):
        """If we can't find the taxonomy id, it's likely a bad organism name.
        """
        mock_get.return_value = Mock(ok=True)
        mock_get.return_value.text = ESEARCH_NOT_FOUND_XML

        with self.assertRaises(UnknownOrganismId):
            Organism.get_id_for_name("blah")

        mock_get.assert_has_calls(
            [
                call(
                    ESEARCH_URL,
                    {
                        "db": "taxonomy",
                        "field": "scin",
                        "api_key": "3a1f8d818b0aa05d1aa3c334fa2cc9a17e09",
                        "term": "BLAH",
                    },
                ),
                call(
                    ESEARCH_URL,
                    {
                        "db": "taxonomy",
                        "api_key": "3a1f8d818b0aa05d1aa3c334fa2cc9a17e09",
                        "term": "BLAH",
                    },
                ),
            ]
        )
예제 #5
0
    def test_qn_management_command(self):
        """Test that the management command fires off and then does not create
        a job for an organism that does not have enough samples on the same
        platform."""

        homo_sapiens = Organism(name="HOMO_SAPIENS", taxonomy_id=9606)
        homo_sapiens.save()

        experiment = Experiment()
        experiment.accession_code = "12345"
        experiment.save()
        codes = ["1", "2", "3", "4", "5", "6"]
        # We don't have a 0.tsv

        for code in codes:
            sample = Sample()
            sample.accession_code = code
            sample.title = code
            sample.platform_accession_code = "A-MEXP-1171"
            sample.manufacturer = "SLIPPERY DICK'S DISCOUNT MICROARRAYS"
            sample.organism = homo_sapiens
            sample.technology = "MICROARRAY"
            sample.is_processed = True
            sample.save()

            cr = ComputationalResult()
            cr.save()

            computed_file = ComputedFile()
            computed_file.filename = code + ".tsv"
            computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv"
            computed_file.size_in_bytes = int(code)
            computed_file.result = cr
            computed_file.is_smashable = True
            computed_file.save()

            scfa = SampleComputedFileAssociation()
            scfa.sample = sample
            scfa.computed_file = computed_file
            scfa.save()

            exsa = ExperimentSampleAssociation()
            exsa.experiment = experiment
            exsa.sample = sample
            exsa.save()

        out = StringIO()
        try:
            call_command("create_qn_target", organism="homo_sapiens", min=1, stdout=out)
        except SystemExit as e:  # this is okay!
            pass

        stdout = out.getvalue()
        self.assertFalse("Target file" in stdout)

        # There's not enough samples available in this scenario so we
        # shouldn't have even made a processor job.
        self.assertEqual(ProcessorJob.objects.count(), 0)
예제 #6
0
    def test_cached_ids_are_found(self, mock_get):
        Organism.objects.create(name="HOMO_SAPIENS", taxonomy_id=9606, is_scientific_name=True)

        id = Organism.get_id_for_name("H**o Sapiens")

        self.assertEqual(id, 9606)
        mock_get.assert_not_called()
예제 #7
0
    def test_cached_names_are_found(self, mock_get):
        Organism.objects.create(name="HOMO_SAPIENS", taxonomy_id=9606, is_scientific_name=True)

        name = Organism.get_name_for_id(9606)

        self.assertEqual(name, "HOMO_SAPIENS")
        mock_get.assert_not_called()
예제 #8
0
    def test_qn_reference(self, mock_send_job):
        organism = Organism(name="HOMO_SAPIENS", taxonomy_id=9606)
        organism.save()

        experiment = Experiment()
        experiment.accession_code = "12345"
        experiment.save()

        for code in [str(i) for i in range(1, 401)]:
            sample = Sample()
            sample.accession_code = code
            sample.title = code
            sample.platform_name = f"Affymetrix {organism.name}"
            sample.platform_accession_code = f"A-MEXP-{organism.name}"
            sample.manufacturer = "AFFYMETRIX"
            sample.organism = organism
            sample.technology = "MICROARRAY"
            sample.is_processed = True
            sample.has_raw = True
            sample.save()

            cr = ComputationalResult()
            cr.save()

            computed_file = ComputedFile()
            computed_file.filename = code + ".tsv"
            computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv"
            computed_file.size_in_bytes = int(code)
            computed_file.result = cr
            computed_file.is_smashable = True
            computed_file.save()

            scfa = SampleComputedFileAssociation()
            scfa.sample = sample
            scfa.computed_file = computed_file
            scfa.save()

            exsa = ExperimentSampleAssociation()
            exsa.experiment = experiment
            exsa.sample = sample
            exsa.save()

            # We need more than one organism for the tests, but can't
            # repeat accesion codes, so halfway through just change the organism.
            if int(code) == 200:
                organism = Organism(name="MUS_MUSCULUS", taxonomy_id=111)
                organism.save()

        # Setup is done, actually run the command.
        command = Command()
        command.handle(organisms="HOMO_SAPIENS,MUS_MUSCULUS")

        self.assertEqual(len(mock_send_job.mock_calls), 2)
        self.assertEqual(ProcessorJob.objects.count(), 2)
예제 #9
0
    def test_uncached_ids_are_found(self, mock_get):
        mock_get.return_value = Mock(ok=True)
        mock_get.return_value.text = EFETCH_RESPONSE_XML

        organism_name = Organism.get_name_for_id(9606)

        self.assertEqual(organism_name, "HOMO_SAPIENS")
        mock_get.assert_called_once_with(EFETCH_URL, {
            "db": "taxonomy",
            "id": "9606"
        })

        # The first call should have stored the organism record in the
        # database so this call should not make a request.
        mock_get.reset_mock()
        new_name = Organism.get_name_for_id(9606)

        self.assertEqual(new_name, "HOMO_SAPIENS")
        mock_get.assert_not_called()
예제 #10
0
    def test_uncached_scientific_names_are_found(self, mock_get):
        mock_get.return_value = Mock(ok=True)
        mock_get.return_value.text = ESEARCH_RESPONSE_XML

        taxonomy_id = Organism.get_id_for_name("H**o Sapiens")

        self.assertEqual(taxonomy_id, 9606)
        mock_get.assert_called_once_with(ESEARCH_URL, {
            "db": "taxonomy",
            "field": "scin",
            "term": "HOMO_SAPIENS"
        })

        # The first call should have stored the organism record in the
        # database so this call should not make a request.
        mock_get.reset_mock()
        new_id = Organism.get_id_for_name("H**o Sapiens")

        self.assertEqual(new_id, 9606)
        mock_get.assert_not_called()
예제 #11
0
    def test_qn_reference(self):
        job = ProcessorJob()
        job.pipeline_applied = "QN_REFERENCE"
        job.save()

        homo_sapiens = Organism(name="HOMO_SAPIENS", taxonomy_id=9606)
        homo_sapiens.save()

        experiment = Experiment()
        experiment.accession_code = "12345"
        experiment.save()
        # We don't have a 0.tsv
        codes = [str(i) for i in range(1, 201)]

        for code in codes:
            sample = Sample()
            sample.accession_code = code
            sample.title = code
            sample.platform_accession_code = "A-MEXP-1171"
            sample.manufacturer = "SLIPPERY DICK'S DISCOUNT MICROARRAYS"
            sample.organism = homo_sapiens
            sample.technology = "MICROARRAY"
            sample.is_processed = True
            sample.save()

            cr = ComputationalResult()
            cr.save()

            computed_file = ComputedFile()
            computed_file.filename = code + ".tsv"
            computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv"
            computed_file.size_in_bytes = int(code)
            computed_file.result = cr
            computed_file.is_smashable = True
            computed_file.save()

            scfa = SampleComputedFileAssociation()
            scfa.sample = sample
            scfa.computed_file = computed_file
            scfa.save()

            exsa = ExperimentSampleAssociation()
            exsa.experiment = experiment
            exsa.sample = sample
            exsa.save()

        dataset = Dataset()
        dataset.data = {"12345": ["1", "2", "3", "4", "5", "6"]}
        dataset.aggregate_by = "ALL"
        dataset.scale_by = "NONE"
        dataset.quantile_normalize = False  # We don't QN because we're creating the target now
        dataset.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = dataset
        pjda.save()

        final_context = qn_reference.create_qn_reference(job.pk)
        self.assertTrue(final_context["success"])
        self.assertTrue(os.path.exists(final_context["target_file"]))
        self.assertEqual(os.path.getsize(final_context["target_file"]), 562)

        homo_sapiens.refresh_from_db()
        target = homo_sapiens.qn_target.computedfile_set.latest()
        self.assertEqual(target.sha1, "de69d348f8b239479e2330d596c4013a7b0b2b6a")

        # Create and run a smasher job that will use the QN target we just made.
        pj = ProcessorJob()
        pj.pipeline_applied = "SMASHER"
        pj.save()

        ds = Dataset()
        ds.data = {"12345": ["1", "2", "3", "4", "5"]}
        ds.aggregate_by = "SPECIES"
        ds.scale_by = "STANDARD"
        ds.email_address = "*****@*****.**"
        ds.quantile_normalize = True
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = pj
        pjda.dataset = ds
        pjda.save()

        final_context = smasher.smash(pj.pk, upload=False)
        self.assertTrue(final_context["success"])

        np.testing.assert_almost_equal(final_context["merged_qn"]["1"][0], -0.4379488527774811)
        np.testing.assert_almost_equal(final_context["original_merged"]["1"][0], -0.5762109)
예제 #12
0
    def test_invalid_ids_cause_exceptions(self, mock_get):
        mock_get.return_value = Mock(ok=True)
        mock_get.return_value.text = EFETCH_NOT_FOUND_XML

        with self.assertRaises(InvalidNCBITaxonomyId):
            Organism.get_name_for_id(0)