Exemple #1
0
    def test_get_sample_keywords(self):
        experiment = Experiment()
        experiment.save()

        sample = Sample()
        sample.title = "123"
        sample.accession_code = "123"
        sample.age = 23
        sample.save()

        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample
        experiment_sample_association.experiment = experiment
        experiment_sample_association.save()

        length = OntologyTerm()
        length.ontology_term = "EFO:0002939"
        length.human_readable_name = "medulloblastoma"
        length.save()

        sk = SampleKeyword()
        sk.name = length
        sk.source, _ = Contribution.objects.get_or_create(
            source_name="Refinebio Tests", methods_url="ccdatalab.org")
        sk.sample = sample
        sk.save()

        self.assertEqual(set(experiment.get_sample_keywords()),
                         set(["medulloblastoma"]))
Exemple #2
0
    def test_qn_management_command(self):
        """Test that the management command fires off and then does not create
        a job for an organism that does not have enough samples on the same
        platform."""

        homo_sapiens = Organism(name="HOMO_SAPIENS", taxonomy_id=9606)
        homo_sapiens.save()

        experiment = Experiment()
        experiment.accession_code = "12345"
        experiment.save()
        codes = ["1", "2", "3", "4", "5", "6"]
        # We don't have a 0.tsv

        for code in codes:
            sample = Sample()
            sample.accession_code = code
            sample.title = code
            sample.platform_accession_code = "A-MEXP-1171"
            sample.manufacturer = "SLIPPERY DICK'S DISCOUNT MICROARRAYS"
            sample.organism = homo_sapiens
            sample.technology = "MICROARRAY"
            sample.is_processed = True
            sample.save()

            cr = ComputationalResult()
            cr.save()

            computed_file = ComputedFile()
            computed_file.filename = code + ".tsv"
            computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv"
            computed_file.size_in_bytes = int(code)
            computed_file.result = cr
            computed_file.is_smashable = True
            computed_file.save()

            scfa = SampleComputedFileAssociation()
            scfa.sample = sample
            scfa.computed_file = computed_file
            scfa.save()

            exsa = ExperimentSampleAssociation()
            exsa.experiment = experiment
            exsa.sample = sample
            exsa.save()

        out = StringIO()
        try:
            call_command("create_qn_target", organism="homo_sapiens", min=1, stdout=out)
        except SystemExit as e:  # this is okay!
            pass

        stdout = out.getvalue()
        self.assertFalse("Target file" in stdout)

        # There's not enough samples available in this scenario so we
        # shouldn't have even made a processor job.
        self.assertEqual(ProcessorJob.objects.count(), 0)
Exemple #3
0
def make_test_data(organism):
    experiment = Experiment()
    experiment.accession_code = "GSE51088"
    experiment.technology = "RNA-SEQ"
    experiment.save()

    xoa = ExperimentOrganismAssociation()
    xoa.experiment = experiment
    xoa.organism = organism
    xoa.save()

    result = ComputationalResult()
    result.save()

    sample = Sample()
    sample.accession_code = "GSM1237818"
    sample.title = "GSM1237818"
    sample.organism = organism
    sample.technology = "RNA-SEQ"
    sample.is_processed = True
    sample.save()

    sra = SampleResultAssociation()
    sra.sample = sample
    sra.result = result
    sra.save()

    esa = ExperimentSampleAssociation()
    esa.experiment = experiment
    esa.sample = sample
    esa.save()

    computed_file = ComputedFile()
    computed_file.s3_key = "smasher-test-quant.sf"
    computed_file.s3_bucket = "data-refinery-test-assets"
    computed_file.filename = "quant.sf"
    computed_file.absolute_file_path = "/home/user/data_store/QUANT/smasher-test-quant.sf"
    computed_file.result = result
    computed_file.is_smashable = True
    computed_file.size_in_bytes = 123123
    computed_file.sha1 = (
        "08c7ea90b66b52f7cd9d9a569717a1f5f3874967"  # this matches with the downloaded file
    )
    computed_file.save()

    computed_file = ComputedFile()
    computed_file.filename = "logquant.tsv"
    computed_file.is_smashable = True
    computed_file.size_in_bytes = 123123
    computed_file.result = result
    computed_file.save()

    assoc = SampleComputedFileAssociation()
    assoc.sample = sample
    assoc.computed_file = computed_file
    assoc.save()
Exemple #4
0
    def test_qn_reference(self, mock_send_job):
        organism = Organism(name="HOMO_SAPIENS", taxonomy_id=9606)
        organism.save()

        experiment = Experiment()
        experiment.accession_code = "12345"
        experiment.save()

        for code in [str(i) for i in range(1, 401)]:
            sample = Sample()
            sample.accession_code = code
            sample.title = code
            sample.platform_name = f"Affymetrix {organism.name}"
            sample.platform_accession_code = f"A-MEXP-{organism.name}"
            sample.manufacturer = "AFFYMETRIX"
            sample.organism = organism
            sample.technology = "MICROARRAY"
            sample.is_processed = True
            sample.has_raw = True
            sample.save()

            cr = ComputationalResult()
            cr.save()

            computed_file = ComputedFile()
            computed_file.filename = code + ".tsv"
            computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv"
            computed_file.size_in_bytes = int(code)
            computed_file.result = cr
            computed_file.is_smashable = True
            computed_file.save()

            scfa = SampleComputedFileAssociation()
            scfa.sample = sample
            scfa.computed_file = computed_file
            scfa.save()

            exsa = ExperimentSampleAssociation()
            exsa.experiment = experiment
            exsa.sample = sample
            exsa.save()

            # We need more than one organism for the tests, but can't
            # repeat accesion codes, so halfway through just change the organism.
            if int(code) == 200:
                organism = Organism(name="MUS_MUSCULUS", taxonomy_id=111)
                organism.save()

        # Setup is done, actually run the command.
        command = Command()
        command.handle(organisms="HOMO_SAPIENS,MUS_MUSCULUS")

        self.assertEqual(len(mock_send_job.mock_calls), 2)
        self.assertEqual(ProcessorJob.objects.count(), 2)
Exemple #5
0
    def test_get_sample_metadata_fields_none(self):
        experiment = Experiment()
        experiment.save()

        sample = Sample()
        sample.title = "123"
        sample.accession_code = "123"
        sample.save()

        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample
        experiment_sample_association.experiment = experiment
        experiment_sample_association.save()

        self.assertEqual(experiment.get_sample_metadata_fields(), [])
Exemple #6
0
    def test_get_sample_metadata_fields(self):
        experiment = Experiment()
        experiment.save()

        sample = Sample()
        sample.title = "123"
        sample.accession_code = "123"
        sample.specimen_part = "Lung"
        sample.sex = "Male"
        sample.save()

        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample
        experiment_sample_association.experiment = experiment
        experiment_sample_association.save()

        self.assertEqual(set(experiment.get_sample_metadata_fields()), set(['specimen_part', 'sex']))
def prepare_experiment(ids: List[int]) -> Experiment:
    (homo_sapiens, _) = Organism.objects.get_or_create(name="HOMO_SAPIENS",
                                                       taxonomy_id=9606)

    experiment = Experiment()
    experiment.accession_code = "12345"
    experiment.save()
    codes = [str(i) for i in ids]

    for code in codes:
        sample = Sample()
        sample.accession_code = code
        sample.title = code
        sample.platform_accession_code = "A-MEXP-1171"
        sample.manufacturer = "SLIPPERY DICK'S DISCOUNT MICROARRAYS"
        sample.organism = homo_sapiens
        sample.technology = "MICROARRAY"
        sample.is_processed = True
        sample.save()

        cr = ComputationalResult()
        cr.save()

        computed_file = ComputedFile()
        computed_file.filename = code + ".tsv"
        computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv"
        computed_file.size_in_bytes = int(code)
        computed_file.result = cr
        computed_file.is_smashable = True
        computed_file.save()

        scfa = SampleComputedFileAssociation()
        scfa.sample = sample
        scfa.computed_file = computed_file
        scfa.save()

        exsa = ExperimentSampleAssociation()
        exsa.experiment = experiment
        exsa.sample = sample
        exsa.save()
def create_sample_for_experiment(sample_info: Dict,
                                 experiment: Experiment) -> Sample:
    result = ComputationalResult()
    result.save()

    sample = Sample()
    sample.accession_code = sample_info["accession_code"]
    sample.title = sample_info.get("title",
                                   None) or sample_info["accession_code"]
    sample.organism = sample_info["organism"]
    sample.technology = sample_info["technology"]
    sample.save()

    sra = SampleResultAssociation()
    sra.sample = sample
    sra.result = result
    sra.save()

    esa = ExperimentSampleAssociation()
    esa.experiment = experiment
    esa.sample = sample
    esa.save()

    if sample_info.get("filename") is not None:
        computed_file = ComputedFile()
        computed_file.filename = sample_info["filename"]
        computed_file.absolute_file_path = sample_info[
            "data_dir"] + sample_info["filename"]
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

    return sample
    def test_organism_shepherd_command(self, mock_nomad, mock_send_job,
                                       mock_get_active_volumes):
        """Tests that the organism shepherd requeues jobs in the right order.

        The situation we're setting up is basically this:
          * There are two experiments.
          * One of them has 1/2 samples processed, the other 0/1
          * One of them needs a DownloaderJob requeued and the other
            needs a ProcessorJob requued.

        And what we're going to test for is:
          * Both of the jobs that need to be requeued are requeued.
          * The experiment with a processed sample is requeued first
            because it has a higher completion percentage.
        """
        # First, set up our mocks to prevent network calls.
        mock_send_job.return_value = True
        active_volumes = {"1", "2", "3"}
        mock_get_active_volumes.return_value = active_volumes

        def mock_init_nomad(host, port=0, timeout=0):
            ret_value = MagicMock()
            ret_value.jobs = MagicMock()
            ret_value.jobs.get_jobs = MagicMock()
            ret_value.jobs.get_jobs.side_effect = lambda: []
            return ret_value

        mock_nomad.side_effect = mock_init_nomad
        zebrafish = Organism(name="DANIO_RERIO",
                             taxonomy_id=1337,
                             is_scientific_name=True)
        zebrafish.save()

        # Experiment that is 0% complete.
        zero_percent_experiment = Experiment(accession_code='ERP037000')
        zero_percent_experiment.technology = 'RNA-SEQ'
        zero_percent_experiment.save()

        organism_assoc = ExperimentOrganismAssociation.objects.create(
            organism=zebrafish, experiment=zero_percent_experiment)

        zero_percent = OriginalFile()
        zero_percent.filename = "ERR037001.fastq.gz"
        zero_percent.source_filename = "ERR037001.fastq.gz"
        zero_percent.source_url = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR037/ERR037001/ERR037001_1.fastq.gz"
        zero_percent.is_archive = True
        zero_percent.save()

        zero_percent_sample = Sample()
        zero_percent_sample.accession_code = 'ERR037001'
        zero_percent_sample.organism = zebrafish
        zero_percent_sample.save()

        assoc = OriginalFileSampleAssociation()
        assoc.sample = zero_percent_sample
        assoc.original_file = zero_percent
        assoc.save()

        assoc = ExperimentSampleAssociation()
        assoc.sample = zero_percent_sample
        assoc.experiment = zero_percent_experiment
        assoc.save()

        # TODO: fix names of all the variables to be appropriate for this test case.
        zero_percent_dl_job = DownloaderJob()
        zero_percent_dl_job.accession_code = zero_percent_sample.accession_code
        zero_percent_dl_job.downloader_task = "SRA"
        zero_percent_dl_job.start_time = timezone.now()
        zero_percent_dl_job.end_time = timezone.now()
        zero_percent_dl_job.success = False
        zero_percent_dl_job.save()

        assoc = DownloaderJobOriginalFileAssociation()
        assoc.downloader_job = zero_percent_dl_job
        assoc.original_file = zero_percent
        assoc.save()

        # Experiment that is 50% complete.
        fify_percent_experiment = Experiment(accession_code='ERP036000')
        fify_percent_experiment.technology = 'RNA-SEQ'
        fify_percent_experiment.save()

        organism_assoc = ExperimentOrganismAssociation.objects.create(
            organism=zebrafish, experiment=fify_percent_experiment)

        ## First sample, this one has been processed.
        successful_pj = ProcessorJob()
        successful_pj.accession_code = "ERR036000"
        successful_pj.pipeline_applied = "SALMON"
        successful_pj.ram_amount = 12288
        successful_pj.start_time = timezone.now()
        successful_pj.end_time = timezone.now()
        successful_pj.success = True
        successful_pj.save()

        successful_og = OriginalFile()
        successful_og.filename = "ERR036000.fastq.gz"
        successful_og.source_filename = "ERR036000.fastq.gz"
        successful_og.source_url = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz"
        successful_og.is_archive = True
        successful_og.save()

        successful_sample = Sample()
        successful_sample.accession_code = 'ERR036000'
        successful_sample.organism = zebrafish
        successful_sample.save()

        assoc = OriginalFileSampleAssociation()
        assoc.sample = successful_sample
        assoc.original_file = successful_og
        assoc.save()

        assoc = ProcessorJobOriginalFileAssociation()
        assoc.processor_job = successful_pj
        assoc.original_file = successful_og
        assoc.save()

        assoc = ExperimentSampleAssociation()
        assoc.sample = successful_sample
        assoc.experiment = fify_percent_experiment
        assoc.save()

        ## Second sample, this one hasn't been processed.
        fifty_percent_unprocessed_og = OriginalFile()
        fifty_percent_unprocessed_og.filename = "ERR036001.fastq.gz"
        fifty_percent_unprocessed_og.source_filename = "ERR036001.fastq.gz"
        fifty_percent_unprocessed_og.source_url = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036001/ERR036001_1.fastq.gz"
        fifty_percent_unprocessed_og.is_archive = True
        fifty_percent_unprocessed_og.save()

        fifty_percent_unprocessed_sample = Sample()
        fifty_percent_unprocessed_sample.accession_code = 'ERR036001'
        fifty_percent_unprocessed_sample.organism = zebrafish
        fifty_percent_unprocessed_sample.save()

        assoc = OriginalFileSampleAssociation()
        assoc.sample = fifty_percent_unprocessed_sample
        assoc.original_file = fifty_percent_unprocessed_og
        assoc.save()

        assoc = ExperimentSampleAssociation()
        assoc.sample = fifty_percent_unprocessed_sample
        assoc.experiment = fify_percent_experiment
        assoc.save()

        fifty_percent_processor_job = ProcessorJob()
        fifty_percent_processor_job.pipeline_applied = "SALMON"
        fifty_percent_processor_job.accession_code = fifty_percent_unprocessed_sample.accession_code
        fifty_percent_processor_job.ram_amount = 12288
        fifty_percent_processor_job.start_time = timezone.now()
        fifty_percent_processor_job.end_time = timezone.now()
        fifty_percent_processor_job.success = False
        fifty_percent_processor_job.save()

        assoc = ProcessorJobOriginalFileAssociation()
        assoc.processor_job = fifty_percent_processor_job
        assoc.original_file = fifty_percent_unprocessed_og
        assoc.save()

        # Setup is done, actually run the command.
        args = []
        options = {"organism_name": "DANIO_RERIO"}
        call_command("organism_shepherd", *args, **options)

        # Verify that the jobs were called in the correct order.
        mock_calls = mock_send_job.mock_calls

        first_call_job_type = mock_calls[0][1][0]
        first_call_job_object = mock_calls[0][2]["job"]
        self.assertEqual(first_call_job_type, ProcessorPipeline.SALMON)
        self.assertEqual(first_call_job_object.pipeline_applied,
                         fifty_percent_processor_job.pipeline_applied)
        self.assertEqual(first_call_job_object.ram_amount,
                         fifty_percent_processor_job.ram_amount)
        self.assertIn(first_call_job_object.volume_index, active_volumes)

        fifty_percent_processor_job.refresh_from_db()
        self.assertEqual(first_call_job_object,
                         fifty_percent_processor_job.retried_job)

        second_call_job_type = mock_calls[1][1][0]
        second_call_job_object = mock_calls[1][2]["job"]
        self.assertEqual(second_call_job_type, Downloaders.SRA)
        self.assertEqual(second_call_job_object.accession_code,
                         zero_percent_dl_job.accession_code)
        self.assertEqual(second_call_job_object.downloader_task,
                         zero_percent_dl_job.downloader_task)

        zero_percent_dl_job.refresh_from_db()
        self.assertEqual(second_call_job_object,
                         zero_percent_dl_job.retried_job)
Exemple #10
0
    def test_log2(self):
        pj = ProcessorJob()
        pj.pipeline_applied = "SMASHER"
        pj.save()

        # Has non-log2 data:
        # https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE44421
        # ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE44nnn/GSE44421/miniml/GSE44421_family.xml.tgz
        experiment = Experiment()
        experiment.accession_code = "GSE44421"
        experiment.save()

        result = ComputationalResult()
        result.save()

        homo_sapiens = Organism.get_object_for_name("HOMO_SAPIENS")

        sample = Sample()
        sample.accession_code = 'GSM1084806'
        sample.title = 'GSM1084806'
        sample.organism = homo_sapiens
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "GSM1084806-tbl-1.txt"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        sample = Sample()
        sample.accession_code = 'GSM1084807'
        sample.title = 'GSM1084807'
        sample.organism = homo_sapiens
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "GSM1084807-tbl-1.txt"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        ds = Dataset()
        ds.data = {'GSE44421': ['GSM1084806', 'GSM1084807']}
        ds.aggregate_by = 'EXPERIMENT'
        ds.scale_by = 'MINMAX'
        ds.email_address = "*****@*****.**"
        ds.quantile_normalize = False
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = pj
        pjda.dataset = ds
        pjda.save()

        final_context = smasher.smash(pj.pk, upload=False)
        ds = Dataset.objects.get(id=ds.id)

        self.assertTrue(final_context['success'])
Exemple #11
0
    def test_dualtech_smash(self):
        """ """

        pj = ProcessorJob()
        pj.pipeline_applied = "SMASHER"
        pj.save()

        # MICROARRAY TECH
        experiment = Experiment()
        experiment.accession_code = "GSE1487313"
        experiment.save()

        result = ComputationalResult()
        result.save()

        gallus_gallus = Organism.get_object_for_name("GALLUS_GALLUS")

        sample = Sample()
        sample.accession_code = 'GSM1487313'
        sample.title = 'GSM1487313'
        sample.organism = gallus_gallus
        sample.technology = "MICROARRAY"
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "GSM1487313_liver.PCL"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        # RNASEQ TECH
        experiment2 = Experiment()
        experiment2.accession_code = "SRS332914"
        experiment2.save()

        result2 = ComputationalResult()
        result2.save()

        sample2 = Sample()
        sample2.accession_code = 'SRS332914'
        sample2.title = 'SRS332914'
        sample2.organism = gallus_gallus
        sample2.technology = "RNA-SEQ"
        sample2.save()

        sra2 = SampleResultAssociation()
        sra2.sample = sample2
        sra2.result = result2
        sra2.save()

        esa2 = ExperimentSampleAssociation()
        esa2.experiment = experiment2
        esa2.sample = sample2
        esa2.save()

        computed_file2 = ComputedFile()
        computed_file2.filename = "SRP149598_gene_lengthScaledTPM.tsv"
        computed_file2.absolute_file_path = "/home/user/data_store/PCL/" + computed_file2.filename
        computed_file2.result = result2
        computed_file2.size_in_bytes = 234
        computed_file2.is_smashable = True
        computed_file2.save()

        assoc2 = SampleComputedFileAssociation()
        assoc2.sample = sample2
        assoc2.computed_file = computed_file2
        assoc2.save()

        # CROSS-SMASH BY SPECIES
        ds = Dataset()
        ds.data = {'GSE1487313': ['GSM1487313'], 'SRX332914': ['SRS332914']}
        ds.aggregate_by = 'SPECIES'
        ds.scale_by = 'STANDARD'
        ds.email_address = "*****@*****.**"
        ds.quantile_normalize = False
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = pj
        pjda.dataset = ds
        pjda.save()

        self.assertTrue(ds.is_cross_technology())
        final_context = smasher.smash(pj.pk, upload=False)
        self.assertTrue(os.path.exists(final_context['output_file']))
        os.remove(final_context['output_file'])
        self.assertEqual(len(final_context['final_frame'].columns), 2)

        # THEN BY EXPERIMENT
        ds.aggregate_by = 'EXPERIMENT'
        ds.save()

        dsid = ds.id
        ds = Dataset.objects.get(id=dsid)

        pj.start_time = None
        pj.end_time = None
        pj.save()

        final_context = smasher.smash(pj.pk, upload=False)
        self.assertTrue(os.path.exists(final_context['output_file']))
        os.remove(final_context['output_file'])
        self.assertEqual(len(final_context['final_frame'].columns), 1)

        # THEN BY ALL
        ds.aggregate_by = 'ALL'
        ds.save()

        dsid = ds.id
        ds = Dataset.objects.get(id=dsid)

        pj.start_time = None
        pj.end_time = None
        pj.save()
        final_context = smasher.smash(pj.pk, upload=False)
        self.assertTrue(os.path.exists(final_context['output_file']))
        self.assertEqual(len(final_context['final_frame'].columns), 2)
Exemple #12
0
    def test_no_smash_dupe(self):
        """ """

        job = ProcessorJob()
        job.pipeline_applied = "SMASHER"
        job.save()

        experiment = Experiment()
        experiment.accession_code = "GSE51081"
        experiment.save()

        result = ComputationalResult()
        result.save()

        homo_sapiens = Organism.get_object_for_name("HOMO_SAPIENS")

        sample = Sample()
        sample.accession_code = 'GSM1237810'
        sample.title = 'GSM1237810'
        sample.organism = homo_sapiens
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "GSM1237810_T09-1084.PCL"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        result = ComputationalResult()
        result.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        sample = Sample()
        sample.accession_code = 'GSM1237811'
        sample.title = 'GSM1237811'
        sample.organism = homo_sapiens
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        result = ComputationalResult()
        result.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        ds = Dataset()
        ds.data = {'GSE51081': ['GSM1237810', 'GSM1237811']}
        ds.aggregate_by = 'ALL'
        ds.scale_by = 'STANDARD'
        ds.email_address = "*****@*****.**"
        ds.quantile_normalize = False
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = ds
        pjda.save()

        final_context = smasher.smash(job.pk, upload=False)

        dsid = ds.id
        ds = Dataset.objects.get(id=dsid)

        self.assertTrue(ds.success)
        for column in final_context['original_merged'].columns:
            self.assertTrue('_x' not in column)
Exemple #13
0
    def test_no_smash_dupe_two(self):
        """ Tests the SRP051449 case, where the titles collide. Also uses a real QN target file."""

        job = ProcessorJob()
        job.pipeline_applied = "SMASHER"
        job.save()

        experiment = Experiment()
        experiment.accession_code = "SRP051449"
        experiment.save()

        result = ComputationalResult()
        result.save()

        danio_rerio = Organism.get_object_for_name("DANIO_RERIO")

        sample = Sample()
        sample.accession_code = 'SRR1731761'
        sample.title = 'Danio rerio'
        sample.organism = danio_rerio
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "SRR1731761_output_gene_lengthScaledTPM.tsv"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        result = ComputationalResult()
        result.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        sample = Sample()
        sample.accession_code = 'SRR1731762'
        sample.title = 'Danio rerio'
        sample.organism = danio_rerio
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "SRR1731762_output_gene_lengthScaledTPM.tsv"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        result = ComputationalResult()
        result.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        ds = Dataset()
        ds.data = {'SRP051449': ['SRR1731761', 'SRR1731762']}
        ds.aggregate_by = 'SPECIES'
        ds.scale_by = 'NONE'
        ds.email_address = "*****@*****.**"
        ds.quantile_normalize = True
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = ds
        pjda.save()

        cr = ComputationalResult()
        cr.save()

        computed_file = ComputedFile()
        computed_file.filename = "danio_target.tsv"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = cr
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = False
        computed_file.save()

        cra = ComputationalResultAnnotation()
        cra.data = {'organism_id': danio_rerio.id, 'is_qn': True}
        cra.result = cr
        cra.save()

        final_context = smasher.smash(job.pk, upload=False)
        self.assertTrue(final_context['success'])
    def test_create_compendia_danio(self):
        job = ProcessorJob()
        job.pipeline_applied = "COMPENDIA"
        job.save()

        # MICROARRAY TECH
        experiment = Experiment()
        experiment.accession_code = "GSE1234"
        experiment.save()

        result = ComputationalResult()
        result.save()

        danio_rerio = Organism.get_object_for_name("DANIO_RERIO")

        micros = []
        for file in os.listdir('/home/user/data_store/raw/TEST/MICROARRAY/'):

            if 'microarray.txt' in file:
                continue

            sample = Sample()
            sample.accession_code = file
            sample.title = file
            sample.organism = danio_rerio
            sample.technology = "MICROARRAY"
            sample.save()

            sra = SampleResultAssociation()
            sra.sample = sample
            sra.result = result
            sra.save()

            esa = ExperimentSampleAssociation()
            esa.experiment = experiment
            esa.sample = sample
            esa.save()

            computed_file = ComputedFile()
            computed_file.filename = file
            computed_file.absolute_file_path = "/home/user/data_store/raw/TEST/MICROARRAY/" + file
            computed_file.result = result
            computed_file.size_in_bytes = 123
            computed_file.is_smashable = True
            computed_file.save()

            assoc = SampleComputedFileAssociation()
            assoc.sample = sample
            assoc.computed_file = computed_file
            assoc.save()

            micros.append(file)

        experiment = Experiment()
        experiment.accession_code = "GSE5678"
        experiment.save()

        result = ComputationalResult()
        result.save()
        rnas = []
        for file in os.listdir('/home/user/data_store/raw/TEST/RNASEQ/'):

            if 'rnaseq.txt' in file:
                continue

            sample = Sample()
            sample.accession_code = file
            sample.title = file
            sample.organism = danio_rerio
            sample.technology = "RNASEQ"
            sample.save()

            sra = SampleResultAssociation()
            sra.sample = sample
            sra.result = result
            sra.save()

            esa = ExperimentSampleAssociation()
            esa.experiment = experiment
            esa.sample = sample
            esa.save()

            computed_file = ComputedFile()
            computed_file.filename = file
            computed_file.absolute_file_path = "/home/user/data_store/raw/TEST/RNASEQ/" + file
            computed_file.result = result
            computed_file.size_in_bytes = 123
            computed_file.is_smashable = True
            computed_file.save()

            assoc = SampleComputedFileAssociation()
            assoc.sample = sample
            assoc.computed_file = computed_file
            assoc.save()

            rnas.append(file)

        result = ComputationalResult()
        result.save()

        qn_target = ComputedFile()
        qn_target.filename = "danio_target.tsv"
        qn_target.absolute_file_path = '/home/user/data_store/QN/danio_target.tsv'
        qn_target.is_qn_target = True
        qn_target.size_in_bytes = "12345"
        qn_target.sha1 = "aabbccddeeff"
        qn_target.result = result
        qn_target.save()

        cra = ComputationalResultAnnotation()
        cra.data = {}
        cra.data['organism_id'] = danio_rerio.id
        cra.data['is_qn'] = True
        cra.result = result
        cra.save()

        dset = Dataset()
        dset.data = {'GSE1234': micros, 'GSE5678': rnas}
        dset.scale_by = 'NONE'
        dset.aggregate_by = 'SPECIES'
        dset.quantile_normalize = False
        dset.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = dset
        pjda.save()

        final_context = create_compendia.create_compendia(job.id)

        # Verify result
        self.assertEqual(len(final_context['computed_files']), 3)
        for file in final_context['computed_files']:
            self.assertTrue(os.path.exists(file.absolute_file_path))
Exemple #15
0
    def test_no_smash_all_diff_species(self):
        """ Smashing together with 'ALL' with different species is a really weird behavior. 
        This test isn't really testing a normal case, just make sure that it's marking the
        unsmashable files.
        """

        job = ProcessorJob()
        job.pipeline_applied = "SMASHER"
        job.save()

        experiment = Experiment()
        experiment.accession_code = "GSE51081"
        experiment.save()

        result = ComputationalResult()
        result.save()

        homo_sapiens = Organism.get_object_for_name("HOMO_SAPIENS")

        sample = Sample()
        sample.accession_code = 'GSM1237810'
        sample.title = 'GSM1237810'
        sample.organism = homo_sapiens
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "GSM1237810_T09-1084.PCL"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        result = ComputationalResult()
        result.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        experiment = Experiment()
        experiment.accession_code = "GSE51084"
        experiment.save()

        mus_mus = Organism.get_object_for_name("MUS_MUSCULUS")

        sample = Sample()
        sample.accession_code = 'GSM1238108'
        sample.title = 'GSM1238108'
        sample.organism = homo_sapiens
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "GSM1238108-tbl-1.txt"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        ds = Dataset()
        ds.data = {'GSE51081': ['GSM1237810'], 'GSE51084': ['GSM1238108']}
        ds.aggregate_by = 'ALL'
        ds.scale_by = 'STANDARD'
        ds.email_address = "*****@*****.**"
        ds.quantile_normalize = False
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = ds
        pjda.save()

        final_context = smasher.smash(job.pk, upload=False)

        dsid = ds.id
        ds = Dataset.objects.get(id=dsid)
        print(ds.failure_reason)
        print(final_context['dataset'].failure_reason)

        self.assertEqual(final_context['unsmashable_files'], ['GSM1238108'])
Exemple #16
0
    def test_qn_reference(self):
        job = ProcessorJob()
        job.pipeline_applied = "QN_REFERENCE"
        job.save()

        homo_sapiens = Organism(name="HOMO_SAPIENS", taxonomy_id=9606)
        homo_sapiens.save()

        experiment = Experiment()
        experiment.accession_code = "12345"
        experiment.save()
        # We don't have a 0.tsv
        codes = [str(i) for i in range(1, 201)]

        for code in codes:
            sample = Sample()
            sample.accession_code = code
            sample.title = code
            sample.platform_accession_code = "A-MEXP-1171"
            sample.manufacturer = "SLIPPERY DICK'S DISCOUNT MICROARRAYS"
            sample.organism = homo_sapiens
            sample.technology = "MICROARRAY"
            sample.is_processed = True
            sample.save()

            cr = ComputationalResult()
            cr.save()

            computed_file = ComputedFile()
            computed_file.filename = code + ".tsv"
            computed_file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv"
            computed_file.size_in_bytes = int(code)
            computed_file.result = cr
            computed_file.is_smashable = True
            computed_file.save()

            scfa = SampleComputedFileAssociation()
            scfa.sample = sample
            scfa.computed_file = computed_file
            scfa.save()

            exsa = ExperimentSampleAssociation()
            exsa.experiment = experiment
            exsa.sample = sample
            exsa.save()

        dataset = Dataset()
        dataset.data = {"12345": ["1", "2", "3", "4", "5", "6"]}
        dataset.aggregate_by = "ALL"
        dataset.scale_by = "NONE"
        dataset.quantile_normalize = False  # We don't QN because we're creating the target now
        dataset.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = dataset
        pjda.save()

        final_context = qn_reference.create_qn_reference(job.pk)
        self.assertTrue(final_context["success"])
        self.assertTrue(os.path.exists(final_context["target_file"]))
        self.assertEqual(os.path.getsize(final_context["target_file"]), 562)

        homo_sapiens.refresh_from_db()
        target = homo_sapiens.qn_target.computedfile_set.latest()
        self.assertEqual(target.sha1, "de69d348f8b239479e2330d596c4013a7b0b2b6a")

        # Create and run a smasher job that will use the QN target we just made.
        pj = ProcessorJob()
        pj.pipeline_applied = "SMASHER"
        pj.save()

        ds = Dataset()
        ds.data = {"12345": ["1", "2", "3", "4", "5"]}
        ds.aggregate_by = "SPECIES"
        ds.scale_by = "STANDARD"
        ds.email_address = "*****@*****.**"
        ds.quantile_normalize = True
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = pj
        pjda.dataset = ds
        pjda.save()

        final_context = smasher.smash(pj.pk, upload=False)
        self.assertTrue(final_context["success"])

        np.testing.assert_almost_equal(final_context["merged_qn"]["1"][0], -0.4379488527774811)
        np.testing.assert_almost_equal(final_context["original_merged"]["1"][0], -0.5762109)
    def setUp(self):
        # Saving this for if we have protected endpoints
        # self.superuser = User.objects.create_superuser('john', '*****@*****.**', 'johnpassword')
        # self.client.login(username='******', password='******')
        # self.user = User.objects.create(username="******")

        experiment = Experiment()
        experiment.accession_code = "GSE000"
        experiment.alternate_accession_code = "E-GEOD-000"
        experiment.title = "NONONONO"
        experiment.description = "Boooooourns. Wasabi."
        experiment.technology = "RNA-SEQ"
        experiment.save()

        experiment = Experiment()
        experiment.accession_code = "GSE123"
        experiment.title = "Hey Ho Let's Go"
        experiment.description = (
            "This is a very exciting test experiment. Faygo soda. Blah blah blah."
        )
        experiment.technology = "MICROARRAY"
        experiment.save()
        self.experiment = experiment

        experiment_annotation = ExperimentAnnotation()
        experiment_annotation.data = {"hello": "world", "123": 456}
        experiment_annotation.experiment = experiment
        experiment_annotation.save()

        # Create 26 test organisms numbered 0-25 for pagination test, so there should be 29 organisms total (with the 3 others below)
        for i in range(26):
            Organism(name=("TEST_ORGANISM_{}".format(i)),
                     taxonomy_id=(1234 + i)).save()

        ailuropoda = Organism(name="AILUROPODA_MELANOLEUCA",
                              taxonomy_id=9646,
                              is_scientific_name=True)
        ailuropoda.save()
        self.homo_sapiens = Organism(name="HOMO_SAPIENS",
                                     taxonomy_id=9606,
                                     is_scientific_name=True)
        self.homo_sapiens.save()
        self.danio_rerio = Organism(name="DANIO_RERIO",
                                    taxonomy_id=1337,
                                    is_scientific_name=True)
        self.danio_rerio.save()

        sample = Sample()
        sample.title = "123"
        sample.accession_code = "123"
        sample.is_processed = True
        sample.organism = ailuropoda
        sample.save()

        sample = Sample()
        sample.title = "789"
        sample.accession_code = "789"
        sample.is_processed = True
        sample.organism = ailuropoda
        sample.save()
        self.sample = sample

        # add qn target for sample organism
        result = ComputationalResult()
        result.commands.append("create_qn_target.py")
        result.is_ccdl = True
        result.is_public = True
        result.processor = None
        result.save()

        cra = ComputationalResultAnnotation()
        cra.result = result
        cra.data = {"organism_id": ailuropoda.id, "is_qn": True}
        cra.save()

        ailuropoda.qn_target = result
        ailuropoda.save()

        sample_annotation = SampleAnnotation()
        sample_annotation.data = {"goodbye": "world", "789": 123}
        sample_annotation.sample = sample
        sample_annotation.save()

        original_file = OriginalFile()
        original_file.save()

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.sample = sample
        original_file_sample_association.original_file = original_file
        original_file_sample_association.save()

        downloader_job = DownloaderJob()
        downloader_job.save()

        download_assoc = DownloaderJobOriginalFileAssociation()
        download_assoc.original_file = original_file
        download_assoc.downloader_job = downloader_job
        download_assoc.save()

        processor_job = ProcessorJob()
        processor_job.save()

        processor_assoc = ProcessorJobOriginalFileAssociation()
        processor_assoc.original_file = original_file
        processor_assoc.processor_job = processor_job
        processor_assoc.save()

        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample
        experiment_sample_association.experiment = experiment
        experiment_sample_association.save()
        experiment.num_total_samples = 1
        experiment.num_processed_samples = 1
        experiment.save()

        result = ComputationalResult()
        result.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        result = ComputationalResult()
        result.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        processor = Processor()
        processor.name = "Salmon Quant"
        processor.version = "v9.9.9"
        processor.docker_image = "dr_salmon"
        processor.environment = '{"some": "environment"}'
        processor.save()

        computational_result_short = ComputationalResult(processor=processor)
        computational_result_short.save()

        organism_index = OrganismIndex()
        organism_index.index_type = "TRANSCRIPTOME_SHORT"
        organism_index.organism = self.danio_rerio
        organism_index.result = computational_result_short
        organism_index.absolute_directory_path = (
            "/home/user/data_store/salmon_tests/TRANSCRIPTOME_INDEX/SHORT")
        organism_index.is_public = True
        organism_index.s3_url = "not_blank"
        organism_index.save()

        return
Exemple #18
0
    def test_make_experiment_result_associations(self):
        """Tests that the correct associations are made.

        The situation we're setting up is basically this:
          * tximport has been run for an experiment.
          * It made associations between the samples in
            the experiment and the ComputationalResult.
          * It didn't make associations between the
            experiment itself and the ComputationalResult.
          * There is a second experiment that hasn't had
            tximport run but shares a sample with the
            other experiment.
          * This second experiment has a sample which has
            not yet had tximport run on it.

        And what we're going to test for is:
          * An association is created between the tximport
            result and the first experiment.
          * An association is NOT created between the
            tximport result and the second experiment.
        """
        # Get an organism to set on samples:
        homo_sapiens = Organism.get_object_for_name("HOMO_SAPIENS",
                                                    taxonomy_id=9606)

        # Create the tximport processor and result:
        processor = Processor()
        processor.name = "Tximport"
        processor.version = "v9.9.9"
        processor.docker_image = "dr_salmon"
        processor.environment = '{"some": "environment"}'
        processor.save()

        result = ComputationalResult()
        result.commands.append("tximport invocation")
        result.is_ccdl = True
        result.processor = processor
        result.save()

        # Create the first experiment and it's samples:
        processed_experiment = Experiment()
        processed_experiment.accession_code = "SRP12345"
        processed_experiment.save()

        processed_sample_one = Sample()
        processed_sample_one.accession_code = "SRX12345"
        processed_sample_one.title = "SRX12345"
        processed_sample_one.organism = homo_sapiens
        processed_sample_one.save()

        sra = SampleResultAssociation()
        sra.sample = processed_sample_one
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = processed_experiment
        esa.sample = processed_sample_one
        esa.save()

        processed_sample_two = Sample()
        processed_sample_two.accession_code = "SRX12346"
        processed_sample_two.title = "SRX12346"
        processed_sample_two.organism = homo_sapiens
        processed_sample_two.save()

        sra = SampleResultAssociation()
        sra.sample = processed_sample_two
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = processed_experiment
        esa.sample = processed_sample_two
        esa.save()

        # Create the second experiment and it's additional sample.
        unprocessed_experiment = Experiment()
        unprocessed_experiment.accession_code = "SRP6789"
        unprocessed_experiment.save()

        unprocessed_sample = Sample()
        unprocessed_sample.accession_code = "SRX6789"
        unprocessed_sample.title = "SRX6789"
        unprocessed_sample.organism = homo_sapiens
        unprocessed_sample.save()

        sra = SampleResultAssociation()
        sra.sample = unprocessed_sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = unprocessed_experiment
        esa.sample = unprocessed_sample
        esa.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = unprocessed_experiment
        esa.sample = processed_sample_two
        esa.save()

        # Run the function we're testing:
        make_experiment_result_associations()

        # Test that only one association was created and that it was
        # to the processed experiment:
        eras = ExperimentResultAssociation.objects.all()

        self.assertEqual(len(eras), 1)
        self.assertEqual(eras.first().experiment, processed_experiment)
Exemple #19
0
    def setUpClass(cls):
        super(ESTestCases, cls).setUpClass()  # ref https://stackoverflow.com/a/29655301/763705

        """Set up class."""
        experiment = Experiment()
        experiment.accession_code = "GSE000-X"
        experiment.title = "NONONONO"
        experiment.description = "Boooooourns. Wasabi."
        experiment.technology = "RNA-SEQ"
        experiment.save()

        experiment = Experiment()
        experiment.accession_code = "GSE123-X"
        experiment.title = "Hey Ho Let's Go"
        experiment.description = (
            "This is a very exciting test experiment. Faygo soda. Blah blah blah."
        )
        experiment.technology = "MICROARRAY"
        experiment.num_processed_samples = 1  # added below
        experiment.num_total_samples = 1
        experiment.num_downloadable_samples = 1
        experiment.save()

        experiment_annotation = ExperimentAnnotation()
        experiment_annotation.data = {"hello": "world", "123": 456}
        experiment_annotation.experiment = experiment
        experiment_annotation.save()

        sample = Sample()
        sample.title = "123"
        sample.accession_code = "123"
        sample.save()

        organism = Organism(
            name="AILUROPODA_MELANOLEUCA", taxonomy_id=9646, is_scientific_name=True
        )
        organism.save()

        sample = Sample()
        sample.title = "789"
        sample.accession_code = "789"
        sample.is_processed = True
        sample.organism = organism
        sample.save()

        sample_annotation = SampleAnnotation()
        sample_annotation.data = {"goodbye": "world", "789": 123}
        sample_annotation.sample = sample
        sample_annotation.save()

        original_file = OriginalFile()
        original_file.save()

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.sample = sample
        original_file_sample_association.original_file = original_file
        original_file_sample_association.save()

        downloader_job = DownloaderJob()
        downloader_job.save()

        download_assoc = DownloaderJobOriginalFileAssociation()
        download_assoc.original_file = original_file
        download_assoc.downloader_job = downloader_job
        download_assoc.save()

        processor_job = ProcessorJob()
        processor_job.save()

        processor_assoc = ProcessorJobOriginalFileAssociation()
        processor_assoc.original_file = original_file
        processor_assoc.processor_job = processor_job
        processor_assoc.save()

        # associate the experiment with the sample
        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample
        experiment_sample_association.experiment = experiment
        experiment_sample_association.save()

        result = ComputationalResult()
        result.save()

        # and create a qn tarjet for the sample
        computational_result = ComputationalResultAnnotation()
        computational_result.result = result
        computational_result.data = {"is_qn": True, "organism_id": sample.organism.id}
        computational_result.save()

        # and associate it with the sample organism
        sample.organism.qn_target = result
        sample.organism.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        result = ComputationalResult()
        result.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        # clear default cache and reindex
        # otherwise the organisms with qn_targes will be cached.
        cache.clear()
        call_command("search_index", "--rebuild", "-f")
Exemple #20
0
    def test_create_compendia(self):
        job = ProcessorJob()
        job.pipeline_applied = ProcessorPipeline.CREATE_COMPENDIA.value
        job.save()

        # MICROARRAY TECH
        experiment = Experiment()
        experiment.accession_code = "GSE1487313"
        experiment.save()

        result = ComputationalResult()
        result.save()

        gallus_gallus = Organism.get_object_for_name("GALLUS_GALLUS",
                                                     taxonomy_id=1001)

        sample = Sample()
        sample.accession_code = "GSM1487313"
        sample.title = "GSM1487313"
        sample.organism = gallus_gallus
        sample.technology = "MICROARRAY"
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "GSM1487313_liver.PCL"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        # Missing sample that will be filtered
        sample = Sample()
        sample.accession_code = "GSM1487222"
        sample.title = "this sample will be filtered"
        sample.organism = gallus_gallus
        sample.technology = "MICROARRAY"
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "GSM1487222_empty.PCL"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/doesnt_exists.PCL"
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        # RNASEQ TECH
        experiment2 = Experiment()
        experiment2.accession_code = "SRS332914"
        experiment2.save()

        result2 = ComputationalResult()
        result2.save()

        sample2 = Sample()
        sample2.accession_code = "SRS332914"
        sample2.title = "SRS332914"
        sample2.organism = gallus_gallus
        sample2.technology = "RNA-SEQ"
        sample2.save()

        sra2 = SampleResultAssociation()
        sra2.sample = sample2
        sra2.result = result2
        sra2.save()

        esa2 = ExperimentSampleAssociation()
        esa2.experiment = experiment2
        esa2.sample = sample2
        esa2.save()

        computed_file2 = ComputedFile()
        computed_file2.filename = "SRP149598_gene_lengthScaledTPM.tsv"
        computed_file2.absolute_file_path = "/home/user/data_store/PCL/" + computed_file2.filename
        computed_file2.result = result2
        computed_file2.size_in_bytes = 234
        computed_file2.is_smashable = True
        computed_file2.save()

        assoc2 = SampleComputedFileAssociation()
        assoc2.sample = sample2
        assoc2.computed_file = computed_file2
        assoc2.save()

        dset = Dataset()
        dset.data = {
            "GSE1487313": ["GSM1487313", "GSM1487222"],
            "SRX332914": ["SRS332914"]
        }
        dset.scale_by = "NONE"
        dset.aggregate_by = "SPECIES"
        dset.svd_algorithm = "ARPACK"
        dset.quantile_normalize = False
        dset.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = dset
        pjda.save()

        final_context = create_compendia.create_compendia(job.id)

        self.assertFalse(job.success)

        # check that sample with no computed file was skipped
        self.assertTrue("GSM1487222" in final_context["filtered_samples"])
        self.assertEqual(
            final_context["filtered_samples"]["GSM1487222"]
            ["experiment_accession_code"],
            "GSE1487313",
        )
Exemple #21
0
    def test_create_compendia_danio(self):
        job = ProcessorJob()
        job.pipeline_applied = ProcessorPipeline.CREATE_COMPENDIA.value
        job.save()

        # MICROARRAY TECH
        experiment = Experiment()
        experiment.accession_code = "GSE1234"
        experiment.save()

        result = ComputationalResult()
        result.save()

        qn_target = ComputedFile()
        qn_target.filename = "danio_target.tsv"
        qn_target.absolute_file_path = "/home/user/data_store/QN/danio_target.tsv"
        qn_target.is_qn_target = True
        qn_target.size_in_bytes = "12345"
        qn_target.sha1 = "aabbccddeeff"
        qn_target.result = result
        qn_target.save()

        danio_rerio = Organism(name="DANIO_RERIO",
                               taxonomy_id=1,
                               qn_target=result)
        danio_rerio.save()

        cra = ComputationalResultAnnotation()
        cra.data = {}
        cra.data["organism_id"] = danio_rerio.id
        cra.data["is_qn"] = True
        cra.result = result
        cra.save()

        result = ComputationalResult()
        result.save()

        micros = []
        for file in os.listdir("/home/user/data_store/raw/TEST/MICROARRAY/"):

            if "microarray.txt" in file:
                continue

            sample = Sample()
            sample.accession_code = file
            sample.title = file
            sample.organism = danio_rerio
            sample.technology = "MICROARRAY"
            sample.save()

            sra = SampleResultAssociation()
            sra.sample = sample
            sra.result = result
            sra.save()

            esa = ExperimentSampleAssociation()
            esa.experiment = experiment
            esa.sample = sample
            esa.save()

            computed_file = ComputedFile()
            computed_file.filename = file
            computed_file.absolute_file_path = "/home/user/data_store/raw/TEST/MICROARRAY/" + file
            computed_file.result = result
            computed_file.size_in_bytes = 123
            computed_file.is_smashable = True
            computed_file.save()

            assoc = SampleComputedFileAssociation()
            assoc.sample = sample
            assoc.computed_file = computed_file
            assoc.save()

            micros.append(file)

        experiment = Experiment()
        experiment.accession_code = "GSE5678"
        experiment.save()

        result = ComputationalResult()
        result.save()
        rnas = []
        for file in os.listdir("/home/user/data_store/raw/TEST/RNASEQ/"):

            if "rnaseq.txt" in file:
                continue

            sample = Sample()
            sample.accession_code = file
            sample.title = file
            sample.organism = danio_rerio
            sample.technology = "RNASEQ"
            sample.save()

            sra = SampleResultAssociation()
            sra.sample = sample
            sra.result = result
            sra.save()

            esa = ExperimentSampleAssociation()
            esa.experiment = experiment
            esa.sample = sample
            esa.save()

            computed_file = ComputedFile()
            computed_file.filename = file
            computed_file.absolute_file_path = "/home/user/data_store/raw/TEST/RNASEQ/" + file
            computed_file.result = result
            computed_file.size_in_bytes = 123
            computed_file.is_smashable = True
            computed_file.save()

            assoc = SampleComputedFileAssociation()
            assoc.sample = sample
            assoc.computed_file = computed_file
            assoc.save()

            rnas.append(file)

        # Missing sample that will be filtered
        sample = Sample()
        sample.accession_code = "GSM1487222"
        sample.title = "this sample will be filtered"
        sample.organism = danio_rerio
        sample.technology = "RNASEQ"
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        rnas.append(sample.accession_code)

        dset = Dataset()
        dset.data = {"GSE1234": micros, "GSE5678": rnas}
        dset.scale_by = "NONE"
        dset.aggregate_by = "SPECIES"
        dset.svd_algorithm = "ARPACK"
        dset.quantile_normalize = False
        dset.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = dset
        pjda.save()

        final_context = create_compendia.create_compendia(job.id)

        # Verify result
        self.assertEqual(
            final_context["compendium_result"].result.computedfile_set.count(),
            1)
        for file in final_context[
                "compendium_result"].result.computedfile_set.all():
            self.assertTrue(os.path.exists(file.absolute_file_path))

        # test compendium_result
        self.assertEqual(final_context["compendium_result"].svd_algorithm,
                         "ARPACK")
        self.assertEqual(
            final_context["compendium_result"].primary_organism.name,
            final_context["organism_name"])
        self.assertEqual(
            final_context["compendium_result"].primary_organism.name,
            "DANIO_RERIO")
        self.assertEqual(final_context["compendium_result"].organisms.count(),
                         1)

        # check that sample with no computed file was skipped
        self.assertTrue("GSM1487222" in final_context["filtered_samples"])
        self.assertEqual(
            final_context["filtered_samples"]["GSM1487222"]
            ["experiment_accession_code"], "GSE5678")
    def test_create_compendia(self):
        job = ProcessorJob()
        job.pipeline_applied = "COMPENDIA"
        job.save()

        # MICROARRAY TECH
        experiment = Experiment()
        experiment.accession_code = "GSE1487313"
        experiment.save()

        result = ComputationalResult()
        result.save()

        gallus_gallus = Organism.get_object_for_name("GALLUS_GALLUS")

        sample = Sample()
        sample.accession_code = 'GSM1487313'
        sample.title = 'GSM1487313'
        sample.organism = gallus_gallus
        sample.technology = "MICROARRAY"
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "GSM1487313_liver.PCL"
        computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        # RNASEQ TECH
        experiment2 = Experiment()
        experiment2.accession_code = "SRS332914"
        experiment2.save()

        result2 = ComputationalResult()
        result2.save()

        sample2 = Sample()
        sample2.accession_code = 'SRS332914'
        sample2.title = 'SRS332914'
        sample2.organism = gallus_gallus
        sample2.technology = "RNA-SEQ"
        sample2.save()

        sra2 = SampleResultAssociation()
        sra2.sample = sample2
        sra2.result = result2
        sra2.save()

        esa2 = ExperimentSampleAssociation()
        esa2.experiment = experiment2
        esa2.sample = sample2
        esa2.save()

        computed_file2 = ComputedFile()
        computed_file2.filename = "SRP149598_gene_lengthScaledTPM.tsv"
        computed_file2.absolute_file_path = "/home/user/data_store/PCL/" + computed_file2.filename
        computed_file2.result = result2
        computed_file2.size_in_bytes = 234
        computed_file2.is_smashable = True
        computed_file2.save()

        assoc2 = SampleComputedFileAssociation()
        assoc2.sample = sample2
        assoc2.computed_file = computed_file2
        assoc2.save()

        dset = Dataset()
        dset.data = {'GSE1487313': ['GSM1487313'], 'SRX332914': ['SRS332914']}
        dset.scale_by = 'NONE'
        dset.aggregate_by = 'SPECIES'
        dset.quantile_normalize = False
        dset.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = dset
        pjda.save()

        final_context = create_compendia.create_compendia(job.id)
Exemple #23
0
    def test_queue_downloader_jobs_for_original_files(self, mock_send_task):
        """Make sure that queue_downloader_jobs queues all expected Downloader
        jobs for a given experiment.
        """
        # First, create an experiment with two samples associated with it
        # and create two original files for each of those samples.
        experiment_object = Experiment()
        experiment_object.accession_code = "Experiment1"
        experiment_object.save()

        sample_object_1 = Sample()
        sample_object_1.accession_code = "Sample1"
        sample_object_1.platform_accession_code = "Illumina Genome Analyzer"
        sample_object_1.platform_accession_name = "Illumina Genome Analyzer"
        sample_object_1.technology = "RNA-SEQ"
        sample_object_1.manufacturer = "ILLUMINA"
        sample_object_1.source_database = "SRA"
        sample_object_1.save()
        sample_object_2 = Sample()
        sample_object_2.accession_code = "Sample2"
        sample_object_2.platform_accession_code = "Illumina Genome Analyzer"
        sample_object_2.platform_accession_name = "Illumina Genome Analyzer"
        sample_object_2.technology = "RNA-SEQ"
        sample_object_2.manufacturer = "ILLUMINA"
        sample_object_2.source_database = "SRA"
        sample_object_2.save()

        association = ExperimentSampleAssociation()
        association.experiment = experiment_object
        association.sample = sample_object_1
        association.save()

        association = ExperimentSampleAssociation()
        association.experiment = experiment_object
        association.sample = sample_object_2
        association.save()

        sample_1_original_files = []
        sample_2_original_files = []

        original_file = OriginalFile()
        original_file.source_url = "first_url"
        original_file.source_filename = "first_filename"
        original_file.is_downloaded = False
        original_file.has_raw = True
        original_file.save()
        sample_1_original_files.append(original_file)

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.original_file = original_file
        original_file_sample_association.sample = sample_object_1
        original_file_sample_association.save()

        original_file = OriginalFile()
        original_file.source_url = "second_url"
        original_file.source_filename = "second_filename"
        original_file.is_downloaded = False
        original_file.has_raw = True
        original_file.save()
        sample_2_original_files.append(original_file)

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.original_file = original_file
        original_file_sample_association.sample = sample_object_1
        original_file_sample_association.save()

        original_file = OriginalFile()
        original_file.source_url = "third_url"
        original_file.source_filename = "third_filename"
        original_file.is_downloaded = False
        original_file.has_raw = True
        original_file.save()
        sample_2_original_files.append(original_file)

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.original_file = original_file
        original_file_sample_association.sample = sample_object_2
        original_file_sample_association.save()

        original_file = OriginalFile()
        original_file.source_url = "fourth_url"
        original_file.source_filename = "fourth_filename"
        original_file.is_downloaded = False
        original_file.has_raw = True
        original_file.save()
        sample_2_original_files.append(original_file)

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.original_file = original_file
        original_file_sample_association.sample = sample_object_2
        original_file_sample_association.save()

        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        surveyor = SraSurveyor(survey_job)

        surveyor.queue_downloader_job_for_original_files(
            sample_1_original_files, experiment_object.accession_code
        )
        surveyor.queue_downloader_job_for_original_files(
            sample_2_original_files, experiment_object.accession_code
        )

        self.assertEqual(DownloaderJob.objects.all().count(), 2)
Exemple #24
0
    def test_dataset_stats(self):
        """ Test the dataset stats endpoint """

        gallus_gallus = Organism(name="GALLUS_GALLUS", taxonomy_id=9031, is_scientific_name=True)
        gallus_gallus.save()
        equus_ferus = Organism(name="EQUUS_FERUS", taxonomy_id=1114792, is_scientific_name=True)
        equus_ferus.save()

        ex = Experiment()
        ex.accession_code = "XYZ123"
        ex.title = "XYZ123"
        ex.description = "XYZ123"
        ex.technology = "MICROARRAY"
        ex.submitter_institution = "XYZ123"
        ex.save()

        ex2 = Experiment()
        ex2.accession_code = "ABC789"
        ex2.title = "ABC789"
        ex2.description = "ABC789"
        ex2.technology = "RNA-SEQ"
        ex2.submitter_institution = "Funkytown"
        ex2.save()

        sample1 = Sample()
        sample1.title = "1"
        sample1.accession_code = "1"
        sample1.platform_name = "AFFY"
        sample1.organism = self.homo_sapiens
        sample1.save()

        sample2 = Sample()
        sample2.title = "2"
        sample2.accession_code = "2"
        sample2.platform_name = "ILLUMINA"
        sample2.organism = gallus_gallus
        sample2.save()

        sample3 = Sample()
        sample3.title = "3"
        sample3.accession_code = "3"
        sample3.platform_name = "ILLUMINA"
        sample3.organism = gallus_gallus
        sample3.save()

        xoa = ExperimentOrganismAssociation()
        xoa.experiment = ex
        xoa.organism = self.homo_sapiens
        xoa.save()

        xoa = ExperimentOrganismAssociation()
        xoa.experiment = ex2
        xoa.organism = gallus_gallus
        xoa.save()

        xoa = ExperimentOrganismAssociation()
        xoa.experiment = ex2
        xoa.organism = equus_ferus
        xoa.save()

        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample1
        experiment_sample_association.experiment = ex
        experiment_sample_association.save()

        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample2
        experiment_sample_association.experiment = ex2
        experiment_sample_association.save()

        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample3
        experiment_sample_association.experiment = ex2
        experiment_sample_association.save()

        jdata = json.dumps({"data": {"XYZ123": ["1"], "ABC789": ["2"]}})
        response = self.client.post(
            reverse("create_dataset", kwargs={"version": API_VERSION}),
            jdata,
            content_type="application/json",
        )

        self.assertEqual(response.status_code, 201)
        self.assertEqual(response.json()["data"], json.loads(jdata)["data"])
        good_id = response.json()["id"]

        # Check that we can fetch these sample details via samples API
        response = self.client.get(
            reverse("samples", kwargs={"version": API_VERSION}), {"dataset_id": good_id}
        )
        self.assertEqual(response.json()["count"], 2)
Exemple #25
0
    def test_qn_reference(self):
        job = ProcessorJob()
        job.pipeline_applied = "QN_REFERENCE"
        job.save()

        homo_sapiens = Organism.get_object_for_name("HOMO_SAPIENS")

        experiment = Experiment()
        experiment.accession_code = "12345"
        experiment.save()

        for code in ['1', '2', '3', '4', '5', '6']:
            sample = Sample()
            sample.accession_code = code
            sample.title = code
            sample.platform_accession_code = 'A-MEXP-1171'
            sample.manufacturer = "SLIPPERY DICK'S DISCOUNT MICROARRAYS"
            sample.organism = homo_sapiens
            sample.technology = "MICROARRAY"
            sample.is_processed = True
            sample.save()

            cr = ComputationalResult()
            cr.save()

            file = ComputedFile()
            file.filename = code + ".tsv"
            file.absolute_file_path = "/home/user/data_store/QN/" + code + ".tsv"
            file.size_in_bytes = int(code)
            file.result = cr
            file.is_smashable = True
            file.save()

            scfa = SampleComputedFileAssociation()
            scfa.sample = sample
            scfa.computed_file = file
            scfa.save()

            exsa = ExperimentSampleAssociation()
            exsa.experiment = experiment
            exsa.sample = sample
            exsa.save()

        
        dataset = Dataset()
        dataset.data = {"12345": ["1", "2", "3", "4", "5", "6"]}
        dataset.aggregate_by = "ALL"
        dataset.scale_by = "NONE"
        dataset.quantile_normalize = False # We don't QN because we're creating the target now
        dataset.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = dataset
        pjda.save()

        final_context = qn_reference.create_qn_reference(job.pk)
        self.assertTrue(final_context['success'])

        self.assertTrue(os.path.exists(final_context['target_file']))
        self.assertEqual(os.path.getsize(final_context['target_file']), 556)

        target = utils.get_most_recent_qn_target_for_organism(homo_sapiens)
        self.assertEqual(target.sha1, '636d72d5cbf4b9785b0bd271a1430b615feaa7ea')

        ###
        # Smasher with QN
        ###

        pj = ProcessorJob()
        pj.pipeline_applied = "SMASHER"
        pj.save()

        ds = Dataset()
        ds.data = {"12345": ["1", "2", "3", "4", "5"]}
        ds.aggregate_by = 'SPECIES'
        ds.scale_by = 'STANDARD'
        ds.email_address = "*****@*****.**"
        ds.quantile_normalize = True
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = pj
        pjda.dataset = ds
        pjda.save()

        final_context = smasher.smash(pj.pk, upload=False)
        self.assertTrue(final_context['success'])

        self.assertEqual(final_context['merged_qn']['1'][0], -0.4379488528812934)
        self.assertEqual(final_context['original_merged']['1'][0], -0.576210936113982)

        ## 
        # Test via management command
        ##

        from django.core.management import call_command
        from django.test import TestCase
        from django.utils.six import StringIO

        out = StringIO()
        try:
            call_command('create_qn_target', organism='homo_sapiens', min=1, stdout=out)
        except SystemExit as e: # this is okay!
            pass

        stdout = out.getvalue()
        self.assertTrue('Target file' in stdout)
        path = stdout.split('\n')[0].split(':')[1].strip()
        self.assertTrue(os.path.exists(path))
        self.assertEqual(path, utils.get_most_recent_qn_target_for_organism(homo_sapiens).absolute_file_path)
Exemple #26
0
def prepare_computed_files():
    # MICROARRAY TECH
    experiment = Experiment()
    experiment.accession_code = "GSE1487313"
    experiment.num_processed_samples = 1
    experiment.save()

    result = ComputationalResult()
    result.save()

    gallus_gallus = Organism.get_object_for_name("GALLUS_GALLUS",
                                                 taxonomy_id=1001)

    sample = Sample()
    sample.accession_code = "GSM1487313"
    sample.title = "GSM1487313"
    sample.organism = gallus_gallus
    sample.technology = "MICROARRAY"
    sample.is_processed = True
    sample.save()

    sra = SampleResultAssociation()
    sra.sample = sample
    sra.result = result
    sra.save()

    esa = ExperimentSampleAssociation()
    esa.experiment = experiment
    esa.sample = sample
    esa.save()

    computed_file = ComputedFile()
    computed_file.filename = "GSM1487313_liver.PCL"
    computed_file.result = result
    computed_file.size_in_bytes = 123
    computed_file.is_smashable = True
    computed_file.s3_key = "GSM1487313_liver.PCL"
    computed_file.s3_bucket = TEST_DATA_BUCKET
    computed_file.save()

    assoc = SampleComputedFileAssociation()
    assoc.sample = sample
    assoc.computed_file = computed_file
    assoc.save()

    # RNASEQ TECH
    experiment2 = Experiment()
    experiment2.accession_code = "SRP332914"
    experiment2.num_processed_samples = 1
    experiment2.save()

    result2 = ComputationalResult()
    result2.save()

    sample2 = Sample()
    sample2.accession_code = "SRR332914"
    sample2.title = "SRR332914"
    sample2.organism = gallus_gallus
    sample2.technology = "RNA-SEQ"
    sample2.is_processed = True
    sample2.save()

    sra2 = SampleResultAssociation()
    sra2.sample = sample2
    sra2.result = result2
    sra2.save()

    esa2 = ExperimentSampleAssociation()
    esa2.experiment = experiment2
    esa2.sample = sample2
    esa2.save()

    computed_file2 = ComputedFile()
    computed_file2.filename = "SRP149598_gene_lengthScaledTPM.tsv"
    computed_file2.result = result2
    computed_file2.size_in_bytes = 234
    computed_file2.is_smashable = True
    computed_file2.s3_key = "SRP149598_gene_lengthScaledTPM.tsv"
    computed_file2.s3_bucket = TEST_DATA_BUCKET
    computed_file2.save()

    assoc2 = SampleComputedFileAssociation()
    assoc2.sample = sample2
    assoc2.computed_file = computed_file2
    assoc2.save()
Exemple #27
0
    def setUp(self):
        experiment = Experiment()
        experiment.accession_code = "GSE000"
        experiment.alternate_accession_code = "E-GEOD-000"
        experiment.title = "NONONONO"
        experiment.description = "Boooooourns. Wasabi."
        experiment.technology = "RNA-SEQ"
        experiment.save()
        self.experiment = experiment

        # Create some samples to attach keywords to
        sample = Sample()
        sample.accession_code = "SRR123"
        sample.technology = "RNA-SEQ"
        sample.source_database = "SRA"
        sample.title = "Not important"
        sample.save()

        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample
        experiment_sample_association.experiment = experiment
        experiment_sample_association.save()

        sample2 = Sample()
        sample2.accession_code = "SRR456"
        sample2.technology = "RNA-SEQ"
        sample2.source_database = "SRA"
        sample2.title = "Not important"
        sample2.save()

        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample2
        experiment_sample_association.experiment = experiment
        experiment_sample_association.save()

        # Create the ontology terms I'm using in the tests
        name = OntologyTerm()
        name.ontology_term = "PATO:0000122"
        name.human_readable_name = "length"
        name.save()

        unit = OntologyTerm()
        unit.ontology_term = "UO:0010012"
        unit.human_readable_name = "thou"
        unit.save()

        contribution = Contribution()
        contribution.source_name = "refinebio_tests"
        contribution.methods_url = "ccdatalab.org"
        contribution.save()
        self.contribution = contribution
Exemple #28
0
    def test_bad_overlap(self):

        pj = ProcessorJob()
        pj.pipeline_applied = "SMASHER"
        pj.save()

        experiment = Experiment()
        experiment.accession_code = "GSE51081"
        experiment.save()

        result = ComputationalResult()
        result.save()

        homo_sapiens = Organism.get_object_for_name("HOMO_SAPIENS")

        sample = Sample()
        sample.accession_code = 'GSM1237810'
        sample.title = 'GSM1237810'
        sample.organism = homo_sapiens
        sample.save()

        sample_annotation = SampleAnnotation()
        sample_annotation.data = {'hi': 'friend'}
        sample_annotation.sample = sample
        sample_annotation.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.filename = "big.PCL"
        computed_file.absolute_file_path = "/home/user/data_store/BADSMASH/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        sample = Sample()
        sample.accession_code = 'GSM1237812'
        sample.title = 'GSM1237812'
        sample.organism = homo_sapiens
        sample.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        computed_file = ComputedFile()
        computed_file.filename = "small.PCL"
        computed_file.absolute_file_path = "/home/user/data_store/BADSMASH/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        ds = Dataset()
        ds.data = {'GSE51081': ['GSM1237810', 'GSM1237812']}
        ds.aggregate_by = 'ALL'  # [ALL or SPECIES or EXPERIMENT]
        ds.scale_by = 'NONE'  # [NONE or MINMAX or STANDARD or ROBUST]
        ds.email_address = "*****@*****.**"
        #ds.email_address = "*****@*****.**"
        ds.quantile_normalize = False
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = pj
        pjda.dataset = ds
        pjda.save()

        final_context = smasher.smash(pj.pk, upload=False)
        ds = Dataset.objects.get(id=ds.id)

        pj = ProcessorJob()
        pj.pipeline_applied = "SMASHER"
        pj.save()

        # Now, make sure the bad can't zero this out.
        sample = Sample()
        sample.accession_code = 'GSM999'
        sample.title = 'GSM999'
        sample.organism = homo_sapiens
        sample.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        computed_file = ComputedFile()
        computed_file.filename = "bad.PCL"
        computed_file.absolute_file_path = "/home/user/data_store/BADSMASH/" + computed_file.filename
        computed_file.result = result
        computed_file.size_in_bytes = 123
        computed_file.is_smashable = True
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        ds = Dataset()
        ds.data = {'GSE51081': ['GSM1237810', 'GSM1237812', 'GSM999']}
        ds.aggregate_by = 'ALL'  # [ALL or SPECIES or EXPERIMENT]
        ds.scale_by = 'NONE'  # [NONE or MINMAX or STANDARD or ROBUST]
        ds.email_address = "*****@*****.**"
        #ds.email_address = "*****@*****.**"
        ds.quantile_normalize = False
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = pj
        pjda.dataset = ds
        pjda.save()

        final_context = smasher.smash(pj.pk, upload=False)
        ds = Dataset.objects.get(id=ds.id)

        self.assertEqual(len(final_context['final_frame']), 4)
    def test_processed_samples_only(self):
        """ Don't return unprocessed samples """
        experiment = Experiment()
        experiment.accession_code = "GSX12345"
        experiment.is_public = True
        experiment.save()

        sample = Sample()
        sample.title = "I am unprocessed"
        sample.accession_code = "GSXUnprocessed"
        sample.is_processed = False
        sample.save()

        experiment_sample_association = ExperimentSampleAssociation()
        experiment_sample_association.sample = sample
        experiment_sample_association.experiment = experiment
        experiment_sample_association.save()

        # we return all experiments
        response = self.client.get(
            reverse("search", kwargs={"version": API_VERSION}),
            {"search": "GSX12345"})
        self.assertEqual(response.json()["count"], 1)

        # check requesting only experiments with processed samples
        response = self.client.get(
            reverse("search", kwargs={"version": API_VERSION}),
            {
                "search": "GSX12345",
                "num_processed_samples__gt": 0
            },
        )
        self.assertEqual(response.json()["count"], 0)

        sample2 = Sample()
        sample2.title = "I am processed"
        sample2.accession_code = "GSXProcessed"
        sample2.is_processed = True
        sample2.save()

        experiment_sample2_association = ExperimentSampleAssociation()
        experiment_sample2_association.sample = sample2
        experiment_sample2_association.experiment = experiment
        experiment_sample2_association.save()

        # update cached values
        experiment.num_total_samples = 2
        experiment.num_processed_samples = 1
        experiment.save()

        response = self.client.get(
            reverse("search", kwargs={"version": API_VERSION}),
            {"search": "GSX12345"})
        self.assertEqual(response.json()["count"], 1)

        self.assertEqual(len(experiment.processed_samples), 1)

        experiment.delete()
        sample.delete()
        sample2.delete()
Exemple #30
0
def prepare_job():
    pj = ProcessorJob()
    pj.pipeline_applied = "SMASHER"
    pj.save()

    experiment = Experiment()
    experiment.accession_code = "GSE51081"
    experiment.save()

    result = ComputationalResult()
    result.save()

    homo_sapiens = Organism.get_object_for_name("HOMO_SAPIENS")

    sample = Sample()
    sample.accession_code = 'GSM1237810'
    sample.title = 'GSM1237810'
    sample.organism = homo_sapiens
    sample.save()

    sample_annotation = SampleAnnotation()
    sample_annotation.data = {'hi': 'friend'}
    sample_annotation.sample = sample
    sample_annotation.save()

    sra = SampleResultAssociation()
    sra.sample = sample
    sra.result = result
    sra.save()

    esa = ExperimentSampleAssociation()
    esa.experiment = experiment
    esa.sample = sample
    esa.save()

    computed_file = ComputedFile()
    computed_file.filename = "GSM1237810_T09-1084.PCL"
    computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
    computed_file.result = result
    computed_file.size_in_bytes = 123
    computed_file.is_smashable = True
    computed_file.save()

    assoc = SampleComputedFileAssociation()
    assoc.sample = sample
    assoc.computed_file = computed_file
    assoc.save()

    sample = Sample()
    sample.accession_code = 'GSM1237812'
    sample.title = 'GSM1237812'
    sample.organism = homo_sapiens
    sample.save()

    esa = ExperimentSampleAssociation()
    esa.experiment = experiment
    esa.sample = sample
    esa.save()

    assoc = SampleComputedFileAssociation()
    assoc.sample = sample
    assoc.computed_file = computed_file
    assoc.save()

    sra = SampleResultAssociation()
    sra.sample = sample
    sra.result = result
    sra.save()

    computed_file = ComputedFile()
    computed_file.filename = "GSM1237812_S97-PURE.PCL"
    computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
    computed_file.result = result
    computed_file.size_in_bytes = 123
    computed_file.is_smashable = True
    computed_file.save()

    computed_file = ComputedFile()
    computed_file.filename = "GSM1237812_S97-PURE.DAT"
    computed_file.absolute_file_path = "/home/user/data_store/PCL/" + computed_file.filename
    computed_file.result = result
    computed_file.size_in_bytes = 123
    computed_file.is_smashable = False
    computed_file.save()

    assoc = SampleComputedFileAssociation()
    assoc.sample = sample
    assoc.computed_file = computed_file
    assoc.save()

    ds = Dataset()
    ds.data = {'GSE51081': ['GSM1237810', 'GSM1237812']}
    ds.aggregate_by = 'EXPERIMENT'  # [ALL or SPECIES or EXPERIMENT]
    ds.scale_by = 'STANDARD'  # [NONE or MINMAX or STANDARD or ROBUST]
    ds.email_address = "*****@*****.**"
    #ds.email_address = "*****@*****.**"
    ds.quantile_normalize = False
    ds.save()

    pjda = ProcessorJobDatasetAssociation()
    pjda.processor_job = pj
    pjda.dataset = ds
    pjda.save()

    return pj