예제 #1
0
def create_job_for_organism(organism: Organism):
    """Returns a quantpendia job for the provided organism."""
    job = ProcessorJob()
    job.pipeline_applied = ProcessorPipeline.CREATE_QUANTPENDIA.value
    job.save()

    dset = Dataset()
    dset.data = build_dataset(organism)
    dset.scale_by = "NONE"
    dset.aggregate_by = "EXPERIMENT"
    dset.quantile_normalize = False
    dset.quant_sf_only = True
    dset.svd_algorithm = "NONE"
    dset.save()

    pjda = ProcessorJobDatasetAssociation()
    pjda.processor_job = job
    pjda.dataset = dset
    pjda.save()

    # Have to call this after setting the dataset since it's used in
    # the caclulation.
    job.ram_amount = determine_ram_amount(job)
    job.save()

    return job
예제 #2
0
def create_job_for_organism(organisms: List[Organism], svd_algorithm="ARPACK"):
    """Returns a compendia job for the provided organism.

    Fetch all of the experiments and compile large but normally formated Dataset.
    """
    job = ProcessorJob()
    job.pipeline_applied = ProcessorPipeline.CREATE_COMPENDIA.value
    job.save()

    dataset = Dataset()
    dataset.data = get_dataset(organisms)
    dataset.scale_by = "NONE"
    dataset.aggregate_by = "SPECIES"
    dataset.quantile_normalize = True
    dataset.quant_sf_only = False
    dataset.svd_algorithm = svd_algorithm
    dataset.save()

    pjda = ProcessorJobDatasetAssociation()
    pjda.processor_job = job
    pjda.dataset = dataset
    pjda.save()

    # Have to call this after setting the dataset since it's used in
    # the caclulation.
    job.ram_amount = determine_ram_amount(job)
    job.save()

    return job
예제 #3
0
def create_job_for_organism(organism: Organism):
    """Returns a quantpendia job for the provided organism."""
    job = ProcessorJob()
    job.pipeline_applied = ProcessorPipeline.CREATE_QUANTPENDIA.value
    job.save()

    dset = Dataset()
    dset.data = build_dataset(organism)
    dset.scale_by = "NONE"
    dset.aggregate_by = "EXPERIMENT"
    dset.quantile_normalize = False
    dset.quant_sf_only = True
    dset.svd_algorithm = "NONE"
    dset.save()

    pjda = ProcessorJobDatasetAssociation()
    pjda.processor_job = job
    pjda.dataset = dset
    pjda.save()

    return job
예제 #4
0
    def test_create_quantpendia(self):
        job = ProcessorJob()
        job.pipeline_applied = ProcessorPipeline.CREATE_QUANTPENDIA.value
        job.save()

        experiment = Experiment()
        experiment.accession_code = "GSE51088"
        experiment.save()

        result = ComputationalResult()
        result.save()

        homo_sapiens = Organism.get_object_for_name("HOMO_SAPIENS",
                                                    taxonomy_id=9606)

        sample = Sample()
        sample.accession_code = "GSM1237818"
        sample.title = "GSM1237818"
        sample.organism = homo_sapiens
        sample.technology = "RNA-SEQ"
        sample.save()

        sra = SampleResultAssociation()
        sra.sample = sample
        sra.result = result
        sra.save()

        esa = ExperimentSampleAssociation()
        esa.experiment = experiment
        esa.sample = sample
        esa.save()

        computed_file = ComputedFile()
        computed_file.s3_key = "smasher-test-quant.sf"
        computed_file.s3_bucket = "data-refinery-test-assets"
        computed_file.filename = "quant.sf"
        computed_file.absolute_file_path = "/home/user/data_store/QUANT/smasher-test-quant.sf"
        computed_file.result = result
        computed_file.is_smashable = True
        computed_file.size_in_bytes = 123123
        computed_file.sha1 = (
            "08c7ea90b66b52f7cd9d9a569717a1f5f3874967"  # this matches with the downloaded file
        )
        computed_file.save()

        computed_file = ComputedFile()
        computed_file.filename = "logquant.tsv"
        computed_file.is_smashable = True
        computed_file.size_in_bytes = 123123
        computed_file.result = result
        computed_file.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file
        assoc.save()

        ds = Dataset()
        ds.data = {"GSE51088": ["GSM1237818"]}
        ds.aggregate_by = "EXPERIMENT"
        ds.scale_by = "STANDARD"
        ds.email_address = "*****@*****.**"
        ds.quant_sf_only = True  # Make the dataset include quant.sf files only
        ds.save()

        pjda = ProcessorJobDatasetAssociation()
        pjda.processor_job = job
        pjda.dataset = ds
        pjda.save()

        final_context = create_quantpendia(job.id)

        self.assertTrue(
            os.path.exists(final_context["output_dir"] +
                           "/GSE51088/GSM1237818_quant.sf"))
        self.assertTrue(
            os.path.exists(final_context["output_dir"] + "/README.md"))
        self.assertTrue(
            os.path.exists(final_context["output_dir"] + "/LICENSE.TXT"))
        self.assertTrue(
            os.path.exists(final_context["output_dir"] +
                           "/aggregated_metadata.json"))

        self.assertTrue(final_context["metadata"]["quant_sf_only"])
        self.assertEqual(final_context["metadata"]["num_samples"], 1)
        self.assertEqual(final_context["metadata"]["num_experiments"], 1)

        # test that archive exists
        quantpendia_file = ComputedFile.objects.filter(
            is_compendia=True, quant_sf_only=True).latest()
        self.assertTrue(os.path.exists(quantpendia_file.absolute_file_path))