Esempio n. 1
0
    def handle(self, *args, **options):
        if options["job_id"] is None:
            logger.error("You must specify a job ID.")
            return 1

        try:
            job_type = ProcessorPipeline[options["job_name"]]
        except KeyError:
            logger.error("You must specify a valid job name.")
            return 1

        if job_type is ProcessorPipeline.AFFY_TO_PCL:
            affy_to_pcl(options["job_id"])
        elif job_type is ProcessorPipeline.TRANSCRIPTOME_INDEX:
            build_transcriptome_index(options["job_id"])
        elif job_type is ProcessorPipeline.NO_OP:
            no_op_processor(options["job_id"])
        else:
            logger.error(
                ("A valid job name was specified for job %s with id %d but "
                 "no processor function is known to run it."),
                options["job_name"], options["job_id"])
            return 1

        return 0
Esempio n. 2
0
    def handle(self, *args, **options):
        if options["job_id"] is None:
            logger.error("You must specify a job ID.",
                         job_id=options["job_id"])
            sys.exit(1)

        try:
            job_type = ProcessorPipeline[options["job_name"]]
        except KeyError:
            logger.error("You must specify a valid job name.",
                         job_name=options["job_name"],
                         job_id=options["job_id"])
            sys.exit(1)

        if job_type is ProcessorPipeline.AFFY_TO_PCL:
            from data_refinery_workers.processors.array_express import affy_to_pcl
            affy_to_pcl(options["job_id"])
        elif job_type is ProcessorPipeline.TRANSCRIPTOME_INDEX_SHORT:
            from data_refinery_workers.processors.transcriptome_index import build_transcriptome_index
            build_transcriptome_index(options["job_id"], length="short")
        elif job_type is ProcessorPipeline.TRANSCRIPTOME_INDEX_LONG:
            from data_refinery_workers.processors.transcriptome_index import build_transcriptome_index
            build_transcriptome_index(options["job_id"], length="long")
        elif job_type is ProcessorPipeline.AGILENT_TWOCOLOR_TO_PCL:
            from data_refinery_workers.processors.agilent_twocolor import agilent_twocolor_to_pcl
            agilent_twocolor_to_pcl(options["job_id"])
        elif job_type is ProcessorPipeline.ILLUMINA_TO_PCL:
            from data_refinery_workers.processors.illumina import illumina_to_pcl
            illumina_to_pcl(options["job_id"])
        elif job_type is ProcessorPipeline.SALMON:
            from data_refinery_workers.processors.salmon import salmon
            salmon(options["job_id"])
        elif job_type is ProcessorPipeline.SMASHER:
            from data_refinery_workers.processors.smasher import smash
            smash(options["job_id"])
        elif job_type is ProcessorPipeline.NO_OP:
            from data_refinery_workers.processors.no_op import no_op_processor
            no_op_processor(options["job_id"])
        elif job_type is ProcessorPipeline.JANITOR:
            from data_refinery_workers.processors.janitor import run_janitor
            run_janitor(options["job_id"])
        elif job_type is ProcessorPipeline.QN_REFERENCE:
            from data_refinery_workers.processors import qn_reference
            qn_reference.create_qn_reference(options["job_id"])
        else:
            logger.error(
                ("A valid job name was specified for job %s with id %d but "
                 "no processor function is known to run it."),
                options["job_name"], options["job_id"])
            sys.exit(1)

        sys.exit(0)
Esempio n. 3
0
def assertRunsSuccessfully(test_case: TestCase, job: ProcessorJob) -> dict:
    final_context = no_op.no_op_processor(job.pk)
    test_case.assertTrue(final_context["success"])
    test_case.assertTrue(os.path.exists(final_context["output_file_path"]))

    test_case.assertEqual(len(final_context["samples"]), 1)
    test_case.assertEqual(len(final_context["computed_files"]), 1)

    for sample in final_context["samples"]:
        for cf in final_context["computed_files"]:
            test_case.assertTrue(cf in sample.computed_files.all())

    # Return final_context so we can perform additional checks manually
    return final_context
Esempio n. 4
0
    def test_convert_processed_illumina(self):
        job = ProcessorJob()
        job.pipeline_applied = "NO_OP"
        job.save()

        # ex:
        # Reporter Identifier VALUE   Detection Pval
        # ILMN_1343291    14.943602   0
        # ILMN_1343295    13.528082   0
        og_file = OriginalFile()
        og_file.source_filename = "https://www.ebi.ac.uk/arrayexpress/experiments/E-GEOD-22433/"
        og_file.filename = "GSM557500_sample_table.txt"
        og_file.absolute_file_path = (
            "/home/user/data_store/raw/TEST/NO_OP/GSM557500_sample_table.txt")
        og_file.is_downloaded = True
        og_file.save()

        homo_sapiens = Organism(name="HOMO_SAPIENS",
                                taxonomy_id=9606,
                                is_scientific_name=True)
        homo_sapiens.save()

        sample = Sample()
        sample.accession_code = "GSM557500"
        sample.title = "GSM557500"
        sample.platform_accession_code = "A-MEXP-1171"
        sample.manufacturer = "ILLUMINA"
        sample.organism = homo_sapiens
        sample.save()

        assoc = OriginalFileSampleAssociation()
        assoc.original_file = og_file
        assoc.sample = sample
        assoc.save()

        assoc1 = ProcessorJobOriginalFileAssociation()
        assoc1.original_file = og_file
        assoc1.processor_job = job
        assoc1.save()

        # To:
        # ENSG00000156508 14.943602
        # ENSG00000111640 13.528082
        final_context = no_op.no_op_processor(job.pk)
        self.assertTrue(final_context["success"])
        self.assertTrue(os.path.exists(final_context["output_file_path"]))
        self.assertEqual(os.path.getsize(final_context["output_file_path"]),
                         920374)
        self.assertTrue(
            no_op.check_output_quality(final_context["output_file_path"]))
Esempio n. 5
0
    def test_convert_illumina_no_header(self):
        job = ProcessorJob()
        job.pipeline_applied = "NO_OP"
        job.save()

        # ex:
        # ILMN_1885639    10.0000 0.7931
        # ILMN_2209417    10.0000 0.2029
        # ILMN_1765401    152.0873    0.0000
        og_file = OriginalFile()
        og_file.source_filename = (
            "https://github.com/AlexsLemonade/refinebio/files/2255178/GSM1089291-tbl-1.txt"
        )
        og_file.filename = "GSM1089291-tbl-1.txt"
        og_file.absolute_file_path = "/home/user/data_store/raw/TEST/NO_OP/GSM1089291-tbl-1.txt"
        og_file.is_downloaded = True
        og_file.save()

        homo_sapiens = Organism(name="HOMO_SAPIENS",
                                taxonomy_id=9606,
                                is_scientific_name=True)
        homo_sapiens.save()

        sample = Sample()
        sample.accession_code = "GSM557500"
        sample.title = "GSM557500"
        sample.platform_accession_code = "A-MEXP-1171"
        sample.manufacturer = "ILLUMINA"
        sample.organism = homo_sapiens
        sample.save()

        assoc = OriginalFileSampleAssociation()
        assoc.original_file = og_file
        assoc.sample = sample
        assoc.save()

        assoc1 = ProcessorJobOriginalFileAssociation()
        assoc1.original_file = og_file
        assoc1.processor_job = job
        assoc1.save()

        # To:
        # ENSG00000105675 10
        # ENSG00000085721 152.0873
        # ENSG00000278494 152.0873
        final_context = no_op.no_op_processor(job.pk)
        self.assertTrue(final_context["success"])
        self.assertTrue(os.path.exists(final_context["output_file_path"]))
        self.assertEqual(os.path.getsize(final_context["output_file_path"]),
                         786207)
Esempio n. 6
0
    def test_convert_illumina_bad_cols(self):
        """
        In future, this test may be deprecated. For now it just alerts that it needs attention.
        """
        job = ProcessorJob()
        job.pipeline_applied = "NO_OP"
        job.save()

        # ex:
        # ILMN_1885639    10.0000 0.7931  11.0000 0.123
        # ILMN_2209417    10.0000 0.2029  11.1234 0.543
        # LMN_1765401    152.0873    0.0000  99.999  0.19
        og_file = OriginalFile()
        og_file.source_filename = (
            "https://github.com/AlexsLemonade/refinebio/files/2255178/GSM1089291-tbl-1-modified.txt"
        )
        og_file.filename = "GSM1089291-tbl-1-modified.txt"
        og_file.absolute_file_path = (
            "/home/user/data_store/raw/TEST/NO_OP/GSM1089291-tbl-1-modified.txt"
        )
        og_file.is_downloaded = True
        og_file.save()

        homo_sapiens = Organism(name="HOMO_SAPIENS",
                                taxonomy_id=9606,
                                is_scientific_name=True)
        homo_sapiens.save()

        sample = Sample()
        sample.accession_code = "GSM557500"
        sample.title = "GSM557500"
        sample.platform_accession_code = "A-MEXP-1171"
        sample.manufacturer = "ILLUMINA"
        sample.organism = homo_sapiens
        sample.save()

        assoc = OriginalFileSampleAssociation()
        assoc.original_file = og_file
        assoc.sample = sample
        assoc.save()

        assoc1 = ProcessorJobOriginalFileAssociation()
        assoc1.original_file = og_file
        assoc1.processor_job = job
        assoc1.save()

        final_context = no_op.no_op_processor(job.pk)
        self.assertFalse(final_context["success"])
        self.assertTrue("Tell Rich!" in final_context["job"].failure_reason)
Esempio n. 7
0
    def test_convert_simple_pcl(self):
        """ """

        job = ProcessorJob()
        job.pipeline_applied = "NO_OP"
        job.save()

        # ID_REF, VALUE
        og_file = OriginalFile()
        og_file.source_filename = "https://www.ebi.ac.uk/arrayexpress/experiments/E-GEOD-51013/"
        og_file.filename = "GSM1234847_sample_table.txt"
        og_file.absolute_file_path = "/home/user/data_store/raw/TEST/NO_OP/GSM1234847_sample_table.txt"
        og_file.is_downloaded = True
        og_file.save()

        sample = Sample()
        sample.accession_code = "GSM1234847"
        sample.title = "GSM1234847"
        sample.platform_accession_code = 'A-AFFY-38'
        sample.save()

        assoc = OriginalFileSampleAssociation()
        assoc.original_file = og_file
        assoc.sample = sample
        assoc.save()

        assoc1 = ProcessorJobOriginalFileAssociation()
        assoc1.original_file = og_file
        assoc1.processor_job = job
        assoc1.save()

        final_context = no_op.no_op_processor(job.pk)

        # No header - ex
        # AFFX-BioB-3_at  0.74218756
        og_file = OriginalFile()
        og_file.source_filename = "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE10nnn/GSE10188/miniml/GSE10188_family.xml.tgz"
        og_file.filename = "GSM269747-tbl-1.txt"
        og_file.absolute_file_path = "/home/user/data_store/raw/TEST/NO_OP/GSM269747-tbl-1.txt"
        og_file.is_downloaded = True
        og_file.save()

        sample = Sample()
        sample.accession_code = "GSM269747"
        sample.title = "GSM269747"
        sample.platform_accession_code = 'GPL1319'
        sample.save()

        assoc = OriginalFileSampleAssociation()
        assoc.original_file = og_file
        assoc.sample = sample
        assoc.save()

        job = ProcessorJob()
        job.pipeline_applied = "NO_OP"
        job.save()

        assoc1 = ProcessorJobOriginalFileAssociation()
        assoc1.original_file = og_file
        assoc1.processor_job = job
        assoc1.save()

        final_context = no_op.no_op_processor(job.pk)
        self.assertTrue(final_context['success'])
        self.assertTrue(os.path.exists(final_context['output_file_path']))
        self.assertEqual(os.path.getsize(final_context['output_file_path']),
                         346535)