Beispiel #1
0
    def handle(self, *args, **options):
        if options["job_id"] is None:
            logger.error("You must specify a job ID.")
            return 1

        try:
            job_type = ProcessorPipeline[options["job_name"]]
        except KeyError:
            logger.error("You must specify a valid job name.")
            return 1

        if job_type is ProcessorPipeline.AFFY_TO_PCL:
            affy_to_pcl(options["job_id"])
        elif job_type is ProcessorPipeline.TRANSCRIPTOME_INDEX:
            build_transcriptome_index(options["job_id"])
        elif job_type is ProcessorPipeline.NO_OP:
            no_op_processor(options["job_id"])
        else:
            logger.error(
                ("A valid job name was specified for job %s with id %d but "
                 "no processor function is known to run it."),
                options["job_name"], options["job_id"])
            return 1

        return 0
    def handle(self, *args, **options):
        if options["job_id"] is None:
            logger.error("You must specify a job ID.",
                         job_id=options["job_id"])
            sys.exit(1)

        try:
            job_type = ProcessorPipeline[options["job_name"]]
        except KeyError:
            logger.error("You must specify a valid job name.",
                         job_name=options["job_name"],
                         job_id=options["job_id"])
            sys.exit(1)

        if job_type is ProcessorPipeline.AFFY_TO_PCL:
            from data_refinery_workers.processors.array_express import affy_to_pcl
            affy_to_pcl(options["job_id"])
        elif job_type is ProcessorPipeline.TRANSCRIPTOME_INDEX_SHORT:
            from data_refinery_workers.processors.transcriptome_index import build_transcriptome_index
            build_transcriptome_index(options["job_id"], length="short")
        elif job_type is ProcessorPipeline.TRANSCRIPTOME_INDEX_LONG:
            from data_refinery_workers.processors.transcriptome_index import build_transcriptome_index
            build_transcriptome_index(options["job_id"], length="long")
        elif job_type is ProcessorPipeline.AGILENT_TWOCOLOR_TO_PCL:
            from data_refinery_workers.processors.agilent_twocolor import agilent_twocolor_to_pcl
            agilent_twocolor_to_pcl(options["job_id"])
        elif job_type is ProcessorPipeline.ILLUMINA_TO_PCL:
            from data_refinery_workers.processors.illumina import illumina_to_pcl
            illumina_to_pcl(options["job_id"])
        elif job_type is ProcessorPipeline.SALMON:
            from data_refinery_workers.processors.salmon import salmon
            salmon(options["job_id"])
        elif job_type is ProcessorPipeline.SMASHER:
            from data_refinery_workers.processors.smasher import smash
            smash(options["job_id"])
        elif job_type is ProcessorPipeline.NO_OP:
            from data_refinery_workers.processors.no_op import no_op_processor
            no_op_processor(options["job_id"])
        elif job_type is ProcessorPipeline.JANITOR:
            from data_refinery_workers.processors.janitor import run_janitor
            run_janitor(options["job_id"])
        elif job_type is ProcessorPipeline.QN_REFERENCE:
            from data_refinery_workers.processors import qn_reference
            qn_reference.create_qn_reference(options["job_id"])
        else:
            logger.error(
                ("A valid job name was specified for job %s with id %d but "
                 "no processor function is known to run it."),
                options["job_name"], options["job_id"])
            sys.exit(1)

        sys.exit(0)
    def test_affy_to_pcl_huex_v1(self):
        """ Special Case because there is no CDL for Huex V1 """
        job = prepare_huex_v1_job()
        shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id),
                      ignore_errors=True)
        array_express.affy_to_pcl(job.pk)

        updated_job = ProcessorJob.objects.get(pk=job.pk)
        self.assertTrue(updated_job.success)
        self.assertEqual(len(ComputationalResult.objects.all()), 1)
        self.assertEqual(len(ComputedFile.objects.all()), 1)
        self.assertEqual(ComputedFile.objects.all()[0].filename,
                         "GSM1364667_U_110208_7-02-10_S2.PCL")

        os.remove(ComputedFile.objects.all()[0].absolute_file_path)
Beispiel #4
0
    def test_affy_to_pcl_no_brainarray(self):
        """ """
        job = prepare_non_ba_job()
        array_express.affy_to_pcl(job.pk)

        updated_job = ProcessorJob.objects.get(pk=job.pk)
        self.assertTrue(updated_job.success)
        self.assertEqual(len(ComputationalResult.objects.all()), 1)
        self.assertEqual(len(ComputedFile.objects.all()), 1)
        self.assertEqual(ComputedFile.objects.all()[0].filename,
                         'GSM45588.PCL')

        os.remove(ComputedFile.objects.all()[0].absolute_file_path)
        ComputationalResult.objects.all()[0].delete(
        )  # ComputedFile deleted by cascade
Beispiel #5
0
    def test_affy_to_pcl(self):
        """ """
        job = prepare_ba_job()
        # Make sure that a previous test didn't leave a directory around.
        shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id), ignore_errors=True)
        job_context = array_express.affy_to_pcl(job.pk)

        self.assertEqual(job_context["platform_accession_code"], "hugene10st")
        self.assertEqual(job_context["brainarray_package"], "hugene10sthsensgprobe")

        updated_job = ProcessorJob.objects.get(pk=job.pk)
        self.assertTrue(updated_job.success)
        self.assertEqual(len(ComputationalResult.objects.all()), 1)
        self.assertEqual(len(ComputedFile.objects.all()), 1)
        self.assertEqual(ComputedFile.objects.all()[0].filename, "GSM1426071_CD_colon_active_1.PCL")
        output_filename = ComputedFile.objects.all()[0].absolute_file_path

        expected_data = pd.read_csv(
            "/home/user/data_store/TEST/PCL/GSM1426071_CD_colon_active_1.PCL", sep="\t"
        )["GSM1426071_CD_colon_active_1.CEL"]
        actual_data = pd.read_csv(output_filename, sep="\t")["GSM1426071_CD_colon_active_1.CEL"]

        assertMostlyAgrees(self, expected_data, actual_data)

        os.remove(output_filename)
    def test_affy_to_pcl(self):
        """ """
        job = prepare_ba_job()
        # Make sure that a previous test didn't leave a directory around.
        shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id),
                      ignore_errors=True)
        array_express.affy_to_pcl(job.pk)

        updated_job = ProcessorJob.objects.get(pk=job.pk)
        self.assertTrue(updated_job.success)
        self.assertEqual(len(ComputationalResult.objects.all()), 1)
        self.assertEqual(len(ComputedFile.objects.all()), 1)
        self.assertEqual(ComputedFile.objects.all()[0].filename,
                         "GSM1426071_CD_colon_active_1.PCL")

        os.remove(ComputedFile.objects.all()[0].absolute_file_path)
Beispiel #7
0
    def test_affy_to_pcl_no_brainarray(self):
        """ """
        job = prepare_non_ba_job()
        # Make sure that a previous test didn't leave a directory around.
        shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id),
                      ignore_errors=True)
        array_express.affy_to_pcl(job.pk)

        updated_job = ProcessorJob.objects.get(pk=job.pk)
        self.assertTrue(updated_job.success)
        self.assertEqual(len(ComputationalResult.objects.all()), 1)
        self.assertEqual(len(ComputedFile.objects.all()), 1)
        self.assertEqual(ComputedFile.objects.all()[0].filename,
                         'GSM45588.PCL')

        os.remove(ComputedFile.objects.all()[0].absolute_file_path)
        ComputationalResult.objects.all()[0].delete(
        )  # ComputedFile deleted by cascade
Beispiel #8
0
    def test_affy_to_pcl_no_brainarray(self):
        """ """
        job = prepare_non_ba_job()
        # Make sure that a previous test didn't leave a directory around.
        shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id), ignore_errors=True)
        array_express.affy_to_pcl(job.pk)

        updated_job = ProcessorJob.objects.get(pk=job.pk)
        self.assertTrue(updated_job.success)
        self.assertEqual(len(ComputationalResult.objects.all()), 1)
        self.assertEqual(len(ComputedFile.objects.all()), 1)
        self.assertEqual(ComputedFile.objects.all()[0].filename, "GSM45588.PCL")
        output_filename = ComputedFile.objects.all()[0].absolute_file_path

        expected_data = pd.read_csv("/home/user/data_store/TEST/PCL/GSM45588.PCL", sep="\t")[
            "GSM45588.CEL"
        ]
        actual_data = pd.read_csv(output_filename, sep="\t")["GSM45588.CEL"]

        assertMostlyAgrees(self, expected_data, actual_data)

        os.remove(output_filename)