def handle(self, *args, **options): if options["job_id"] is None: logger.error("You must specify a job ID.", job_id=options["job_id"]) sys.exit(1) try: job_type = ProcessorPipeline[options["job_name"]] except KeyError: logger.error("You must specify a valid job name.", job_name=options["job_name"], job_id=options["job_id"]) sys.exit(1) if job_type is ProcessorPipeline.AFFY_TO_PCL: from data_refinery_workers.processors.array_express import affy_to_pcl affy_to_pcl(options["job_id"]) elif job_type is ProcessorPipeline.TRANSCRIPTOME_INDEX_SHORT: from data_refinery_workers.processors.transcriptome_index import build_transcriptome_index build_transcriptome_index(options["job_id"], length="short") elif job_type is ProcessorPipeline.TRANSCRIPTOME_INDEX_LONG: from data_refinery_workers.processors.transcriptome_index import build_transcriptome_index build_transcriptome_index(options["job_id"], length="long") elif job_type is ProcessorPipeline.AGILENT_TWOCOLOR_TO_PCL: from data_refinery_workers.processors.agilent_twocolor import agilent_twocolor_to_pcl agilent_twocolor_to_pcl(options["job_id"]) elif job_type is ProcessorPipeline.ILLUMINA_TO_PCL: from data_refinery_workers.processors.illumina import illumina_to_pcl illumina_to_pcl(options["job_id"]) elif job_type is ProcessorPipeline.SALMON: from data_refinery_workers.processors.salmon import salmon salmon(options["job_id"]) elif job_type is ProcessorPipeline.SMASHER: from data_refinery_workers.processors.smasher import smash smash(options["job_id"]) elif job_type is ProcessorPipeline.NO_OP: from data_refinery_workers.processors.no_op import no_op_processor no_op_processor(options["job_id"]) elif job_type is ProcessorPipeline.JANITOR: from data_refinery_workers.processors.janitor import run_janitor run_janitor(options["job_id"]) elif job_type is ProcessorPipeline.QN_REFERENCE: from data_refinery_workers.processors import qn_reference qn_reference.create_qn_reference(options["job_id"]) else: logger.error( ("A valid job name was specified for job %s with id %d but " "no processor function is known to run it."), options["job_name"], options["job_id"]) sys.exit(1) sys.exit(0)
def test_salmon_dotsra_bad(self): try: os.remove("/home/user/data_store/raw/TEST/SALMON/processed/quant.sf") except FileNotFoundError: pass job, files = prepare_dotsra_job("i-dont-exist.sra") job_context = salmon.salmon(job.pk) job = ProcessorJob.objects.get(id=job.pk) self.assertFalse(job.success)
def test_salmon_dotsra(self): """Test the whole pipeline.""" # Ensure any computed files from previous tests are removed. try: os.remove("/home/user/data_store/raw/TEST/SALMON/processed/quant.sf") except FileNotFoundError: pass job, files = prepare_dotsra_job() job_context = salmon.salmon(job.pk) job = ProcessorJob.objects.get(id=job.pk) self.assertTrue(job.success) shutil.rmtree(job_context["work_dir"])
def test_salmon(self): """Test the whole pipeline.""" # Ensure any computed files from previous tests are removed. try: os.remove("/home/user/data_store/raw/TEST/SALMON/processed/quant.sf") except FileNotFoundError: pass job, files = prepare_job() job_context = salmon.salmon(job.pk) job = ProcessorJob.objects.get(id=job.pk) self.assertTrue(job.success) sample = files[0].samples.first() self.assertFalse(sample.is_processed) organism_index = job_context["quant_result"].organism_index self.assertEqual(organism_index.index_type, "TRANSCRIPTOME_SHORT")
def test_no_salmon_on_geo(self): """Test that salmon won't be run on data coming from GEO.""" # Ensure any computed files from previous tests are removed. try: os.remove("/home/user/data_store/raw/TEST/SALMON/processed/quant.sf") except FileNotFoundError: pass job, files = prepare_job() # We're expecting this processor job to fail, and when it does # it should clean up the original files that were for the # job. However we want to use these files in other tests, so # copy them so we can delete them without deleting the # originals. for original_file in OriginalFile.objects.all(): new_path = original_file.absolute_file_path + "_copy" shutil.copyfile(original_file.absolute_file_path, new_path) original_file.absolute_file_path = new_path original_file.save() sample_object = Sample.objects.first() sample_object.source_database = 'GEO' sample_object.save() job_context = salmon.salmon(job.pk) job = ProcessorJob.objects.get(id=job.pk) self.assertFalse(job.success) self.assertEqual(job.failure_reason, ("The sample for this job either was not RNA-Seq or was not from the " "SRA database.")) self.assertTrue(job.no_retry) # Make sure the data got cleaned up, since the Janitor isn't # going to do it. for original_file in OriginalFile.objects.all(): self.assertFalse(os.path.exists(original_file.absolute_file_path))