def create_sample_json(self, run): j = run.output_metadata # todo: cmoSampleName in output_metadata for Nucleo appears to be the igo ID? j["cmoSampleName"] = run.output_metadata["sampleName"] for f in meta_fields: # Use None for missing fields if not f in j: j[f] = None for f in j: # MultiQC cannot handle cells with "," if type(j[f]) is str and "," in j[f]: j[f] = j[f].replace(",", ";") # Use some double quotes to make JSON compatible j["qcReports"] = "na" out = json.dumps([j]) tmpdir = os.path.join(settings.BEAGLE_SHARED_TMPDIR, str(uuid.uuid4())) Path(tmpdir).mkdir(parents=True, exist_ok=True) output = os.path.join(tmpdir, "samples_json.json") with open(output, "w+") as fh: fh.write(out) os.chmod(output, 0o777) fname = os.path.basename(output) temp_file_group = FileGroup.objects.get(slug="temp") file_type = FileType.objects.get(name="unknown") f = File(file_name=fname, path=output, file_type=file_type, file_group=temp_file_group) f.save() return self.create_cwl_file_object(f.path)
def register_tmp_file(self, path): fname = os.path.basename(path) temp_file_group = FileGroup.objects.get(slug="temp") file_type = FileType.objects.get(name="txt") try: File.objects.get(path=path) except: print("Registering temp file %s" % path) f = File(file_name=fname, path=path, file_type=file_type, file_group=temp_file_group) f.save()
def _create_single_file(self, path, file_type, group_id, request_id, sample_id): file_type_obj = FileType.objects.get(name=file_type) group_id_obj = FileGroup.objects.get(id=group_id) file = File(path=path, file_name=os.path.basename(path), file_type=file_type_obj, file_group=group_id_obj, size=1234) file.save() file_metadata = {"requestId": request_id, "igoSampleId": sample_id} file_metadata = FileMetadata(file=file, metadata=file_metadata) file_metadata.save() return file
def create_file_obj(uri, size, checksum, group_id, metadata): file_path = FileProcessor.parse_path_from_uri(uri) basename = os.path.basename(file_path) file_type = FileProcessor.get_file_ext(basename) try: group_id_obj = FileGroup.objects.get(id=group_id) except FileGroup.DoesNotExist as e: raise FileHelperException('Invalid FileGroup id: %s' % group_id) file_object = File(path=file_path, file_name=os.path.basename(file_path), checksum=checksum, file_type=file_type, file_group=group_id_obj, size=size) try: file_object.save() except IntegrityError as e: raise FileConflictException("File with path %s already exist" % file_path) file_metadata = FileMetadata(file=file_object, metadata=metadata) file_metadata.save() return file_object
def setUp(self): self.storage = Storage(name="test", type=StorageType.LOCAL) self.storage.save() self.file_group = FileGroup(name="Test Files", storage=self.storage) self.file_group.save() self.pipeline = Pipeline( name="pipeline_name", github="http://pipeline.github.com", version="v1.0", entrypoint="pipeline.cwl", output_file_group=self.file_group, output_directory="/path/to/outputs", ) self.pipeline.save() self.run = Run(run_type=0, app=self.pipeline, status=RunStatus.CREATING, notify_for_outputs=[]) self.run.save() self.file_type_unknown = FileType(name="unknown") self.file_type_unknown.save() self.file1 = File( **{ "file_name": "FASTQ_L002_R1_001.fastq.gz", "path": "/FASTQ/FASTQ_L002_R1_001.fastq.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, } ) self.file1.save() self.file2 = File( **{ "file_name": "FASTQ_L002_R2_001.fastq.gz", "path": "/FASTQ/FASTQ_L002_R2_001.fastq.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, } ) self.file2.save() self.file3 = File( **{ "file_name": "FASTQ_L006_R1_001.fastq.gz", "path": "/FASTQ/FASTQ_L006_R1_001.fastq.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, } ) self.file3.save() self.file4 = File( **{ "file_name": "FASTQ_L006_R2_001.fastq.gz", "path": "/FASTQ/FASTQ_L006_R2_001.fastq.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, } ) self.file4.save() file_list = [ { "file_name": "GRCm38.fasta", "path": "/resources/genomes/GRCm38/GRCm38.fasta", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "b37.fasta", "path": "/resources/genomes/GRCh37/fasta/b37.fasta", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "hapmap_3.3.b37.vcf", "path": "/resources/request_files/hapmap/hapmap_3.3.b37.vcf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf.gz", "path": "/resources/genomes/GRCh37/facets_snps/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "human.hg19.excl.tsv", "path": "/resources/genomes/GRCh37/delly/human.hg19.excl.tsv", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "hotspot-list-union-v1-v2.maf", "path": "/resources/qc_resources/hotspot-list-union-v1-v2.maf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "AgilentExon_v2_FP_tiling_intervals.intervals", "path": "/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_FP_tiling_intervals.intervals", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "AgilentExon_v2_FP_tiling_genotypes.txt", "path": "/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_FP_tiling_genotypes.txt", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "AgilentExon_v2_b37_baits.ilist", "path": "/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_b37_baits.ilist", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "AgilentExon_v2_b37_targets.ilist", "path": "/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_b37_targets.ilist", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "refGene_b37.sorted.txt", "path": "/resources/request_files/refseq/refGene_b37.sorted.txt", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz", "path": "/resources/vep/cache/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "CosmicCodingMuts_v67_b37_20131024__NDS.vcf", "path": "/resources/request_files/cosmic/CosmicCodingMuts_v67_b37_20131024__NDS.vcf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "s_C_006537_N001_d.Group0.rg.md.abra.printreads.bam", "path": "/resources/curated_bams/IDT_Exome_v1_FP_b37/s_C_006537_N001_d.Group0.rg.md.abra.printreads.bam", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "s_C_006284_N002_d.Group3.rg.md.abra.printreads.bam", "path": "/resources/curated_bams/IDT_Exome_v1_FP_b37/s_C_006284_N002_d.Group3.rg.md.abra.printreads.bam", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "Mills_and_1000G_gold_standard.indels.b37.vcf", "path": "/resources/request_files/indels_1000g/Mills_and_1000G_gold_standard.indels.b37.vcf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "dbsnp_138.b37.excluding_sites_after_129.vcf", "path": "/resources/request_files/dbsnp/dbsnp_138.b37.excluding_sites_after_129.vcf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "1000G_phase1.snps.high_confidence.b37.vcf", "path": "/resources/request_files/snps_1000g/1000G_phase1.snps.high_confidence.b37.vcf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, ] for f in file_list: self.file = File(**f) self.file.save() self.outputs = { "maf": { "size": 68273211, "class": "File", "nameext": ".maf", "basename": "test_1.muts.maf", "checksum": "sha1$0ccf4718a717f5a991607561af0b5bf989caf1e4", "location": "file:///outputs/test_1.muts.maf", "nameroot": "test_1.muts.maf", }, "bams": [ { "size": 56524168530, "class": "File", "nameext": ".bam", "basename": "test_1.printreads.bam", "checksum": "sha1$e4c05e8b3e7c1d682640e690f71536e22cb63802", "location": "file:///output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bam", "nameroot": "test_1.printreads.bam", "secondaryFiles": [ { "size": 7214992, "class": "File", "nameext": ".bai", "basename": "test_1.rg.md.abra.printreads.bai", "checksum": "sha1$79299d55657a0226206a4bf3ddaba854ae11f9f1", "location": "file:///output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bai", "nameroot": "test_1.rg.md.abra.printreads.bai", } ], }, { "size": 6163808009, "class": "File", "nameext": ".bam", "basename": "test_2.rg.md.abra.printreads.bam", "checksum": "sha1$15ddc908c3ece551d331e78806d3ac19569174c3", "location": "file:///output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bam", "nameroot": "test_2.rg.md.abra.printreads", "secondaryFiles": [ { "size": 6235920, "class": "File", "nameext": ".bai", "basename": "test_2.rg.md.abra.printreads.bai", "checksum": "sha1$6fd2fc0ce6b42253cbac89d31e8eddb169d65605", "location": "file:///output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bai", "nameroot": "test_2.rg.md.abra.printreads", } ], }, ], }
class CWLRunObjectTest(APITestCase): fixtures = [ "beagle_etl.operator.json", "runner.operator_run.json", "runner.operator_trigger.json", ] def setUp(self): self.storage = Storage(name="test", type=StorageType.LOCAL) self.storage.save() self.file_group = FileGroup(name="Test Files", storage=self.storage) self.file_group.save() self.pipeline = Pipeline( name="pipeline_name", github="http://pipeline.github.com", version="v1.0", entrypoint="pipeline.cwl", output_file_group=self.file_group, output_directory="/path/to/outputs", ) self.pipeline.save() self.run = Run(run_type=0, app=self.pipeline, status=RunStatus.CREATING, notify_for_outputs=[]) self.run.save() self.file_type_unknown = FileType(name="unknown") self.file_type_unknown.save() self.file1 = File( **{ "file_name": "FASTQ_L002_R1_001.fastq.gz", "path": "/FASTQ/FASTQ_L002_R1_001.fastq.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, } ) self.file1.save() self.file2 = File( **{ "file_name": "FASTQ_L002_R2_001.fastq.gz", "path": "/FASTQ/FASTQ_L002_R2_001.fastq.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, } ) self.file2.save() self.file3 = File( **{ "file_name": "FASTQ_L006_R1_001.fastq.gz", "path": "/FASTQ/FASTQ_L006_R1_001.fastq.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, } ) self.file3.save() self.file4 = File( **{ "file_name": "FASTQ_L006_R2_001.fastq.gz", "path": "/FASTQ/FASTQ_L006_R2_001.fastq.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, } ) self.file4.save() file_list = [ { "file_name": "GRCm38.fasta", "path": "/resources/genomes/GRCm38/GRCm38.fasta", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "b37.fasta", "path": "/resources/genomes/GRCh37/fasta/b37.fasta", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "hapmap_3.3.b37.vcf", "path": "/resources/request_files/hapmap/hapmap_3.3.b37.vcf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf.gz", "path": "/resources/genomes/GRCh37/facets_snps/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "human.hg19.excl.tsv", "path": "/resources/genomes/GRCh37/delly/human.hg19.excl.tsv", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "hotspot-list-union-v1-v2.maf", "path": "/resources/qc_resources/hotspot-list-union-v1-v2.maf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "AgilentExon_v2_FP_tiling_intervals.intervals", "path": "/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_FP_tiling_intervals.intervals", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "AgilentExon_v2_FP_tiling_genotypes.txt", "path": "/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_FP_tiling_genotypes.txt", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "AgilentExon_v2_b37_baits.ilist", "path": "/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_b37_baits.ilist", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "AgilentExon_v2_b37_targets.ilist", "path": "/resources/genomic_resources/targets/AgilentExon_v2/b37/AgilentExon_v2_b37_targets.ilist", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "refGene_b37.sorted.txt", "path": "/resources/request_files/refseq/refGene_b37.sorted.txt", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz", "path": "/resources/vep/cache/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "CosmicCodingMuts_v67_b37_20131024__NDS.vcf", "path": "/resources/request_files/cosmic/CosmicCodingMuts_v67_b37_20131024__NDS.vcf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "s_C_006537_N001_d.Group0.rg.md.abra.printreads.bam", "path": "/resources/curated_bams/IDT_Exome_v1_FP_b37/s_C_006537_N001_d.Group0.rg.md.abra.printreads.bam", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "s_C_006284_N002_d.Group3.rg.md.abra.printreads.bam", "path": "/resources/curated_bams/IDT_Exome_v1_FP_b37/s_C_006284_N002_d.Group3.rg.md.abra.printreads.bam", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "Mills_and_1000G_gold_standard.indels.b37.vcf", "path": "/resources/request_files/indels_1000g/Mills_and_1000G_gold_standard.indels.b37.vcf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "dbsnp_138.b37.excluding_sites_after_129.vcf", "path": "/resources/request_files/dbsnp/dbsnp_138.b37.excluding_sites_after_129.vcf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, { "file_name": "1000G_phase1.snps.high_confidence.b37.vcf", "path": "/resources/request_files/snps_1000g/1000G_phase1.snps.high_confidence.b37.vcf", "size": 1234, "file_group": self.file_group, "file_type": self.file_type_unknown, }, ] for f in file_list: self.file = File(**f) self.file.save() self.outputs = { "maf": { "size": 68273211, "class": "File", "nameext": ".maf", "basename": "test_1.muts.maf", "checksum": "sha1$0ccf4718a717f5a991607561af0b5bf989caf1e4", "location": "file:///outputs/test_1.muts.maf", "nameroot": "test_1.muts.maf", }, "bams": [ { "size": 56524168530, "class": "File", "nameext": ".bam", "basename": "test_1.printreads.bam", "checksum": "sha1$e4c05e8b3e7c1d682640e690f71536e22cb63802", "location": "file:///output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bam", "nameroot": "test_1.printreads.bam", "secondaryFiles": [ { "size": 7214992, "class": "File", "nameext": ".bai", "basename": "test_1.rg.md.abra.printreads.bai", "checksum": "sha1$79299d55657a0226206a4bf3ddaba854ae11f9f1", "location": "file:///output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bai", "nameroot": "test_1.rg.md.abra.printreads.bai", } ], }, { "size": 6163808009, "class": "File", "nameext": ".bam", "basename": "test_2.rg.md.abra.printreads.bam", "checksum": "sha1$15ddc908c3ece551d331e78806d3ac19569174c3", "location": "file:///output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bam", "nameroot": "test_2.rg.md.abra.printreads", "secondaryFiles": [ { "size": 6235920, "class": "File", "nameext": ".bai", "basename": "test_2.rg.md.abra.printreads.bai", "checksum": "sha1$6fd2fc0ce6b42253cbac89d31e8eddb169d65605", "location": "file:///output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bai", "nameroot": "test_2.rg.md.abra.printreads", } ], }, ], } @patch("runner.pipeline.pipeline_cache.PipelineCache.get_pipeline") def test_run_creation_from_cwl(self, mock_get_pipeline): with open("runner/tests/run/pair-workflow.cwl", "r") as f: app = json.load(f) with open("runner/tests/run/inputs.json", "r") as f: inputs = json.load(f) mock_get_pipeline.return_value = app run = RunObjectFactory.from_definition(str(self.run.id), inputs) run.ready() for inp in run.inputs: if inp.name == "pair": self.assertEqual(inp.db_value[0]["R1"][0]["location"], "bid://%s" % str(self.file1.id)) self.assertEqual(inp.value[0]["R1"][0]["path"], self.file1.path) self.assertEqual(inp.db_value[0]["R2"][0]["location"], "bid://%s" % str(self.file2.id)) self.assertEqual(inp.value[0]["R2"][0]["path"], self.file2.path) self.assertEqual(inp.db_value[1]["R1"][0]["location"], "bid://%s" % str(self.file3.id)) self.assertEqual(inp.value[1]["R1"][0]["path"], self.file3.path) self.assertEqual(inp.db_value[1]["R2"][0]["location"], "bid://%s" % str(self.file4.id)) self.assertEqual(inp.value[1]["R2"][0]["path"], self.file4.path) @patch("runner.pipeline.pipeline_cache.PipelineCache.get_pipeline") def test_run_to_db(self, mock_get_pipeline): with open("runner/tests/run/pair-workflow.cwl", "r") as f: app = json.load(f) with open("runner/tests/run/inputs.json", "r") as f: inputs = json.load(f) mock_get_pipeline.return_value = app run = RunObjectFactory.from_definition(str(self.run.id), inputs) run.to_db() try: run_obj = Run.objects.get(id=run.run_id) except Run.DoesNotExist as e: pass self.assertEqual(str(run_obj.id), run.run_id) @patch("notifier.tasks.send_notification.delay") @patch("lib.memcache_lock.memcache_task_lock") @patch("runner.pipeline.pipeline_cache.PipelineCache.get_pipeline") @patch("file_system.tasks.populate_job_group_notifier_metadata.delay") def test_run_complete_job( self, mock_populate_job_group_notifier, mock_get_pipeline, memcache_task_lock, send_notification ): with open("runner/tests/run/pair-workflow.cwl", "r") as f: app = json.load(f) with open("runner/tests/run/inputs.json", "r") as f: inputs = json.load(f) mock_populate_job_group_notifier.return_value = None mock_get_pipeline.return_value = app memcache_task_lock.return_value = True send_notification.return_value = False run = RunObjectFactory.from_definition(str(self.run.id), inputs) run.to_db() operator_run = OperatorRun.objects.first() operator_run.runs.add(run.run_obj) num_completed_runs = operator_run.num_completed_runs complete_job(run.run_id, self.outputs) operator_run.refresh_from_db() self.assertEqual(operator_run.num_completed_runs, num_completed_runs + 1) run_obj = RunObjectFactory.from_db(run.run_id) file_obj = File.objects.filter(path=self.outputs["maf"]["location"].replace("file://", "")).first() run_obj.to_db() for out in run_obj.outputs: if out.name == "maf": self.assertEqual(out.value["location"], self.outputs["maf"]["location"]) self.assertEqual(FileProcessor.get_bid_from_file(file_obj), out.db_value["location"]) port = Port.objects.filter(run_id=run_obj.run_id, name="bams").first() self.assertEqual(len(port.files.all()), 4) expected_result = ( "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bam", "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bai", "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bam", "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bai", ) self.assertTrue(port.files.all()[0].path in expected_result) self.assertTrue(port.files.all()[1].path in expected_result) self.assertTrue(port.files.all()[2].path in expected_result) self.assertTrue(port.files.all()[3].path in expected_result) @patch("runner.models.Run.set_for_restart") @patch("notifier.tasks.send_notification.delay") @patch("lib.memcache_lock.memcache_task_lock") @patch("runner.pipeline.pipeline_cache.PipelineCache.get_pipeline") def test_run_fail_job(self, mock_get_pipeline, memcache_task_lock, send_notification, set_for_restart): with open("runner/tests/run/pair-workflow.cwl", "r") as f: app = json.load(f) with open("runner/tests/run/inputs.json", "r") as f: inputs = json.load(f) set_for_restart.return_value = None mock_get_pipeline.return_value = app memcache_task_lock.return_value = True send_notification.return_value = False run = RunObjectFactory.from_definition(str(self.run.id), inputs) run.to_db() operator_run = OperatorRun.objects.first() operator_run.runs.add(run.run_obj) num_failed_runs = operator_run.num_failed_runs fail_job(run.run_id, {"details": "Error has happened"}) operator_run.refresh_from_db() self.assertEqual(operator_run.num_failed_runs, num_failed_runs + 1) run_obj = RunObjectFactory.from_db(run.run_id) self.assertEqual(run_obj.message, {"details": "Error has happened"}) @patch("runner.models.Run.set_for_restart") @patch("notifier.tasks.send_notification.delay") @patch("lib.memcache_lock.memcache_task_lock") @patch("runner.pipeline.pipeline_cache.PipelineCache.get_pipeline") def test_multiple_failed_on_same_job( self, mock_get_pipeline, memcache_task_lock, send_notification, set_for_restart ): with open("runner/tests/run/pair-workflow.cwl", "r") as f: app = json.load(f) with open("runner/tests/run/inputs.json", "r") as f: inputs = json.load(f) set_for_restart.return_value = None memcache_task_lock.return_value = True send_notification.return_value = False mock_get_pipeline.return_value = app run = RunObjectFactory.from_definition(str(self.run.id), inputs) run.to_db() operator_run = OperatorRun.objects.first() operator_run.runs.add(run.run_obj) num_failed_runs = operator_run.num_failed_runs fail_job(run.run_id, {"details": "Error has happened"}) fail_job(run.run_id, {"details": "Error has happened"}) fail_job(run.run_id, {"details": "Error has happened"}) operator_run.refresh_from_db() self.assertEqual(operator_run.num_failed_runs, num_failed_runs + 1)
def setUp(self): self.storage = Storage(name="test", type=StorageType.LOCAL) self.storage.save() self.file_group = FileGroup(name="Test Files", storage=self.storage) self.file_group.save() self.file_type_fastq = FileType(name='fastq') self.file_type_fastq.save() self.file_ext_fastq_1 = FileExtension(extension='fastq', file_type=self.file_type_fastq) self.file_ext_fastq_1.save() self.file_ext_fastq_2 = FileExtension(extension='fastq.gz', file_type=self.file_type_fastq) self.file_ext_fastq_2.save() self.file_type_vcf = FileType(name='vcf') self.file_type_vcf.save() self.file_type_txt = FileType(name='txt') self.file_type_txt.save() self.file_type_tsv = FileType(name='tsv') self.file_type_tsv.save() self.file_type_maf = FileType(name='maf') self.file_type_maf.save() self.file_type_unknown = FileType(name='unknown') self.file_type_unknown.save() self.file1 = File(file_name="S16_R1_001.fastq.gz", path="/path/to/file/S16_R1_001.fastq.gz", file_type=self.file_type_fastq, size=5966546453, file_group=self.file_group) self.file1.save() self.file2 = File(file_name="S16_R2_001.fastq.gz", path="/path/to/file/S16_R2_001.fastq.gz", file_type=self.file_type_fastq, size=5832468368, file_group=self.file_group) self.file2.save() self.file3 = File(file_name="P-S12_R1_001.fastq.gz", path="/path/to/file/Sample_P/P-S12_R1_001.fastq.gz", file_type=self.file_type_fastq, size=3576965127, file_group=self.file_group) self.file3.save() self.file4 = File(file_name="P-S12_R2_001.fastq.gz", path="/path/to/file/Sample_P/P-S12_R2_001.fastq.gz", file_type=self.file_type_fastq, size=3592299152, file_group=self.file_group) self.file4.save() self.file5 = File(file_name="refGene_b37.sorted.txt", path="/path/to/file/refGene_b37.sorted.txt", file_type=self.file_type_fastq, size=359229, file_group=self.file_group) self.file5.save() self.file6 = File( file_name= "dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf.gz", path= "/path/to/file/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf.gz", file_type=self.file_type_vcf, size=359228, file_group=self.file_group) self.file6.save() self.file7 = File(file_name="FP_tiling_genotypes.txt", path="/path/to/file/FP_tiling_genotypes.txt", file_type=self.file_type_vcf, size=359228, file_group=self.file_group) self.file7.save() self.file8 = File(file_name="hotspot-list-union-v1-v2.txt", path="/path/to/file/hotspot-list-union-v1-v2.txt", file_type=self.file_type_vcf, size=359228, file_group=self.file_group) self.file8.save() self.file9 = File(file_name="human.hg19.excl.tsv", path="/path/to/file/human.hg19.excl.tsv", file_type=self.file_type_tsv, size=359228, file_group=self.file_group) self.file9.save() self.file10 = File( file_name="IDT_Exome_v1_FP_b37_baits.ilist", path="/path/to/file/IDT_Exome_v1_FP_b37_baits.ilist", file_type=self.file_type_tsv, size=359228, file_group=self.file_group) self.file10.save() self.file11 = File(file_name="hotspot-list-union-v1-v2.maf", path="/path/to/file/hotspot-list-union-v1-v2.maf", file_type=self.file_type_maf, size=359228, file_group=self.file_group) self.file11.save() self.file12 = File( file_name="IDT_Exome_v1_FP_b37_targets.ilist", path="/path/to/file/IDT_Exome_v1_FP_b37_targets.ilist", file_type=self.file_type_maf, size=359228, file_group=self.file_group) self.file12.save() self.file13 = File(file_name="FP_tiling_intervals.intervals", path="/path/to/file/FP_tiling_intervals.intervals", file_type=self.file_type_maf, size=359228, file_group=self.file_group) self.file13.save() self.file14 = File(file_name="refGene_b37.sorted.txt.sec", path="/path/to/file/refGene_b37.sorted.txt.sec", file_type=self.file_type_maf, size=359228, file_group=self.file_group) self.file14.save()
class ProcessorTest(APITestCase): def setUp(self): self.storage = Storage(name="test", type=StorageType.LOCAL) self.storage.save() self.file_group = FileGroup(name="Test Files", storage=self.storage) self.file_group.save() self.file_type_fastq = FileType(name='fastq') self.file_type_fastq.save() self.file_ext_fastq_1 = FileExtension(extension='fastq', file_type=self.file_type_fastq) self.file_ext_fastq_1.save() self.file_ext_fastq_2 = FileExtension(extension='fastq.gz', file_type=self.file_type_fastq) self.file_ext_fastq_2.save() self.file_type_vcf = FileType(name='vcf') self.file_type_vcf.save() self.file_type_txt = FileType(name='txt') self.file_type_txt.save() self.file_type_tsv = FileType(name='tsv') self.file_type_tsv.save() self.file_type_maf = FileType(name='maf') self.file_type_maf.save() self.file_type_unknown = FileType(name='unknown') self.file_type_unknown.save() self.file1 = File(file_name="S16_R1_001.fastq.gz", path="/path/to/file/S16_R1_001.fastq.gz", file_type=self.file_type_fastq, size=5966546453, file_group=self.file_group) self.file1.save() self.file2 = File(file_name="S16_R2_001.fastq.gz", path="/path/to/file/S16_R2_001.fastq.gz", file_type=self.file_type_fastq, size=5832468368, file_group=self.file_group) self.file2.save() self.file3 = File(file_name="P-S12_R1_001.fastq.gz", path="/path/to/file/Sample_P/P-S12_R1_001.fastq.gz", file_type=self.file_type_fastq, size=3576965127, file_group=self.file_group) self.file3.save() self.file4 = File(file_name="P-S12_R2_001.fastq.gz", path="/path/to/file/Sample_P/P-S12_R2_001.fastq.gz", file_type=self.file_type_fastq, size=3592299152, file_group=self.file_group) self.file4.save() self.file5 = File(file_name="refGene_b37.sorted.txt", path="/path/to/file/refGene_b37.sorted.txt", file_type=self.file_type_fastq, size=359229, file_group=self.file_group) self.file5.save() self.file6 = File( file_name= "dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf.gz", path= "/path/to/file/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf.gz", file_type=self.file_type_vcf, size=359228, file_group=self.file_group) self.file6.save() self.file7 = File(file_name="FP_tiling_genotypes.txt", path="/path/to/file/FP_tiling_genotypes.txt", file_type=self.file_type_vcf, size=359228, file_group=self.file_group) self.file7.save() self.file8 = File(file_name="hotspot-list-union-v1-v2.txt", path="/path/to/file/hotspot-list-union-v1-v2.txt", file_type=self.file_type_vcf, size=359228, file_group=self.file_group) self.file8.save() self.file9 = File(file_name="human.hg19.excl.tsv", path="/path/to/file/human.hg19.excl.tsv", file_type=self.file_type_tsv, size=359228, file_group=self.file_group) self.file9.save() self.file10 = File( file_name="IDT_Exome_v1_FP_b37_baits.ilist", path="/path/to/file/IDT_Exome_v1_FP_b37_baits.ilist", file_type=self.file_type_tsv, size=359228, file_group=self.file_group) self.file10.save() self.file11 = File(file_name="hotspot-list-union-v1-v2.maf", path="/path/to/file/hotspot-list-union-v1-v2.maf", file_type=self.file_type_maf, size=359228, file_group=self.file_group) self.file11.save() self.file12 = File( file_name="IDT_Exome_v1_FP_b37_targets.ilist", path="/path/to/file/IDT_Exome_v1_FP_b37_targets.ilist", file_type=self.file_type_maf, size=359228, file_group=self.file_group) self.file12.save() self.file13 = File(file_name="FP_tiling_intervals.intervals", path="/path/to/file/FP_tiling_intervals.intervals", file_type=self.file_type_maf, size=359228, file_group=self.file_group) self.file13.save() self.file14 = File(file_name="refGene_b37.sorted.txt.sec", path="/path/to/file/refGene_b37.sorted.txt.sec", file_type=self.file_type_maf, size=359228, file_group=self.file_group) self.file14.save() def test_convert_list_to_bid(self): port_value_list = [{ "CN": "MSKCC", "ID": "TEST_ID_1", "LB": "TEST_LB_1", "PL": "Illumina", "PU": ["TEST_PU_1"], "R1": [{ "location": "juno:///path/to/file/S16_R1_001.fastq.gz", "size": 5966546453, "class": "File" }], "R2": [{ "location": "juno:///path/to/file/S16_R2_001.fastq.gz", "size": 5832468368, "class": "File" }], "bam": [], "zR1": [], "zR2": [], "RG_ID": ["TEST_RG_ID_1"], "adapter": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATGAGCATCTCGTATGCCGTCTTCTGCTTG", "adapter2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT", "bwa_output": "bwa_output_1.bam" }, { "CN": "MSKCC", "ID": "TEST_ID_2", "LB": "TEST_LB_2", "PL": "Illumina", "PU": ["TEST_PU_2"], "R1": [{ "location": "juno:///path/to/file/Sample_P/P-S12_R1_001.fastq.gz", "size": 3576965127, "class": "File" }], "R2": [{ "location": "juno:///path/to/file/Sample_P/P-S12_R2_001.fastq.gz", "size": 3592299152, "class": "File" }], "bam": [], "zR1": [], "zR2": [], "RG_ID": ["TEST_RG_ID_2"], "adapter": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATGAGCATCTCGTATGCCGTCTTCTGCTTG", "adapter2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT", "bwa_output": "bwa_output_2.bam" }] result = PortProcessor.process_files(port_value_list, PortAction.CONVERT_TO_BID) difference = diff(port_value_list, result) difference = list(difference) self.assertEqual(difference[0][0], 'change') self.assertEqual(difference[0][1][0], 0) self.assertEqual(difference[0][1][1], 'R1') self.assertEqual(difference[0][1][2], 0) self.assertEqual(difference[0][1][3], 'location') self.assertEqual(difference[0][2][0], 'juno://%s' % self.file1.path) self.assertEqual(difference[0][2][1], 'bid://%s' % str(self.file1.id)) self.assertEqual(difference[0][0], 'change') self.assertEqual(difference[1][1][0], 0) self.assertEqual(difference[1][1][1], 'R2') self.assertEqual(difference[1][1][2], 0) self.assertEqual(difference[1][1][3], 'location') self.assertEqual(difference[1][2][0], 'juno://%s' % self.file2.path) self.assertEqual(difference[1][2][1], 'bid://%s' % str(self.file2.id)) self.assertEqual(difference[2][1][0], 1) self.assertEqual(difference[2][1][1], 'R1') self.assertEqual(difference[2][1][2], 0) self.assertEqual(difference[2][1][3], 'location') self.assertEqual(difference[2][2][0], 'juno://%s' % self.file3.path) self.assertEqual(difference[2][2][1], 'bid://%s' % str(self.file3.id)) self.assertEqual(difference[3][1][0], 1) self.assertEqual(difference[3][1][1], 'R2') self.assertEqual(difference[3][1][2], 0) self.assertEqual(difference[3][1][3], 'location') self.assertEqual(difference[3][2][0], 'juno://%s' % self.file4.path) self.assertEqual(difference[3][2][1], 'bid://%s' % str(self.file4.id)) def test_convert_dict_to_bid(self): port_value_dict = { "test_data": "string_value", "test_data_none": None, "test_data_int": 2, "refseq": { "location": "juno:///path/to/file/refGene_b37.sorted.txt", "class": "File", "secondaryFiles": [{ "class": "File", "location": "juno:///path/to/file/refGene_b37.sorted.txt.sec" }] }, "double_list_test": [[{ "location": "juno:///path/to/file/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf.gz", "class": "File" }]], "conpair_markers_bed": "string_value_bed", "double_nested_port_list": [{ "nested_port_list_1": [{ "bait_intervals_1": { "location": "juno:///path/to/file/IDT_Exome_v1_FP_b37_baits.ilist", "class": "File" }, "bait_intervals_2": { "location": "juno:///path/to/file/IDT_Exome_v1_FP_b37_baits.ilist", "class": "File" } }] }] } file_list = [] result = PortProcessor.process_files(port_value_dict, PortAction.CONVERT_TO_BID, file_list=file_list) difference = diff(port_value_dict, result) difference = list(difference) self.assertEqual(difference[0][0], 'change') self.assertEqual(difference[0][1], 'refseq.location') self.assertEqual(difference[0][2][0], 'juno://%s' % self.file5.path) self.assertEqual(difference[0][2][1], 'bid://%s' % str(self.file5.id)) self.assertEqual(difference[1][0], 'change') self.assertEqual(difference[1][1][0], 'refseq') self.assertEqual(difference[1][1][1], 'secondaryFiles') self.assertEqual(difference[1][1][2], 0) self.assertEqual(difference[1][1][3], 'location') self.assertEqual(difference[1][2][0], 'juno://%s' % self.file14.path) self.assertEqual(difference[1][2][1], 'bid://%s' % str(self.file14.id)) self.assertEqual(difference[2][0], 'change') self.assertEqual(difference[2][1][0], 'double_list_test') self.assertEqual(difference[2][1][1], 0) self.assertEqual(difference[2][1][2], 0) self.assertEqual(difference[2][1][3], 'location') self.assertEqual(difference[2][2][0], 'juno://%s' % self.file6.path) self.assertEqual(difference[2][2][1], 'bid://%s' % str(self.file6.id)) self.assertEqual(difference[3][0], 'change') self.assertEqual(difference[3][1][0], 'double_nested_port_list') self.assertEqual(difference[3][1][1], 0) self.assertEqual(difference[3][1][2], 'nested_port_list_1') self.assertEqual(difference[3][1][3], 0) self.assertEqual(difference[3][1][4], 'bait_intervals_1') self.assertEqual(difference[3][1][5], 'location') self.assertEqual(difference[3][2][0], 'juno://%s' % self.file10.path) self.assertEqual(difference[3][2][1], 'bid://%s' % self.file10.id) self.assertEqual(difference[4][0], 'change') self.assertEqual(difference[4][1][0], 'double_nested_port_list') self.assertEqual(difference[4][1][1], 0) self.assertEqual(difference[4][1][2], 'nested_port_list_1') self.assertEqual(difference[4][1][3], 0) self.assertEqual(difference[4][1][4], 'bait_intervals_2') self.assertEqual(difference[4][1][5], 'location') self.assertEqual(difference[4][2][0], 'juno://%s' % self.file10.path) self.assertEqual(difference[4][2][1], 'bid://%s' % self.file10.id) self.assertEqual(len(file_list), 5) def test_create_file_setting_proper_file_type_based_on_extension(self): file_obj = FileProcessor.create_file_obj( 'file:///path/to/file.fastq.gz', 123345, 'sha1$calculated checksum', str(self.file_group.id), {}) self.assertEqual(file_obj.file_type, self.file_type_fastq) def test_create_file_type_unknown(self): file_obj = FileProcessor.create_file_obj( 'file:///path/to/file.unknown_data_type', 123345, 'sha1$calculated checksum', str(self.file_group.id), {}) self.assertEqual(file_obj.file_type, self.file_type_unknown) def test_create_file_obj_bad_file_group(self): file_group_id = str(uuid.uuid4()) with self.assertRaises(Exception) as context: file_obj = FileProcessor.create_file_obj( 'file:///path/to/file.unknown_data_type', 123345, 'sha1$calculated checksum', file_group_id, {}) self.assertTrue('Invalid FileGroup id: %s' % file_group_id in context.exception) def test_cwl_format(self): refseq_port = { "refseq": { "location": "juno:///path/to/file/refGene_b37.sorted.txt", "class": "File", "secondaryFiles": [{ "class": "File", "location": "juno:///path/to/file/refGene_b37.sorted.txt.sec" }] } } refseq_port_cwl_format = { "refseq": { "class": "File", "size": 359229, "basename": "refGene_b37.sorted.txt", "nameext": ".txt", "nameroot": "refGene_b37.sorted", "path": "/path/to/file/refGene_b37.sorted.txt", "secondaryFiles": [{ "class": "File", "size": 359228, "basename": "refGene_b37.sorted.txt.sec", "nameext": ".sec", "nameroot": "refGene_b37.sorted.txt", "path": "/path/to/file/refGene_b37.sorted.txt.sec" }] } } cwl_format_port = PortProcessor.process_files( refseq_port, PortAction.CONVERT_TO_CWL_FORMAT) self.assertEqual(cwl_format_port, refseq_port_cwl_format)