def test_shell_region2(): os.environ['AWS_DEFAULT_REGION'] = REGION2 res = API().run_workflow(input_json=get_test_json('shelltest4.json'), sfn=DEV_SFN_REGION2) jobid = res['jobid'] time.sleep(300) status = API().check_status(job_id=jobid) assert status == 'SUCCEEDED'
def test_shell(): os.environ['AWS_DEFAULT_REGION'] = REGION1 res = API().run_workflow(input_json=get_test_json('shelltest4.json'), sfn=DEV_SFN_REGION1) jobid = res['jobid'] time.sleep(300) status = API().check_status(job_id=jobid) status_costupdater = API().check_costupdater_status(job_id=jobid) assert status_unicorn == 'SUCCEEDED' and status_costupdater == 'SUCCEEDED'
def test_cw_metrics_region2(): os.environ['AWS_DEFAULT_REGION'] = REGION2 res = API().run_workflow( input_json=get_test_json('4dn_bwa.runonly.v1.json'), sfn=DEV_SFN2_REGION2) jobid = res['jobid'] time.sleep(60 * 20) status = API().check_status(job_id=jobid) assert status == 'SUCCEEDED' prj = json.loads(API().log(job_id=jobid, postrunjson=True)) assert prj['metrics']['max_mem_utilization_percent'] assert prj['metrics']['max_cpu_utilization_percent']
def run_tibanna(num): bucket = "eo72-4dn" file_index = "{:02d}".format(num) output = "PON_" + file_index vcflist = "vcf" + file_index + ".list " options = " --setKey \"null\" --filteredAreUncalled --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED" command = "java -Xmx100g -jar /usr/GenomeAnalysisTK.jar -nt 16 -T CombineVariants -R GRCh38.d1.vd1.fa " + "--variant " + vcflist + options + " -o " + output + ".vcf.gz" tibanna_args = { "args": { "language": "shell", "command": [command], "container_image": "broadinstitute/gatk3:3.8-1", "output_S3_bucket": bucket, "output_target": { "file:///data1/shell/" + output + ".vcf.gz": "PON/" + output + ".vcf.gz", "file:///data1/shell/" + output + ".vcf.gz.tbi": "PON/" + output + ".vcf.gz.tbi", }, "input_files": { "file:///data1/shell/VCFS/": "s3://eo72-4dn/vcfpass", "file:///data1/shell/" + "GRCh38.d1.vd1.fa.fai": { "bucket_name": bucket, "object_key": "FASTA/GRCh38.d1.vd1.fa.fai" }, "file:///data1/shell/" + "GRCh38.d1.vd1.dict": { "bucket_name": bucket, "object_key": "FASTA/GRCh38.d1.vd1.dict" }, "file:///data1/shell/" + "GRCh38.d1.vd1.fa": { "bucket_name": bucket, "object_key": "FASTA/GRCh38.d1.vd1.fa" }, "file:///data1/shell/" + vcflist: { "bucket_name": bucket, "object_key": "vcflist/" + vcflist } } }, "config": { "instance_type": "r5.4xlarge", "log_bucket": bucket, "ebs_size": 60, "EBS_optimized": True, "behavior_on_capacity_limit": "wait_and_retry" } } API().run_workflow( input_json=tibanna_args) # json file or dictionary object
def run_tibanna(file, name): command = "java -jar /usr/GenomeAnalysisTK.jar -T MuTect2 -R GRCh38.d1.vd1.fa -I:tumor " + file + " -o "+ name + ".vcf.gz" tibanna_args = { "args": { "language": "shell", "command": [ "pwd", "ls /", "ls /gatk/", command] , "container_image": "broadinstitute/gatk3:3.8-1", "output_S3_bucket": bucket, "output_target": { "file:///data1/shell/" + name + ".vcf.gz" : "GATK3_MUTECT/" + name + ".vcf.gz", "file:///data1/shell/" + name + ".vcf.gz.tbi" : "GATK3_MUTECT/" + name + ".vcf.gz.tbi" }, "input_files": { "file:///data1/shell/" + file : { "bucket_name": bucket, "object_key": path }, "file:///data1/shell/" + name + ".bai" : { "bucket_name": bucket, "object_key": "output/" + name + ".bai" }, "file:///data1/shell/" + "GRCh38.d1.vd1.fa.fai" : { "bucket_name": bucket, "object_key": "FASTA/GRCh38.d1.vd1.fa.fai" }, "file:///data1/shell/" + "GRCh38.d1.vd1.dict" : { "bucket_name": bucket, "object_key": "FASTA/GRCh38.d1.vd1.dict" }, "file:///data1/shell/" + "GRCh38.d1.vd1.fa" : { "bucket_name": bucket, "object_key": "FASTA/GRCh38.d1.vd1.fa" } } }, "config": { "instance_type": "r4.large", "log_bucket": bucket, "ebs_size": 60, "EBS_optimized" : True, "behavior_on_capacity_limit": "wait_and_retry", "spot_instance": True } } API().run_workflow(input_json= tibanna_args) # json file or dictionary object
def cleanup_sfn2_region2(): os.environ['AWS_DEFAULT_REGION'] = REGION2 API().cleanup(user_group_name=DEV_GROUP_SUFFIX + '2', suffix=DEV_SUFFIX)
def deploy_sfn2_to_region2(): os.environ['AWS_DEFAULT_REGION'] = REGION2 buckets = 'tibanna-output,elasticbeanstalk-fourfront-webdev-files,soos-4dn-bucket' API().deploy_unicorn(suffix=DEV_SUFFIX, buckets=buckets, usergroup=DEV_GROUP_SUFFIX + '2')
def deploy_sfn1_to_region2(): os.environ['AWS_DEFAULT_REGION'] = REGION2 API().deploy_unicorn(suffix=DEV_SUFFIX, buckets='tibanna-output,soos-4dn-bucket', usergroup=DEV_GROUP_SUFFIX + '1')
def deploy_sfn1_to_region1(): """Deploy sfn1 to region1 with cost updater""" os.environ['AWS_DEFAULT_REGION'] = REGION1 API().deploy_unicorn(suffix=DEV_SUFFIX, buckets='tibanna-output,soos-4dn-bucket', usergroup=DEV_GROUP_SUFFIX + '1', deploy_costupdater = True)
"PON/" + output + ".vcf.gz", "file:///data1/shell/" + output + ".vcf.gz.tbi": "PON/" + output + ".vcf.gz.tbi" }, "input_files": { "file:///data1/shell/VCFS/": "s3://eo72-4dn/PON_1000", "file:///data1/shell/" + "GRCh38.d1.vd1.fa.fai": { "bucket_name": bucket, "object_key": "FASTA/GRCh38.d1.vd1.fa.fai" }, "file:///data1/shell/" + "GRCh38.d1.vd1.dict": { "bucket_name": bucket, "object_key": "FASTA/GRCh38.d1.vd1.dict" }, "file:///data1/shell/" + "GRCh38.d1.vd1.fa": { "bucket_name": bucket, "object_key": "FASTA/GRCh38.d1.vd1.fa" } } }, "config": { "instance_type": "r5.4xlarge", "log_bucket": bucket, "ebs_size": 60, "EBS_optimized": True, "behavior_on_capacity_limit": "wait_and_retry" } } API().run_workflow(input_json=tibanna_args) # json file or dictionary object
def run_tibanna(region): path = "<PATH>" bucket = "<BUCKET>" format_region = "{:04d}".format(region) regionfile = format_region + "-scattered.intervals" tumor = path.split("/")[1] out = tumor.split(".")[0] + "_" + format_region command = "java -jar /usr/GenomeAnalysisTK.jar -T MuTect2 -R GRCh38_full_analysis_set_plus_decoy_hla.fa -L " + regionfile + " -I:tumor " + tumor + " -o "+ out + ".vcf.gz" print(command) tibanna_args = { "args": { "language": "shell", "command": command , "container_image": "broadinstitute/gatk3:3.8-1", "output_S3_bucket": bucket, "output_target": { "file:///data1/shell/" + out + ".vcf.gz" : "WGS/"+ tumor.split(".")[0]+ "/" + out + ".vcf.gz", "file:///data1/shell/" + out + ".vcf.gz.tbi" : "WGS/"+ tumor.split(".")[0]+ "/" + out + ".vcf.gz.tbi" }, "input_files": { "file:///data1/shell/" + tumor : { "bucket_name": bucket, "object_key": path }, "file:///data1/shell/" + tumor + ".bai" : { "bucket_name": bucket, "object_key": path + ".bai" }, "file:///data1/shell/" + regionfile: { "bucket_name": bucket, "object_key": "regions/" + regionfile }, "file:///data1/shell/" + "GRCh38_full_analysis_set_plus_decoy_hla.fa" : { "bucket_name": bucket, "object_key": "WGS/GRCh38_full_analysis_set_plus_decoy_hla.fa" }, "file:///data1/shell/" + "GRCh38_full_analysis_set_plus_decoy_hla.dict" : { "bucket_name": bucket, "object_key": "WGS/GRCh38_full_analysis_set_plus_decoy_hla.dict" }, "file:///data1/shell/" + "GRCh38_full_analysis_set_plus_decoy_hla.fa.fai" : { "bucket_name": bucket, "object_key": "WGS/GRCh38_full_analysis_set_plus_decoy_hla.fa.fai" } } }, "config": { "instance_type": "r4.large", "log_bucket": bucket, "ebs_size": 150, "EBS_optimized" : True, "behavior_on_capacity_limit": "wait_and_retry" } } print(json.dumps(tibanna_args, indent = 4)) API().run_workflow(input_json= tibanna_args)