def main(): prefix = os.environ["prefix"] param_file = os.environ["param_file"] ref_uri = os.environ["ref_uri"] in_uri = os.environ["in_uri"] out_uri = os.environ["out_uri"] assets_uri = os.environ["assets_uri"] build = os.environ["build"] in_files = ["{}.sam".format(prefix)] start_time = datetime.now() print("SORT SAM for {} was started at {}.".format(prefix, str(start_time))) task = SDK.Task(step="sort_sam", prefix=prefix, in_files=in_files, param_file=param_file, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri, assets_uri=assets_uri) task.get_reference_files(build) task.download_files("INPUT") task.download_files("REF") task.download_files("PARAMS") task.build_cmd() task.run_cmd() task.upload_results() task.cleanup() end_time = datetime.now() print("SORT SAM for {} ended at {}.".format(prefix, str(end_time))) total_time = end_time - start_time print("Total time for SORT SAM was {}.".format(str(total_time)))
def main(): prefix = os.environ["prefix"] param_file = os.environ["param_file"] ref_uri = os.environ["ref_uri"] in_uri = os.environ["in_uri"] out_uri = os.environ["out_uri"] assets_uri = os.environ["assets_uri"] sample_file = os.environ["sample_file"] sentieon_pkg = os.environ["sentieon_pkg"] license_file = os.environ["sentieon_license"] ome = os.environ["ome"] build = os.environ["build"] if ome == "wes": target_file = os.environ["target_file"] else: target_file = None start_time = datetime.now() print("Sentieon's GENOTYPER for {} was started at {}.".format(prefix, str(start_time))) in_files = [] task = SDK.Task( step="genotyper", prefix=prefix, in_files=in_files, param_file=param_file, sentieon_pkg=sentieon_pkg, license_file=license_file, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri, assets_uri=assets_uri, target_file=target_file, sample_file=sample_file) if ome == "wes" and target_file: task.download_files("TARGET") task.get_reference_files(build) task.get_genotyping_samples() task.download_files("INPUT") task.download_files("REF") task.download_files("SENTIEON") task.download_files("PARAMS") # Note: genotyper is the only step where in_files is set as an env variable # Build in_files from sample_file task.build_cmd() task.run_cmd() task.upload_results() task.cleanup() end_time = datetime.now() print("Sentieon's GENOTYPER for {} ended at {}.".format(prefix, str(end_time))) total_time = end_time - start_time print("Total time for Sentieon's GENOTYPER was {}.".format(str(total_time)))
def main(): prefix = os.environ['prefix'] param_file = os.environ['param_file'] ref_uri = os.environ['ref_uri'] in_uri = os.environ['in_uri'] out_uri = os.environ['out_uri'] assets_uri = os.environ['assets_uri'] build = os.environ['build'] fam_id = os.environ['fam_id'] vcf = '{}.vcf'.format(fam_id) idx = '{}.idx'.format(vcf) in_files = [vcf, idx] print(in_files) start_time = datetime.now() print('SCRUB VCF for {} was started at {}.'.format(prefix, str(start_time))) task = SDK.Task( step='scrub_vcf', prefix=prefix, in_files=in_files, param_file=param_file, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri, assets_uri=assets_uri) dir_contents = os.listdir('.') print('Current dir contents: {}'.format(str(dir_contents))) task.get_reference_files(build) task.download_files('INPUT') task.download_files('REF') task.download_files('PARAMS') task.build_cmd() task.run_cmd() task.upload_results() task.cleanup() end_time = datetime.now() print('SCRUB VCF for {} ended at {}.'.format(prefix, str(end_time))) total_time = end_time - start_time print('Total time for SCRUB VCF was {}.'.format(str(total_time)))
def main(): prefix = os.environ["prefix"] param_file = os.environ["param_file"] ref_uri = os.environ["ref_uri"] in_uri = os.environ["in_uri"] out_uri = os.environ["out_uri"] assets_uri = os.environ["assets_uri"] build = os.environ["build"] R1 = os.environ["R1"] R2 = os.environ["R2"] in_files = [R1, R2] threads = os.environ['threads'] print(in_files) start_time = datetime.now() print("BWA MEM for {} was started at {}.".format(prefix, str(start_time))) task = SDK.Task(step="bwa_mem", prefix=prefix, threads=threads, in_files=in_files, param_file=param_file, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri, assets_uri=assets_uri) dir_contents = os.listdir(".") print("Current dir contents: {}".format(str(dir_contents))) task.get_reference_files(build) task.download_files("INPUT") task.download_files("REF") task.download_files("PARAMS") task.build_cmd() task.run_cmd() task.upload_results() task.cleanup() end_time = datetime.now() print("BWA MEM for {} ended at {}.".format(prefix, str(end_time))) total_time = end_time - start_time print("Total time for BWA MEM was {}.".format(str(total_time)))
def main(): prefix = os.environ["prefix"] param_file = os.environ["param_file"] ref_uri = os.environ["ref_uri"] in_uri = os.environ["in_uri"] out_uri = os.environ["out_uri"] assets_uri = os.environ["assets_uri"] vcf = "{}.gt.snp.recal.vcf".format(prefix) idx = "{}.gt.snp.recal.vcf.idx".format(prefix) recal_file = "{}.gt.snp.indel.recal.model".format(prefix) tranches_file = "{}.gt.snp.indel.tranches".format(prefix) in_files = [vcf, idx, recal_file, tranches_file] build = os.environ["build"] start_time = datetime.now() print("VQSR INDEL APPLY for {} was started at {}.".format( prefix, str(start_time))) task = SDK.Task(step="vqsr_indel_apply", prefix=prefix, in_files=in_files, param_file=param_file, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri, assets_uri=assets_uri) task.get_reference_files(build) task.download_files("INPUT") task.download_files("REF") task.download_files("PARAMS") task.build_cmd() task.run_cmd() task.upload_results() task.cleanup() end_time = datetime.now() print("VQSR INDEL APPLY for {} ended at {}.".format(prefix, str(end_time))) total_time = end_time - start_time print("Total time for VQSR INDEL APPLY was {}.".format(str(total_time)))
def main(): prefix = os.environ["prefix"] param_file = os.environ["param_file"] ref_uri = os.environ["ref_uri"] in_uri = os.environ["in_uri"] out_uri = os.environ["out_uri"] assets_uri = os.environ["assets_uri"] build = os.environ["build"] bam = "{}.sorted.deduped.bam".format(prefix) bai = "{}.sorted.deduped.bam.bai".format(prefix) bqsr = "{}.base_recal_table.txt".format(prefix) in_files = [bam, bai, bqsr] start_time = datetime.now() print("BASE RECAL for {} was started at {}.".format( prefix, str(start_time))) task = SDK.Task(step="base_recal", prefix=prefix, in_files=in_files, param_file=param_file, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri, assets_uri=assets_uri) task.get_reference_files(build) task.download_files("INPUT") task.download_files("REF") task.download_files("PARAMS") task.build_cmd() task.run_cmd() task.upload_results() task.cleanup() end_time = datetime.now() print("BASE RECAL for {} ended at {}.".format(prefix, str(end_time))) total_time = end_time - start_time print("Total time for BASE RECAL was {}.".format(str(total_time)))
def main(): prefix = os.environ["prefix"] param_file = os.environ["param_file"] ref_uri = os.environ["ref_uri"] in_uri = os.environ["in_uri"] out_uri = os.environ["out_uri"] assets_uri = os.environ["assets_uri"] vcf = "{}.gt.vcf.gz".format(prefix) tbi = "{}.gt.vcf.gz.tbi".format(prefix) in_files = [vcf, tbi] build = os.environ["build"] start_time = datetime.now() print("VQSR SNP MODEL for {} was started at {}.".format( prefix, str(start_time))) task = SDK.Task(step="vqsr_snp_model", prefix=prefix, in_files=in_files, param_file=param_file, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri, assets_uri=assets_uri) task.get_reference_files(build) task.download_files("INPUT") task.download_files("REF") task.download_files("PARAMS") task.build_cmd() task.run_cmd() task.upload_results() task.cleanup() end_time = datetime.now() print("VQSR SNP MODEL for {} ended at {}.".format(prefix, str(end_time))) total_time = end_time - start_time print("Total time for VQSR SNP MODEL was {}.".format(str(total_time)))
param_uri = os.environ["param_uri"] param_file = param_uri.split("/")[-1] ref_uri = os.environ["ref_uri"] in_uri = os.environ["in_uri"] out_uri = os.environ["out_uri"] ref_files = [] in_files = ["{}.sorted.deduped.bam".format(prefix)] start_time = datetime.now() print "INDEX BAM for {} was started at {}.".format(prefix, str(start_time)) task = SDK.Task( step="index_bam", prefix=prefix, in_files=in_files, ref_files=ref_files, param_file=param_file, param_uri=param_uri, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri) if not set(in_files).issubset(set(os.listdir("."))): task.download_files("INPUT") if not set(ref_files).issubset(set(os.listdir("."))): task.download_files("REF") if param_file not in os.listdir("."): task.download_files("PARAMS") task.build_cmd() task.run_cmd() task.upload_results() task.cleanup()
raise ValueError( "Unrecognized build - environment variable mode must be GRCh38 or GRCh37" ) vcf = "{}.gt.vcf.gz".format(prefix) tbi = "{}.gt.vcf.gz.tbi".format(prefix) in_files = [vcf, tbi] start_time = datetime.now() print "VQSR SNP MODEL for {} was started at {}.".format( prefix, str(start_time)) task = SDK.Task(step="vqsr_snp_model", prefix=prefix, in_files=in_files, ref_files=ref_files, param_file=param_file, param_uri=param_uri, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri) if not set(in_files).issubset(set(os.listdir("."))): task.download_files("INPUT") if not set(ref_files).issubset(set(os.listdir("."))): task.download_files("REF") if param_file not in os.listdir("."): task.download_files("PARAMS") task.build_cmd() task.run_cmd() task.upload_results() task.cleanup()
) vcf = "{}.gt.snp.recal.vcf".format(prefix) idx = "{}.gt.snp.recal.vcf.idx".format(prefix) recal_file = "{}.gt.snp.indel.recal.model".format(prefix) tranches_file = "{}.gt.snp.indel.tranches".format(prefix) in_files = [vcf, idx, recal_file, tranches_file] start_time = datetime.now() print "VQSR INDEL APPLY for {} was started at {}.".format( prefix, str(start_time)) task = SDK.Task(step="vqsr_indel_apply", prefix=prefix, in_files=in_files, ref_files=ref_files, param_file=param_file, param_uri=param_uri, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri) if not set(in_files).issubset(set(os.listdir("."))): task.download_files("INPUT") if not set(ref_files).issubset(set(os.listdir("."))): task.download_files("REF") if param_file not in os.listdir("."): task.download_files("PARAMS") task.build_cmd() task.run_cmd() task.upload_results() task.cleanup()
param_uri = os.environ["param_uri"] param_file = param_uri.split("/")[-1] ref_uri = os.environ["ref_uri"] in_uri = os.environ["in_uri"] out_uri = os.environ["out_uri"] ref_files = [] in_files = ["{}.sorted.bam".format(prefix)] start_time = datetime.now() print "MARK DUPS for {} was started at {}.".format(prefix, str(start_time)) task = SDK.Task(step="mark_dups", prefix=prefix, in_files=in_files, ref_files=ref_files, param_file=param_file, param_uri=param_uri, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri) if not set(in_files).issubset(set(os.listdir("."))): task.download_files("INPUT") if not set(ref_files).issubset(set(os.listdir("."))): task.download_files("REF") if param_file not in os.listdir("."): task.download_files("PARAMS") task.build_cmd() task.run_cmd() task.upload_results() task.cleanup()
raise ValueError( "Unrecognized build - environment variable mode must be GRCh38 or GRCh37" ) R1 = "{}_R1{}".format(prefix, suffix) R2 = "{}_R2{}".format(prefix, suffix) in_files = [R1, R2] threads = os.environ['threads'] start_time = datetime.now() print "BWA MEM for {} was started at {}.".format(prefix, str(start_time)) task = SDK.Task(step="bwa_mem", prefix=prefix, threads=threads, in_files=in_files, ref_files=ref_files, param_file=param_file, param_uri=param_uri, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri) dir_contents = os.listdir(".") print "Current dir contents: {}".format(str(dir_contents)) if not set(in_files).issubset(set(dir_contents)): task.download_files("INPUT") if not set(ref_files).issubset(set(dir_contents)): task.download_files("REF") if param_file not in dir_contents: task.download_files("PARAMS") task.build_cmd() task.run_cmd()
"Unrecognized build - environment variable build must be GRCh38 or GRCh37" ) bam = "{}.sorted.deduped.bam".format(prefix) bai = "{}.sorted.deduped.bam.bai".format(prefix) bqsr = "{}.base_recal_table.txt".format(prefix) in_files = [bam, bai, bqsr] start_time = datetime.now() print "BASE RECAL for {} was started at {}.".format(prefix, str(start_time)) task = SDK.Task(step="base_recal", prefix=prefix, in_files=in_files, ref_files=ref_files, param_file=param_file, param_uri=param_uri, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri) if not set(in_files).issubset(set(os.listdir("."))): task.download_files("INPUT") if not set(ref_files).issubset(set(os.listdir("."))): task.download_files("REF") if param_file not in os.listdir("."): task.download_files("PARAMS") task.build_cmd() task.run_cmd() task.upload_results() task.cleanup()
raise ValueError("Unrecognized build - environment variable build must be GRCh38 or GRCh37") if ome == "wes": target_file = os.environ["target_file"] ref_files.append(target_file) else: target_file = None start_time = datetime.now() print "Sentieon's GENOTYPER for {} was started at {}.".format(", ".join(gvcfs), str(start_time)) task = SDK.Task( step="genotyper", prefix=prefix, in_files=in_files, ref_files=ref_files, param_file=param_file, sentieon_pkg=sentieon_pkg, license_file=license_file, license_uri=license_uri, param_uri=param_uri, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri, target_file=target_file) if not set(in_files).issubset(set(os.listdir("."))): task.download_files("INPUT") if not set(ref_files).issubset(set(os.listdir("."))): task.download_files("REF") if license_file not in os.listdir("."): task.download_files("SENTIEON") if param_file not in os.listdir("."): task.download_files("PARAMS") if ome == "wes" and target_file not in os.listdir("."):
def main(): start_point = os.environ['start_point'] prefix = os.environ['prefix'] param_file = os.environ['param_file'] ref_uri = os.environ['ref_uri'] in_uri = os.environ['in_uri'] out_uri = os.environ['out_uri'] assets_uri = os.environ['assets_uri'] sentieon_pkg = os.environ['sentieon_pkg'] license_file = os.environ['sentieon_license'] build = os.environ['build'] ome = os.environ['ome'] if ome == 'wes': target_file = os.environ['target_file'] else: target_file = None if start_point == 'fastq': bam = '{}.sorted.deduped.recalibrated.bam'.format(prefix) bai = '{}.sorted.deduped.recalibrated.bai'.format(prefix) else: bam = os.environ['in_file'] bai = '{}.bai'.format('.'.join(bam.split('.')[:-1])) #bai = '{}.bai'.format(bam) #bai = '{}.crai'.format(bam) in_files = [bam, bai] threads = os.environ['threads'] start_time = datetime.now() print('Sentieons HAPLOTYPER for {} was started at {}.'.format( prefix, str(start_time))) task = SDK.Task(step='haplotyper', prefix=prefix, threads=threads, in_files=in_files, param_file=param_file, sentieon_pkg=sentieon_pkg, license_file=license_file, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri, assets_uri=assets_uri, target_file=target_file) if ome == 'wes' and target_file: task.download_files('TARGET') task.get_reference_files(build) task.download_files('INPUT') task.download_files('REF') task.download_files('SENTIEON') task.download_files('PARAMS') task.build_cmd() task.run_cmd() task.upload_results() task.cleanup() end_time = datetime.now() print('Sentieons HAPLOTYPER for {} ended at {}.'.format( prefix, str(end_time))) total_time = end_time - start_time print('Total time for Sentieons HAPLOTYPER was {}.'.format( str(total_time)))
else: target_file = None bam = "{}.sorted.deduped.bam".format(prefix) bai = "{}.sorted.deduped.bam.bai".format(prefix) in_files = [bam, bai] threads = os.environ['threads'] start_time = datetime.now() print "BASE RECAL TABLE for {} was started at {}.".format(prefix, str(start_time)) task = SDK.Task( step="base_recal_table", prefix=prefix, threads=threads, in_files=in_files, ref_files=ref_files, param_file=param_file, param_uri=param_uri, ref_uri=ref_uri, in_uri=in_uri, out_uri=out_uri, target_file=target_file) if not set(in_files).issubset(set(os.listdir("."))): task.download_files("INPUT") if not set(ref_files).issubset(set(os.listdir("."))): task.download_files("REF") if param_file not in os.listdir("."): task.download_files("PARAMS") if ome == "wes" and target_file not in os.listdir("."): task.download_files("TARGET") task.build_cmd() task.run_cmd()