def main():
    prefix = os.environ["prefix"]
    param_file = os.environ["param_file"]
    ref_uri = os.environ["ref_uri"]
    in_uri = os.environ["in_uri"]
    out_uri = os.environ["out_uri"]
    assets_uri = os.environ["assets_uri"]
    build = os.environ["build"]
    in_files = ["{}.sam".format(prefix)]

    start_time = datetime.now()
    print("SORT SAM for {} was started at {}.".format(prefix, str(start_time)))

    task = SDK.Task(step="sort_sam",
                    prefix=prefix,
                    in_files=in_files,
                    param_file=param_file,
                    ref_uri=ref_uri,
                    in_uri=in_uri,
                    out_uri=out_uri,
                    assets_uri=assets_uri)
    task.get_reference_files(build)
    task.download_files("INPUT")
    task.download_files("REF")
    task.download_files("PARAMS")
    task.build_cmd()
    task.run_cmd()
    task.upload_results()
    task.cleanup()

    end_time = datetime.now()
    print("SORT SAM for {} ended at {}.".format(prefix, str(end_time)))
    total_time = end_time - start_time
    print("Total time for SORT SAM was {}.".format(str(total_time)))
예제 #2
0
def main():
    prefix = os.environ["prefix"]
    param_file = os.environ["param_file"]
    ref_uri = os.environ["ref_uri"]
    in_uri = os.environ["in_uri"]
    out_uri = os.environ["out_uri"]
    assets_uri = os.environ["assets_uri"]
    sample_file = os.environ["sample_file"]
    sentieon_pkg = os.environ["sentieon_pkg"]
    license_file = os.environ["sentieon_license"]
    ome = os.environ["ome"]
    build = os.environ["build"]
    if ome == "wes":
        target_file = os.environ["target_file"]
    else:
        target_file = None
    start_time = datetime.now()
    print("Sentieon's GENOTYPER for {} was started at {}.".format(prefix, str(start_time)))
    in_files = []
    task = SDK.Task(
        step="genotyper",
        prefix=prefix,
        in_files=in_files,
        param_file=param_file,
        sentieon_pkg=sentieon_pkg,
        license_file=license_file,
        ref_uri=ref_uri,
        in_uri=in_uri,
        out_uri=out_uri,
        assets_uri=assets_uri,
        target_file=target_file,
        sample_file=sample_file)

    if ome == "wes" and target_file:
        task.download_files("TARGET")
    task.get_reference_files(build)
    task.get_genotyping_samples()
    task.download_files("INPUT")
    task.download_files("REF")
    task.download_files("SENTIEON")
    task.download_files("PARAMS")
    # Note: genotyper is the only step where in_files is set as an env variable
    # Build in_files from sample_file
    task.build_cmd()
    task.run_cmd()
    task.upload_results()
    task.cleanup()

    end_time = datetime.now()
    print("Sentieon's GENOTYPER for {} ended at {}.".format(prefix, str(end_time)))
    total_time = end_time - start_time
    print("Total time for Sentieon's GENOTYPER was {}.".format(str(total_time)))
def main():
    prefix = os.environ['prefix']
    param_file = os.environ['param_file']
    ref_uri = os.environ['ref_uri']
    in_uri = os.environ['in_uri']
    out_uri = os.environ['out_uri']
    assets_uri = os.environ['assets_uri']
    build = os.environ['build']
    fam_id = os.environ['fam_id']
    vcf = '{}.vcf'.format(fam_id)
    idx = '{}.idx'.format(vcf)

    in_files = [vcf, idx]

    print(in_files)

    start_time = datetime.now()
    print('SCRUB VCF for {} was started at {}.'.format(prefix, str(start_time)))

    task = SDK.Task(
        step='scrub_vcf',
        prefix=prefix,
        in_files=in_files,
        param_file=param_file,
        ref_uri=ref_uri,
        in_uri=in_uri,
        out_uri=out_uri,
        assets_uri=assets_uri)
    dir_contents = os.listdir('.')

    print('Current dir contents: {}'.format(str(dir_contents)))
    task.get_reference_files(build)
    task.download_files('INPUT')
    task.download_files('REF')
    task.download_files('PARAMS')
    task.build_cmd()
    task.run_cmd()
    task.upload_results()
    task.cleanup()

    end_time = datetime.now()
    print('SCRUB VCF for {} ended at {}.'.format(prefix, str(end_time)))
    total_time = end_time - start_time
    print('Total time for SCRUB VCF was {}.'.format(str(total_time)))
def main():
    prefix = os.environ["prefix"]
    param_file = os.environ["param_file"]
    ref_uri = os.environ["ref_uri"]
    in_uri = os.environ["in_uri"]
    out_uri = os.environ["out_uri"]
    assets_uri = os.environ["assets_uri"]
    build = os.environ["build"]
    R1 = os.environ["R1"]
    R2 = os.environ["R2"]
    in_files = [R1, R2]
    threads = os.environ['threads']

    print(in_files)

    start_time = datetime.now()
    print("BWA MEM for {} was started at {}.".format(prefix, str(start_time)))

    task = SDK.Task(step="bwa_mem",
                    prefix=prefix,
                    threads=threads,
                    in_files=in_files,
                    param_file=param_file,
                    ref_uri=ref_uri,
                    in_uri=in_uri,
                    out_uri=out_uri,
                    assets_uri=assets_uri)
    dir_contents = os.listdir(".")

    print("Current dir contents: {}".format(str(dir_contents)))
    task.get_reference_files(build)
    task.download_files("INPUT")
    task.download_files("REF")
    task.download_files("PARAMS")
    task.build_cmd()
    task.run_cmd()
    task.upload_results()
    task.cleanup()

    end_time = datetime.now()
    print("BWA MEM for {} ended at {}.".format(prefix, str(end_time)))
    total_time = end_time - start_time
    print("Total time for BWA MEM was {}.".format(str(total_time)))
예제 #5
0
def main():
    prefix = os.environ["prefix"]
    param_file = os.environ["param_file"]
    ref_uri = os.environ["ref_uri"]
    in_uri = os.environ["in_uri"]
    out_uri = os.environ["out_uri"]
    assets_uri = os.environ["assets_uri"]
    vcf = "{}.gt.snp.recal.vcf".format(prefix)
    idx = "{}.gt.snp.recal.vcf.idx".format(prefix)
    recal_file = "{}.gt.snp.indel.recal.model".format(prefix)
    tranches_file = "{}.gt.snp.indel.tranches".format(prefix)
    in_files = [vcf, idx, recal_file, tranches_file]
    build = os.environ["build"]

    start_time = datetime.now()
    print("VQSR INDEL APPLY for {} was started at {}.".format(
        prefix, str(start_time)))

    task = SDK.Task(step="vqsr_indel_apply",
                    prefix=prefix,
                    in_files=in_files,
                    param_file=param_file,
                    ref_uri=ref_uri,
                    in_uri=in_uri,
                    out_uri=out_uri,
                    assets_uri=assets_uri)
    task.get_reference_files(build)
    task.download_files("INPUT")
    task.download_files("REF")
    task.download_files("PARAMS")
    task.build_cmd()
    task.run_cmd()
    task.upload_results()
    task.cleanup()

    end_time = datetime.now()
    print("VQSR INDEL APPLY for {} ended at {}.".format(prefix, str(end_time)))
    total_time = end_time - start_time
    print("Total time for VQSR INDEL APPLY was {}.".format(str(total_time)))
def main():
    prefix = os.environ["prefix"]
    param_file = os.environ["param_file"]
    ref_uri = os.environ["ref_uri"]
    in_uri = os.environ["in_uri"]
    out_uri = os.environ["out_uri"]
    assets_uri = os.environ["assets_uri"]
    build = os.environ["build"]
    bam = "{}.sorted.deduped.bam".format(prefix)
    bai = "{}.sorted.deduped.bam.bai".format(prefix)
    bqsr = "{}.base_recal_table.txt".format(prefix)
    in_files = [bam, bai, bqsr]

    start_time = datetime.now()
    print("BASE RECAL for {} was started at {}.".format(
        prefix, str(start_time)))

    task = SDK.Task(step="base_recal",
                    prefix=prefix,
                    in_files=in_files,
                    param_file=param_file,
                    ref_uri=ref_uri,
                    in_uri=in_uri,
                    out_uri=out_uri,
                    assets_uri=assets_uri)
    task.get_reference_files(build)
    task.download_files("INPUT")
    task.download_files("REF")
    task.download_files("PARAMS")
    task.build_cmd()
    task.run_cmd()
    task.upload_results()
    task.cleanup()

    end_time = datetime.now()
    print("BASE RECAL for {} ended at {}.".format(prefix, str(end_time)))
    total_time = end_time - start_time
    print("Total time for BASE RECAL was {}.".format(str(total_time)))
def main():
    prefix = os.environ["prefix"]
    param_file = os.environ["param_file"]
    ref_uri = os.environ["ref_uri"]
    in_uri = os.environ["in_uri"]
    out_uri = os.environ["out_uri"]
    assets_uri = os.environ["assets_uri"]
    vcf = "{}.gt.vcf.gz".format(prefix)
    tbi = "{}.gt.vcf.gz.tbi".format(prefix)
    in_files = [vcf, tbi]
    build = os.environ["build"]

    start_time = datetime.now()
    print("VQSR SNP MODEL for {} was started at {}.".format(
        prefix, str(start_time)))

    task = SDK.Task(step="vqsr_snp_model",
                    prefix=prefix,
                    in_files=in_files,
                    param_file=param_file,
                    ref_uri=ref_uri,
                    in_uri=in_uri,
                    out_uri=out_uri,
                    assets_uri=assets_uri)
    task.get_reference_files(build)
    task.download_files("INPUT")
    task.download_files("REF")
    task.download_files("PARAMS")
    task.build_cmd()
    task.run_cmd()
    task.upload_results()
    task.cleanup()

    end_time = datetime.now()
    print("VQSR SNP MODEL for {} ended at {}.".format(prefix, str(end_time)))
    total_time = end_time - start_time
    print("Total time for VQSR SNP MODEL was {}.".format(str(total_time)))
예제 #8
0
param_uri = os.environ["param_uri"]
param_file = param_uri.split("/")[-1]
ref_uri = os.environ["ref_uri"]
in_uri = os.environ["in_uri"]
out_uri = os.environ["out_uri"]
ref_files = []
in_files = ["{}.sorted.deduped.bam".format(prefix)]

start_time = datetime.now()
print "INDEX BAM for {} was started at {}.".format(prefix, str(start_time))

task = SDK.Task(
	step="index_bam",  
	prefix=prefix, 
	in_files=in_files, 
	ref_files=ref_files,
	param_file=param_file,
	param_uri=param_uri,
	ref_uri=ref_uri,
	in_uri=in_uri,
	out_uri=out_uri)
if not set(in_files).issubset(set(os.listdir("."))):
	task.download_files("INPUT")
if not set(ref_files).issubset(set(os.listdir("."))):
	task.download_files("REF")
if param_file not in os.listdir("."):
	task.download_files("PARAMS")
task.build_cmd()
task.run_cmd()
task.upload_results()
task.cleanup()
예제 #9
0
    raise ValueError(
        "Unrecognized build - environment variable mode must be GRCh38 or GRCh37"
    )
vcf = "{}.gt.vcf.gz".format(prefix)
tbi = "{}.gt.vcf.gz.tbi".format(prefix)
in_files = [vcf, tbi]

start_time = datetime.now()
print "VQSR SNP MODEL for {} was started at {}.".format(
    prefix, str(start_time))

task = SDK.Task(step="vqsr_snp_model",
                prefix=prefix,
                in_files=in_files,
                ref_files=ref_files,
                param_file=param_file,
                param_uri=param_uri,
                ref_uri=ref_uri,
                in_uri=in_uri,
                out_uri=out_uri)
if not set(in_files).issubset(set(os.listdir("."))):
    task.download_files("INPUT")
if not set(ref_files).issubset(set(os.listdir("."))):
    task.download_files("REF")
if param_file not in os.listdir("."):
    task.download_files("PARAMS")
task.build_cmd()
task.run_cmd()
task.upload_results()
task.cleanup()
예제 #10
0
    )
vcf = "{}.gt.snp.recal.vcf".format(prefix)
idx = "{}.gt.snp.recal.vcf.idx".format(prefix)
recal_file = "{}.gt.snp.indel.recal.model".format(prefix)
tranches_file = "{}.gt.snp.indel.tranches".format(prefix)
in_files = [vcf, idx, recal_file, tranches_file]

start_time = datetime.now()
print "VQSR INDEL APPLY for {} was started at {}.".format(
    prefix, str(start_time))

task = SDK.Task(step="vqsr_indel_apply",
                prefix=prefix,
                in_files=in_files,
                ref_files=ref_files,
                param_file=param_file,
                param_uri=param_uri,
                ref_uri=ref_uri,
                in_uri=in_uri,
                out_uri=out_uri)
if not set(in_files).issubset(set(os.listdir("."))):
    task.download_files("INPUT")
if not set(ref_files).issubset(set(os.listdir("."))):
    task.download_files("REF")
if param_file not in os.listdir("."):
    task.download_files("PARAMS")
task.build_cmd()
task.run_cmd()
task.upload_results()
task.cleanup()
예제 #11
0
param_uri = os.environ["param_uri"]
param_file = param_uri.split("/")[-1]
ref_uri = os.environ["ref_uri"]
in_uri = os.environ["in_uri"]
out_uri = os.environ["out_uri"]
ref_files = []
in_files = ["{}.sorted.bam".format(prefix)]

start_time = datetime.now()
print "MARK DUPS for {} was started at {}.".format(prefix, str(start_time))

task = SDK.Task(step="mark_dups",
                prefix=prefix,
                in_files=in_files,
                ref_files=ref_files,
                param_file=param_file,
                param_uri=param_uri,
                ref_uri=ref_uri,
                in_uri=in_uri,
                out_uri=out_uri)
if not set(in_files).issubset(set(os.listdir("."))):
    task.download_files("INPUT")
if not set(ref_files).issubset(set(os.listdir("."))):
    task.download_files("REF")
if param_file not in os.listdir("."):
    task.download_files("PARAMS")
task.build_cmd()
task.run_cmd()
task.upload_results()
task.cleanup()
예제 #12
0
    raise ValueError(
        "Unrecognized build - environment variable mode must be GRCh38 or GRCh37"
    )
R1 = "{}_R1{}".format(prefix, suffix)
R2 = "{}_R2{}".format(prefix, suffix)
in_files = [R1, R2]
threads = os.environ['threads']

start_time = datetime.now()
print "BWA MEM for {} was started at {}.".format(prefix, str(start_time))

task = SDK.Task(step="bwa_mem",
                prefix=prefix,
                threads=threads,
                in_files=in_files,
                ref_files=ref_files,
                param_file=param_file,
                param_uri=param_uri,
                ref_uri=ref_uri,
                in_uri=in_uri,
                out_uri=out_uri)
dir_contents = os.listdir(".")

print "Current dir contents: {}".format(str(dir_contents))
if not set(in_files).issubset(set(dir_contents)):
    task.download_files("INPUT")
if not set(ref_files).issubset(set(dir_contents)):
    task.download_files("REF")
if param_file not in dir_contents:
    task.download_files("PARAMS")
task.build_cmd()
task.run_cmd()
예제 #13
0
        "Unrecognized build - environment variable build must be GRCh38 or GRCh37"
    )

bam = "{}.sorted.deduped.bam".format(prefix)
bai = "{}.sorted.deduped.bam.bai".format(prefix)
bqsr = "{}.base_recal_table.txt".format(prefix)
in_files = [bam, bai, bqsr]

start_time = datetime.now()
print "BASE RECAL for {} was started at {}.".format(prefix, str(start_time))

task = SDK.Task(step="base_recal",
                prefix=prefix,
                in_files=in_files,
                ref_files=ref_files,
                param_file=param_file,
                param_uri=param_uri,
                ref_uri=ref_uri,
                in_uri=in_uri,
                out_uri=out_uri)
if not set(in_files).issubset(set(os.listdir("."))):
    task.download_files("INPUT")
if not set(ref_files).issubset(set(os.listdir("."))):
    task.download_files("REF")
if param_file not in os.listdir("."):
    task.download_files("PARAMS")
task.build_cmd()
task.run_cmd()
task.upload_results()
task.cleanup()
예제 #14
0
	raise ValueError("Unrecognized build - environment variable build must be GRCh38 or GRCh37")
if ome == "wes":
	target_file = os.environ["target_file"]
	ref_files.append(target_file)
else:
	target_file = None
start_time = datetime.now()
print "Sentieon's GENOTYPER for {} was started at {}.".format(", ".join(gvcfs), str(start_time))

task = SDK.Task(
	step="genotyper",  
	prefix=prefix, 
	in_files=in_files, 
	ref_files=ref_files,
	param_file=param_file,
	sentieon_pkg=sentieon_pkg,
	license_file=license_file,
	license_uri=license_uri,
	param_uri=param_uri,
	ref_uri=ref_uri,
	in_uri=in_uri,
	out_uri=out_uri,
	target_file=target_file)
if not set(in_files).issubset(set(os.listdir("."))):
	task.download_files("INPUT")
if not set(ref_files).issubset(set(os.listdir("."))):
	task.download_files("REF")
if license_file not in os.listdir("."):
	task.download_files("SENTIEON")
if param_file not in os.listdir("."):
	task.download_files("PARAMS")
if ome == "wes" and target_file not in os.listdir("."):
예제 #15
0
def main():
    start_point = os.environ['start_point']
    prefix = os.environ['prefix']
    param_file = os.environ['param_file']
    ref_uri = os.environ['ref_uri']
    in_uri = os.environ['in_uri']
    out_uri = os.environ['out_uri']
    assets_uri = os.environ['assets_uri']
    sentieon_pkg = os.environ['sentieon_pkg']
    license_file = os.environ['sentieon_license']
    build = os.environ['build']
    ome = os.environ['ome']
    if ome == 'wes':
        target_file = os.environ['target_file']
    else:
        target_file = None
    if start_point == 'fastq':
        bam = '{}.sorted.deduped.recalibrated.bam'.format(prefix)
        bai = '{}.sorted.deduped.recalibrated.bai'.format(prefix)
    else:
        bam = os.environ['in_file']
        bai = '{}.bai'.format('.'.join(bam.split('.')[:-1]))
        #bai = '{}.bai'.format(bam)
        #bai = '{}.crai'.format(bam)

    in_files = [bam, bai]
    threads = os.environ['threads']

    start_time = datetime.now()
    print('Sentieons HAPLOTYPER for {} was started at {}.'.format(
        prefix, str(start_time)))

    task = SDK.Task(step='haplotyper',
                    prefix=prefix,
                    threads=threads,
                    in_files=in_files,
                    param_file=param_file,
                    sentieon_pkg=sentieon_pkg,
                    license_file=license_file,
                    ref_uri=ref_uri,
                    in_uri=in_uri,
                    out_uri=out_uri,
                    assets_uri=assets_uri,
                    target_file=target_file)

    if ome == 'wes' and target_file:
        task.download_files('TARGET')
    task.get_reference_files(build)
    task.download_files('INPUT')
    task.download_files('REF')
    task.download_files('SENTIEON')
    task.download_files('PARAMS')
    task.build_cmd()
    task.run_cmd()
    task.upload_results()
    task.cleanup()

    end_time = datetime.now()
    print('Sentieons HAPLOTYPER for {} ended at {}.'.format(
        prefix, str(end_time)))
    total_time = end_time - start_time
    print('Total time for Sentieons HAPLOTYPER was {}.'.format(
        str(total_time)))
예제 #16
0
else:
	target_file = None
bam = "{}.sorted.deduped.bam".format(prefix)
bai = "{}.sorted.deduped.bam.bai".format(prefix)
in_files = [bam, bai]
threads = os.environ['threads']

start_time = datetime.now()
print "BASE RECAL TABLE for {} was started at {}.".format(prefix, str(start_time))

task = SDK.Task(
	step="base_recal_table",  
	prefix=prefix, 
	threads=threads,
	in_files=in_files, 
	ref_files=ref_files,
	param_file=param_file,
	param_uri=param_uri,
	ref_uri=ref_uri,
	in_uri=in_uri,
	out_uri=out_uri,
	target_file=target_file)
if not set(in_files).issubset(set(os.listdir("."))):
	task.download_files("INPUT")
if not set(ref_files).issubset(set(os.listdir("."))):
	task.download_files("REF")
if param_file not in os.listdir("."):
	task.download_files("PARAMS")
if ome == "wes" and target_file not in os.listdir("."):
	task.download_files("TARGET")
task.build_cmd()
task.run_cmd()