Example #1
0
def picard_sortvcf(case_id, vcf_path, output_vcf, reference_fasta_dict, engine, logger):
    files = vcf_path
    step_dir = os.getcwd()
    os.makedirs(step_dir, exist_ok=True)
    srt_vcf_path = os.path.join(step_dir, output_vcf)
    logger.info("picard_sortvcf_output=%s" % srt_vcf_path)
    mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
    mem_gib = mem_bytes / (1024.0 ** 3)
    thread = int(multiprocessing.cpu_count()) - 2
    java_heap = int(int(mem_gib) - thread / 2)
    if pipe_util.already_step(step_dir, case_id + "_sortvcf", logger):
        logger.info("already completed step `sortvcf` of: %s" % vcf_path)
    else:
        logger.info("running step `picard SortVcf` of: %s" % vcf_path)
        home_dir = os.path.join("/home", getpass.getuser())
        cmd = [
            "java",
            "-Djava.io.tmpdir=/tmp/job_tmp",
            "-XX:ParallelGCThreads=" + str(thread),
            "-Xmx" + str(java_heap) + "G",
            "-d64",
            "-jar",
            os.path.join(home_dir, "tools/picard-tools/picard.jar"),
            "SortVcf",
            "OUTPUT=" + srt_vcf_path,
            "SEQUENCE_DICTIONARY=" + reference_fasta_dict,
        ]
        for i in vcf_path:
            cmd.extend(["I=" + i])
        output = pipe_util.do_command(cmd, logger)
        metrics = time_util.parse_time(output)
        met = SRT(
            case_id=case_id,
            tool="picard_sortvcf",
            files=files,
            systime=metrics["system_time"],
            usertime=metrics["user_time"],
            elapsed=metrics["wall_clock"],
            cpu=metrics["percent_of_cpu"],
            max_resident_time=metrics["maximum_resident_set_size"],
        )
        postgres.create_table(engine, met)
        postgres.add_metrics(engine, met)
        pipe_util.create_already_step(step_dir, case_id + "_SortVcf", logger)
        logger.info("completed running step sortvcf of: %s" % vcf_path)
    return srt_vcf_path