def picard_sortvcf(case_id, vcf_path, output_vcf, reference_fasta_dict, engine, logger): files = vcf_path step_dir = os.getcwd() os.makedirs(step_dir, exist_ok=True) srt_vcf_path = os.path.join(step_dir, output_vcf) logger.info("picard_sortvcf_output=%s" % srt_vcf_path) mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") mem_gib = mem_bytes / (1024.0 ** 3) thread = int(multiprocessing.cpu_count()) - 2 java_heap = int(int(mem_gib) - thread / 2) if pipe_util.already_step(step_dir, case_id + "_sortvcf", logger): logger.info("already completed step `sortvcf` of: %s" % vcf_path) else: logger.info("running step `picard SortVcf` of: %s" % vcf_path) home_dir = os.path.join("/home", getpass.getuser()) cmd = [ "java", "-Djava.io.tmpdir=/tmp/job_tmp", "-XX:ParallelGCThreads=" + str(thread), "-Xmx" + str(java_heap) + "G", "-d64", "-jar", os.path.join(home_dir, "tools/picard-tools/picard.jar"), "SortVcf", "OUTPUT=" + srt_vcf_path, "SEQUENCE_DICTIONARY=" + reference_fasta_dict, ] for i in vcf_path: cmd.extend(["I=" + i]) output = pipe_util.do_command(cmd, logger) metrics = time_util.parse_time(output) met = SRT( case_id=case_id, tool="picard_sortvcf", files=files, systime=metrics["system_time"], usertime=metrics["user_time"], elapsed=metrics["wall_clock"], cpu=metrics["percent_of_cpu"], max_resident_time=metrics["maximum_resident_set_size"], ) postgres.create_table(engine, met) postgres.add_metrics(engine, met) pipe_util.create_already_step(step_dir, case_id + "_SortVcf", logger) logger.info("completed running step sortvcf of: %s" % vcf_path) return srt_vcf_path