def Run(args): basedir = os.path.dirname(__file__) callVarBamBin = basedir + "/../clair.py callVarBam" pypyBin = executable_command_string_from(args.pypy, exit_on_not_found=True) samtoolsBin = executable_command_string_from(args.samtools, exit_on_not_found=True) chkpnt_fn = file_path_from(args.chkpnt_fn, suffix=".meta", exit_on_not_found=True) bam_fn = file_path_from(args.bam_fn, exit_on_not_found=True) ref_fn = file_path_from(args.ref_fn, exit_on_not_found=True) fai_fn = file_path_from(args.ref_fn + ".fai", exit_on_not_found=True) bed_fn = file_path_from(args.bed_fn) vcf_fn = file_path_from(args.vcf_fn) output_prefix = args.output_prefix af_threshold = args.threshold tree = bed_tree_from(bed_file_path=bed_fn) minCoverage = args.minCoverage sampleName = args.sampleName delay = args.delay threads = args.tensorflowThreads qual = args.qual is_include_all_contigs = args.includingAllContigs region_chunk_size = args.refChunkSize stop_consider_left_edge = command_option_from(args.stop_consider_left_edge, 'stop_consider_left_edge') log_path = command_option_from(args.log_path, 'log_path', option_value=args.log_path) pysam_for_all_indel_bases = command_option_from(args.pysam_for_all_indel_bases, 'pysam_for_all_indel_bases') haploid_mode = command_option_from(args.haploid, 'haploid') output_for_ensemble = command_option_from(args.output_for_ensemble, 'output_for_ensemble') debug = command_option_from(args.debug, 'debug') qual = command_option_from(args.qual, 'qual', option_value=args.qual) fast_plotting = command_option_from(args.fast_plotting, 'fast_plotting') call_var_bam_command_options = [ ExecuteCommand('python3', callVarBamBin), CommandOption('chkpnt_fn', chkpnt_fn), CommandOption('ref_fn', ref_fn), CommandOption('bam_fn', bam_fn), CommandOption('threshold', af_threshold), CommandOption('minCoverage', minCoverage), CommandOption('pypy', pypyBin), CommandOption('samtools', samtoolsBin), CommandOption('delay', delay), CommandOption('threads', threads), CommandOption('sampleName', sampleName), # optional command options CommandOption('vcf_fn', vcf_fn) if vcf_fn is not None else None, qual, stop_consider_left_edge, debug, pysam_for_all_indel_bases, haploid_mode, output_for_ensemble, ] activation_only_command_options = [ CommandOptionWithNoValue('activation_only'), log_path, CommandOption('max_plot', args.max_plot), CommandOption('parallel_level', args.parallel_level), CommandOption('workers', args.workers), fast_plotting, ] if args.activation_only else [] is_bed_file_provided = bed_fn is not None command_string = command_string_from(call_var_bam_command_options + activation_only_command_options) with open(fai_fn, 'r') as fai_fp: for row in fai_fp: columns = row.strip().split("\t") contig_name = columns[0] if not is_include_all_contigs and str(contig_name) not in major_contigs: continue region_start, region_end = 0, 0 contig_length = int(columns[1]) while region_end < contig_length: region_start = region_end region_end = region_start + region_chunk_size if region_end > contig_length: region_end = contig_length output_fn = "%s.%s_%d_%d.vcf" % (output_prefix, contig_name, region_start, region_end) is_region_in_bed = is_bed_file_provided and is_region_in(tree, contig_name, region_start, region_end) need_output_command = not is_bed_file_provided or is_region_in_bed if not need_output_command: continue additional_command_options = [ CommandOption('ctgName', contig_name), CommandOption('ctgStart', region_start), CommandOption('ctgEnd', region_end), CommandOption('call_fn', output_fn), CommandOption('bed_fn', bed_fn) if is_region_in_bed else None ] print(command_string + " " + command_string_from(additional_command_options))
def Run(args): basedir = dirname(__file__) CTP_Bin = basedir + "/../clair3.py CreateTensorPileup" CTFA_Bin = basedir + "/../clair3.py CreateTensorFullAlignment" T2B_Bin = basedir + "/../clair3.py Tensor2Bin" if args.delay > 0: delay = random.randrange(0, args.delay) print("[INFO] Delay %d seconds before starting tensor creation ..." % (delay)) sleep(delay) pypyBin = executable_command_string_from(args.pypy, exit_on_not_found=True) pythonBin = executable_command_string_from(args.python, exit_on_not_found=True) samtoolsBin = executable_command_string_from(args.samtools, exit_on_not_found=True) if args.pileup: bam_fn = file_path_from(args.bam_fn, exit_on_not_found=True) else: bam_fn = file_path_from(args.bam_fn) if bam_fn is None or bam_fn == "": print( log_warning( "[WARNING] Skip full-alignment variant calling for empty full-alignment regions" )) return ref_fn = file_path_from(args.ref_fn, exit_on_not_found=True) bed_fn = file_path_from(args.bed_fn) vcf_fn = file_path_from(args.vcf_fn) var_fn = file_path_from(args.var_fn, exit_on_not_found=True) bin_fn = args.bin_fn extend_bed = file_path_from(args.extend_bed) full_aln_regions = file_path_from(args.full_aln_regions) platform = args.platform if not platform or platform not in param.support_platform: sys.exit( "[ERROR] Provided platform are not in support platform list [ont, hifi, ilmn]" ) pileup = args.pileup ctgName = args.ctgName min_af = args.min_af if args.min_af else param.min_af_dict[platform] snp_min_af = args.snp_min_af indel_min_af = args.indel_min_af if ctgName is None: sys.exit( "--ctgName must be specified. You can call variants on multiple chromosomes simultaneously." ) pileup_mode = command_option_from(args.pileup, 'pileup') phasing_info_mode = command_option_from(args.phasing_info_in_bam, 'phasing_info_in_bam') add_no_phasing_mode = command_option_from( args.add_no_phasing_data_training, 'add_no_phasing_data_training') allow_duplicate_mode = command_option_from(args.allow_duplicate_chr_pos, 'allow_duplicate_chr_pos') maximum_non_variant_ratio = CommandOption('maximum_non_variant_ratio', args.maximum_non_variant_ratio) shuffle_mode = command_option_from(args.shuffle, 'shuffle') ctgStart = None ctgEnd = None chunk_id = None chunk_num = None if args.ctgStart is not None and args.ctgEnd is not None and int( args.ctgStart) <= int(args.ctgEnd): ctgStart = CommandOption('ctgStart', args.ctgStart) ctgEnd = CommandOption('ctgEnd', args.ctgEnd) if args.chunk_id is not None and args.chunk_num is not None and int( args.chunk_id) <= int(args.chunk_num): chunk_id = CommandOption('chunk_id', args.chunk_id) chunk_num = CommandOption('chunk_num', args.chunk_num) CT_Bin = CTP_Bin if pileup else CTFA_Bin create_tensor_command_options = [ pypyBin, CT_Bin, CommandOption('bam_fn', bam_fn), CommandOption('ref_fn', ref_fn), CommandOption('vcf_fn', vcf_fn), CommandOption('ctgName', ctgName), CommandOption('platform', platform), CommandOption('samtools', samtoolsBin), CommandOption('bed_fn', bed_fn), CommandOption('extend_bed', extend_bed), CommandOption('min_af', min_af), CommandOption('snp_min_af', snp_min_af), CommandOption('indel_min_af', indel_min_af), ctgStart, ctgEnd, chunk_id, chunk_num, ] if not pileup: create_tensor_command_options.append(phasing_info_mode) create_tensor_command_options.append(add_no_phasing_mode) create_tensor_command_options.append( CommandOption('full_aln_regions', full_aln_regions)) compress_tensor_command_options = [ pythonBin, T2B_Bin, CommandOption('platform', platform), CommandOption('var_fn', var_fn), CommandOption('bin_fn', bin_fn), CommandOption('bed_fn', bed_fn), chunk_id, chunk_num, allow_duplicate_mode, maximum_non_variant_ratio, shuffle_mode, ] if pileup: compress_tensor_command_options.append(pileup_mode) try: c.create_tensor = subprocess_popen( shlex.split(command_string_from(create_tensor_command_options)), ) c.compress_tensor = subprocess_popen(shlex.split( command_string_from(compress_tensor_command_options)), stdin=c.create_tensor.stdout, stdout=sys.stderr) except Exception as e: print(e, file=sys.stderr) sys.exit("Failed to start required processes. Exiting...") signal.signal(signal.SIGALRM, check_return_code) signal.alarm(2) try: c.compress_tensor.wait() signal.alarm(0) c.create_tensor.stdout.close() c.create_tensor.wait() except KeyboardInterrupt as e: print( "KeyboardInterrupt received when waiting at Tensor2Bin, terminating all scripts." ) try: c.compress_tensor.terminate() c.create_tensor.terminate() except Exception as e: print(e) raise KeyboardInterrupt except Exception as e: print( "Exception received when waiting at CreateTensor, terminating all scripts." ) print(e) try: c.compress_tensor.terminate() c.create_tensor.terminate() except Exception as e: print(e) raise e
def Run(args): basedir = dirname(__file__) EVCBin = basedir + "/../clair.py ExtractVariantCandidates" GTBin = basedir + "/../clair.py GetTruth" CTBin = basedir + "/../clair.py CreateTensor" CVBin = basedir + "/../clair.py call_var" pypyBin = executable_command_string_from(args.pypy, exit_on_not_found=True) samtoolsBin = executable_command_string_from(args.samtools, exit_on_not_found=True) chkpnt_fn = file_path_from(args.chkpnt_fn, suffix=".meta", exit_on_not_found=True) bam_fn = file_path_from(args.bam_fn, exit_on_not_found=True) ref_fn = file_path_from(args.ref_fn, exit_on_not_found=True) vcf_fn = file_path_from(args.vcf_fn) bed_fn = file_path_from(args.bed_fn) dcov = args.dcov call_fn = args.call_fn af_threshold = args.threshold minCoverage = int(args.minCoverage) sampleName = args.sampleName ctgName = args.ctgName if ctgName is None: sys.exit( "--ctgName must be specified. You can call variants on multiple chromosomes simultaneously." ) stop_consider_left_edge = command_option_from(args.stop_consider_left_edge, 'stop_consider_left_edge') log_path = command_option_from(args.log_path, 'log_path', option_value=args.log_path) pysam_for_all_indel_bases = command_option_from( args.pysam_for_all_indel_bases, 'pysam_for_all_indel_bases') haploid_precision_mode = command_option_from(args.haploid_precision, 'haploid_precision') haploid_sensitive_mode = command_option_from(args.haploid_sensitive, 'haploid_sensitive') output_for_ensemble = command_option_from(args.output_for_ensemble, 'output_for_ensemble') pipe_line = command_option_from(args.pipe_line, 'pipe_line') store_loaded_mini_match = command_option_from(args.store_loaded_mini_match, 'store_loaded_mini_match') only_prediction = command_option_from(args.only_prediction, 'only_prediction') debug = command_option_from(args.debug, 'debug') qual = command_option_from(args.qual, 'qual', option_value=args.qual) fast_plotting = command_option_from(args.fast_plotting, 'fast_plotting') ctgStart = None ctgEnd = None if args.ctgStart is not None and args.ctgEnd is not None and int( args.ctgStart) <= int(args.ctgEnd): ctgStart = CommandOption('ctgStart', args.ctgStart) ctgEnd = CommandOption('ctgEnd', args.ctgEnd) if args.threads is None: numCpus = multiprocessing.cpu_count() else: numCpus = args.threads if args.threads < multiprocessing.cpu_count( ) else multiprocessing.cpu_count() maxCpus = multiprocessing.cpu_count() _cpuSet = ",".join( str(x) for x in random.sample(range(0, maxCpus), numCpus)) taskSet = "taskset -c %s" % (_cpuSet) try: subprocess.check_output("which %s" % ("taskset"), shell=True) except: taskSet = "" if args.delay > 0: delay = random.randrange(0, args.delay) print("Delay %d seconds before starting variant calling ..." % (delay), file=sys.stderr) sleep(delay) extract_variant_candidate_command_options = [ pypyBin, EVCBin, CommandOption('bam_fn', bam_fn), CommandOption('ref_fn', ref_fn), CommandOption('bed_fn', bed_fn), CommandOption('ctgName', ctgName), ctgStart, ctgEnd, CommandOption('threshold', af_threshold), CommandOption('minCoverage', minCoverage), CommandOption('samtools', samtoolsBin) ] get_truth_command_options = [ pypyBin, GTBin, CommandOption('vcf_fn', vcf_fn), CommandOption('ref_fn', ref_fn), CommandOption('ctgName', ctgName), ctgStart, ctgEnd ] create_tensor_command_options = [ pypyBin, CTBin, CommandOption('bam_fn', bam_fn), CommandOption('ref_fn', ref_fn), CommandOption('ctgName', ctgName), ctgStart, ctgEnd, stop_consider_left_edge, CommandOption('samtools', samtoolsBin), CommandOption('dcov', dcov) ] call_variant_command_options = [ taskSet, ExecuteCommand('python', CVBin), CommandOption('chkpnt_fn', chkpnt_fn), CommandOption('call_fn', call_fn), CommandOption('bam_fn', bam_fn), CommandOption('sampleName', sampleName), CommandOption('time_counter_file_name', args.time_counter_file_name), CommandOption('threads', numCpus), CommandOption('ref_fn', ref_fn), pysam_for_all_indel_bases, haploid_precision_mode, haploid_sensitive_mode, output_for_ensemble, pipe_line, store_loaded_mini_match, only_prediction, qual, debug ] call_variant_with_activation_command_options = [ CommandOptionWithNoValue('activation_only'), log_path, CommandOption('max_plot', args.max_plot), CommandOption('parallel_level', args.parallel_level), CommandOption('workers', args.workers), fast_plotting, ] if args.activation_only else [] is_true_variant_call = vcf_fn is not None try: c.extract_variant_candidate = subprocess_popen( shlex.split( command_string_from( get_truth_command_options if is_true_variant_call else extract_variant_candidate_command_options))) c.create_tensor = subprocess_popen( shlex.split(command_string_from(create_tensor_command_options)), stdin=c.extract_variant_candidate.stdout) c.call_variant = subprocess_popen(shlex.split( command_string_from(call_variant_command_options + call_variant_with_activation_command_options)), stdin=c.create_tensor.stdout, stdout=sys.stderr) except Exception as e: print(e, file=sys.stderr) sys.exit("Failed to start required processes. Exiting...") signal.signal(signal.SIGALRM, check_return_code) signal.alarm(2) try: c.call_variant.wait() c.create_tensor.stdout.close() c.create_tensor.wait() c.extract_variant_candidate.stdout.close() c.extract_variant_candidate.wait() except KeyboardInterrupt as e: print( "KeyboardInterrupt received when waiting at CallVarBam, terminating all scripts." ) try: c.call_variant.terminate() c.create_tensor.terminate() c.extract_variant_candidate.terminate() except Exception as e: print(e) raise KeyboardInterrupt except Exception as e: print( "Exception received when waiting at CallVarBam, terminating all scripts." ) print(e) try: c.call_variant.terminate() c.create_tensor.terminate() c.extract_variant_candidate.terminate() except Exception as e: print(e) raise e
def Run(args): basedir = dirname(__file__) CTP_Bin = basedir + "/../clair3.py CreateTensorPileup" CTFA_Bin = basedir + "/../clair3.py CreateTensorFullAlignment" RR_Bin = basedir + "/../clair3.py RealignReads" CVBin = basedir + "/../clair3.py CallVariants" if args.delay > 0: delay = random.randrange(0, args.delay) print("[INFO] Delay %d seconds before starting variant calling ..." % (delay)) sleep(delay) pypyBin = executable_command_string_from(args.pypy, exit_on_not_found=True) pythonBin = executable_command_string_from(args.python, exit_on_not_found=True) samtoolsBin = executable_command_string_from(args.samtools, exit_on_not_found=True) chkpnt_fn = args.chkpnt_fn if args.pileup: bam_fn = file_path_from(args.bam_fn, exit_on_not_found=True) else: bam_fn = file_path_from(args.bam_fn) if bam_fn is None or bam_fn == "": print(log_warning( "[WARNING] Skip full-alignment variant calling for empty full-alignment regions")) return ref_fn = file_path_from(args.ref_fn, exit_on_not_found=True) bed_fn = file_path_from(args.bed_fn) vcf_fn = file_path_from(args.vcf_fn) extend_bed = file_path_from(args.extend_bed) full_aln_regions = file_path_from(args.full_aln_regions) platform = args.platform if not platform or platform not in param.support_platform: sys.exit("[ERROR] Provided platform are not in support platform list [ont, hifi, ilmn]") pileup = args.pileup call_fn = args.call_fn sampleName = args.sampleName ctgName = args.ctgName need_realignment = args.need_realignment and platform == 'ilmn' and not pileup min_af = args.min_af if args.min_af else param.min_af_dict[platform] snp_min_af = args.snp_min_af indel_min_af = args.indel_min_af if ctgName is None: sys.exit("--ctgName must be specified. You can call variants on multiple chromosomes simultaneously.") haploid_precise_mode = command_option_from(args.haploid_precise, 'haploid_precise') haploid_sensitive_mode = command_option_from(args.haploid_sensitive, 'haploid_sensitive') output_for_ensemble = command_option_from(args.output_for_ensemble, 'output_for_ensemble') showRef_mode = command_option_from(args.showRef, 'showRef') qual = command_option_from(args.qual, 'qual', option_value=args.qual) add_indel_length_mode = CommandOption('add_indel_length', args.add_indel_length) phasing_info_in_bam_mode = command_option_from(args.phasing_info_in_bam, 'phasing_info_in_bam') need_phasing_mode = command_option_from(args.need_phasing, 'need_phasing') is_from_tables_mode = command_option_from(args.is_from_tables, 'is_from_tables') pileup_mode = command_option_from(args.pileup, 'pileup') gvcf_mode = CommandOption('gvcf', args.gvcf) fast_mode = CommandOption('fast_mode', args.fast_mode) call_snp_only_mode = CommandOption('call_snp_only', args.call_snp_only) enable_long_indel_mode = CommandOption('enable_long_indel', args.enable_long_indel) ctgStart = None ctgEnd = None chunk_id = None chunk_num = None if args.ctgStart is not None and args.ctgEnd is not None and int(args.ctgStart) <= int(args.ctgEnd): ctgStart = CommandOption('ctgStart', args.ctgStart) ctgEnd = CommandOption('ctgEnd', args.ctgEnd) if args.chunk_id is not None and args.chunk_num is not None and int(args.chunk_id) <= int(args.chunk_num): chunk_id = CommandOption('chunk_id', args.chunk_id) chunk_num = CommandOption('chunk_num', args.chunk_num) if machine() in {"aarch64", "arm64"} or system() == "Darwin": taskSet = "" else: sched_getaffinity_list = list(os.sched_getaffinity(0)) maxCpus = len(sched_getaffinity_list) if args.tensorflow_threads is None: numCpus = maxCpus else: numCpus = args.tensorflow_threads if args.tensorflow_threads < maxCpus else maxCpus _cpuSet = ",".join(str(x) for x in random.sample(sched_getaffinity_list, numCpus)) taskSet = "taskset -c %s" % (_cpuSet) try: subprocess.check_output("which %s" % ("taskset"), shell=True) except: taskSet = "" if need_realignment: realign_reads_command_options = [ pypyBin, RR_Bin, CommandOption('bam_fn', bam_fn), CommandOption('ref_fn', ref_fn), CommandOption('ctgName', ctgName), ctgStart, ctgEnd, chunk_id, chunk_num, CommandOption('samtools', samtoolsBin), CommandOption('extend_bed', extend_bed), CommandOption('full_aln_regions', full_aln_regions), ] bam_fn = "PIPE" CT_Bin = CTP_Bin if pileup else CTFA_Bin create_tensor_command_options = [ pypyBin, CT_Bin, CommandOption('bam_fn', bam_fn), CommandOption('ref_fn', ref_fn), CommandOption('vcf_fn', vcf_fn), CommandOption('ctgName', ctgName), CommandOption('min_af', min_af), CommandOption('platform', platform), CommandOption('samtools', samtoolsBin), CommandOption('bed_fn', bed_fn), CommandOption('extend_bed', extend_bed), CommandOption('sampleName', args.sampleName), CommandOption('minCoverage', args.minCoverage), CommandOption('minMQ', args.minMQ), ctgStart, ctgEnd, chunk_id, chunk_num, gvcf_mode, ] if not pileup: create_tensor_command_options.append(phasing_info_in_bam_mode) create_tensor_command_options.append(need_phasing_mode) create_tensor_command_options.append(CommandOption('full_aln_regions', full_aln_regions)) else: create_tensor_command_options.append(CommandOption('snp_min_af', snp_min_af)) create_tensor_command_options.append(CommandOption('indel_min_af', indel_min_af)) create_tensor_command_options.append(fast_mode) create_tensor_command_options.append(call_snp_only_mode) if (args.gvcf): create_tensor_command_options.append(CommandOption('base_err', args.base_err)) create_tensor_command_options.append(CommandOption('gq_bin_size', args.gq_bin_size)) create_tensor_command_options.append(CommandOption('temp_file_dir', args.temp_file_dir)) if args.bp_resolution: create_tensor_command_options.append(CommandOptionWithNoValue('bp_resolution')) call_variant_command_options = [ taskSet, pythonBin, CVBin, CommandOption('chkpnt_fn', chkpnt_fn), CommandOption('call_fn', call_fn), CommandOption('sampleName', sampleName), CommandOption('ref_fn', ref_fn), CommandOption('platform', platform), CommandOption('ctgName', ctgName), CommandOption('temp_file_dir', args.temp_file_dir), haploid_precise_mode, haploid_sensitive_mode, output_for_ensemble, qual, add_indel_length_mode, showRef_mode, is_from_tables_mode, pileup_mode, chunk_id, chunk_num, gvcf_mode, enable_long_indel_mode ] try: if need_realignment: c.realign_reads = subprocess_popen( shlex.split(command_string_from(realign_reads_command_options)), ) c.create_tensor = subprocess_popen( shlex.split(command_string_from(create_tensor_command_options)), stdin=c.realign_reads.stdout) else: c.create_tensor = subprocess_popen( shlex.split(command_string_from(create_tensor_command_options)), ) c.call_variant = subprocess_popen( shlex.split(command_string_from(call_variant_command_options)), stdin=c.create_tensor.stdout, stdout=sys.stderr ) except Exception as e: print(e, file=sys.stderr) sys.exit("Failed to start required processes. Exiting...") signal.signal(signal.SIGALRM, check_return_code) signal.alarm(2) try: c.call_variant.wait() signal.alarm(0) c.create_tensor.stdout.close() c.create_tensor.wait() if need_realignment: c.realign_reads.stdout.close() c.realign_reads.wait() except KeyboardInterrupt as e: print("KeyboardInterrupt received when waiting at CallVarBam, terminating all scripts.") try: c.call_variant.terminate() c.create_tensor.terminate() if need_realignment: c.realign_reads.terminate() except Exception as e: print(e) raise KeyboardInterrupt except Exception as e: print("Exception received when waiting at CallVarBam, terminating all scripts.") print(e) try: c.call_variant.terminate() c.create_tensor.terminate() if need_realignment: c.realign_reads.terminate() except Exception as e: print(e) raise e