def set_sample_2type(li, mode, config): # tumor, control-panel import genomon_post_analysis.subcode.tools as tools tmr_list = [] tmr_none = [] [section_in, section_out] = tools.get_section(mode) unpanel = tools.config_getboolean(config, section_out, "include_unpanel") for item in li: if item[1]== None: # control-panel if unpanel == True: tmr_none.append(item[0]) else: tmr_list.append(item[0]) sample_dict = {"all":[], "case1":[], "case2":[], "case3":[], "case4":[]} if tools.config_getboolean(config, section_out, "all_in_one") == True: al = [] al.extend(tmr_list) al.extend(tmr_none) sample_dict["all"] = al if tools.config_getboolean(config, section_out, "separate") == True: sample_dict["case1"] = tmr_list sample_dict["case2"] = tmr_none return sample_dict
def call_merge_result(mode, ids_dict, output_dir, genomon_root, config): print "=== [%s] merge result file. ===" % mode #import os import genomon_post_analysis.subcode.merge as subcode_merge import merge [section_in, section_out] = tools.get_section(mode) suffix_u = tools.config_getstr(config, section_in, "suffix") suffix_f = tools.config_getstr(config, section_in, "suffix_filt") merge_unfilt = tools.config_getboolean(config, section_out, "include_unfilt") for key in ids_dict: if len(ids_dict[key]) == 0: continue # unfilterd output_name = tools.config_getstr(config, section_out, "output_" + key) #if output_name != "" and os.path.exists(output_dir + "/" + output_name) == False: if output_name != "": if merge_unfilt == True: files = [] for iid in ids_dict[key]: files.append(capture.sample_to_result_file(iid, mode, genomon_root, suffix_u)) if mode == "mutation": merge.merge_mutaion_for_paplot(files, ids_dict[key], output_dir + "/" + output_name, config) elif mode == "starqc": merge.merge_star_qc_for_paplot(files, ids_dict[key], output_dir + "/" + output_name, config) else: subcode_merge.merge_result(files, ids_dict[key], output_dir + "/" + output_name, mode, config) # filterd output_name = tools.config_getstr(config, section_out, "output_filt_" + key) #if output_name != "" and os.path.exists(output_dir + "/" + output_name) == False: if output_name != "": files = [] for iid in ids_dict[key]: files.append(capture.sample_to_result_file(iid, mode, genomon_root, suffix_f)) if mode == "mutation": merge.merge_mutaion_for_paplot(files, ids_dict[key], output_dir + "/" + output_name, config) elif mode == "starqc": merge.merge_star_qc_for_paplot(files, ids_dict[key], output_dir + "/" + output_name, config) else: subcode_merge.merge_result(files, ids_dict[key], output_dir + "/" + output_name, mode, config)
def call_image_capture(mode, ids_dict, output_dir, genomon_root, sample_conf, config): print "=== [%s] create script file, for IGV image capture. ===" % mode import os def image_capture(mode, sample_list, output_dir, genomon_root, sample_conf, config): if (os.path.exists(output_dir + "/capture") == False): os.mkdir(output_dir + "/capture") if (os.path.exists(output_dir + "/capture_script") == False): os.mkdir(output_dir + "/capture_script") files_capt = [] for ID in sample_list: f_capt = "%s/capture_script/%s.bat" % (output_dir, ID) path_options = { "output_file": f_capt, "output_igv_dir": output_dir + "/capture", "bam_dir": "", "genomon_root": genomon_root } if capture.write_capture_bat(path_options, ID, sample_conf, mode, config) == True: files_capt.append(f_capt) else: if os.path.exists(f_capt) == True: os.remove(f_capt) capture.merge_capture_bat(files_capt, output_dir + "/capture_script/capture.bat", True) # output dirs output_dir = os.path.abspath(output_dir) + "/" + mode if (os.path.exists(output_dir) == False): os.mkdir(output_dir) [section_in, section_out] = tools.get_section(mode) for key in ids_dict: if len(ids_dict[key]) == 0: continue dirname = tools.config_getstr(config, section_out, "output_dirname_" + key) if (os.path.exists(output_dir + "/" + dirname) == False): os.mkdir(output_dir + "/" + dirname) image_capture(mode, ids_dict[key], output_dir + "/" + dirname, genomon_root, sample_conf, config)
def arg_to_file(mode, genomon_root, args, config): def text_to_list(inputs, flg): if flg == False: return [] if len(inputs) == 0: return [] f = inputs.lstrip("'").lstrip('"').rstrip("'").rstrip('"').split(";") li = [] for item in f[0].split(","): li.append(item.lstrip(" ").rstrip(" ")) return li [section_in, section_out] = tools.get_section(mode) sample_dict = {"all":[], "case1":[], "case2":[], "case3":[], "case4":[]} if tools.config_getboolean(config, section_out, "all_in_one") == True: al = [] al.extend(text_to_list(args.input_file_case1, True)) al.extend(text_to_list(args.input_file_case2, tools.config_getboolean(config, section_out, "include_unpanel"))) al.extend(text_to_list(args.input_file_case3, tools.config_getboolean(config, section_out, "include_unpair"))) al.extend(text_to_list(args.input_file_case4, tools.config_getboolean(config, section_out, "include_unpair") and tools.config_getboolean(config, section_out, "include_unpanel"))) sample_dict["all"] = al if tools.config_getboolean(config, section_out, "separate") == True: sample_dict["case1"] = text_to_list(args.input_file_case1, True) sample_dict["case2"] = text_to_list(args.input_file_case2, tools.config_getboolean(config, section_out, "include_unpanel")) sample_dict["case3"] = text_to_list(args.input_file_case3, tools.config_getboolean(config, section_out, "include_unpair")) sample_dict["case4"] = text_to_list(args.input_file_case4, tools.config_getboolean(config, section_out, "include_unpair") and tools.config_getboolean(config, section_out, "include_unpanel")) return sample_dict
def write_pickup_script(path_options, ID, sample_conf, mode, config): cmd_header = """#!/bin/bash # #$ -S /bin/bash #$ -cwd #$ -e {log} #$ -o {log} """ cmd_bed1 = """ if [ -e {bed} ]; then rm {bed} fi """ cmd_bed2 = "echo '{chr}\t{start}\t{end}' >> {bed}\n" cmd_bed3 = """ {bedtools} sort -i {bed} > {bed}.sort.bed {bedtools} merge -i {bed}.sort.bed > {bed}.merge.bed rm {bed} {bed}.sort.bed mv {bed}.merge.bed {bed} """ cmd_view = """ {samtools} view -h -L {bed} {bam} > {output_bam}.temp.bam {samtools} sort {output_bam}.temp.bam {output_bam} {samtools} index {output_bam}.bam rm {output_bam}.temp.bam """ cmd_rm_bed = """ rm {bed} """ import genomon_post_analysis.subcode.tools as tools import os # options read [section_in, section_out] = tools.get_section(mode) suffix_f = tools.config_getstr(config, section_in, "suffix_filt") data_file = sample_to_result_file(ID, mode, path_options["genomon_root"], suffix_f) # output file name if os.path.exists(path_options["output_bam_dir"] + "/" + ID) == False: os.mkdir(path_options["output_bam_dir"] + "/" + ID) bam_tumor = sample_to_bam_file(ID, mode, path_options["genomon_root"], tools.config_getstr(config, "bam", "input_bam_suffix")) out_tumor_name = path_options["output_bam_dir"] + "/" + ID + "/" + ID normal = sample_to_pair(sample_conf, mode, ID) bam_normal = "" out_normal_name = "" if normal != None: bam_normal = sample_to_bam_file(normal, mode, path_options["genomon_root"], tools.config_getstr(config, "bam", "input_bam_suffix")) out_normal_name = path_options["output_bam_dir"] + "/" + ID + "/" + normal # read config width = config.getint("bam", "pickup_width") output_bam_suffix = os.path.splitext(config.get("bam", "output_bam_suffix"))[0] samtools = path_options["samtools"] bedtools = path_options["bedtools"] sept = tools.config_getstr(config, section_in, "sept").replace("\\t", "\t").replace("\\n", "\n").replace("\\r", "\r") # create command text bed1_text = cmd_bed1.format(bedtools = bedtools, bed = out_tumor_name + ".bed") #bed2_text = "" bed3_text = cmd_bed3.format(bedtools = bedtools, bed = out_tumor_name + ".bed") cmd_text = cmd_view.format(samtools = samtools, bed = out_tumor_name + ".bed", bam = bam_tumor, output_bam = out_tumor_name + output_bam_suffix) if bam_normal != "": cmd_text += cmd_view.format(samtools = samtools, bed = out_tumor_name + ".bed", bam = bam_normal, output_bam = out_normal_name + output_bam_suffix) cmd_text += cmd_rm_bed.format(bed = out_tumor_name + ".bed") # write script 1 f_sh = open(path_options["output_file"] + ".tmp", "w") f_sh.write(cmd_header.format(log = path_options["output_log_dir"])) f_sh.write(bed1_text) # read header = [] bed2_text = [] lines_count = 0 enable_data = False for line in open(data_file): line = line.rstrip() if len(line.replace(sept, "")) == 0: continue if line.find(tools.config_getstr(config, section_in, "comment")) == 0: continue if len(header) == 0: header = line.split(sept) col_chr1 = header.index(tools.config_getstr(config, section_in, "col_chr1")) col_chr2 = header.index(tools.config_getstr(config, section_in, "col_chr2")) col_start = header.index(tools.config_getstr(config, section_in, "col_start")) col_end = header.index(tools.config_getstr(config, section_in, "col_end")) continue data = line.split(sept) start = int(data[col_start]) - width if start < 0: start = 0 bed2_text.append(cmd_bed2.format(chr = data[col_chr1], start = start, end = int(data[col_start]) + width, bed = out_tumor_name + ".bed" )) lines_count += 1 start = int(data[col_end]) - width if start < 0: start = 0 bed2_text.append(cmd_bed2.format(chr = data[col_chr2], start = start, end = int(data[col_end]) + width, bed = out_tumor_name + ".bed" )) lines_count += 1 enable_data = True if (lines_count > 10000): f_sh.writelines(bed2_text) bed2_text = [] lines_count = 0 if (lines_count > 0): f_sh.writelines(bed2_text) # write script 3 f_sh.write(bed3_text) f_sh.write(cmd_text) f_sh.close() os.rename(path_options["output_file"] + ".tmp", path_options["output_file"]) return enable_data
def write_capture_bat(path_options, ID, sample_conf, mode, config): cmd_header = """ genome hg19 """ cmd_new_tumor = """ new load {tumor_bam} """ cmd_new_normal = """ load {normal_bam} """ cmd_capt = """ goto {chr}:{start}-{end} snapshot {name} """ import genomon_post_analysis.subcode.tools as tools import os [section_in, section_out] = tools.get_section(mode) # result file suffix_f = tools.config_getstr(config, section_in, "suffix_filt") data_file = sample_to_result_file(ID, mode, path_options["genomon_root"], suffix_f) # use bams bam_tumor = sample_to_bam_file(ID, mode, path_options["genomon_root"], tools.config_getstr(config, "bam", "input_bam_suffix")) normal = sample_to_pair(sample_conf, mode, ID) bam_normal = "" if normal != None: bam_normal = sample_to_bam_file(normal, mode, path_options["genomon_root"], tools.config_getstr(config, "bam", "input_bam_suffix")) # output file out_tumor_name = "%s/%s" % (path_options["output_igv_dir"], ID) # options width = config.getint("igv", "capture_width") sept = tools.config_getstr(config, section_in, "sept").replace("\\t", "\t").replace("\\n", "\n").replace("\\r", "\r") # read capt_list = [] # write script 1 f = open(path_options["output_file"] + ".tmp", "w") f.write(cmd_header) f.write(cmd_new_tumor.format(tumor_bam = bam_tumor)) if len(bam_normal) > 0: f.write(cmd_new_normal.format(normal_bam = bam_normal)) # read header = [] capt_text = [] lines_count = 0 enable_data = False for line in open(data_file): if len(capt_list) >= config.getint("igv", "capture_max"): break line = line.rstrip() if len(line.replace(sept, "")) == 0: continue if line.find(tools.config_getstr(config, section_in, "comment")) == 0: continue if len(header) == 0: header = line.split(sept) col_chr1 = header.index(tools.config_getstr(config, section_in, "col_chr1")) col_chr2 = header.index(tools.config_getstr(config, section_in, "col_chr2")) col_start = header.index(tools.config_getstr(config, section_in, "col_start")) col_end = header.index(tools.config_getstr(config, section_in, "col_end")) continue data = line.split(sept) chr1 = data[col_chr1] start = int(data[col_start]) chr2 = data[col_chr2] end = int(data[col_end]) fname = "{0}_{1}_{2}_{3}_{4}".format(out_tumor_name, chr1, start, chr2, end) if (fname in capt_list) == True: continue capt_list.append(fname) if (chr1 == chr2) and ((long(end) - long(start)) < (width / 2)): capt_text.append(cmd_capt.format(chr = chr1, start = start - width, end = start + width, name = fname + ".png")) lines_count += 1 else: start2 = start - width if start2 < 0: start2 = 0 capt_text.append(cmd_capt.format(chr = chr1, start = start2, end = start + width, name = fname + "_1.png")) lines_count += 1 start2 = end-width if start2 < 0: start2 = 0 capt_text.append(cmd_capt.format(chr = chr2, start = start2, end = end + width, name = fname + "_2.png")) lines_count += 1 enable_data = True if (lines_count > 10000): f.writelines(capt_text) capt_text = [] lines_count = 0 if (lines_count > 0): f.writelines(capt_text) f.close() os.rename(path_options["output_file"] + ".tmp", path_options["output_file"]) return enable_data
def call_bam_pickup(mode, ids_dict, output_dir, genomon_root, arg_samtools, arg_bedtools, sample_conf, config): print "=== [%s] create script file, for bam pick up. ===" % mode import os def bam_pickup(mode, sample_list, output_dir, genomon_root, samtools, bedtools, sample_conf, config): output_bam_dir = output_dir + "/bam" output_log_dir = output_dir + "/log" output_script_dir = output_dir + "/bam_script" if (os.path.exists(output_bam_dir) == False): os.mkdir(output_bam_dir) if (os.path.exists(output_log_dir) == False): os.mkdir(output_log_dir) if (os.path.exists(output_script_dir) == False): os.mkdir(output_script_dir) files_pick = [] for ID in sample_list: f_pick = "%s/pickup.%s.sh" % (output_script_dir, ID) path_options = { "output_file": f_pick, "output_bam_dir": output_bam_dir, "output_log_dir": output_log_dir, "genomon_root": genomon_root, "samtools":samtools, "bedtools":bedtools } if capture.write_pickup_script(path_options, ID, sample_conf, mode, config) == True: files_pick.append(f_pick) else: if os.path.exists(f_pick) == True: os.remove(f_pick) capture.merge_pickup_script(files_pick, output_script_dir + "/pickup.sh") # output dirs output_dir = os.path.abspath(output_dir) + "/" + mode if (os.path.exists(output_dir) == False): os.mkdir(output_dir) # tools samtools = arg_samtools if samtools == "": samtools = tools.config_getstr(config, "tools", "samtools") bedtools = arg_bedtools if bedtools == "": bedtools = tools.config_getstr(config, "tools", "bedtools") [section_in, section_out] = tools.get_section(mode) for key in ids_dict: if len(ids_dict[key]) == 0: continue dirname = tools.config_getstr(config, section_out, "output_dirname_" + key) if (os.path.exists(output_dir + "/" + dirname) == False): os.mkdir(output_dir + "/" + dirname) bam_pickup(mode, ids_dict[key], output_dir + "/" + dirname, genomon_root, samtools, bedtools, sample_conf, config)