def visualize_sv_calls(args, dbo_args, endpoint_args): in_svcalls_bedpe_file = args.filter_bedpe_file bcd13_file = dbo_args.bcd13_file bcd21_file = endpoint_args.bcd21_file faidx_file = args.faidx_file cal_2d_overlapping_barcodes_binary = args.cal_2d_overlapping_barcodes cal_read_depth_from_bcd21_binary = args.cal_read_depth_from_bcd21 out_dir = args.image_out_dir out_prefix = args.bam_name my_utils.make_dir(out_dir) chr_len_list = my_utils.get_chr_length(faidx_file) tid2chrname_list, chrname2tid_dict = my_utils.get_chrnames(faidx_file) in_svcall_list = read_svcall_bedpe_file(in_svcalls_bedpe_file, chrname2tid_dict) for i in range(0, len(in_svcall_list)): in_svcall_list[i].format() plot_depth(cal_read_depth_from_bcd21_binary, bcd21_file, in_svcall_list, faidx_file, chr_len_list, tid2chrname_list, chrname2tid_dict, out_dir, out_prefix) plot_twin_window_barcode_similarity(in_svcall_list, bcd13_file, faidx_file, out_dir, chr_len_list, tid2chrname_list, chrname2tid_dict, out_prefix) flank_dist = 100 * 1000 # set flank distance to be 100 kb plot_heatmap (in_svcall_list, bcd21_file, faidx_file, out_dir, flank_dist, chr_len_list, tid2chrname_list, chrname2tid_dict, cal_2d_overlapping_barcodes_binary, out_prefix) return
def plot_heatmap(in_svcall_list, bcd21_file, faidx_file, out_dir, flank_dist, chr_len_list, tid2chrname_list, chrname2tid_dict, cal_2d_overlapping_barcodes_binary, out_prefix): if os.path.exists(cal_2d_overlapping_barcodes_binary) == False: my_utils.myprint('ERROR! The binary file doesn\'t exist: %s' % cal_2d_overlapping_barcodes_binary) my_utils.myprint('Skipped plotting the heat maps') return out_dir = os.path.join(out_dir, '2D_heatmap') my_utils.make_dir(out_dir) my_utils.myprint('plotting heat maps of overlapping barcodes') target_region_bedpe_list = generate_target_region_bedpe_list(in_svcall_list, chr_len_list, flank_dist, chrname2tid_dict) target_region_bedpe_file = os.path.join(out_dir, 'target_region.bedpe') target_region_bedpe_fp = my_utils.gzopen(target_region_bedpe_file, 'w') for bedpe1 in target_region_bedpe_list: target_region_bedpe_fp.write(bedpe1.output_svcall() + endl) target_region_bedpe_fp.close() target_region_2d_ovl_with_low_mapq_file = os.path.join(out_dir, '%s.2d_heatmap.with_low_mapq_reads.txt' % out_prefix) bin_size = 1000 max_ovl_num = 100 cmd_args_list1 = [cal_2d_overlapping_barcodes_binary, bcd21_file, target_region_bedpe_file, target_region_2d_ovl_with_low_mapq_file, faidx_file, str(bin_size), '1'] subprocess.call(cmd_args_list1) plot_2d_barcodes.plot_2d_overlapping_barcodes(target_region_2d_ovl_with_low_mapq_file, target_region_bedpe_list, bin_size, max_ovl_num, out_dir, out_prefix) return
def plot_2d_overlapping_barcodes(in_2d_values_file, target_region_bedpe_list, bin_size, max_ovl_num, out_dir, out_prefix): my_utils.make_dir(out_dir) in_2d_values_fp = my_utils.gzopen(in_2d_values_file, 'r') region_title = '' xmin, xmax, ymin, ymax = (0, 0, 0, 0) xsize = 0 ysize = 0 current_x = 0 current_y = 0 ovl_2d_array = np.zeros(shape=(xsize, ysize), dtype=np.int32) while 1: line = in_2d_values_fp.readline() if not line: break line = line.strip() if line[0:2] == '##': if (xsize and ysize): plot_one_bedpe(out_dir, target_region_bedpe_list, out_prefix, region_title, ovl_2d_array, xmin, xmax, ymin, ymax, bin_size) region_title = line[2:] xmin, xmax, ymin, ymax = (0, 0, 0, 0) xsize = 0 ysize = 0 current_x = 0 current_y = 0 continue if line[0:5] == '#xmin': line = line.split('=') xmin, xmax, ymin, ymax = line[1].split(',') xmin = int(xmin) xmax = int(xmax) ymin = int(ymin) ymax = int(ymax) xsize = int((xmax - xmin) / bin_size) ysize = int((ymax - ymin) / bin_size) ovl_2d_array = np.zeros(shape=(xsize, ysize), dtype=np.int32) current_x = 0 current_y = 0 continue line = line.split(tab) for current_y in range(0, len(line)): ovl_2d_array[current_x][current_y] = int(line[current_y]) if ovl_2d_array[current_x][current_y] > max_ovl_num: ovl_2d_array[current_x][current_y] = max_ovl_num current_x += 1 if (xsize and ysize): plot_one_bedpe(out_dir, target_region_bedpe_list, out_prefix, region_title, ovl_2d_array, xmin, xmax, ymin, ymax, bin_size) in_2d_values_fp.close() return
def plot_twin_window_barcode_similarity(in_svcall_list, bcd13_file, faidx_file, out_dir, chr_len_list, tid2chrname_list, chrname2tid_dict, out_prefix): out_dir = os.path.join(out_dir, 'twin_window_barcode_similarity') my_utils.make_dir(out_dir) wg_pvalue_list, bin_size = get_wg_pvalue_list_from_bcd13_file(bcd13_file, chr_len_list) max_length_in_one_figure = 500 * 1000 for svcall in in_svcall_list: if svcall.svtype == 'DEL' or svcall.svtype == 'DUP': continue flank_dist = abs(svcall.key2 - svcall.key1) if flank_dist > 50 * 1000: flank_dist = 50 * 1000 if flank_dist < 10 * 1000: flank_dist = 10 * 1000 if abs(svcall.key2 - svcall.key1) > max_length_in_one_figure: out_file1 = os.path.join(out_dir, '%s.%s.breakpoint1.twin_window_barcode_similarity.png' % (out_prefix, svcall.sv_id )) reg_start = svcall.start1 - flank_dist reg_end = svcall.end1 + flank_dist if reg_start < 0: reg_start = 0 if reg_end > chr_len_list[svcall.tid1]: reg_end = chr_len_list[svcall.tid1] bk_pos1 = svcall.start1 bk_pos2 = bk_pos1 figure_title = '%s, %s (breakpoint 1)' % (svcall.sv_id, svcall.svtype) plot_twin_window_barcode_similarity_for1region(svcall.chrm1, svcall.tid1, reg_start, reg_end, bk_pos1, bk_pos2, out_file1, figure_title, wg_pvalue_list, bin_size) out_file2 = os.path.join(out_dir, '%s.%s.breakpoint2.twin_window_barcode_similarity.png' % (out_prefix, svcall.sv_id )) reg_start = svcall.start2 - flank_dist reg_end = svcall.end2 + flank_dist bk_pos1 = svcall.start2 bk_pos2 = bk_pos1 if reg_start < 0: reg_start = 0 if reg_end > chr_len_list[svcall.tid2]: reg_end = chr_len_list[svcall.tid2] figure_title = '%s, %s (breakpoint 2)' % (svcall.sv_id, svcall.svtype) plot_twin_window_barcode_similarity_for1region(svcall.chrm2, svcall.tid2, reg_start, reg_end, bk_pos1, bk_pos2, out_file2, figure_title, wg_pvalue_list, bin_size) else: out_file = os.path.join(out_dir, '%s.%s.both_breakpoints.twin_window_barcode_similarity.png' % (out_prefix, svcall.sv_id)) reg_start = svcall.start1 - flank_dist reg_end = svcall.end2 + flank_dist if reg_start < 0: reg_start = 0 if reg_end > chr_len_list[svcall.tid1]: reg_end = chr_len_list[svcall.tid1] bk_pos1 = svcall.start1 bk_pos2 = svcall.start2 figure_title = '%s, %s (breakpoint 1 and 2)' % (svcall.sv_id, svcall.svtype) plot_twin_window_barcode_similarity_for1region(svcall.chrm1, svcall.tid1, reg_start, reg_end, bk_pos1, bk_pos2, out_file, figure_title, wg_pvalue_list, bin_size) return
def plot_depth(cal_read_depth_from_bcd21_binary, bcd21_file, in_svcalls_list, faidx_file, chr_len_list, tid2chrname_list, chrname2tid_dict, out_dir, out_prefix): if os.path.exists(cal_read_depth_from_bcd21_binary) == False: my_utils.myprint('ERROR! The binary file doesn\'t exist:%s\Failed to plot read depth' % cal_read_depth_from_bcd21_binary) return if os.path.exists(bcd21_file) == False: my_utils.myprint('ERROR! The bcd21 file doesn\'t exist:%s\Failed to plot read depth' % bcd21_file) return if os.path.exists(faidx_file) == False: my_utils.myprint('ERROR! The fasta index file doesn\'t exist:%s\Failed to plot read depth' % faidx_file) return out_dir = os.path.join(out_dir, 'read_depth') my_utils.make_dir(out_dir) bin_size = 500 read_depth_file = os.path.join(out_dir, '%s.read_depth.txt' % out_prefix) cmd_args_list = [cal_read_depth_from_bcd21_binary, bcd21_file, read_depth_file, faidx_file, str(bin_size), '20'] my_utils.myprint('calculating read depth from file: %s' % bcd21_file) subprocess.call(cmd_args_list) my_utils.myprint('plotting read depth') wg_high_mapq_depth_list, wg_total_depth_list, bin_size = plot_read_depth.get_wg_depth_list(read_depth_file, chr_len_list) wg_total_depth = 0 wg_n_bin = 0 for tid in range(0, len(wg_high_mapq_depth_list)): for depth in wg_high_mapq_depth_list[tid]: wg_total_depth += depth wg_n_bin += 1 wg_avg_depth = float(wg_total_depth) / wg_n_bin for svcall in in_svcalls_list: if svcall.chrm1 != svcall.chrm2: continue out_file = os.path.join(out_dir, '%s.%s.read_depth.png' % (out_prefix, svcall.sv_id)) figure_title = 'Read depth (%s, %d bp %s)' % (svcall.sv_id, svcall.end2 - svcall.start1, svcall.svtype) plot_read_depth.plot_read_depth_for1region(svcall.chrm1, svcall.tid1, svcall.start1, svcall.end2, out_file, figure_title, wg_high_mapq_depth_list, wg_total_depth_list, chr_len_list, bin_size, wg_avg_depth) os.remove(read_depth_file) return