コード例 #1
0
def visualize_sv_calls(args, dbo_args, endpoint_args):

    in_svcalls_bedpe_file = args.filter_bedpe_file
    bcd13_file = dbo_args.bcd13_file
    bcd21_file = endpoint_args.bcd21_file
    faidx_file = args.faidx_file
    cal_2d_overlapping_barcodes_binary = args.cal_2d_overlapping_barcodes
    cal_read_depth_from_bcd21_binary = args.cal_read_depth_from_bcd21
    out_dir = args.image_out_dir
    out_prefix = args.bam_name

    my_utils.make_dir(out_dir)

    chr_len_list = my_utils.get_chr_length(faidx_file)
    tid2chrname_list, chrname2tid_dict = my_utils.get_chrnames(faidx_file)
    in_svcall_list = read_svcall_bedpe_file(in_svcalls_bedpe_file, chrname2tid_dict)
    for i in range(0, len(in_svcall_list)):
        in_svcall_list[i].format() 

    plot_depth(cal_read_depth_from_bcd21_binary, bcd21_file, in_svcall_list, faidx_file, chr_len_list, tid2chrname_list, chrname2tid_dict, out_dir, out_prefix)

    plot_twin_window_barcode_similarity(in_svcall_list, bcd13_file, faidx_file, out_dir, chr_len_list, tid2chrname_list, chrname2tid_dict, out_prefix)

    flank_dist = 100 * 1000 # set flank distance to be 100 kb
    plot_heatmap (in_svcall_list, bcd21_file, faidx_file, out_dir, flank_dist, chr_len_list, tid2chrname_list, chrname2tid_dict, cal_2d_overlapping_barcodes_binary, out_prefix)

    return
コード例 #2
0
def plot_heatmap(in_svcall_list, bcd21_file, faidx_file, out_dir, flank_dist, chr_len_list, tid2chrname_list, chrname2tid_dict, cal_2d_overlapping_barcodes_binary, out_prefix):

    if os.path.exists(cal_2d_overlapping_barcodes_binary) == False:
        my_utils.myprint('ERROR! The binary file doesn\'t exist: %s' % cal_2d_overlapping_barcodes_binary)
        my_utils.myprint('Skipped plotting the heat maps')
        return

    out_dir = os.path.join(out_dir, '2D_heatmap')
    my_utils.make_dir(out_dir)

    my_utils.myprint('plotting heat maps of overlapping barcodes')

    target_region_bedpe_list = generate_target_region_bedpe_list(in_svcall_list, chr_len_list, flank_dist, chrname2tid_dict)
    target_region_bedpe_file = os.path.join(out_dir, 'target_region.bedpe') 
    target_region_bedpe_fp = my_utils.gzopen(target_region_bedpe_file, 'w')  
    for bedpe1 in target_region_bedpe_list: 
        target_region_bedpe_fp.write(bedpe1.output_svcall() + endl)
    target_region_bedpe_fp.close()

    target_region_2d_ovl_with_low_mapq_file = os.path.join(out_dir, '%s.2d_heatmap.with_low_mapq_reads.txt' % out_prefix)

    bin_size = 1000
    max_ovl_num = 100

    cmd_args_list1 = [cal_2d_overlapping_barcodes_binary, bcd21_file, target_region_bedpe_file, target_region_2d_ovl_with_low_mapq_file, faidx_file, str(bin_size), '1']
    subprocess.call(cmd_args_list1)
    plot_2d_barcodes.plot_2d_overlapping_barcodes(target_region_2d_ovl_with_low_mapq_file, target_region_bedpe_list, bin_size, max_ovl_num, out_dir, out_prefix)

    return
コード例 #3
0
def plot_2d_overlapping_barcodes(in_2d_values_file, target_region_bedpe_list,
                                 bin_size, max_ovl_num, out_dir, out_prefix):

    my_utils.make_dir(out_dir)

    in_2d_values_fp = my_utils.gzopen(in_2d_values_file, 'r')

    region_title = ''
    xmin, xmax, ymin, ymax = (0, 0, 0, 0)
    xsize = 0
    ysize = 0
    current_x = 0
    current_y = 0

    ovl_2d_array = np.zeros(shape=(xsize, ysize), dtype=np.int32)

    while 1:
        line = in_2d_values_fp.readline()
        if not line: break
        line = line.strip()
        if line[0:2] == '##':
            if (xsize and ysize):
                plot_one_bedpe(out_dir, target_region_bedpe_list, out_prefix,
                               region_title, ovl_2d_array, xmin, xmax, ymin,
                               ymax, bin_size)
            region_title = line[2:]
            xmin, xmax, ymin, ymax = (0, 0, 0, 0)
            xsize = 0
            ysize = 0
            current_x = 0
            current_y = 0
            continue
        if line[0:5] == '#xmin':
            line = line.split('=')
            xmin, xmax, ymin, ymax = line[1].split(',')
            xmin = int(xmin)
            xmax = int(xmax)
            ymin = int(ymin)
            ymax = int(ymax)
            xsize = int((xmax - xmin) / bin_size)
            ysize = int((ymax - ymin) / bin_size)
            ovl_2d_array = np.zeros(shape=(xsize, ysize), dtype=np.int32)
            current_x = 0
            current_y = 0
            continue
        line = line.split(tab)
        for current_y in range(0, len(line)):
            ovl_2d_array[current_x][current_y] = int(line[current_y])
            if ovl_2d_array[current_x][current_y] > max_ovl_num:
                ovl_2d_array[current_x][current_y] = max_ovl_num
        current_x += 1

    if (xsize and ysize):
        plot_one_bedpe(out_dir, target_region_bedpe_list, out_prefix,
                       region_title, ovl_2d_array, xmin, xmax, ymin, ymax,
                       bin_size)

    in_2d_values_fp.close()
    return
コード例 #4
0
def plot_twin_window_barcode_similarity(in_svcall_list, bcd13_file, faidx_file, out_dir, chr_len_list, tid2chrname_list, chrname2tid_dict, out_prefix):

    out_dir = os.path.join(out_dir, 'twin_window_barcode_similarity')
    my_utils.make_dir(out_dir)

    wg_pvalue_list, bin_size = get_wg_pvalue_list_from_bcd13_file(bcd13_file, chr_len_list)

    max_length_in_one_figure = 500 * 1000
    for svcall in in_svcall_list:
        if svcall.svtype == 'DEL' or svcall.svtype == 'DUP': continue
        flank_dist = abs(svcall.key2 - svcall.key1)
        if flank_dist > 50 * 1000: flank_dist = 50 * 1000
        if flank_dist < 10 * 1000: flank_dist = 10 * 1000
        if abs(svcall.key2 - svcall.key1) > max_length_in_one_figure:
            out_file1 = os.path.join(out_dir, '%s.%s.breakpoint1.twin_window_barcode_similarity.png' % (out_prefix, svcall.sv_id )) 
            reg_start = svcall.start1 - flank_dist 
            reg_end = svcall.end1 + flank_dist 
            if reg_start < 0: reg_start = 0
            if reg_end > chr_len_list[svcall.tid1]: reg_end = chr_len_list[svcall.tid1]
            bk_pos1 = svcall.start1
            bk_pos2 = bk_pos1 
            figure_title = '%s, %s (breakpoint 1)' % (svcall.sv_id, svcall.svtype)
            plot_twin_window_barcode_similarity_for1region(svcall.chrm1, svcall.tid1, reg_start, reg_end, bk_pos1, bk_pos2, out_file1, figure_title, wg_pvalue_list, bin_size)

            out_file2 = os.path.join(out_dir, '%s.%s.breakpoint2.twin_window_barcode_similarity.png' % (out_prefix, svcall.sv_id )) 
            reg_start = svcall.start2 - flank_dist 
            reg_end = svcall.end2 + flank_dist 
            bk_pos1 = svcall.start2
            bk_pos2 = bk_pos1 
            if reg_start < 0: reg_start = 0
            if reg_end > chr_len_list[svcall.tid2]: reg_end = chr_len_list[svcall.tid2]
            figure_title = '%s, %s (breakpoint 2)' % (svcall.sv_id, svcall.svtype)
            plot_twin_window_barcode_similarity_for1region(svcall.chrm2, svcall.tid2, reg_start, reg_end, bk_pos1, bk_pos2, out_file2, figure_title, wg_pvalue_list, bin_size) 
        else:
            out_file  = os.path.join(out_dir, '%s.%s.both_breakpoints.twin_window_barcode_similarity.png' % (out_prefix, svcall.sv_id)) 
            reg_start = svcall.start1 - flank_dist 
            reg_end = svcall.end2 + flank_dist 
            if reg_start < 0: reg_start = 0
            if reg_end > chr_len_list[svcall.tid1]: reg_end = chr_len_list[svcall.tid1]
            bk_pos1 = svcall.start1
            bk_pos2 = svcall.start2
            figure_title = '%s, %s (breakpoint 1 and 2)' % (svcall.sv_id, svcall.svtype)
            plot_twin_window_barcode_similarity_for1region(svcall.chrm1, svcall.tid1, reg_start, reg_end, bk_pos1, bk_pos2, out_file, figure_title, wg_pvalue_list, bin_size) 

    return
コード例 #5
0
def plot_depth(cal_read_depth_from_bcd21_binary, bcd21_file, in_svcalls_list, faidx_file, chr_len_list, tid2chrname_list, chrname2tid_dict, out_dir, out_prefix):

    if os.path.exists(cal_read_depth_from_bcd21_binary) == False:
        my_utils.myprint('ERROR! The binary file doesn\'t exist:%s\Failed to plot read depth' % cal_read_depth_from_bcd21_binary)
        return
    
    if os.path.exists(bcd21_file) == False:
        my_utils.myprint('ERROR! The bcd21 file doesn\'t exist:%s\Failed to plot read depth' % bcd21_file)
        return

    if os.path.exists(faidx_file) == False:
        my_utils.myprint('ERROR! The fasta index file doesn\'t exist:%s\Failed to plot read depth' % faidx_file)
        return

    out_dir = os.path.join(out_dir, 'read_depth')
    my_utils.make_dir(out_dir)

    bin_size = 500
    read_depth_file = os.path.join(out_dir, '%s.read_depth.txt' % out_prefix)
    cmd_args_list = [cal_read_depth_from_bcd21_binary, bcd21_file, read_depth_file, faidx_file, str(bin_size), '20']
    my_utils.myprint('calculating read depth from file: %s' % bcd21_file)
    subprocess.call(cmd_args_list)

    my_utils.myprint('plotting read depth')
    wg_high_mapq_depth_list, wg_total_depth_list, bin_size = plot_read_depth.get_wg_depth_list(read_depth_file, chr_len_list)
    
    wg_total_depth = 0 
    wg_n_bin = 0 
    for tid in range(0, len(wg_high_mapq_depth_list)):
        for depth in wg_high_mapq_depth_list[tid]:
            wg_total_depth += depth
            wg_n_bin += 1

    wg_avg_depth = float(wg_total_depth) / wg_n_bin 

    for svcall in in_svcalls_list:
        if svcall.chrm1 != svcall.chrm2: continue
        out_file = os.path.join(out_dir, '%s.%s.read_depth.png' % (out_prefix, svcall.sv_id))
        figure_title = 'Read depth (%s, %d bp %s)' % (svcall.sv_id, svcall.end2 - svcall.start1, svcall.svtype)
        plot_read_depth.plot_read_depth_for1region(svcall.chrm1, svcall.tid1, svcall.start1, svcall.end2, out_file, figure_title, wg_high_mapq_depth_list, wg_total_depth_list, chr_len_list, bin_size, wg_avg_depth)

    os.remove(read_depth_file)

    return