コード例 #1
0
ファイル: quantify.py プロジェクト: EricDeveaud/spladder
def quantify_exon_skip(event, gene, counts_segments, counts_edges, CFG):

    cov = sp.zeros((2, ), dtype='float')
    sg = gene.splicegraph
    segs = gene.segmentgraph

    if CFG['is_matlab']:
        seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :]
        seg_shape = segs[0, 2].shape
        order = 'F'
        offset = 1

        ### find exons corresponding to event
        idx_exon_pre = sp.where((sg[0, 0][0, :] == event.exon_pre[0]) & (sg[0, 0][1, :] == event.exon_pre[1]))[0]
        idx_exon = sp.where((sg[0, 0][0, :] == event.exon[0]) & (sg[0, 0][1, :] == event.exon[1]))[0]
        idx_exon_aft = sp.where((sg[0, 0][0, :] == event.exon_aft[0]) & (sg[0, 0][1, :] == event.exon_aft[1]))[0]

        ### find segments corresponding to exons
        seg_exon_pre = sp.sort(sp.where(segs[0, 1][idx_exon_pre, :])[1])
        seg_exon_aft = sp.sort(sp.where(segs[0, 1][idx_exon_aft, :])[1])
        seg_exon = sp.sort(sp.where(segs[0, 1][idx_exon, :])[1])
    else:
        seg_lens = segs.segments[1, :] - segs.segments[0, :]
        seg_shape = segs.seg_edges.shape
        order = 'C'
        offset = 0

        ### find exons corresponding to event
        idx_exon_pre = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
        idx_exon = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
        idx_exon_aft = sp.where((sg.vertices[0, :] == event.exons2[2, 0]) & (sg.vertices[1, :] == event.exons2[2, 1]))[0]

        ### find segments corresponding to exons
        seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
        seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])
        seg_exon = sp.sort(sp.where(segs.seg_match[idx_exon, :])[1])

    # get inner exon cov
    cov[0] = sp.sum(counts_segments[seg_exon] * seg_lens[seg_exon]) /sp.sum(seg_lens[seg_exon])

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon_conf
    idx1 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exon[0]], seg_shape, order=order) + offset)[0]
    cov[0] += counts_edges[idx1, 1]
    # exon_exon_aft_conf
    idx2 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon[-1], seg_exon_aft[0]], seg_shape, order=order) + offset)[0]
    cov[0] += counts_edges[idx2, 1]
    # exon_pre_exon_aft_conf
    idx3 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exon_aft[0]], seg_shape, order=order) + offset)[0]
    cov[1] = counts_edges[idx3, 1]

    return cov
コード例 #2
0
ファイル: quantify.py プロジェクト: ratschlab/spladder
def quantify_mult_exon_skip(event, gene, counts_segments, counts_edges):

    cov = sp.zeros((2, ), dtype='float')

    sg = gene.splicegraph
    segs = gene.segmentgraph

    seg_lens = segs.segments[1, :] - segs.segments[0, :]
    seg_shape = segs.seg_edges.shape[0]
    order = 'C'
    offset = 0

    ### find exons corresponding to event
    idx_exon_pre  = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    idx_exon_aft  = sp.where((sg.vertices[0, :] == event.exons2[-1, 0]) & (sg.vertices[1, :] == event.exons2[-1, 1]))[0]
    seg_exons = []
    for i in range(1, event.exons2.shape[0] - 1):
        tmp = sp.where((sg.vertices[0, :] == event.exons2[i, 0]) & (sg.vertices[1, :] == event.exons2[i, 1]))[0]
        seg_exons.append(sp.where(segs.seg_match[tmp, :])[1])
    
    ### find segments corresponding to exons
    seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
    seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])

    seg_exons_u = sp.sort(sp.unique([x for sublist in seg_exons for x in sublist]))

    ### inner exons_cov
    cov[0] = sp.sum(counts_segments[seg_exons_u] * seg_lens[seg_exons_u]) / sp.sum(seg_lens[seg_exons_u])

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon_conf
    idx1 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exons[0][0]], seg_shape, order=order) + offset)[0]
    if len(idx1.shape) > 0 and idx1.shape[0] > 0:
        cov[0] += counts_edges[idx1[0], 1]
    # exon_exon_aft_conf
    idx2 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exons[-1][-1], seg_exon_aft[0]], seg_shape, order=order) + offset)[0]
    if len(idx2.shape) > 0 and idx2.shape[0] > 0:
        cov[0] += counts_edges[idx2[0], 1]
    # exon_pre_exon_aft_conf
    idx3 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exon_aft[0]], seg_shape, order=order) + offset)[0]
    if len(idx3.shape) > 0 and idx3.shape[0] > 0:
        cov[1] = counts_edges[idx3[0], 1]
    for i in range(len(seg_exons) - 1):
        # sum_inner_exon_conf
        idx4 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exons[i][-1], seg_exons[i+1][0]], seg_shape, order=order) + offset)[0]
        if len(idx4.shape) > 0 and idx4.shape[0] > 0:
            cov[0] += counts_edges[idx4[0], 1]

    return cov
コード例 #3
0
def coordinates_to_voxel_idx(coords_xyz, masker):
	# transform to homogeneous coordinates
	coords_h_xyz = sp.append(coords_xyz, ones([1,coords_xyz.shape[1]]),axis=0)
	
	# apply inverse affine transformation to get homogeneous coordinates in voxel space
	inv_transf = sp.linalg.inv(masker.volume.get_affine())
	coords_h_voxel_space = inv_transf.dot(coords_h_xyz)
	coords_h_voxel_space = sp.rint(coords_h_voxel_space).astype(int)
	
	# remove homogeneous dimension
	coords_voxel_space = coords_h_voxel_space[0:-1,:]
	
	# convert coordinates to idcs in a flattened voxel space
	flattened_idcs = sp.ravel_multi_index(coords_voxel_space, masker.dims)
	
	# check if there is any study data for the flattened idcs
	voxel_idcs = sp.zeros((1,len(flattened_idcs)),dtype=int64)
	for i in range(0,len(flattened_idcs)):
		idcs = find(masker.in_mask == flattened_idcs[i])
		if len(idcs > 0):
			voxel_idcs[0,i] = find(masker.in_mask == flattened_idcs[i])
		else:
			voxel_idcs[0,i] = nan
			
	return voxel_idcs
コード例 #4
0
ファイル: quantify.py プロジェクト: ratschlab/spladder
def quantify_intron_retention(event, gene, counts_segments, counts_edges, counts_seg_pos):

    cov = sp.zeros((2, ), dtype='float')
    sg = gene.splicegraph
    segs = gene.segmentgraph

    seg_lens = segs.segments[1, :] - segs.segments[0, :]
    seg_shape = segs.seg_edges.shape
    order = 'C'
    offset = 0

    ### find exons corresponding to event
    idx_exon1  = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    idx_exon2  = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]

    ### find segments corresponding to exons
    seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
    seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])
    seg_all = sp.arange(seg_exon1[0], seg_exon2[-1])

    seg_intron = sp.setdiff1d(seg_all, seg_exon1)
    seg_intron = sp.setdiff1d(seg_intron, seg_exon2)
    assert(seg_intron.shape[0] > 0)

    ### compute exon coverages as mean of position wise coverage
    # intron_cov
    cov[0] = sp.sum(counts_segments[seg_intron] * seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron])

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon1[-1], seg_exon2[0]], seg_shape, order=order) + offset)[0]
    cov[1] = counts_edges[idx, 1]

    return cov
コード例 #5
0
ファイル: quantify.py プロジェクト: ratschlab/spladder
def quantify_mutex_exons(event, gene, counts_segments, counts_edges):

    sg = gene.splicegraph
    segs = gene.segmentgraph

    seg_lens = segs.segments[1, :] - segs.segments[0, :]
    seg_shape = segs.seg_edges.shape[0]
    order = 'C'
    offset = 0

    ### find exons corresponding to event
    idx_exon_pre  = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    idx_exon_aft  = sp.where((sg.vertices[0, :] == event.exons1[-1, 0]) & (sg.vertices[1, :] == event.exons1[-1, 1]))[0]
    idx_exon1  = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
    idx_exon2  = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
    
    ### find segments corresponding to exons
    seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
    seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])
    seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
    seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])

    # exon1 cov
    cov[0] = sp.sum(counts_segments[seg_exon1] * seg_lens[seg_exon1]) / sp.sum(seg_lens[seg_exon1])
    # exon2 cov
    cov[1] = sp.sum(counts_segments[seg_exon2] * seg_lens[seg_exon2]) / sp.sum(seg_lens[seg_exon2])

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon1_conf
    idx1 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exon1[0]], seg_shape, order=order) + offset)[0]
    if len(idx1.shape) > 0 and idx1.shape[0] > 0:
        cov[0] += counts_edges[idx1[0], 1]
    # exon_pre_exon2_conf
    idx2 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exon2[0]], seg_shape, order=order) + offset)[0]
    if len(idx2.shape) > 0 and idx2.shape[0] > 0:
        cov[1] += counts_edges[idx2[0], 1]
    # exon1_exon_aft_conf
    idx3 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon1[-1], seg_exon_aft[0]], seg_shape, order=order) + offset)[0]
    if len(idx3.shape) > 0 and idx3.shape[0] > 0:
        cov[0] += counts_edges[idx3[0], 1]
    # exon2_exon_aft_conf
    idx4 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon2[-1], seg_exon_aft[0]], seg_shape, order=order) + offset)[0]
    if len(idx4.shape) > 0 and idx4.shape[0] > 0:
        cov[1] += counts_edges[idx4[0], 1]

    return cov
コード例 #6
0
ファイル: utils.py プロジェクト: ratschlab/spladder
def replace_sub_matrix(mat_in, idx, mat_put):
    """Replaces the values in mat_in in rows and cols idx with values of mat_put"""
    
    assert((idx.shape[0] * idx.shape[0]) == mat_put.ravel().shape[0])

    sp.put(mat_in, sp.ravel_multi_index([[x for x in idx for _ in idx], [x for _ in idx for x in idx]], (mat_in.shape[0], mat_in.shape[1])), mat_put.ravel())

    return mat_in
コード例 #7
0
def verify_alt_prime(event, gene, counts_segments, counts_edges, CFG):
    # [verified, info] = verify_exon_skip(event, fn_bam, cfg)

    # (0) valid, (1) exon_diff_cov, (2) exon_const_cov
    # (3) intron1_conf, (4) intron2_conf
    info = [1, 0, 0, 0, 0]
    verified = [0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0
        return (verified, info)

    ### check validity of intron coordinates (only one side is differing)
    if (event.exons1[0, 1] != event.exons2[0, 1]) and (event.exons1[1, 0] !=
                                                       event.exons2[1, 0]):
        info[0] = 0
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0])
                          & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    if idx_exon11.shape[0] == 0:
        segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) &
                               (segs.segments[1, :] <= event.exons1[0, 1]))[0]
    else:
        segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1]
    idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0])
                          & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
    if idx_exon12.shape[0] == 0:
        segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) &
                               (segs.segments[1, :] <= event.exons1[1, 1]))[0]
    else:
        segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1]
    idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0])
                          & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    if idx_exon21.shape[0] == 0:
        segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) &
                               (segs.segments[1, :] <= event.exons2[0, 1]))[0]
    else:
        segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1]
    idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0])
                          & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
    if idx_exon22.shape[0] == 0:
        segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) &
                               (segs.segments[1, :] <= event.exons2[1, 1]))[0]
    else:
        segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1]

    assert (segs_exon11.shape[0] > 0)
    assert (segs_exon12.shape[0] > 0)
    assert (segs_exon21.shape[0] > 0)
    assert (segs_exon22.shape[0] > 0)

    if sp.all(segs_exon11 == segs_exon21):
        seg_exon_const = segs_exon11
        seg_diff = sp.setdiff1d(segs_exon12, segs_exon22)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon22, segs_exon12)
        seg_const = sp.intersect1d(segs_exon12, segs_exon22)
    elif sp.all(segs_exon12 == segs_exon22):
        seg_exon_const = segs_exon12
        seg_diff = sp.setdiff1d(segs_exon11, segs_exon21)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon21, segs_exon11)
        seg_const = sp.intersect1d(segs_exon21, segs_exon11)
    else:
        print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime"
        sys.exit(1)
    seg_const = sp.r_[seg_exon_const, seg_const]

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    # exon_diff_cov
    info[1] = sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(
        seg_lens[seg_diff])
    # exon_const_cov
    info[2] = sp.sum(counts_segments[seg_const] *
                     seg_lens[seg_const]) / sp.sum(seg_lens[seg_const])

    if info[1] >= CFG['alt_prime']['min_diff_rel_cov'] * info[2]:
        verified[0] = 1

    ### check intron confirmations as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron1_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [segs_exon11[-1], segs_exon12[0]], segs.seg_edges.shape))[0]
    assert (idx.shape[0] > 0)
    info[3] = counts_edges[idx, 1]
    # intron2_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [segs_exon21[-1], segs_exon22[0]], segs.seg_edges.shape))[0]
    assert (idx.shape[0] > 0)
    info[4] = counts_edges[idx, 1]

    if min(info[3], info[4]) >= CFG['alt_prime']['min_intron_count']:
        verified[1] = 1

    return (verified, info)
コード例 #8
0
def verify_exon_skip(event, gene, counts_segments, counts_edges, CFG):
    # [verified, info] = verify_exon_skip(event, fn_bam, CFG)

    verified = [0, 0, 0, 0]
    # (0) valid, (1) exon_cov, (2) exon_pre_cov, (3) exon_aft_cov,
    # (4) exon_pre_exon_conf, (5) exon_exon_aft_conf, (6) exon_pre_exon_aft_conf
    info = [1, 0, 0, 0, 0, 0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = False
        return (verified, info)
    ### check validity of exon coordinates (start < stop && non-overlapping)
    elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any(
            event.exons2[:, 1] - event.exons2[:, 0] < 1):
        info[0] = False
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon_pre = sp.where((sg.vertices[0, :] == event.exons2[0, 0])
                            & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    idx_exon = sp.where((sg.vertices[0, :] == event.exons2[1, 0])
                        & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
    idx_exon_aft = sp.where((sg.vertices[0, :] == event.exons2[2, 0])
                            & (sg.vertices[1, :] == event.exons2[2, 1]))[0]

    ### find segments corresponding to exons
    seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
    seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])
    seg_exon = sp.sort(sp.where(segs.seg_match[idx_exon, :])[1])

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    # exon pre cov
    info[2] = sp.sum(counts_segments[seg_exon_pre] *
                     seg_lens[seg_exon_pre]) / sp.sum(seg_lens[seg_exon_pre])
    # exon aft cov
    info[3] = sp.sum(counts_segments[seg_exon_aft] *
                     seg_lens[seg_exon_aft]) / sp.sum(seg_lens[seg_exon_aft])
    # exon cov
    info[1] = sp.sum(counts_segments[seg_exon] * seg_lens[seg_exon]) / sp.sum(
        seg_lens[seg_exon])

    ### check if coverage of skipped exon is >= than FACTOR times average of pre and after
    if info[1] >= CFG['exon_skip']['min_skip_rel_cov'] * (info[2] +
                                                          info[3]) / 2:
        verified[0] = 1

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exon[0]], segs.seg_edges.shape))[0]
    info[4] = counts_edges[idx, 1]
    if info[4] >= CFG['exon_skip']['min_non_skip_count']:
        verified[1] = 1
    # exon_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon[-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    info[5] = counts_edges[idx, 1]
    if info[5] >= CFG['exon_skip']['min_non_skip_count']:
        verified[2] = 1
    # exon_pre_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    info[6] = counts_edges[idx, 1]
    if info[6] >= CFG['exon_skip']['min_skip_count']:
        verified[3] = 1

    return (verified, info)
コード例 #9
0
def generate_node_connectivity_array(index_map, data_array):
    r"""
    Generates a node connectivity array based on faces, edges and corner
    adjacency
    """
    #
    logger.info('generating network connections...')
    #
    # setting up some constants
    x_dim, y_dim, z_dim = data_array.shape
    conn_map = list(product([0, -1, 1], [0, -1, 1], [0, -1, 1]))
    conn_map = sp.array(conn_map, dtype=int)
    conn_map = conn_map[1:]
    #
    # creating slice list to process data chunks
    slice_list = [slice(0, 10000)]
    for i in range(slice_list[0].stop, index_map.shape[0], slice_list[0].stop):
        slice_list.append(slice(i, i+slice_list[0].stop))
    slice_list[-1] = slice(slice_list[-1].start, index_map.shape[0])
    #
    conns = sp.ones((0, 2), dtype=sp.uint32)
    logger.debug('    number of slices to process: {}'.format(len(slice_list)))
    for sect in slice_list:
        # getting coordinates of nodes and their neighbors
        nodes = index_map[sect]
        inds = sp.repeat(nodes, conn_map.shape[0], axis=0)
        inds += sp.tile(conn_map, (nodes.shape[0], 1))
        #
        # calculating the flattened index of the central nodes and storing
        nodes = sp.ravel_multi_index(sp.hsplit(nodes, 3), data_array.shape)
        inds = sp.hstack([inds, sp.repeat(nodes, conn_map.shape[0], axis=0)])
        #
        # removing neigbors with negative indicies
        mask = ~inds[:, 0:3] < 0
        inds = inds[sp.sum(mask, axis=1) == 3]
        # removing neighbors with indicies outside of bounds
        mask = (inds[:, 0] < x_dim, inds[:, 1] < y_dim, inds[:, 2] < z_dim)
        mask = sp.stack(mask, axis=1)
        inds = inds[sp.sum(mask, axis=1) == 3]
        # removing indices with zero-weight connection
        mask = data_array[inds[:, 0], inds[:, 1], inds[:, 2]]
        inds = inds[mask]
        if inds.size:
            # calculating flattened index of remaining nieghbor nodes
            nodes = sp.ravel_multi_index(sp.hsplit(inds[:, 0:3], 3),
                                         data_array.shape)
            inds = sp.hstack([sp.reshape(inds[:, -1], (-1, 1)), nodes])
            # ensuring conns[0] is always < conns[1] for duplicate removal
            mask = inds[:, 0] > inds[:, 1]
            inds[mask] = inds[mask][:, ::-1]
            # appending section connectivity data to conns array
            conns = sp.append(conns, inds.astype(sp.uint32), axis=0)
    #
    # using scipy magic from stackoverflow to remove dupilcate connections
    logger.info('removing duplicate connections...')
    dim0 = conns.shape[0]
    conns = sp.ascontiguousarray(conns)
    dtype = sp.dtype((sp.void, conns.dtype.itemsize*conns.shape[1]))
    dim1 = conns.shape[1]
    conns = sp.unique(conns.view(dtype)).view(conns.dtype).reshape(-1, dim1)
    logger.debug('    removed {} duplicates'.format(dim0 - conns.shape[0]))
    #
    return conns
コード例 #10
0
ファイル: verify.py プロジェクト: ccwang12/spladder
def verify_mutex_exons(event, gene, counts_segments, counts_edges, CFG):
    # [verified, info] = verify_mutex_exons(event, gene, counts_segments, counts_edges, CFG)
    #

    verified = [0, 0, 0, 0]

    # (0) valid, (1) exon_pre_cov, (2) exon1_cov, (3) exon1_cov, (4) exon_aft_cov, 
    # (5) exon_pre_exon1_conf, (6) exon_pre_exon2_conf, (7) exon1_exon_aft_conf, (8) exon2_exon_aft_conf
    info = [1, 0, 0, 0, 0, 0, 0, 0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0
        return (verified, info)
    ### check validity of exon coordinates (start < stop && non-overlapping)
    elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any(event.exons2[:, 1] - event.exons2[:, 0] < 1) or \
         (event.exons1[1, 1] > event.exons2[1, 0] and event.exons1[1, 0] < event.exons2[1, 0]) or \
         (event.exons2[1, 1] > event.exons1[1, 0] and event.exons2[1, 0] < event.exons1[1, 0]):
        info[0] = 0
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon_pre  = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    idx_exon_aft  = sp.where((sg.vertices[0, :] == event.exons1[-1, 0]) & (sg.vertices[1, :] == event.exons1[-1, 1]))[0]
    idx_exon1  = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
    idx_exon2  = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
    
    ### find segments corresponding to exons
    seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
    seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])
    seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
    seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    # exon pre cov
    info[1] = sp.sum(counts_segments[seg_exon_pre] * seg_lens[seg_exon_pre]) / sp.sum(seg_lens[seg_exon_pre])
    # exon1 cov
    info[2] = sp.sum(counts_segments[seg_exon1] * seg_lens[seg_exon1]) / sp.sum(seg_lens[seg_exon1])
    # exon2 cov
    info[3] = sp.sum(counts_segments[seg_exon2] * seg_lens[seg_exon2]) / sp.sum(seg_lens[seg_exon2])
    # exon aft cov
    info[4] = sp.sum(counts_segments[seg_exon_aft] * seg_lens[seg_exon_aft]) / sp.sum(seg_lens[seg_exon_aft])

    ### check if coverage of first exon is >= than FACTOR times average of pre and after
    if info[2] >= CFG['mutex_exons']['min_skip_rel_cov'] * (info[1] + info[4])/2:
        verified[0] = 1
    if info[3] >= CFG['mutex_exons']['min_skip_rel_cov'] * (info[1] + info[4])/2:
        verified[1] = 1

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon1_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exon1[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[5] = counts_edges[idx[0], 1]
    # exon_pre_exon2_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exon2[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[6] = counts_edges[idx[0], 1]
    # exon1_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon1[-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[7] = counts_edges[idx[0], 1]
    # exon2_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon2[-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[8] = counts_edges[idx[0], 1]

    # set verification flags for intron confirmation
    if min(info[5], info[6]) >= CFG['mutex_exons']['min_conf_count']:
        verified[2] = 1
    if min(info[7], info[8]) >= CFG['mutex_exons']['min_conf_count']:
        verified[3] = 1

    return (verified, info)
コード例 #11
0
ファイル: verify.py プロジェクト: ccwang12/spladder
def verify_exon_skip(event, gene, counts_segments, counts_edges, CFG):
    # [verified, info] = verify_exon_skip(event, fn_bam, CFG)

    verified = [0, 0, 0, 0]
    # (0) valid, (1) exon_cov, (2) exon_pre_cov, (3) exon_aft_cov, 
    # (4) exon_pre_exon_conf, (5) exon_exon_aft_conf, (6) exon_pre_exon_aft_conf
    info = [1, 0, 0, 0, 0, 0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = False
        return (verified, info)
    ### check validity of exon coordinates (start < stop && non-overlapping)
    elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any(event.exons2[:, 1] - event.exons2[:, 0] < 1):
        info[0] = False
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon_pre = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    idx_exon = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
    idx_exon_aft = sp.where((sg.vertices[0, :] == event.exons2[2, 0]) & (sg.vertices[1, :] == event.exons2[2, 1]))[0]

    ### find segments corresponding to exons
    seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
    seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])
    seg_exon = sp.sort(sp.where(segs.seg_match[idx_exon, :])[1])

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    # exon pre cov
    info[2] = sp.sum(counts_segments[seg_exon_pre] * seg_lens[seg_exon_pre]) /sp.sum(seg_lens[seg_exon_pre])
    # exon aft cov
    info[3] = sp.sum(counts_segments[seg_exon_aft] * seg_lens[seg_exon_aft]) /sp.sum(seg_lens[seg_exon_aft])
    # exon cov
    info[1] = sp.sum(counts_segments[seg_exon] * seg_lens[seg_exon]) /sp.sum(seg_lens[seg_exon])

    ### check if coverage of skipped exon is >= than FACTOR times average of pre and after
    if info[1] >= CFG['exon_skip']['min_skip_rel_cov'] * (info[2] + info[3]) / 2: 
        verified[0] = 1

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exon[0]], segs.seg_edges.shape))[0]
    info[4] = counts_edges[idx, 1]
    if info[4] >= CFG['exon_skip']['min_non_skip_count']:
        verified[1] = 1
    # exon_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon[-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    info[5] = counts_edges[idx, 1]
    if info[5] >= CFG['exon_skip']['min_non_skip_count']:
        verified[2] = 1
    # exon_pre_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    info[6] = counts_edges[idx, 1]
    if info[6] >= CFG['exon_skip']['min_non_skip_count']:
        verified[3] = 1

    return (verified, info)
コード例 #12
0
ファイル: quantify.py プロジェクト: bowhan/spladder
def quantify_alt_prime(event, gene, counts_segments, counts_edges, CFG):

    cov = sp.zeros((2,), dtype="float")

    sg = gene.splicegraph
    segs = gene.segmentgraph
    if CFG["is_matlab"]:
        seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :]
        seg_shape = segs[0, 2].shape[0]

        idx_exon_alt1 = sp.where((sg[0, 0][0, :] == event.exon_alt1[0]) & (sg[0, 0][1, :] == event.exon_alt1[1]))
        idx_exon_alt2 = sp.where((sg[0, 0][0, :] == event.exon_alt2[0]) & (sg[0, 0][1, :] == event.exon_alt2[1]))
        idx_exon_const = sp.where((sg[0, 0][0, :] == event.exon_const[0]) & (sg[0, 0][1, :] == event.exon_const[1]))
        if idx_exon_alt1.shape[0] == 0:
            segs_exon_alt1 = sp.where(
                (segs[0, 0][0, :] >= event.exon_alt1[0]) & (segs[0, 0][1, :] >= event.exon_alt1[1])
            )
        else:
            segs_exon_alt1 = sp.where(segs[0, 1][idx_exon_alt1, :])[1]
        if idx_exon_alt2.shape[0] == 0:
            segs_exon_alt2 = sp.where(
                (segs[0, 0][0, :] >= event.exon_alt2[0]) & (segs[0, 0][1, :] >= event.exon_alt2[1])
            )
        else:
            segs_exon_alt2 = sp.where(segs[0, 1][idx_exon_alt2, :])[1]
        if idx_exon_const.shape[0] == 0:
            segs_exon_const = sp.where(
                (segs[0, 0][0, :] >= event.exon_const[0]) & (segs[0, 0][1, :] >= event.exon_const[1])
            )
        else:
            segs_exon_const = sp.where(segs[0, 1][idx_exon_const, :])[1]

        assert segs_exon_alt1.shape[0] > 0
        assert segs_exon_alt2.shape[0] > 0
        assert segs_exon_const.shape[0] > 0

        cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])

        ### check intron confirmations as sum of valid intron scores
        ### intron score is the number of reads confirming this intron
        if max(segs_exon_alt1[-1], segs_exon_alt2[-1]) < segs_exon_const[0]:
            # intron1_conf
            idx = (
                sp.where(
                    counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_alt1[0], segs_exon_const[-1]], seg_shape)
                )[0]
                + 1
            )
            assert idx.shape[0] > 0
            cov[0] += counts_edges[idx, 1]
            # intron2_conf
            idx = (
                sp.where(
                    counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_alt2[0], segs_exon_const[-1]], seg_shape)
                )[0]
                + 1
            )
            assert idx.shape[0] > 0
            cov[1] += counts_edges[idx, 1]
        elif min(segs_exon_alt1[0], segs_exon_alt2[0]) > segs_exon_const[-1]:
            # intron1_conf
            idx = (
                sp.where(
                    counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_const[0], segs_exon_alt1[-1]], seg_shape)
                )[0]
                + 1
            )
            assert idx.shape[0] > 0
            cov[0] += counts_edges[idx, 1]
            # intron2_conf
            idx = (
                sp.where(
                    counts_edges[:, 0] == sp.ravel_multi_index([segs_exon_const[0], segs_exon_alt2[-1]], seg_shape)
                )[0]
                + 1
            )
            assert idx.shape[0] > 0
            cov[1] += counts_edges[idx, 1]
    else:
        seg_lens = segs.segments[1, :] - segs.segments[0, :]
        seg_shape = segs.seg_edges.shape[0]

        ### find exons corresponding to event
        idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
        if idx_exon11.shape[0] == 0:
            segs_exon11 = sp.where(
                (segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1])
            )[0]
        else:
            segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1]
        idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
        if idx_exon12.shape[0] == 0:
            segs_exon12 = sp.where(
                (segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1])
            )[0]
        else:
            segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1]
        idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
        if idx_exon21.shape[0] == 0:
            segs_exon21 = sp.where(
                (segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1])
            )[0]
        else:
            segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1]
        idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
        if idx_exon22.shape[0] == 0:
            segs_exon22 = sp.where(
                (segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1])
            )[0]
        else:
            segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1]

        assert segs_exon11.shape[0] > 0
        assert segs_exon12.shape[0] > 0
        assert segs_exon21.shape[0] > 0
        assert segs_exon22.shape[0] > 0

        if sp.all(segs_exon11 == segs_exon21):
            seg_diff = sp.setdiff1d(segs_exon12, segs_exon22)
            if seg_diff.shape[0] == 0:
                seg_diff = sp.setdiff1d(segs_exon22, segs_exon12)
        elif sp.all(segs_exon12 == segs_exon22):
            seg_diff = sp.setdiff1d(segs_exon11, segs_exon21)
            if seg_diff.shape[0] == 0:
                seg_diff = sp.setdiff1d(segs_exon21, segs_exon11)
        else:
            print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime"
            sys.exit(1)

        # exon_diff_cov
        if seg_diff in segs_exon11 or seg_diff in segs_exon12:
            cov[0] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
        elif seg_diff in segs_exon21 or seg_diff in segs_exon22:
            cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
        else:
            raise Exception("differential segment not part of any other segment")

        ### check intron confirmations as sum of valid intron scores
        ### intron score is the number of reads confirming this intron
        # intron1_conf
        idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], seg_shape))[0]
        assert idx.shape[0] > 0
        cov[0] += counts_edges[idx, 1]
        # intron2_conf
        idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], seg_shape))[0]
        assert idx.shape[0] > 0
        cov[1] += counts_edges[idx, 1]

    return cov
コード例 #13
0
ファイル: count.py プロジェクト: jiahsinhuang/spladder
def count_graph_coverage(genes, fn_bam=None, CFG=None, fn_out=None):
# [counts] = count_graph_coverage(genes, fn_bam, CFG, fn_out)

    if fn_bam is None and isinstance(genes, dict):
        PAR = genes
        genes = PAR['genes']
        fn_bam = PAR['fn_bam']
        if 'fn_out' in PAR:
            fn_out = PAR['fn_out'] 
        CFG = PAR['CFG']

    if not isinstance(fn_bam, list):
        fn_bam = [fn_bam]
    counts = sp.zeros((len(fn_bam), genes.shape[0]), dtype='object')

    intron_tol = 0 

    sys.stdout.write('genes: %i\n' % genes.shape[0])
    for f in range(counts.shape[0]):
        sys.stdout.write('\nsample %i/%i\n' % (f + 1, counts.shape[0])) 

        ### iterate over all genes and generate counts for
        ### the segments in the segment graph
        ### and the splice junctions in the splice graph
        ### iterate per contig, so the bam caching works better
        contigs = sp.array([x.chr for x in genes])
        for contig in sp.unique(contigs):
            contig_idx = sp.where(contigs == contig)[0]
            bam_cache = dict()
            print '\ncounting %i genes on contig %s' % (contig_idx.shape[0], contig)
            for ii,i in enumerate(contig_idx):
                sys.stdout.write('.')
                if ii > 0 and ii % 50 == 0:
                    sys.stdout.write('%i/%i\n' % (ii, contig_idx.shape[0]))
                sys.stdout.flush()
                gg = genes[i]
                if gg.segmentgraph.is_empty():
                    gg.segmentgraph = Segmentgraph(gg)
                gg.start = gg.segmentgraph.segments.ravel().min()
                gg.stop = gg.segmentgraph.segments.ravel().max()

                counts[f, i] = Counts(gg.segmentgraph.segments.shape[1])

                if CFG['bam_to_sparse'] and (fn_bam[f].endswith('npz') or os.path.exists(re.sub(r'bam$', '', fn_bam[f]) + 'npz')):
                    ### make sure that we query the right contig from cache
                    assert(gg.chr == contig)
                    (tracks, intron_list) = add_reads_from_sparse_bam(gg, fn_bam[f], contig, types=['exon_track','intron_list'], filter=None, cache=bam_cache)
                else:
                    ### add RNA-seq evidence to the gene structure
                    (tracks, intron_list) = add_reads_from_bam(gg, fn_bam[f], ['exon_track','intron_list'], None, CFG['var_aware'], CFG['primary_only']);
                    intron_list = intron_list[0] ### TODO

                ### extract mean exon coverage for all segments
                for j in range(gg.segmentgraph.segments.shape[1]):
                    idx = sp.arange(gg.segmentgraph.segments[0, j], gg.segmentgraph.segments[1, j]) - gg.start
                    counts[f, i].segments[j] = sp.mean(sp.sum(tracks[:, idx], axis=0))
                    counts[f, i].seg_pos[j] = sp.sum(sp.sum(tracks[:, idx], axis=0) > 0)

                k, l = sp.where(gg.segmentgraph.seg_edges == 1)

                ### there are no introns to count
                if intron_list.shape[0] == 0:
                    for m in range(k.shape[0]):
                        if counts[f, i].edges.shape[0] == 0:
                            counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))
                        else:
                            counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))]
                    continue

                ### extract intron counts 
                for m in range(k.shape[0]):
                    idx = sp.where((sp.absolute(intron_list[:, 0] - gg.segmentgraph.segments[1, k[m]]) <= intron_tol) & (sp.absolute(intron_list[:, 1] - gg.segmentgraph.segments[0, l[m]]) <= intron_tol))[0]
                    if counts[f, i].edges.shape[0] == 0:
                        if idx.shape[0] > 0:
                            counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), sp.sum(intron_list[idx, 2])]))
                        else:
                            counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))
                    else:
                        if idx.shape[0] > 0:
                            counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), sp.sum(intron_list[idx, 2])]))]
                        else:
                            counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))]

    if fn_out is not None:
        cPickle.dump(counts, open(fn_out, 'w'), -1)
    else:
        return counts
コード例 #14
0
def verify_mutex_exons(event, gene, counts_segments, counts_edges, CFG):
    # [verified, info] = verify_mutex_exons(event, gene, counts_segments, counts_edges, CFG)
    #

    verified = [0, 0, 0, 0]

    # (0) valid, (1) exon_pre_cov, (2) exon1_cov, (3) exon1_cov, (4) exon_aft_cov,
    # (5) exon_pre_exon1_conf, (6) exon_pre_exon2_conf, (7) exon1_exon_aft_conf, (8) exon2_exon_aft_conf
    info = [1, 0, 0, 0, 0, 0, 0, 0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0
        return (verified, info)
    ### check validity of exon coordinates (start < stop && non-overlapping)
    elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any(event.exons2[:, 1] - event.exons2[:, 0] < 1) or \
         (event.exons1[1, 1] > event.exons2[1, 0] and event.exons1[1, 0] < event.exons2[1, 0]) or \
         (event.exons2[1, 1] > event.exons1[1, 0] and event.exons2[1, 0] < event.exons1[1, 0]):
        info[0] = 0
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon_pre = sp.where((sg.vertices[0, :] == event.exons1[0, 0])
                            & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    idx_exon_aft = sp.where((sg.vertices[0, :] == event.exons1[-1, 0])
                            & (sg.vertices[1, :] == event.exons1[-1, 1]))[0]
    idx_exon1 = sp.where((sg.vertices[0, :] == event.exons1[1, 0])
                         & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
    idx_exon2 = sp.where((sg.vertices[0, :] == event.exons2[1, 0])
                         & (sg.vertices[1, :] == event.exons2[1, 1]))[0]

    ### find segments corresponding to exons
    seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
    seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])
    seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
    seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    # exon pre cov
    info[1] = sp.sum(counts_segments[seg_exon_pre] *
                     seg_lens[seg_exon_pre]) / sp.sum(seg_lens[seg_exon_pre])
    # exon1 cov
    info[2] = sp.sum(counts_segments[seg_exon1] *
                     seg_lens[seg_exon1]) / sp.sum(seg_lens[seg_exon1])
    # exon2 cov
    info[3] = sp.sum(counts_segments[seg_exon2] *
                     seg_lens[seg_exon2]) / sp.sum(seg_lens[seg_exon2])
    # exon aft cov
    info[4] = sp.sum(counts_segments[seg_exon_aft] *
                     seg_lens[seg_exon_aft]) / sp.sum(seg_lens[seg_exon_aft])

    ### check if coverage of first exon is >= than FACTOR times average of pre and after
    if info[2] >= CFG['mutex_exons']['min_skip_rel_cov'] * (info[1] +
                                                            info[4]) / 2:
        verified[0] = 1
    if info[3] >= CFG['mutex_exons']['min_skip_rel_cov'] * (info[1] +
                                                            info[4]) / 2:
        verified[1] = 1

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon1_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exon1[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[5] = counts_edges[idx[0], 1]
    # exon_pre_exon2_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exon2[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[6] = counts_edges[idx[0], 1]
    # exon1_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon1[-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[7] = counts_edges[idx[0], 1]
    # exon2_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon2[-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[8] = counts_edges[idx[0], 1]

    # set verification flags for intron confirmation
    if min(info[5], info[6]) >= CFG['mutex_exons']['min_conf_count']:
        verified[2] = 1
    if min(info[7], info[8]) >= CFG['mutex_exons']['min_conf_count']:
        verified[3] = 1

    return (verified, info)
コード例 #15
0
def make_amb(Fsorg,m_up,plen,nlags,nspec=128,winname = 'boxcar'):
    """ Make the ambiguity function dictionary that holds the lag ambiguity and
    range ambiguity. Uses a sinc function weighted by a blackman window. Currently
    only set up for an uncoded pulse.
    Inputs:
        Fsorg: A scalar, the original sampling frequency in Hertz.
        m_up: The upsampled ratio between the original sampling rate and the rate of
        the ambiguity function up sampling.
        plen: The length of the pulse in samples at the original sampling frequency.
        nlags: The number of lags used.
    Outputs:
        Wttdict: A dictionary with the keys 'WttAll' which is the full ambiguity function
        for each lag, 'Wtt' is the max for each lag for plotting, 'Wrange' is the
        ambiguity in the range with the lag dimension summed, 'Wlag' The ambiguity
        for the lag, 'Delay' the numpy array for the lag sampling, 'Range' the array
        for the range sampling and 'WttMatrix' for a matrix that will impart the ambiguity
        function on a pulses.
    """

    # make the sinc
    nsamps = sp.floor(8.5*m_up)
    nsamps = nsamps-(1-sp.mod(nsamps,2))

    nvec = sp.arange(-sp.floor(nsamps/2.0),sp.floor(nsamps/2.0)+1)
    pos_windows = ['boxcar', 'triang', 'blackman', 'hamming', 'hann', 'bartlett', 'flattop', 'parzen', 'bohman', 'blackmanharris', 'nuttall', 'barthann']
    curwin = scisig.get_window(winname,nsamps)
    outsinc = curwin*sp.sinc(nvec/m_up)
    outsinc = outsinc/sp.sum(outsinc)
    dt = 1/(Fsorg*m_up)
    Delay = sp.arange(-(len(nvec)-1),m_up*(nlags+5))*dt
    t_rng = sp.arange(0,1.5*plen,dt)
    numdiff = len(Delay)-len(outsinc)
    outsincpad  = sp.pad(outsinc,(0,numdiff),mode='constant',constant_values=(0.0,0.0))
    (srng,d2d)=sp.meshgrid(t_rng,Delay)
    # envelop function
    envfunc = sp.zeros(d2d.shape)
    envfunc[(d2d-srng+plen-Delay.min()>=0)&(d2d-srng+plen-Delay.min()<=plen)]=1
    envfunc = envfunc/sp.sqrt(envfunc.sum(axis=0).max())
    #create the ambiguity function for everything
    Wtt = sp.zeros((nlags,d2d.shape[0],d2d.shape[1]))
    cursincrep = sp.tile(outsincpad[:,sp.newaxis],(1,d2d.shape[1]))
    Wt0 = Wta = cursincrep*envfunc
    Wt0fft = sp.fft(Wt0,axis=0)
    for ilag in sp.arange(nlags):
        cursinc = sp.roll(outsincpad,ilag*m_up)
        cursincrep = sp.tile(cursinc[:,sp.newaxis],(1,d2d.shape[1]))
        Wta = cursincrep*envfunc
        #do fft based convolution, probably best method given sizes
        Wtafft = scfft.fft(Wta,axis=0)
        if ilag==0:
            nmove = len(nvec)-1
        else:
            nmove = len(nvec)
        Wtt[ilag] = sp.roll(scfft.ifft(Wtafft*sp.conj(Wt0fft),axis=0).real,nmove,axis=0)

    # make matrix to take
#    imat = sp.eye(nspec)
#    tau = sp.arange(-sp.floor(nspec/2.),sp.ceil(nspec/2.))/Fsorg
#    tauint = Delay
#    interpmat = spinterp.interp1d(tau,imat,bounds_error=0,axis=0)(tauint)
#    lagmat = sp.dot(Wtt.sum(axis=2),interpmat)

#    # triangle window
    tau = sp.arange(-sp.floor(nspec/2.),sp.ceil(nspec/2.))/Fsorg
    amb1d = plen-tau
    amb1d[amb1d<0]=0.
    amb1d[tau<0]=0.
    amb1d=amb1d/plen
    kp = sp.argwhere(amb1d>0).flatten()
    lagmat = sp.zeros((Wtt.shape[0],nspec))
    lagmat.flat[sp.ravel_multi_index((sp.arange(Wtt.shape[0]),kp),lagmat.shape)]=amb1d[kp]
    Wttdict = {'WttAll':Wtt,'Wtt':Wtt.max(axis=0),'Wrange':Wtt.sum(axis=1),'Wlag':Wtt.sum(axis=2),
               'Delay':Delay,'Range':v_C_0*t_rng/2.0,'WttMatrix':lagmat}
    return Wttdict
コード例 #16
0
def generate_node_connectivity_array(index_map, data_array):
    r"""
    Generates a node connectivity array based on faces, edges and corner
    adjacency
    """
    #
    logger.info('generating network connections...')
    #
    # setting up some constants
    x_dim, y_dim, z_dim = data_array.shape
    conn_map = list(product([0, -1, 1], [0, -1, 1], [0, -1, 1]))
    #
    conn_map = sp.array(conn_map, dtype=int)
    conn_map = conn_map[1:]
    #
    # creating slice list to process data chunks
    slice_list = [slice(0, 10000)]
    for i in range(slice_list[0].stop, index_map.shape[0], slice_list[0].stop):
        slice_list.append(slice(i, i + slice_list[0].stop))
    slice_list[-1] = slice(slice_list[-1].start, index_map.shape[0])
    #
    conns = sp.ones((0, 2), dtype=data_array.index_int_type)
    logger.debug('\tnumber of slices to process: {}'.format(len(slice_list)))
    percent = 10
    for n, sect in enumerate(slice_list):
        # getting coordinates of nodes and their neighbors
        nodes = index_map[sect]
        inds = sp.repeat(nodes, conn_map.shape[0], axis=0)
        inds += sp.tile(conn_map, (nodes.shape[0], 1))
        #
        # calculating the flattened index of the central nodes and storing
        nodes = sp.ravel_multi_index(sp.hsplit(nodes, 3), data_array.shape)
        inds = sp.hstack([inds, sp.repeat(nodes, conn_map.shape[0], axis=0)])
        #
        # removing neigbors with negative indicies
        mask = ~inds[:, 0:3] < 0
        inds = inds[sp.sum(mask, axis=1) == 3]
        # removing neighbors with indicies outside of bounds
        mask = (inds[:, 0] < x_dim, inds[:, 1] < y_dim, inds[:, 2] < z_dim)
        mask = sp.stack(mask, axis=1)
        inds = inds[sp.sum(mask, axis=1) == 3]
        # removing indices with zero-weight connection
        mask = data_array[inds[:, 0], inds[:, 1], inds[:, 2]]
        inds = inds[mask]
        if inds.size:
            # calculating flattened index of remaining nieghbor nodes
            nodes = sp.ravel_multi_index(sp.hsplit(inds[:, 0:3], 3),
                                         data_array.shape)
            inds = sp.hstack([sp.reshape(inds[:, -1], (-1, 1)), nodes])
            # ensuring conns[0] is always < conns[1] for duplicate removal
            mask = inds[:, 0] > inds[:, 1]
            inds[mask] = inds[mask][:, ::-1]
            # appending section connectivity data to conns array
            conns = sp.append(conns, inds.astype(sp.uint32), axis=0)
        if int(n / len(slice_list) * 100) == percent:
            logger.debug('\tprocessed slice {:5d}, {}% complete'.format(
                n, percent))
            percent += 10
    #
    # using scipy magic from stackoverflow to remove dupilcate connections
    logger.info('removing duplicate connections...')
    dim0 = conns.shape[0]
    conns = sp.ascontiguousarray(conns)
    dtype = sp.dtype((sp.void, conns.dtype.itemsize * conns.shape[1]))
    dim1 = conns.shape[1]
    conns = sp.unique(conns.view(dtype)).view(conns.dtype).reshape(-1, dim1)
    logger.debug('\tremoved {} duplicates'.format(dim0 - conns.shape[0]))
    #
    return conns
コード例 #17
0
ファイル: quantify.py プロジェクト: ratschlab/spladder
def quantify_alt_prime(event, gene, counts_segments, counts_edges):

    cov = sp.zeros((2, ), dtype='float')

    sg = gene.splicegraph
    segs = gene.segmentgraph

    seg_lens = segs.segments[1, :] - segs.segments[0, :]
    seg_shape = segs.seg_edges.shape[0]

    ### find exons corresponding to event
    idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    if idx_exon11.shape[0] == 0:
        segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1]))[0]
    else:
        segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1]
    idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
    if idx_exon12.shape[0] == 0:
        segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1]))[0]
    else:
        segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1]
    idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    if idx_exon21.shape[0] == 0:
        segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1]))[0]
    else:
        segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1]
    idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
    if idx_exon22.shape[0] == 0:
        segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1]))[0]
    else:
        segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1]

    assert(segs_exon11.shape[0] > 0)
    assert(segs_exon12.shape[0] > 0)
    assert(segs_exon21.shape[0] > 0)
    assert(segs_exon22.shape[0] > 0)

    if sp.all(segs_exon11 == segs_exon21):
        seg_diff = sp.setdiff1d(segs_exon12, segs_exon22)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon22, segs_exon12)
    elif sp.all(segs_exon12 == segs_exon22):
        seg_diff = sp.setdiff1d(segs_exon11, segs_exon21)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon21, segs_exon11)
    else:
        print("ERROR: both exons differ in alt prime event in verify_alt_prime", file=sys.stderr)
        sys.exit(1)

    # exon_diff_cov
    if seg_diff in segs_exon11 or seg_diff in segs_exon12:
        cov[0] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
    elif seg_diff in segs_exon21 or seg_diff in segs_exon22:
        cov[1] += sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
    else:
        raise Exception('differential segment not part of any other segment')
    
    ### check intron confirmations as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron1_conf 
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], seg_shape))[0]
    assert(idx.shape[0] > 0)
    cov[0] += counts_edges[idx, 1]
    # intron2_conf 
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], seg_shape))[0]
    assert(idx.shape[0] > 0)
    cov[1] += counts_edges[idx, 1]

    return cov
コード例 #18
0
ファイル: count.py プロジェクト: bowhan/spladder
def count_graph_coverage(genes, fn_bam=None, CFG=None, fn_out=None):
# [counts] = count_graph_coverage(genes, fn_bam, CFG, fn_out)

    if fn_bam is None and isinstance(genes, dict):
        PAR = genes
        genes = PAR['genes']
        fn_bam = PAR['fn_bam']
        if 'fn_out' in PAR:
            fn_out = PAR['fn_out'] 
        CFG = PAR['CFG']

    if not isinstance(fn_bam, list):
        fn_bam = [fn_bam]
    counts = sp.zeros((len(fn_bam), genes.shape[0]), dtype='object')

    intron_tol = 0 

    sys.stdout.write('genes: %i\n' % genes.shape[0])
    for f in range(counts.shape[0]):
        sys.stdout.write('sample %i/%i\n' % (f + 1, counts.shape[0])) 

        bam_cache = None

        ### iterate over all genes and generate counts for
        ### the segments in the segment graph
        ### and the splice junctions in the splice graph
        for i in range(genes.shape[0]):
            sys.stdout.write('.')
            if i > 0 and i % 50 == 0:
                sys.stdout.write('%i\n' % i)
            gg = genes[i]
            if gg.segmentgraph.is_empty():
                gg.segmentgraph = Segmentgraph(gg)
            gg.start = gg.segmentgraph.segments.ravel().min()
            gg.stop = gg.segmentgraph.segments.ravel().max()

            counts[f, i] = Counts(gg.segmentgraph.segments.shape[1])

            if CFG['bam_to_sparse'] and (fn_bam[f].endswith('npz') or os.path.exists(re.sub(r'bam$', '', fn_bam[f]) + 'npz')):
                ### load counts from summary file
                if bam_cache is None:
                    bam_cache = dict()
                    if fn_bam[f].endswith('npz'):
                        tmp = sp.load(fn_bam[f])
                    else:
                        tmp = sp.load(re.sub(r'bam$', '', fn_bam[f]) + 'npz')
                    ### re-built sparse matrix
                    for c in sp.unique([re.sub(r'_reads_dat$', '', x) for x in tmp if x.endswith('_reads_dat')]):
                        bam_cache[c + '_reads'] = scipy.sparse.coo_matrix((tmp[c + '_reads_dat'], (tmp[c + '_reads_row'], tmp[c + '_reads_col'])), shape=tmp[c + '_reads_shp'], dtype='uint32').tocsc()
                        bam_cache[c + '_introns_m'] = tmp[c + '_introns_m']
                        bam_cache[c + '_introns_p'] = tmp[c + '_introns_p']
                    del tmp

                if bam_cache[gg.chr + '_reads'].shape[0] == 0:
                    tracks = sp.zeros((1, gg.stop - gg.start), dtype='int')
                elif bam_cache[gg.chr + '_reads'].shape[0] > 1:
                    tracks = bam_cache[gg.chr + '_reads'][[0, 1 + int(gg.strand == '-')], gg.start:gg.stop].todense() 
                else:
                    tracks = bam_cache[gg.chr + '_reads'][:, gg.start:gg.stop].todense() 

                if bam_cache[c + '_introns_m'].shape[0] > 0:
                    if gg.strand == '-':
                        intron_list = get_intron_range(bam_cache[gg.chr + '_introns_m'], gg.start, gg.stop)
                    else:
                        intron_list = get_intron_range(bam_cache[gg.chr + '_introns_p'], gg.start, gg.stop)
                else:
                    intron_list = get_intron_range(bam_cache[gg.chr + '_introns_p'], gg.start, gg.stop)
            else:
                ### add RNA-seq evidence to the gene structure
                #(tracks, intron_list) = add_reads_from_bam(gg, fn_bam[f], ['exon_track','intron_list'], CFG['read_filter'], CFG['var_aware'], CFG['primary_only']);
                (tracks, intron_list) = add_reads_from_bam(gg, fn_bam[f], ['exon_track','intron_list'], None, CFG['var_aware'], CFG['primary_only']);
                intron_list = intron_list[0] ### TODO

            ### extract mean exon coverage for all segments
            for j in range(gg.segmentgraph.segments.shape[1]):
                idx = sp.arange(gg.segmentgraph.segments[0, j], gg.segmentgraph.segments[1, j]) - gg.start
                counts[f, i].segments[j] = sp.mean(sp.sum(tracks[:, idx], axis=0))
                counts[f, i].seg_pos[j] = sp.sum(sp.sum(tracks[:, idx], axis=0) > 0)

            k, l = sp.where(gg.segmentgraph.seg_edges == 1)

            ### there are no introns to count
            if intron_list.shape[0] == 0:
                for m in range(k.shape[0]):
                    if counts[f, i].edges.shape[0] == 0:
                        counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))
                    else:
                        counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))]
                continue

            ### extract intron counts 
            for m in range(k.shape[0]):
                idx = sp.where((sp.absolute(intron_list[:, 0] - gg.segmentgraph.segments[1, k[m]]) <= intron_tol) & (sp.absolute(intron_list[:, 1] - gg.segmentgraph.segments[0, l[m]]) <= intron_tol))[0]
                if counts[f, i].edges.shape[0] == 0:
                    if idx.shape[0] > 0:
                        counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), sp.sum(intron_list[idx, 2])]))
                    else:
                        counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))
                else:
                    if idx.shape[0] > 0:
                        counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), sp.sum(intron_list[idx, 2])]))]
                    else:
                        counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))]

    if fn_out is not None:
        cPickle.dump(counts, open(fn_out, 'w'), -1)
    else:
        return counts
コード例 #19
0
def count_graph_coverage(genes, fn_bam=None, options=None, fn_out=None):

    if fn_bam is None and isinstance(genes, dict):
        PAR = genes
        genes = PAR['genes']
        fn_bam = PAR['fn_bam']
        if 'fn_out' in PAR:
            fn_out = PAR['fn_out'] 
        options = PAR['options']

    if hasattr(genes[0], 'splicegraph_edges_data'):
        for gg in genes:
            gg.from_sparse()

    if not isinstance(fn_bam, list):
        fn_bam = [fn_bam]
    counts = sp.zeros((len(fn_bam), genes.shape[0]), dtype='object')

    intron_tol = 0 

    sys.stdout.write('genes: %i\n' % genes.shape[0])
    for f in range(counts.shape[0]):
        sys.stdout.write('\nsample %i/%i\n' % (f + 1, counts.shape[0])) 

        ### iterate over all genes and generate counts for
        ### the segments in the segment graph
        ### and the splice junctions in the splice graph
        ### iterate per contig, so the bam caching works better
        contigs = sp.array([x.chr for x in genes])
        for contig in sp.unique(contigs):
            contig_idx = sp.where(contigs == contig)[0]
            bam_cache = dict()
            print('\ncounting %i genes on contig %s' % (contig_idx.shape[0], contig))
            for ii,i in enumerate(contig_idx):
                sys.stdout.write('.')
                if ii > 0 and ii % 50 == 0:
                    sys.stdout.write('%i/%i\n' % (ii, contig_idx.shape[0]))
                sys.stdout.flush()
                gg = genes[i]
                if gg.segmentgraph.is_empty():
                    gg.segmentgraph = Segmentgraph(gg)
                gg.start = gg.segmentgraph.segments.ravel().min()
                gg.stop = gg.segmentgraph.segments.ravel().max()

                counts[f, i] = Counts(gg.segmentgraph.segments.shape[1])

                if options.sparse_bam and \
                  (fn_bam[f].endswith('npz') or \
                   os.path.exists(re.sub(r'bam$', '', fn_bam[f]) + 'npz') or \
                   fn_bam[f].endswith('hdf5') or \
                   os.path.exists(re.sub(r'bam$', '', fn_bam[f]) + 'hdf5')):
                    ### make sure that we query the right contig from cache
                    assert(gg.chr == contig)
                    (tracks, intron_list) = add_reads_from_sparse_bam(gg, fn_bam[f], contig, options.confidence, types=['exon_track','intron_list'], filter=None, cache=bam_cache)
                else:
                    ### add RNA-seq evidence to the gene structure
                    (tracks, intron_list) = add_reads_from_bam(gg, fn_bam[f], ['exon_track','intron_list'], None, options.var_aware, options.primary_only, mm_tag=options.mm_tag);
                    intron_list = intron_list[0] ### TODO

                ### extract mean exon coverage for all segments
                for j in range(gg.segmentgraph.segments.shape[1]):
                    idx = sp.arange(gg.segmentgraph.segments[0, j], gg.segmentgraph.segments[1, j]) - gg.start
                    counts[f, i].segments[j] = sp.mean(sp.sum(tracks[:, idx], axis=0))
                    counts[f, i].seg_pos[j] = sp.sum(sp.sum(tracks[:, idx], axis=0) > 0)

                k, l = sp.where(gg.segmentgraph.seg_edges == 1)

                ### there are no introns to count
                if intron_list.shape[0] == 0:
                    for m in range(k.shape[0]):
                        if counts[f, i].edges.shape[0] == 0:
                            counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))
                        else:
                            counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))]
                    continue

                ### extract intron counts 
                for m in range(k.shape[0]):
                    idx = sp.where((sp.absolute(intron_list[:, 0] - gg.segmentgraph.segments[1, k[m]]) <= intron_tol) & (sp.absolute(intron_list[:, 1] - gg.segmentgraph.segments[0, l[m]]) <= intron_tol))[0]
                    if counts[f, i].edges.shape[0] == 0:
                        if idx.shape[0] > 0:
                            counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), sp.sum(intron_list[idx, 2])]))
                        else:
                            counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))
                    else:
                        if idx.shape[0] > 0:
                            counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), sp.sum(intron_list[idx, 2])]))]
                        else:
                            counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))]

    if fn_out is not None:
        pickle.dump(counts, open(fn_out, 'wb'), -1)
    else:
        return counts
コード例 #20
0
ファイル: quantify.py プロジェクト: arpankbasak/spladder
def quantify_mult_exon_skip(event, gene, counts_segments, counts_edges, CFG):

    cov = sp.zeros((2, ), dtype='float')

    sg = gene.splicegraph
    segs = gene.segmentgraph

    if CFG['is_matlab']:
        seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :]
        seg_shape = segs[0, 2].shape[0]
        order = 'F'
        offset = 1

        ### find exons corresponding to event
        idx_exon_pre = sp.where((sg[0, 0][0, :] == event.exon_pre[0])
                                & (sg[0, 0][1, :] == event.exon_pre[1]))[0]
        idx_exon_aft = sp.where((sg[0, 0][0, :] == event.exon_aft[0])
                                & (sg[0, 0][1, :] == event.exon_aft[1]))[0]
        seg_exons = []
        for i in range(0, event.exons.shape[1], 2):
            tmp = sp.where((sg[0, 0][0, :] == event.exons[i])
                           & (sg[0, 0][1, :] == event.exons[i + 1]))[0]
            seg_exons.append(sp.where(segs[0, 1][tmp, :])[1])

        ### find segments corresponding to exons
        seg_exon_pre = sp.sort(sp.where(segs[0, 1][idx_exon_pre, :])[1])
        seg_exon_aft = sp.sort(sp.where(segs[0, 1][idx_exon_aft, :])[1])
    else:
        seg_lens = segs.segments[1, :] - segs.segments[0, :]
        seg_shape = segs.seg_edges.shape[0]
        order = 'C'
        offset = 0

        ### find exons corresponding to event
        idx_exon_pre = sp.where((sg.vertices[0, :] == event.exons2[0, 0])
                                & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
        idx_exon_aft = sp.where((sg.vertices[0, :] == event.exons2[-1, 0])
                                & (sg.vertices[1, :] == event.exons2[-1,
                                                                     1]))[0]
        seg_exons = []
        for i in range(1, event.exons2.shape[0] - 1):
            tmp = sp.where((sg.vertices[0, :] == event.exons2[i, 0])
                           & (sg.vertices[1, :] == event.exons2[i, 1]))[0]
            seg_exons.append(sp.where(segs.seg_match[tmp, :])[1])

        ### find segments corresponding to exons
        seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
        seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])

    seg_exons_u = sp.sort(
        sp.unique([x for sublist in seg_exons for x in sublist]))

    ### inner exons_cov
    cov[0] = sp.sum(counts_segments[seg_exons_u] *
                    seg_lens[seg_exons_u]) / sp.sum(seg_lens[seg_exons_u])

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon_conf
    idx1 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exons[0][0]], seg_shape, order=order) +
                    offset)[0]
    if len(idx1.shape) > 0 and idx1.shape[0] > 0:
        cov[0] += counts_edges[idx1[0], 1]
    # exon_exon_aft_conf
    idx2 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exons[-1][-1], seg_exon_aft[0]], seg_shape, order=order) +
                    offset)[0]
    if len(idx2.shape) > 0 and idx2.shape[0] > 0:
        cov[0] += counts_edges[idx2[0], 1]
    # exon_pre_exon_aft_conf
    idx3 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exon_aft[0]], seg_shape, order=order) +
                    offset)[0]
    if len(idx3.shape) > 0 and idx3.shape[0] > 0:
        cov[1] = counts_edges[idx3[0], 1]
    for i in range(len(seg_exons) - 1):
        # sum_inner_exon_conf
        idx4 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
            [seg_exons[i][-1], seg_exons[i + 1][0]], seg_shape, order=order) +
                        offset)[0]
        if len(idx4.shape) > 0 and idx4.shape[0] > 0:
            cov[0] += counts_edges[idx4[0], 1]

    return cov
コード例 #21
0
ファイル: quantify.py プロジェクト: xtmgah/spladder
def quantify_exon_skip(event, gene, counts_segments, counts_edges, CFG):

    cov = sp.zeros((2, ), dtype='float')
    sg = gene.splicegraph
    segs = gene.segmentgraph

    if CFG['is_matlab']:
        seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :]
        seg_shape = segs[0, 2].shape
        order = 'F'
        offset = 1

        ### find exons corresponding to event
        idx_exon_pre = sp.where((sg[0, 0][0, :] == event.exon_pre[0])
                                & (sg[0, 0][1, :] == event.exon_pre[1]))[0]
        idx_exon = sp.where((sg[0, 0][0, :] == event.exon[0])
                            & (sg[0, 0][1, :] == event.exon[1]))[0]
        idx_exon_aft = sp.where((sg[0, 0][0, :] == event.exon_aft[0])
                                & (sg[0, 0][1, :] == event.exon_aft[1]))[0]

        ### find segments corresponding to exons
        seg_exon_pre = sp.sort(sp.where(segs[0, 1][idx_exon_pre, :])[1])
        seg_exon_aft = sp.sort(sp.where(segs[0, 1][idx_exon_aft, :])[1])
        seg_exon = sp.sort(sp.where(segs[0, 1][idx_exon, :])[1])
    else:
        seg_lens = segs.segments[1, :] - segs.segments[0, :]
        seg_shape = segs.seg_edges.shape
        order = 'C'
        offset = 0

        ### find exons corresponding to event
        idx_exon_pre = sp.where((sg.vertices[0, :] == event.exons2[0, 0])
                                & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
        idx_exon = sp.where((sg.vertices[0, :] == event.exons2[1, 0])
                            & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
        idx_exon_aft = sp.where((sg.vertices[0, :] == event.exons2[2, 0])
                                & (sg.vertices[1, :] == event.exons2[2, 1]))[0]

        ### find segments corresponding to exons
        seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
        seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])
        seg_exon = sp.sort(sp.where(segs.seg_match[idx_exon, :])[1])

    # get inner exon cov
    cov[0] = sp.sum(counts_segments[seg_exon] * seg_lens[seg_exon]) / sp.sum(
        seg_lens[seg_exon])

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon_conf
    idx1 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exon[0]], seg_shape, order=order) + offset)[0]
    cov[0] += counts_edges[idx1, 1]
    # exon_exon_aft_conf
    idx2 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon[-1], seg_exon_aft[0]], seg_shape, order=order) + offset)[0]
    cov[0] += counts_edges[idx2, 1]
    # exon_pre_exon_aft_conf
    idx3 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exon_aft[0]], seg_shape, order=order) +
                    offset)[0]
    cov[1] = counts_edges[idx3, 1]

    return cov
コード例 #22
0
ファイル: quantify.py プロジェクト: xtmgah/spladder
def quantify_alt_prime(event, gene, counts_segments, counts_edges, CFG):

    cov = sp.zeros((2, ), dtype='float')

    sg = gene.splicegraph
    segs = gene.segmentgraph
    if CFG['is_matlab']:
        seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :]
        seg_shape = segs[0, 2].shape[0]

        idx_exon_alt1 = sp.where((sg[0, 0][0, :] == event.exon_alt1[0])
                                 & (sg[0, 0][1, :] == event.exon_alt1[1]))
        idx_exon_alt2 = sp.where((sg[0, 0][0, :] == event.exon_alt2[0])
                                 & (sg[0, 0][1, :] == event.exon_alt2[1]))
        idx_exon_const = sp.where((sg[0, 0][0, :] == event.exon_const[0])
                                  & (sg[0, 0][1, :] == event.exon_const[1]))
        if idx_exon_alt1.shape[0] == 0:
            segs_exon_alt1 = sp.where((segs[0, 0][0, :] >= event.exon_alt1[0])
                                      & (segs[0,
                                              0][1, :] >= event.exon_alt1[1]))
        else:
            segs_exon_alt1 = sp.where(segs[0, 1][idx_exon_alt1, :])[1]
        if idx_exon_alt2.shape[0] == 0:
            segs_exon_alt2 = sp.where((segs[0, 0][0, :] >= event.exon_alt2[0])
                                      & (segs[0,
                                              0][1, :] >= event.exon_alt2[1]))
        else:
            segs_exon_alt2 = sp.where(segs[0, 1][idx_exon_alt2, :])[1]
        if idx_exon_const.shape[0] == 0:
            segs_exon_const = sp.where(
                (segs[0, 0][0, :] >= event.exon_const[0])
                & (segs[0, 0][1, :] >= event.exon_const[1]))
        else:
            segs_exon_const = sp.where(segs[0, 1][idx_exon_const, :])[1]

        assert (segs_exon_alt1.shape[0] > 0)
        assert (segs_exon_alt2.shape[0] > 0)
        assert (segs_exon_const.shape[0] > 0)

        cov[1] += sp.sum(counts_segments[seg_diff] *
                         seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])

        ### check intron confirmations as sum of valid intron scores
        ### intron score is the number of reads confirming this intron
        if max(segs_exon_alt1[-1], segs_exon_alt2[-1]) < segs_exon_const[0]:
            # intron1_conf
            idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
                [segs_exon_alt1[0], segs_exon_const[-1]], seg_shape))[0] + 1
            assert (idx.shape[0] > 0)
            cov[0] += counts_edges[idx, 1]
            # intron2_conf
            idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
                [segs_exon_alt2[0], segs_exon_const[-1]], seg_shape))[0] + 1
            assert (idx.shape[0] > 0)
            cov[1] += counts_edges[idx, 1]
        elif min(segs_exon_alt1[0], segs_exon_alt2[0]) > segs_exon_const[-1]:
            # intron1_conf
            idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
                [segs_exon_const[0], segs_exon_alt1[-1]], seg_shape))[0] + 1
            assert (idx.shape[0] > 0)
            cov[0] += counts_edges[idx, 1]
            # intron2_conf
            idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
                [segs_exon_const[0], segs_exon_alt2[-1]], seg_shape))[0] + 1
            assert (idx.shape[0] > 0)
            cov[1] += counts_edges[idx, 1]
    else:
        seg_lens = segs.segments[1, :] - segs.segments[0, :]
        seg_shape = segs.seg_edges.shape[0]

        ### find exons corresponding to event
        idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0])
                              & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
        if idx_exon11.shape[0] == 0:
            segs_exon11 = sp.where(
                (segs.segments[0, :] >= event.exons1[0, 0])
                & (segs.segments[1, :] <= event.exons1[0, 1]))[0]
        else:
            segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1]
        idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0])
                              & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
        if idx_exon12.shape[0] == 0:
            segs_exon12 = sp.where(
                (segs.segments[0, :] >= event.exons1[1, 0])
                & (segs.segments[1, :] <= event.exons1[1, 1]))[0]
        else:
            segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1]
        idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0])
                              & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
        if idx_exon21.shape[0] == 0:
            segs_exon21 = sp.where(
                (segs.segments[0, :] >= event.exons2[0, 0])
                & (segs.segments[1, :] <= event.exons2[0, 1]))[0]
        else:
            segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1]
        idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0])
                              & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
        if idx_exon22.shape[0] == 0:
            segs_exon22 = sp.where(
                (segs.segments[0, :] >= event.exons2[1, 0])
                & (segs.segments[1, :] <= event.exons2[1, 1]))[0]
        else:
            segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1]

        assert (segs_exon11.shape[0] > 0)
        assert (segs_exon12.shape[0] > 0)
        assert (segs_exon21.shape[0] > 0)
        assert (segs_exon22.shape[0] > 0)

        if sp.all(segs_exon11 == segs_exon21):
            seg_diff = sp.setdiff1d(segs_exon12, segs_exon22)
            if seg_diff.shape[0] == 0:
                seg_diff = sp.setdiff1d(segs_exon22, segs_exon12)
        elif sp.all(segs_exon12 == segs_exon22):
            seg_diff = sp.setdiff1d(segs_exon11, segs_exon21)
            if seg_diff.shape[0] == 0:
                seg_diff = sp.setdiff1d(segs_exon21, segs_exon11)
        else:
            print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime"
            sys.exit(1)

        # exon_diff_cov
        if seg_diff in segs_exon11 or seg_diff in segs_exon12:
            cov[0] += sp.sum(counts_segments[seg_diff] *
                             seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
        elif seg_diff in segs_exon21 or seg_diff in segs_exon22:
            cov[1] += sp.sum(counts_segments[seg_diff] *
                             seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
        else:
            raise Exception(
                'differential segment not part of any other segment')

        ### check intron confirmations as sum of valid intron scores
        ### intron score is the number of reads confirming this intron
        # intron1_conf
        idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
            [segs_exon11[-1], segs_exon12[0]], seg_shape))[0]
        assert (idx.shape[0] > 0)
        cov[0] += counts_edges[idx, 1]
        # intron2_conf
        idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
            [segs_exon21[-1], segs_exon22[0]], seg_shape))[0]
        assert (idx.shape[0] > 0)
        cov[1] += counts_edges[idx, 1]

    return cov
コード例 #23
0
ファイル: quantify.py プロジェクト: xtmgah/spladder
def quantify_mutex_exons(event, gene, counts_segments, counts_edges, CFG):

    sg = gene.splicegraph
    segs = gene.segmentgraph

    if CFG['is_matlab']:
        seg_lens = segs[0, 0][1, :] - segs[0, 0][0, :]
        seg_shape = segs[0, 2].shape[0]
        order = 'F'
        offset = 1

        ### find exons corresponding to event
        idx_exon_pre = sp.where((sg[0, 0][0, :] == event.exon_pre[0])
                                & (sg[0, 0][1, :] == event.exon_pre[1]))[0]
        idx_exon_aft = sp.where((sg[0, 0][0, :] == event.exon_aft[0])
                                & (sg[0, 0][1, :] == event.exon_aft[1]))[0]
        idx_exon1 = sp.where((sg[0, 0][0, :] == event.exon1[0])
                             & (sg[0, 0][1, :] == event.exon1[1]))[0]
        idx_exon2 = sp.where((sg[0, 0][0, :] == event.exon2[0])
                             & (sg[0, 0][1, :] == event.exon2[1]))[0]

        ### find segments corresponding to exons
        seg_exon_pre = sp.sort(sp.where(segs[0, 1][idx_exon_pre, :])[1])
        seg_exon_aft = sp.sort(sp.where(segs[0, 1][idx_exon_aft, :])[1])
        seg_exon1 = sp.sort(sp.where(segs[0, 1][idx_exon1, :])[1])
        seg_exon2 = sp.sort(sp.where(segs[0, 1][idx_exon2, :])[1])

    else:
        seg_lens = segs.segments[1, :] - segs.segments[0, :]
        seg_shape = segs.seg_edges.shape[0]
        order = 'C'
        offset = 0

        ### find exons corresponding to event
        idx_exon_pre = sp.where((sg.vertices[0, :] == event.exons1[0, 0])
                                & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
        idx_exon_aft = sp.where((sg.vertices[0, :] == event.exons1[-1, 0])
                                & (sg.vertices[1, :] == event.exons1[-1,
                                                                     1]))[0]
        idx_exon1 = sp.where((sg.vertices[0, :] == event.exons1[1, 0])
                             & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
        idx_exon2 = sp.where((sg.vertices[0, :] == event.exons2[1, 0])
                             & (sg.vertices[1, :] == event.exons2[1, 1]))[0]

        ### find segments corresponding to exons
        seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
        seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])
        seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
        seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])

    # exon1 cov
    cov[0] = sp.sum(counts_segments[seg_exon1] * seg_lens[seg_exon1]) / sp.sum(
        seg_lens[seg_exon1])
    # exon2 cov
    cov[1] = sp.sum(counts_segments[seg_exon2] * seg_lens[seg_exon2]) / sp.sum(
        seg_lens[seg_exon2])

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon1_conf
    idx1 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exon1[0]], seg_shape, order=order) + offset)[0]
    if len(idx1.shape) > 0 and idx1.shape[0] > 0:
        cov[0] += counts_edges[idx1[0], 1]
    # exon_pre_exon2_conf
    idx2 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exon2[0]], seg_shape, order=order) + offset)[0]
    if len(idx2.shape) > 0 and idx2.shape[0] > 0:
        cov[1] += counts_edges[idx2[0], 1]
    # exon1_exon_aft_conf
    idx3 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon1[-1], seg_exon_aft[0]], seg_shape, order=order) + offset)[0]
    if len(idx3.shape) > 0 and idx3.shape[0] > 0:
        cov[0] += counts_edges[idx3[0], 1]
    # exon2_exon_aft_conf
    idx4 = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon2[-1], seg_exon_aft[0]], seg_shape, order=order) + offset)[0]
    if len(idx4.shape) > 0 and idx4.shape[0] > 0:
        cov[1] += counts_edges[idx4[0], 1]

    return cov
コード例 #24
0
ファイル: verify.py プロジェクト: ccwang12/spladder
def verify_mult_exon_skip(event, gene, counts_segments, counts_edges, CFG):
    # [verified, info] = verify_mult_exon_skip(event, gene, counts_segments, counts_edges, CFG) 

    verified = [0, 0, 0, 0, 0]
    info = [1, 0, 0, 0, 0, 0, 0, 0, 0]
    # (0) valid, (1) exon_pre_cov, (2) exons_cov, (3) exon_aft_cov
    # (4) exon_pre_exon_conf, (5) exon_exon_aft_conf, (6) exon_pre_exon_aft_conf
    # (7) sum_inner_exon_conf, (8) num_inner_exon

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0
        return (verified, info)
    ### check validity of exon coordinates (start < stop && non-overlapping)
    elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any(event.exons2[:, 1] - event.exons2[:, 0] < 1):
        info[0] = 0
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon_pre  = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    idx_exon_aft  = sp.where((sg.vertices[0, :] == event.exons2[-1, 0]) & (sg.vertices[1, :] == event.exons2[-1, 1]))[0]
    seg_exons = []
    for i in range(1, event.exons2.shape[0] - 1):
        tmp = sp.where((sg.vertices[0, :] == event.exons2[i, 0]) & (sg.vertices[1, :] == event.exons2[i, 1]))[0]
        seg_exons.append(sp.where(segs.seg_match[tmp, :])[1])
    
    ### find segments corresponding to exons
    seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
    seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])
    seg_exons_u = sp.sort(sp.unique([x for sublist in seg_exons for x in sublist]))

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    # exon_pre_cov
    info[1] = sp.sum(counts_segments[seg_exon_pre] * seg_lens[seg_exon_pre]) / sp.sum(seg_lens[seg_exon_pre])
    # exon_aft_cov
    info[3] = sp.sum(counts_segments[seg_exon_aft] * seg_lens[seg_exon_aft]) / sp.sum(seg_lens[seg_exon_aft])
    # exons_cov
    info[2] = sp.sum(counts_segments[seg_exons_u] * seg_lens[seg_exons_u]) / sp.sum(seg_lens[seg_exons_u])

    ### check if coverage of skipped exon is >= than FACTOR times average of pre and after
    if info[2] >= CFG['mult_exon_skip']['min_skip_rel_cov'] * (info[1] + info[3]) / 2:
        verified[0] = 1

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exons[0][0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[4] = counts_edges[idx[0], 1]
    # exon_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exons[-1][-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[5] = counts_edges[idx[0], 1]
    # exon_pre_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon_pre[-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[6] = counts_edges[idx[0], 1]
    for i in range(len(seg_exons) - 1):
        # sum_inner_exon_conf
        idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exons[i][-1], seg_exons[i+1][0]], segs.seg_edges.shape))[0]
        if len(idx.shape) > 0 and idx.shape[0] > 0:
            info[7] += counts_edges[idx[0], 1]

    # num_inner_exon
    info[8] = event.exons2.shape[0] - 2
    if info[4] >= CFG['mult_exon_skip']['min_non_skip_count']:
        verified[1] = 1
    if info[5] >= CFG['mult_exon_skip']['min_non_skip_count']:
        verified[2] = 1
    if (info[7] / info[8]) >= CFG['mult_exon_skip']['min_non_skip_count']:
        verified[3] = 1 
    if info[6] >= CFG['mult_exon_skip']['min_skip_count']:
        verified[4] = 1 

    return (verified, info)
コード例 #25
0
def verify_intron_retention(event, gene, counts_segments, counts_edges,
                            counts_seg_pos, CFG):
    # [verified, info] = verify_intron_retention(event, fn_bam, CFG)

    verified = [0, 0]

    # (0) valid, (1) intron_cov, (2) exon1_cov, (3), exon2_cov
    # (4) intron_conf, (5) intron_cov_region
    info = [1, 0, 0, 0, 0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0
        return (verified, info)
    ### check validity of exon coordinates (start < stop && non-overlapping)
    elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any(
        (event.exons2[1] - event.exons2[0]) < 1):
        info[0] = 0
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon1 = sp.where((sg.vertices[0, :] == event.exons1[0, 0])
                         & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    idx_exon2 = sp.where((sg.vertices[0, :] == event.exons1[1, 0])
                         & (sg.vertices[1, :] == event.exons1[1, 1]))[0]

    ### find segments corresponding to exons
    seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
    seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])
    seg_all = sp.arange(seg_exon1[0], seg_exon2[-1])

    seg_intron = sp.setdiff1d(seg_all, seg_exon1)
    seg_intron = sp.setdiff1d(seg_intron, seg_exon2)
    assert (seg_intron.shape[0] > 0)

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    ### compute exon coverages as mean of position wise coverage
    # exon1_cov
    info[2] = sp.sum(counts_segments[seg_exon1] *
                     seg_lens[seg_exon1]) / sp.sum(seg_lens[seg_exon1])
    # exon2_cov
    info[3] = sp.sum(counts_segments[seg_exon2] *
                     seg_lens[seg_exon2]) / sp.sum(seg_lens[seg_exon2])
    # intron_cov
    info[1] = sp.sum(counts_segments[seg_intron] *
                     seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron])
    # intron_cov_region
    info[5] = sp.sum(counts_seg_pos[seg_intron]) / sp.sum(seg_lens[seg_intron])

    ### check if counts match verification criteria
    if info[1] > CFG['intron_retention']['min_retention_cov'] and \
       info[5] > CFG['intron_retention']['min_retention_region'] and \
       info[1] >= CFG['intron_retention']['min_retention_rel_cov'] * (info[2] + info[3]) / 2:
        verified[0] = 1

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon1[-1], seg_exon2[0]], segs.seg_edges.shape))[0]
    info[4] = counts_edges[idx, 1]

    if info[4] >= CFG['intron_retention']['min_non_retention_count']:
        verified[1] = 1

    return (verified, info)
コード例 #26
0
ファイル: verify.py プロジェクト: ccwang12/spladder
def verify_alt_prime(event, gene, counts_segments, counts_edges, CFG):
    # [verified, info] = verify_exon_skip(event, fn_bam, cfg)

    # (0) valid, (1) exon_diff_cov, (2) exon_const_cov
    # (3) intron1_conf, (4) intron2_conf
    info = [1, 0, 0, 0, 0]
    verified = [0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0 
        return (verified, info)

    ### check validity of intron coordinates (only one side is differing)
    if (event.exons1[0, 1] != event.exons2[0, 1]) and (event.exons1[1, 0] != event.exons2[1, 0]):
        info[0] = 0 
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon11 = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    if idx_exon11.shape[0] == 0:
        segs_exon11 = sp.where((segs.segments[0, :] >= event.exons1[0, 0]) & (segs.segments[1, :] <= event.exons1[0, 1]))[0]
    else:
        segs_exon11 = sp.where(segs.seg_match[idx_exon11, :])[1]
    idx_exon12 = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]
    if idx_exon12.shape[0] == 0:
        segs_exon12 = sp.where((segs.segments[0, :] >= event.exons1[1, 0]) & (segs.segments[1, :] <= event.exons1[1, 1]))[0]
    else:
        segs_exon12 = sp.where(segs.seg_match[idx_exon12, :])[1]
    idx_exon21 = sp.where((sg.vertices[0, :] == event.exons2[0, 0]) & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    if idx_exon21.shape[0] == 0:
        segs_exon21 = sp.where((segs.segments[0, :] >= event.exons2[0, 0]) & (segs.segments[1, :] <= event.exons2[0, 1]))[0]
    else:
        segs_exon21 = sp.where(segs.seg_match[idx_exon21, :])[1]
    idx_exon22 = sp.where((sg.vertices[0, :] == event.exons2[1, 0]) & (sg.vertices[1, :] == event.exons2[1, 1]))[0]
    if idx_exon22.shape[0] == 0:
        segs_exon22 = sp.where((segs.segments[0, :] >= event.exons2[1, 0]) & (segs.segments[1, :] <= event.exons2[1, 1]))[0]
    else:
        segs_exon22 = sp.where(segs.seg_match[idx_exon22, :] > 0)[1]

    assert(segs_exon11.shape[0] > 0)
    assert(segs_exon12.shape[0] > 0)
    assert(segs_exon21.shape[0] > 0)
    assert(segs_exon22.shape[0] > 0)

    if sp.all(segs_exon11 == segs_exon21):
        seg_exon_const = segs_exon11
        seg_diff = sp.setdiff1d(segs_exon12, segs_exon22)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon22, segs_exon12)
        seg_const = sp.intersect1d(segs_exon12, segs_exon22)
    elif sp.all(segs_exon12 == segs_exon22):
        seg_exon_const = segs_exon12
        seg_diff = sp.setdiff1d(segs_exon11, segs_exon21)
        if seg_diff.shape[0] == 0:
            seg_diff = sp.setdiff1d(segs_exon21, segs_exon11)
        seg_const = sp.intersect1d(segs_exon21, segs_exon11)
    else:
        print >> sys.stderr, "ERROR: both exons differ in alt prime event in verify_alt_prime"
        sys.exit(1)
    seg_const = sp.r_[seg_exon_const, seg_const]

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    # exon_diff_cov
    info[1] = sp.sum(counts_segments[seg_diff] * seg_lens[seg_diff]) / sp.sum(seg_lens[seg_diff])
    # exon_const_cov
    info[2] = sp.sum(counts_segments[seg_const] * seg_lens[seg_const]) / sp.sum(seg_lens[seg_const])

    if info[1] >= CFG['alt_prime']['min_diff_rel_cov'] * info[2]:
        verified[0] = 1

    ### check intron confirmations as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron1_conf 
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon11[-1], segs_exon12[0]], segs.seg_edges.shape))[0]
    assert(idx.shape[0] > 0)
    info[3] = counts_edges[idx, 1]
    # intron2_conf 
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([segs_exon21[-1], segs_exon22[0]], segs.seg_edges.shape))[0]
    assert(idx.shape[0] > 0)
    info[4] = counts_edges[idx, 1]

    if min(info[3], info[4]) >= CFG['alt_prime']['min_intron_count']:
        verified[1] = 1

    return (verified, info)
コード例 #27
0
def verify_mult_exon_skip(event, gene, counts_segments, counts_edges, CFG):
    # [verified, info] = verify_mult_exon_skip(event, gene, counts_segments, counts_edges, CFG)

    verified = [0, 0, 0, 0, 0]
    info = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    # (0) valid, (1) exon_pre_cov, (2) exons_cov, (3) exon_aft_cov
    # (4) exon_pre_exon_conf, (5) exon_exon_aft_conf, (6) exon_pre_exon_aft_conf
    # (7) sum_inner_exon_conf, (8) num_inner_exon, (9) len_inner_exon

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0
        return (verified, info)
    ### check validity of exon coordinates (start < stop && non-overlapping)
    elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any(
            event.exons2[:, 1] - event.exons2[:, 0] < 1):
        info[0] = 0
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon_pre = sp.where((sg.vertices[0, :] == event.exons2[0, 0])
                            & (sg.vertices[1, :] == event.exons2[0, 1]))[0]
    idx_exon_aft = sp.where((sg.vertices[0, :] == event.exons2[-1, 0])
                            & (sg.vertices[1, :] == event.exons2[-1, 1]))[0]
    seg_exons = []
    for i in range(1, event.exons2.shape[0] - 1):
        tmp = sp.where((sg.vertices[0, :] == event.exons2[i, 0])
                       & (sg.vertices[1, :] == event.exons2[i, 1]))[0]
        seg_exons.append(sp.where(segs.seg_match[tmp, :])[1])

    ### find segments corresponding to exons
    seg_exon_pre = sp.sort(sp.where(segs.seg_match[idx_exon_pre, :])[1])
    seg_exon_aft = sp.sort(sp.where(segs.seg_match[idx_exon_aft, :])[1])
    seg_exons_u = sp.sort(
        sp.unique([x for sublist in seg_exons for x in sublist]))

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    # exon_pre_cov
    info[1] = sp.sum(counts_segments[seg_exon_pre] *
                     seg_lens[seg_exon_pre]) / sp.sum(seg_lens[seg_exon_pre])
    # exon_aft_cov
    info[3] = sp.sum(counts_segments[seg_exon_aft] *
                     seg_lens[seg_exon_aft]) / sp.sum(seg_lens[seg_exon_aft])
    # exons_cov
    info[2] = sp.sum(counts_segments[seg_exons_u] *
                     seg_lens[seg_exons_u]) / sp.sum(seg_lens[seg_exons_u])

    ### check if coverage of skipped exon is >= than FACTOR times average of pre and after
    if info[2] >= CFG['mult_exon_skip']['min_skip_rel_cov'] * (info[1] +
                                                               info[3]) / 2:
        verified[0] = 1

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # exon_pre_exon_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exons[0][0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[4] = counts_edges[idx[0], 1]
    # exon_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exons[-1][-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[5] = counts_edges[idx[0], 1]
    # exon_pre_exon_aft_conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
        [seg_exon_pre[-1], seg_exon_aft[0]], segs.seg_edges.shape))[0]
    if len(idx.shape) > 0 and idx.shape[0] > 0:
        info[6] = counts_edges[idx[0], 1]
    for i in range(len(seg_exons) - 1):
        # sum_inner_exon_conf
        idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index(
            [seg_exons[i][-1], seg_exons[i + 1][0]], segs.seg_edges.shape))[0]
        if len(idx.shape) > 0 and idx.shape[0] > 0:
            info[7] += counts_edges[idx[0], 1]

    # num_inner_exon
    info[8] = event.exons2.shape[0] - 2
    info[9] = sp.sum(event.exons2[1:-1, 1] - event.exons2[1:-1, 0])
    if info[4] >= CFG['mult_exon_skip']['min_non_skip_count']:
        verified[1] = 1
    if info[5] >= CFG['mult_exon_skip']['min_non_skip_count']:
        verified[2] = 1
    if (info[7] / info[8]) >= CFG['mult_exon_skip']['min_non_skip_count']:
        verified[3] = 1
    if info[6] >= CFG['mult_exon_skip']['min_skip_count']:
        verified[4] = 1

    return (verified, info)
コード例 #28
0
ファイル: verify.py プロジェクト: ccwang12/spladder
def verify_intron_retention(event, gene, counts_segments, counts_edges, counts_seg_pos, CFG):
    # [verified, info] = verify_intron_retention(event, fn_bam, CFG)

    verified = [0, 0]

    # (0) valid, (1) intron_cov, (2) exon1_cov, (3), exon2_cov
    # (4) intron_conf, (5) intron_cov_region
    info = [1, 0, 0, 0, 0, 0]

    ### check validity of exon coordinates (>=0)
    if sp.any(event.exons1 < 0) or sp.any(event.exons2 < 0):
        info[0] = 0
        return (verified, info)
    ### check validity of exon coordinates (start < stop && non-overlapping)
    elif sp.any(event.exons1[:, 1] - event.exons1[:, 0] < 1) or sp.any((event.exons2[1] - event.exons2[0]) < 1):
        info[0] = 0
        return (verified, info)

    sg = gene.splicegraph
    segs = gene.segmentgraph

    ### find exons corresponding to event
    idx_exon1  = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    idx_exon2  = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]

    ### find segments corresponding to exons
    seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
    seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])
    seg_all = sp.arange(seg_exon1[0], seg_exon2[-1])

    seg_intron = sp.setdiff1d(seg_all, seg_exon1)
    seg_intron = sp.setdiff1d(seg_intron, seg_exon2)
    assert(seg_intron.shape[0] > 0)

    seg_lens = segs.segments[1, :] - segs.segments[0, :]

    ### compute exon coverages as mean of position wise coverage
    # exon1_cov
    info[2] = sp.sum(counts_segments[seg_exon1] * seg_lens[seg_exon1]) / sp.sum(seg_lens[seg_exon1])
    # exon2_cov
    info[3] = sp.sum(counts_segments[seg_exon2] * seg_lens[seg_exon2]) / sp.sum(seg_lens[seg_exon2])
    # intron_cov
    info[1] = sp.sum(counts_segments[seg_intron] * seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron])
    # intron_cov_region
    info[5] = sp.sum(counts_seg_pos[seg_intron]) / sp.sum(seg_lens[seg_intron])

    ### check if counts match verification criteria
    if info[1] > CFG['intron_retention']['min_retention_cov'] and \
       info[5] > CFG['intron_retention']['min_retention_region'] and \
       info[1] >= CFG['intron_retention']['min_retention_rel_cov'] * (info[2] + info[3]) / 2:
        verified[0] = 1

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon1[-1], seg_exon2[0]], segs.seg_edges.shape))[0]
    info[4] = counts_edges[idx, 1]

    if info[4] >= CFG['intron_retention']['min_non_retention_count']:
        verified[1] = 1

    return (verified, info)
コード例 #29
0
ファイル: count.py プロジェクト: ccwang12/spladder
def count_graph_coverage(genes, fn_bam=None, CFG=None, fn_out=None):
# [counts] = count_graph_coverage(genes, fn_bam, CFG, fn_out)

    if fn_bam is None and isinstance(genes, dict):
        PAR = genes
        genes = PAR['genes']
        fn_bam = PAR['fn_bam']
        if 'fn_out' in PAR:
            fn_out = PAR['fn_out'] 
        CFG = PAR['CFG']

    if not isinstance(fn_bam, list):
        fn_bam = [fn_bam]
    counts = sp.zeros((len(fn_bam), genes.shape[0]), dtype='object')

    intron_tol = 0 

    for f in range(counts.shape[0]):
        ### iterate over all genes and generate counts for
        ### the segments in the segment graph
        ### and the splice junctions in the splice graph
        for i in range(genes.shape[0]):
            sys.stdout.write('.')
            if i > 0 and i % 50 == 0:
                sys.stdout.write('%i\n' % i)
            gg = genes[i]
            if gg.segmentgraph is None:
                gg.segmentgraph = Segmentgraph(gg)
            gg.start = gg.segmentgraph.segments.ravel().min()
            gg.stop = gg.segmentgraph.segments.ravel().max()

            ### add RNA-seq evidence to the gene structure
            (tracks, intron_list) = add_reads_from_bam(gg, fn_bam[f], ['exon_track','intron_list'], CFG['read_filter'], CFG['var_aware'], CFG['primary_only']);
            intron_list = intron_list[0] ### TODO

            ### extract mean exon coverage for all segments
            counts[f, i] = Counts(gg.segmentgraph.segments.shape[1])

            for j in range(gg.segmentgraph.segments.shape[1]):
                idx = sp.arange(gg.segmentgraph.segments[0, j], gg.segmentgraph.segments[1, j]) - gg.start
                counts[f, i].segments[j] = sp.mean(sp.sum(tracks[:, idx], axis=0))
                counts[f, i].seg_pos[j] = sp.sum(sp.sum(tracks[:, idx], axis=0) > 0)

            ### extract intron counts 
            k, l = sp.where(gg.segmentgraph.seg_edges == 1)
            for m in range(k.shape[0]):
                idx = sp.where((sp.absolute(intron_list[:, 0] - gg.segmentgraph.segments[1, k[m]]) <= intron_tol) & (sp.absolute(intron_list[:, 1] - gg.segmentgraph.segments[0, l[m]]) <= intron_tol))[0]
                if counts[f, i].edges.shape[0] == 0:
                    if idx.shape[0] > 0:
                        counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), sp.sum(intron_list[idx, 2])]))
                    else:
                        counts[f, i].edges = sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))
                else:
                    if idx.shape[0] > 0:
                        counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), sp.sum(intron_list[idx, 2])]))]
                    else:
                        counts[f, i].edges = sp.r_[counts[f, i].edges, sp.atleast_2d(sp.array([sp.ravel_multi_index([k[m], l[m]], gg.segmentgraph.seg_edges.shape), 0]))]

    if fn_out is not None:
        cPickle.dump(counts, open(fn_out, 'w'), -1)
    else:
        return counts