예제 #1
0
def calc_permiscuity(c, juncmap5p, juncmap3p, ggmap):
    # subtract one since 5' junc position is an open interval
    coord5p = gene_to_genome_pos(c.mate5p.tx_name, c.mate5p.end - 1, ggmap)
    coord3p = gene_to_genome_pos(c.mate3p.tx_name, c.mate3p.start, ggmap)
    partners = juncmap5p[coord5p]
    cov = partners[coord3p]
    total_cov = sum(partners.itervalues())
    frac5p = cov / float(total_cov)
    partners = juncmap3p[coord3p]
    cov = partners[coord5p]
    total_cov = sum(partners.itervalues())
    frac3p = cov / float(total_cov)
    return frac5p, frac3p
예제 #2
0
def build_junc_permiscuity_map(chimeras, ggmap):
    junc5p_map = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
    junc3p_map = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
    for c in chimeras:
        # subtract one since 5' junc position is an open interval
        coord5p = gene_to_genome_pos(c.mate5p.tx_name, c.mate5p.end - 1, ggmap)
        coord3p = gene_to_genome_pos(c.mate3p.tx_name, c.mate3p.start, ggmap)
        # keep track of total reads eminating from each 5' junction
        # by keeping a dictionary for each 5' junction to all 3' junctions
        # that stores the maximum coverage at that 5'/3' pair
        partners = junc5p_map[coord5p]
        count = partners[coord3p]
        partners[coord3p] = max(count, c.weighted_cov)
        # repeat for 3' partner
        partners = junc3p_map[coord3p]
        count = partners[coord5p]
        partners[coord5p] = max(count, c.weighted_cov)
        #print '5P', c.mate5p.gene_name, len(partners), sum(partners.itervalues())
        #print '3P', c.mate3p.gene_name, len(partners), sum(partners.itervalues())
    return junc5p_map, junc3p_map
def build_junc_coverage_map(chimeras, ggmap):
    junc_cov_map = collections.defaultdict(lambda: [None, None, None])
    num_chimeras = 0
    for c in chimeras:
        num_chimeras += 1
        # convert to genomic coords
        # subtract one since 5' junc position is an open interval
        coord5p = gene_to_genome_pos(c.mate5p.tx_name, c.mate5p.end - 1, ggmap)
        coord3p = gene_to_genome_pos(c.mate3p.tx_name, c.mate3p.start, ggmap)
        # keep track of maximum coverage isoform
        pairkey = (coord5p, coord3p)
        paircov = (c.encomp_and_spanning, c.weighted_cov, c.encomp_or_spanning)
        data = junc_cov_map[pairkey]
        if (data[0] is None) or (cmp(paircov, data[0]) > 0):
            # store encomp/spanning, weighted coverage, and total reads
            data[0] = paircov
            data[1] = c.mate5p.tx_name
            data[2] = c.mate3p.tx_name
    logging.debug("Parsed %d chimeras" % (num_chimeras))
    kept_isoforms = set(tuple(v[1:3]) for v in junc_cov_map.itervalues())
    #del junc_cov_map
    logging.debug("Kept %d highest coverage isoforms" % (len(kept_isoforms)))
    return kept_isoforms
def build_junc_coverage_map(chimeras, ggmap):
    junc_cov_map = collections.defaultdict(lambda: [None, None, None])
    num_chimeras = 0
    for c in chimeras:
        num_chimeras += 1
        # convert to genomic coords
        # subtract one since 5' junc position is an open interval
        coord5p = gene_to_genome_pos(c.mate5p.tx_name, c.mate5p.end - 1, ggmap)
        coord3p = gene_to_genome_pos(c.mate3p.tx_name, c.mate3p.start, ggmap)
        # keep track of maximum coverage isoform
        pairkey = (coord5p, coord3p)
        paircov = (c.encomp_and_spanning, c.weighted_cov, c.encomp_or_spanning)
        data = junc_cov_map[pairkey]
        if (data[0] is None) or (cmp(paircov, data[0]) > 0):
            # store encomp/spanning, weighted coverage, and total reads
            data[0] = paircov
            data[1] = c.mate5p.tx_name
            data[2] = c.mate3p.tx_name
    logging.debug("Parsed %d chimeras" % (num_chimeras))
    kept_isoforms = set(tuple(v[1:3]) for v in junc_cov_map.itervalues())
    #del junc_cov_map
    logging.debug("Kept %d highest coverage isoforms" % (len(kept_isoforms)))
    return kept_isoforms