コード例 #1
0
def get_rho(xy):
    if not xy:
        return 0
    x, y = zip(*xy)
    rho = spearmanr(x, y)
    if np.isnan(rho):
        rho = 0
    return rho
コード例 #2
0
ファイル: allmaps.py プロジェクト: yangjl/jcvi
def get_rho(xy):
    if not xy:
        return 0
    x, y = zip(*xy)
    rho = spearmanr(x, y)
    if np.isnan(rho):
        rho = 0
    return rho
コード例 #3
0
def plot(args):
    """
    %prog plot input.bed seqid

    Plot the matchings between the reconstructed pseudomolecules and the maps.
    Two types of visualizations are available in one canvas:

    1. Parallel axes, and matching markers are shown in connecting lines;
    2. Scatter plot.
    """
    from jcvi.graphics.base import plt, savefig, normalize_axes, \
                set2, panel_labels
    from jcvi.graphics.chromosome import Chromosome, GeneticMap, \
                HorizontalChromosome

    p = OptionParser(plot.__doc__)
    add_allmaps_plot_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x6")

    if len(args) != 2:
        sys.exit(not p.print_help())

    inputbed, seqid = args
    pf = inputbed.rsplit(".", 1)[0]
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"
    weightsfile = opts.weightsfile
    links = opts.links

    function = get_function(opts.distance)
    cc = Map(bedfile, function)
    allseqids = cc.seqids
    mapnames = cc.mapnames
    weights = Weights(weightsfile, mapnames)
    assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids)

    s = Scaffold(seqid, cc)
    mlgs = [k for k, v in s.mlg_counts.items() if v >= links]
    mlgsizes = {}
    for mlg in mlgs:
        mm = cc.extract_mlg(mlg)
        mlgsize = max(function(x) for x in mm)
        mlgsizes[mlg] = mlgsize

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax1 = fig.add_axes([0, 0, .5, 1])
    ax2 = fig.add_axes([.5, 0, .5, 1])

    # Find the layout first
    ystart, ystop = .9, .1
    L = Layout(mlgsizes)
    coords = L.coords

    tip = .02
    marker_pos = {}
    # Palette
    colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames))
    colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs)

    rhos = {}
    # Parallel coordinates
    for mlg, (x, y1, y2) in coords.items():
        mm = cc.extract_mlg(mlg)
        markers = [(m.accn, function(m)) for m in mm]  # exhaustive marker list
        xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid]
        mx, my = zip(*xy)
        rho = spearmanr(mx, my)
        rhos[mlg] = rho
        flip = rho < 0

        g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip)
        extra = -3 * tip if x < .5 else 3 * tip
        ha = "right" if x < .5 else "left"
        mapname = mlg.split("-")[0]
        tlg = mlg.replace("_", ".")  # Latex does not like underscore char
        label = "{0} (w={1})".format(tlg, weights[mapname])
        ax1.text(x + extra, (y1 + y2) / 2,
                 label,
                 color=colors[mlg],
                 ha=ha,
                 va="center",
                 rotation=90)
        marker_pos.update(g.marker_pos)

    agp = AGP(agpfile)
    agp = [x for x in agp if x.object == seqid]
    chrsize = max(x.object_end for x in agp)

    # Pseudomolecules in the center
    r = ystart - ystop
    ratio = r / chrsize
    f = lambda x: (ystart - ratio * x)
    patchstart = [f(x.object_beg) for x in agp if not x.is_gap]
    Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2)

    label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0))
    ax1.text(.5, ystart + tip, label, ha="center")

    scatter_data = defaultdict(list)
    # Connecting lines
    for b in s.markers:
        marker_name = b.accn
        if marker_name not in marker_pos:
            continue

        cx = .5
        cy = f(b.pos)
        mx = coords[b.mlg][0]
        my = marker_pos[marker_name]

        extra = -tip if mx < cx else tip
        extra *= 1.25  # leave boundaries for aesthetic reasons
        cx += extra
        mx -= extra
        ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg])
        scatter_data[b.mlg].append((b.pos, function(b)))

    # Scatter plot, same data as parallel coordinates
    xstart, xstop = sorted((ystart, ystop))
    f = lambda x: (xstart + ratio * x)
    pp = [x.object_beg for x in agp if not x.is_gap]
    patchstart = [f(x) for x in pp]
    HorizontalChromosome(ax2,
                         xstart,
                         xstop,
                         ystop,
                         height=2 * tip,
                         patch=patchstart,
                         lw=2)

    gap = .03
    ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values())

    tlgs = []
    for mlg, mlgsize in sorted(mlgsizes.items()):
        height = ratio * mlgsize
        ystart -= height
        xx = .5 + xstart / 2
        width = r / 2
        color = colors[mlg]
        ax = fig.add_axes([xx, ystart, width, height])
        ypos = ystart + height / 2
        ystart -= gap
        sd = scatter_data[mlg]
        xx, yy = zip(*sd)
        ax.vlines(pp, 0, mlgsize, colors="beige")
        ax.plot(xx, yy, ".", color=color)
        rho = rhos[mlg]
        ax.text(.5,
                1 - .4 * gap / height,
                r"$\rho$={0:.3f}".format(rho),
                ha="center",
                va="top",
                transform=ax.transAxes,
                color="gray")
        tlg = mlg.replace("_", ".")
        tlgs.append((tlg, ypos, color))
        ax.set_xlim(0, chrsize)
        ax.set_ylim(0, mlgsize)
        ax.set_xticks([])
        while height / len(ax.get_yticks()) < .03 and len(
                ax.get_yticks()) >= 2:
            ax.set_yticks(ax.get_yticks()[::2])  # Sparsify the ticks
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_yticklabels(yticklabels, family='Helvetica')
        if rho < 0:
            ax.invert_yaxis()

    for i, (tlg, ypos, color) in enumerate(tlgs):
        ha = "center"
        if len(tlgs) > 4:
            ha = "right" if i % 2 else "left"
        root.text(.5, ypos, tlg, color=color, rotation=90, ha=ha, va="center")

    if opts.panels:
        labels = ((.04, .96, 'A'), (.48, .96, 'B'))
        panel_labels(root, labels)

    normalize_axes((ax1, ax2, root))
    image_name = seqid + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    plt.close(fig)
コード例 #4
0
ファイル: allmaps.py プロジェクト: yangjl/jcvi
def plot(args):
    """
    %prog plot input.bed seqid

    Plot the matchings between the reconstructed pseudomolecules and the maps.
    Two types of visualizations are available in one canvas:

    1. Parallel axes, and matching markers are shown in connecting lines;
    2. Scatter plot.
    """
    from jcvi.graphics.base import plt, savefig, normalize_axes, \
                set2, panel_labels
    from jcvi.graphics.chromosome import Chromosome, GeneticMap, \
                HorizontalChromosome

    p = OptionParser(plot.__doc__)
    add_allmaps_plot_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x6")

    if len(args) != 2:
        sys.exit(not p.print_help())

    inputbed, seqid = args
    pf = inputbed.rsplit(".", 1)[0]
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"
    weightsfile = opts.weightsfile
    links = opts.links

    function = get_function(opts.distance)
    cc = Map(bedfile, function)
    allseqids = cc.seqids
    mapnames = cc.mapnames
    weights = Weights(weightsfile, mapnames)
    assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids)

    s = Scaffold(seqid, cc)
    mlgs = [k for k, v in s.mlg_counts.items() if v >= links]
    while not mlgs:
        links /= 2
        logging.error("No markers to plot, --links reset to {0}".format(links))
        mlgs = [k for k, v in s.mlg_counts.items() if v >= links]

    mlgsizes = {}
    for mlg in mlgs:
        mm = cc.extract_mlg(mlg)
        mlgsize = max(function(x) for x in mm)
        mlgsizes[mlg] = mlgsize

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax1 = fig.add_axes([0, 0, .5, 1])
    ax2 = fig.add_axes([.5, 0, .5, 1])

    # Find the layout first
    ystart, ystop = .9, .1
    L = Layout(mlgsizes)
    coords = L.coords

    tip = .02
    marker_pos = {}
    # Palette
    colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames))
    colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs)

    rhos = {}
    # Parallel coordinates
    for mlg, (x, y1, y2) in coords.items():
        mm = cc.extract_mlg(mlg)
        markers = [(m.accn, function(m)) for m in mm]  # exhaustive marker list
        xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid]
        mx, my = zip(*xy)
        rho = spearmanr(mx, my)
        rhos[mlg] = rho
        flip = rho < 0

        g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip)
        extra = -3 * tip if x < .5 else 3 * tip
        ha = "right" if x < .5 else "left"
        mapname = mlg.split("-")[0]
        tlg = mlg.replace("_", ".")  # Latex does not like underscore char
        label = "{0} (w={1})".format(tlg, weights[mapname])
        ax1.text(x + extra, (y1 + y2) / 2, label, color=colors[mlg],
                 ha=ha, va="center", rotation=90)
        marker_pos.update(g.marker_pos)

    agp = AGP(agpfile)
    agp = [x for x in agp if x.object == seqid]
    chrsize = max(x.object_end for x in agp)

    # Pseudomolecules in the center
    r = ystart - ystop
    ratio = r / chrsize
    f = lambda x: (ystart - ratio * x)
    patchstart = [f(x.object_beg) for x in agp if not x.is_gap]
    Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2)

    label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0))
    ax1.text(.5, ystart + tip, label, ha="center")

    scatter_data = defaultdict(list)
    # Connecting lines
    for b in s.markers:
        marker_name = b.accn
        if marker_name not in marker_pos:
            continue

        cx = .5
        cy = f(b.pos)
        mx = coords[b.mlg][0]
        my = marker_pos[marker_name]

        extra = -tip if mx < cx else tip
        extra *= 1.25  # leave boundaries for aesthetic reasons
        cx += extra
        mx -= extra
        ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg])
        scatter_data[b.mlg].append((b.pos, function(b)))

    # Scatter plot, same data as parallel coordinates
    xstart, xstop = sorted((ystart, ystop))
    f = lambda x: (xstart + ratio * x)
    pp = [x.object_beg for x in agp if not x.is_gap]
    patchstart = [f(x) for x in pp]
    HorizontalChromosome(ax2, xstart, xstop, ystop,
                         height=2 * tip, patch=patchstart, lw=2)

    gap = .03
    ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values())

    tlgs = []
    for mlg, mlgsize in sorted(mlgsizes.items()):
        height = ratio * mlgsize
        ystart -= height
        xx = .5 + xstart / 2
        width = r / 2
        color = colors[mlg]
        ax = fig.add_axes([xx, ystart, width, height])
        ypos = ystart + height / 2
        ystart -= gap
        sd = scatter_data[mlg]
        xx, yy = zip(*sd)
        ax.vlines(pp, 0, mlgsize, colors="beige")
        ax.plot(xx, yy, ".", color=color)
        rho = rhos[mlg]
        ax.text(.5, 1 - .4 * gap / height, r"$\rho$={0:.3f}".format(rho),
                    ha="center", va="top", transform=ax.transAxes, color="gray")
        tlg = mlg.replace("_", ".")
        tlgs.append((tlg, ypos, color))
        ax.set_xlim(0, chrsize)
        ax.set_ylim(0, mlgsize)
        ax.set_xticks([])
        while height / len(ax.get_yticks()) < .03 and len(ax.get_yticks()) >= 2:
            ax.set_yticks(ax.get_yticks()[::2])  # Sparsify the ticks
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_yticklabels(yticklabels, family='Helvetica')
        if rho < 0:
            ax.invert_yaxis()

    for i, (tlg, ypos, color) in enumerate(tlgs):
        ha = "center"
        if len(tlgs) > 4:
            ha = "right" if i % 2 else "left"
        root.text(.5, ypos, tlg, color=color, rotation=90,
                      ha=ha, va="center")

    if opts.panels:
        labels = ((.04, .96, 'A'), (.48, .96, 'B'))
        panel_labels(root, labels)

    normalize_axes((ax1, ax2, root))
    image_name = seqid + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    plt.close(fig)