Beispiel #1
0
    def draw(self, title="*Ks* distribution", filename="Ks_plot.pdf"):

        ax = self.ax
        ks_max = self.ks_max
        lines = self.lines
        labels = [markup(x) for x in self.labels]
        legendp = self.legendp
        if len(lines) > 1:
            leg = ax.legend(
                lines,
                labels,
                loc=legendp,
                shadow=True,
                fancybox=True,
                prop={"size": 10},
            )
            leg.get_frame().set_alpha(0.5)

        ax.set_xlim((0, ks_max - self.interval))
        ylim = ax.get_ylim()[-1]
        ax.set_ylim(0, ylim)
        ax.set_title(markup(title), fontweight="bold")
        ax.set_xlabel(markup("Synonymous substitutions per site (*Ks*)"))
        ax.set_ylabel("Percentage of gene pairs (bin={})".format(
            self.interval))

        ax.set_xticklabels(ax.get_xticks(), family="Helvetica")
        ax.set_yticklabels(ax.get_yticks(), family="Helvetica")

        adjust_spines(ax, ["left", "bottom"], outward=True)

        if filename:
            savefig(filename, dpi=300)
Beispiel #2
0
    def __init__(self, fig, root, canvas, chr, xlim, datadir,
                 order=None, hlsuffix=None, palette=None, cap=50,
                 gauge="bottom", plot_label=True, plot_chr_label=True,
                 gauge_step=5000000, vlines=None):
        x, y, w, h = canvas
        p = .01
        root.add_patch(Rectangle((x - p, y - p), w + 2 * p, h + 2 * p, lw=1,
                        fill=False, ec="darkslategray", zorder=10))
        datafiles = glob(op.join(datadir, chr + "*"))
        ntracks = len(datafiles)
        yinterval = h / ntracks
        yy = y + h

        if palette is None:
            # Get the palette
            import brewer2mpl
            set2 = brewer2mpl.get_map('Set2', 'qualitative', ntracks).mpl_colors
        else:
            set2 = [palette] * ntracks

        if order:
            datafiles.sort(key=lambda x: order.index(x.split(".")[1]))

        if gauge == "top":
            gauge_ax = fig.add_axes([x, yy + p, w, .0001])
            adjust_spines(gauge_ax, ["top"])
            tpos = yy + .07
        elif gauge == "bottom":
            gauge_ax = fig.add_axes([x, y - p, w, .0001])
            adjust_spines(gauge_ax, ["bottom"])
            tpos = y - .07

        start, end = xlim
        fs = gauge_step < 1000000
        setup_gauge_ax(gauge_ax, start, end, gauge_step, float_formatter=fs)

        if plot_chr_label:
            root.text(x + w / 2, tpos, chr, ha="center", va="center",
                      color="darkslategray", size=16)

        for label, datafile, c in zip(order, datafiles, set2):
            yy -= yinterval
            ax = fig.add_axes([x, yy, w, yinterval * .9])
            xy = XYtrack(ax, datafile, color=c)
            xy.interpolate(end)
            xy.cap(ymax=cap)
            if vlines:
                xy.vlines(vlines)
            if hlsuffix:
                hlfile = op.join(datadir, ".".join((label, hlsuffix)))
                xy.import_hlfile(hlfile, chr)
            if plot_label:
                root.text(x - .035, yy + yinterval / 2, label,
                            ha="center", va="center", color=c)
            xy.draw()
            ax.set_xlim(*xlim)
Beispiel #3
0
def histogram(args):
    """
    %prog histogram meryl.histogram species K

    Plot the histogram based on meryl K-mer distribution, species and N are
    only used to annotate the graphic.
    """
    p = OptionParser(histogram.__doc__)
    p.add_option(
        "--vmin",
        dest="vmin",
        default=1,
        type="int",
        help="minimum value, inclusive",
    )
    p.add_option(
        "--vmax",
        dest="vmax",
        default=100,
        type="int",
        help="maximum value, inclusive",
    )
    p.add_option(
        "--pdf",
        default=False,
        action="store_true",
        help="Print PDF instead of ASCII plot",
    )
    p.add_option(
        "--method",
        choices=("nbinom", "allpaths"),
        default="nbinom",
        help=
        "'nbinom' - slow but more accurate for het or polyploid genome; 'allpaths' - fast and works for homozygous enomes",
    )
    p.add_option(
        "--maxiter",
        default=100,
        type="int",
        help="Max iterations for optimization. Only used with --method nbinom",
    )
    p.add_option("--coverage",
                 default=0,
                 type="int",
                 help="Kmer coverage [default: auto]")
    p.add_option(
        "--nopeaks",
        default=False,
        action="store_true",
        help="Do not annotate K-mer peaks",
    )
    opts, args, iopts = p.set_image_options(args, figsize="7x7")

    if len(args) != 3:
        sys.exit(not p.print_help())

    histfile, species, N = args
    method = opts.method
    vmin, vmax = opts.vmin, opts.vmax
    ascii = not opts.pdf
    peaks = not opts.nopeaks and method == "allpaths"
    N = int(N)

    if histfile.rsplit(".", 1)[-1] in ("mcdat", "mcidx"):
        logging.debug("CA kmer index found")
        histfile = merylhistogram(histfile)

    ks = KmerSpectrum(histfile)
    method_info = ks.analyze(K=N, maxiter=opts.maxiter, method=method)

    Total_Kmers = int(ks.totalKmers)
    coverage = opts.coverage
    Kmer_coverage = ks.lambda_ if not coverage else coverage
    Genome_size = int(round(Total_Kmers * 1.0 / Kmer_coverage))

    Total_Kmers_msg = "Total {0}-mers: {1}".format(N, thousands(Total_Kmers))
    Kmer_coverage_msg = "{0}-mer coverage: {1:.1f}x".format(N, Kmer_coverage)
    Genome_size_msg = "Estimated genome size: {0:.1f} Mb".format(Genome_size /
                                                                 1e6)
    Repetitive_msg = ks.repetitive
    SNPrate_msg = ks.snprate

    for msg in (Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg):
        print(msg, file=sys.stderr)

    x, y = ks.get_xy(vmin, vmax)
    title = "{0} {1}-mer histogram".format(species, N)

    if ascii:
        asciiplot(x, y, title=title)
        return Genome_size

    plt.figure(1, (iopts.w, iopts.h))
    plt.bar(x, y, fc="#b2df8a", lw=0)
    # Plot the negative binomial fit
    if method == "nbinom":
        generative_model = method_info["generative_model"]
        GG = method_info["Gbins"]
        ll = method_info["lambda"]
        rr = method_info["rho"]
        kf_range = method_info["kf_range"]
        stacked = generative_model(GG, ll, rr)
        plt.plot(
            kf_range,
            stacked,
            ":",
            color="#6a3d9a",
            lw=2,
        )

    ax = plt.gca()

    if peaks:  # Only works for method 'allpaths'
        t = (ks.min1, ks.max1, ks.min2, ks.max2, ks.min3)
        tcounts = [(x, y) for x, y in ks.counts if x in t]
        if tcounts:
            x, y = zip(*tcounts)
            tcounts = dict(tcounts)
            plt.plot(x, y, "ko", lw=3, mec="k", mfc="w")
            ax.text(ks.max1, tcounts[ks.max1], "SNP peak")
            ax.text(ks.max2, tcounts[ks.max2], "Main peak")

    ymin, ymax = ax.get_ylim()
    ymax = ymax * 7 / 6
    if method == "nbinom":
        # Plot multiple CN locations, CN1, CN2, ... up to ploidy
        cn_color = "#a6cee3"
        for i in range(1, ks.ploidy + 1):
            x = i * ks.lambda_
            plt.plot((x, x), (0, ymax), "-.", color=cn_color)
            plt.text(
                x,
                ymax * 0.95,
                "CN{}".format(i),
                ha="right",
                va="center",
                color=cn_color,
                rotation=90,
            )

    messages = [
        Total_Kmers_msg,
        Kmer_coverage_msg,
        Genome_size_msg,
        Repetitive_msg,
        SNPrate_msg,
    ]
    if method == "nbinom":
        messages += [ks.ploidy_message] + ks.copy_messages
    write_messages(ax, messages)

    ax.set_title(markup(title))
    ax.set_xlim((0, vmax))
    ax.set_ylim((0, ymax))
    adjust_spines(ax, ["left", "bottom"], outward=True)
    xlabel, ylabel = "Coverage (X)", "Counts"
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    set_human_axis(ax)

    imagename = histfile.split(".")[0] + "." + iopts.format
    savefig(imagename, dpi=100)

    return Genome_size
Beispiel #4
0
def fig3(args):
    """
    %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data

    Napus Figure 3 displays alignments between quartet chromosomes, inset
    with read histograms.
    """
    from jcvi.formats.bed import Bed

    p = OptionParser(fig3.__doc__)
    p.add_option("--gauge_step",
                 default=10000000,
                 type="int",
                 help="Step size for the base scale")
    opts, args, iopts = p.set_image_options(args, figsize="12x9")

    if len(args) != 4:
        sys.exit(not p.print_help())

    chrs, sizes, bedfile, datadir = args
    gauge_step = opts.gauge_step
    diverge = iopts.diverge
    rr, gg = diverge
    chrs = [[x] for x in chrs.split(",")]
    sizes = Sizes(sizes).mapping

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes)

    # Synteny panel
    seqidsfile = make_seqids(chrs)
    klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05)
    height = .07
    r = height / 4
    K = Karyotype(fig,
                  root,
                  seqidsfile,
                  klayout,
                  gap=gap,
                  height=height,
                  lw=2,
                  generank=False,
                  sizes=sizes,
                  heightpad=r,
                  roundrect=True,
                  plot_label=False)

    # Chromosome labels
    for kl in K.layout:
        if kl.empty:
            continue
        lx, ly = kl.xstart, kl.y
        if lx < .11:
            lx += .1
            ly += .06
        label = kl.label
        root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center")

    # Inset with datafiles
    datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr",
                 "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr")
    datafiles = [op.join(datadir, x) for x in datafiles]
    tracks = K.tracks
    hlfile = op.join(datadir, "bzh.regions.forhaibao")
    xy_axes = []
    for t, datafile in zip(tracks, datafiles):
        ax = make_affix_axis(fig, t, -r, height=2 * r)
        xy_axes.append(ax)
        chr = t.seqids[0]
        xy = XYtrack(ax, datafile, color="lightslategray")
        start, end = 0, t.total
        xy.interpolate(end)
        xy.cap(ymax=40)
        xy.import_hlfile(hlfile, chr, diverge=diverge)
        xy.draw()
        ax.set_xlim(start, end)
        gauge_ax = make_affix_axis(fig, t, -r)
        adjust_spines(gauge_ax, ["bottom"])
        setup_gauge_ax(gauge_ax, start, end, gauge_step)

    # Converted gene tracks
    ax_Ar = make_affix_axis(fig, tracks[1], r, height=r / 2)
    ax_Co = make_affix_axis(fig, tracks[2], r, height=r / 2)

    order = Bed(bedfile).order
    for asterisk in (False, True):
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         0,
                         "A02",
                         ax_Ar,
                         rr,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         1,
                         "C2",
                         ax_Co,
                         gg,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         0,
                         "A02",
                         ax_Ar,
                         gg,
                         ypos=1,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         1,
                         "C2",
                         ax_Co,
                         rr,
                         ypos=1,
                         asterisk=asterisk)

    Ar, Co = xy_axes[1:3]
    annotations = ((Ar, "Bra028920 Bra028897", "center",
                    "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"),
                   (Ar, "Bra020218 Bra020286", "left",
                    "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"),
                   (Ar, "Bra029317 Bra029251", "right",
                    "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left",
                                      "1DCn2-"), (Co, "Bo2g018560 Bo2g023700",
                                                  "right", "2DCn2-"),
                   (Co, "Bo2g024450 Bo2g025390", "left",
                    "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"),
                   (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-"))

    for ax, genes, ha, label in annotations:
        g1, g2 = genes.split()
        x1, x2 = order[g1][1].start, order[g2][1].start
        if ha == "center":
            x = (x1 + x2) / 2 * .8
        elif ha == "left":
            x = x2
        else:
            x = x1
        label = r"\textit{{{0}}}".format(label)
        color = rr if "+" in label else gg
        ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center")

    ax_Ar.set_xlim(0, tracks[1].total)
    ax_Ar.set_ylim(-1, 1)
    ax_Co.set_xlim(0, tracks[2].total)
    ax_Co.set_ylim(-1, 1)

    # Plot coverage in resequencing lines
    gstep = 5000000
    order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",")
    labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"}
    hlsuffix = "regions.forhaibao"
    chr1, chr2 = "chrA02", "chrC02"
    t1, t2 = tracks[0], tracks[-1]
    s1, s2 = sizes[chr1], sizes[chr2]

    canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2)
    c = Coverage(fig,
                 root,
                 canvas1,
                 chr1, (0, s1),
                 datadir,
                 order=order,
                 gauge=None,
                 plot_chr_label=False,
                 gauge_step=gstep,
                 palette="gray",
                 cap=40,
                 hlsuffix=hlsuffix,
                 labels_dict=labels_dict,
                 diverge=diverge)
    yys = c.yys
    x1, x2 = .37, .72
    tip = .02
    annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2,
                    "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"),
                   (x1, yys[5] + .6 * tip, tip, tip / 2,
                    "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"),
                   (x2, yys[4] + .9 * tip, -1.2 * tip, 0,
                    "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL"))

    arrowprops = dict(facecolor='black',
                      shrink=.05,
                      frac=.5,
                      width=1,
                      headwidth=4)
    for x, y, dx, dy, label in annotations:
        label = r"\textit{{{0}}}".format(label)
        root.annotate(label,
                      xy=(x, y),
                      xytext=(x + dx, y + dy),
                      arrowprops=arrowprops,
                      color=rr,
                      fontsize=9,
                      ha="center",
                      va="center")

    canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2)
    Coverage(fig,
             root,
             canvas2,
             chr2, (0, s2),
             datadir,
             order=order,
             gauge=None,
             plot_chr_label=False,
             gauge_step=gstep,
             palette="gray",
             cap=40,
             hlsuffix=hlsuffix,
             labels_dict=labels_dict,
             diverge=diverge)

    pad = .03
    labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"),
              (t2.xstart - 3 * pad, .25 + pad, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    image_name = "napus-fig3." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Beispiel #5
0
def f3a(args):
    """
    %prog f3a chrA02,A02,C2,chrC02 chr.sizes all.bed data

    Napus Figure 3A displays alignments between quartet chromosomes, inset
    with read histograms.
    """
    from jcvi.formats.bed import Bed

    p = OptionParser(f3a.__doc__)
    p.add_option("--gauge_step", default=10000000, type="int",
                help="Step size for the base scale")
    opts, args, iopts = p.set_image_options(args, figsize="10x6")

    if len(args) != 4:
        sys.exit(not p.print_help())

    chrs, sizes, bedfile, datadir = args
    gauge_step = opts.gauge_step
    chrs = [[x] for x in chrs.split(",")]
    sizes = Sizes(sizes).mapping

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes)

    # Synteny panel
    seqidsfile = make_seqids(chrs)
    klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a)
    height = .11
    r = height / 4
    K = Karyotype(fig, root, seqidsfile, klayout, gap=gap,
                  height=height, lw=2, generank=False, sizes=sizes,
                  heightpad=r, roundrect=True)

    # Inset with datafiles
    datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr",
                 "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr")
    datafiles = [op.join(datadir, x) for x in datafiles]
    tracks = K.tracks
    hlfile = op.join(datadir, "bzh.regions.forhaibao")
    for t, datafile in zip(tracks, datafiles):
        ax = make_affix_axis(fig, t, -r, height=2 * r)
        chr = t.seqids[0]
        xy = XYtrack(ax, datafile, color="lightslategray")
        start, end = 0, t.total
        xy.interpolate(end)
        xy.cap(ymax=40)
        xy.import_hlfile(hlfile, chr)
        xy.draw()
        ax.set_xlim(start, end)
        gauge_ax = make_affix_axis(fig, t, -r)
        adjust_spines(gauge_ax, ["bottom"])
        setup_gauge_ax(gauge_ax, start, end, gauge_step)

    # Converted gene tracks
    ax_Ar = make_affix_axis(fig, tracks[1], r, height=r/2)
    ax_Co = make_affix_axis(fig, tracks[2], r, height=r/2)

    order = Bed(bedfile).order
    for asterisk in (False, True):
        conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         0, "A02", ax_Ar, "r", asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         1, "C2", ax_Co, "g", asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         0, "A02", ax_Ar, "g", ypos=1, asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         1, "C2", ax_Co, "r", ypos=1, asterisk=asterisk)

    ax_Ar.set_xlim(0, tracks[1].total)
    ax_Ar.set_ylim(-.5, 1.5)
    ax_Co.set_xlim(0, tracks[2].total)
    ax_Co.set_ylim(-.5, 1.5)

    # Conversion legend
    if False:
        root.text(.81, .8, r"Converted A$\mathsf{_n}$ to C$\mathsf{_n}$",
                    va="center")
        root.text(.81, .77, r"Converted C$\mathsf{_n}$ to A$\mathsf{_n}$",
                    va="center")
        root.scatter([.8], [.8], s=20, color="g")
        root.scatter([.8], [.77], s=20, color="r")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = "napusf3a." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Beispiel #6
0
def fig3(args):
    """
    %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data

    Napus Figure 3 displays alignments between quartet chromosomes, inset
    with read histograms.
    """
    from jcvi.formats.bed import Bed

    p = OptionParser(fig3.__doc__)
    p.add_option("--gauge_step", default=10000000, type="int",
                help="Step size for the base scale")
    opts, args, iopts = p.set_image_options(args, figsize="12x9")

    if len(args) != 4:
        sys.exit(not p.print_help())

    chrs, sizes, bedfile, datadir = args
    gauge_step = opts.gauge_step
    diverge = iopts.diverge
    rr, gg = diverge
    chrs = [[x] for x in chrs.split(",")]
    sizes = Sizes(sizes).mapping

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes)

    # Synteny panel
    seqidsfile = make_seqids(chrs)
    klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05)
    height = .07
    r = height / 4
    K = Karyotype(fig, root, seqidsfile, klayout, gap=gap,
                  height=height, lw=2, generank=False, sizes=sizes,
                  heightpad=r, roundrect=True, plot_label=False)

    # Chromosome labels
    for kl in K.layout:
        if kl.empty:
            continue
        lx, ly = kl.xstart, kl.y
        if lx < .11:
            lx += .1
            ly += .06
        label = kl.label
        root.text(lx - .015, ly, label, fontsize=15,
                  ha="right", va="center")

    # Inset with datafiles
    datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr",
                 "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr")
    datafiles = [op.join(datadir, x) for x in datafiles]
    tracks = K.tracks
    hlfile = op.join(datadir, "bzh.regions.forhaibao")
    xy_axes = []
    for t, datafile in zip(tracks, datafiles):
        ax = make_affix_axis(fig, t, -r, height=2 * r)
        xy_axes.append(ax)
        chr = t.seqids[0]
        xy = XYtrack(ax, datafile, color="lightslategray")
        start, end = 0, t.total
        xy.interpolate(end)
        xy.cap(ymax=40)
        xy.import_hlfile(hlfile, chr, diverge=diverge)
        xy.draw()
        ax.set_xlim(start, end)
        gauge_ax = make_affix_axis(fig, t, -r)
        adjust_spines(gauge_ax, ["bottom"])
        setup_gauge_ax(gauge_ax, start, end, gauge_step)

    # Converted gene tracks
    ax_Ar = make_affix_axis(fig, tracks[1], r, height=r/2)
    ax_Co = make_affix_axis(fig, tracks[2], r, height=r/2)

    order = Bed(bedfile).order
    for asterisk in (False, True):
        conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         0, "A02", ax_Ar, rr, asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         1, "C2", ax_Co, gg, asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         0, "A02", ax_Ar, gg, ypos=1, asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         1, "C2", ax_Co, rr, ypos=1, asterisk=asterisk)

    Ar, Co = xy_axes[1:3]
    annotations = ((Ar, "Bra028920 Bra028897", "center", "1DAn2+"),
                   (Ar, "Bra020081 Bra020171", "right", "2DAn2+"),
                   (Ar, "Bra020218 Bra020286", "left", "3DAn2+"),
                   (Ar, "Bra008143 Bra008167", "left", "4DAn2-"),
                   (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"),
                   (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"),
                   (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"),
                   (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"),
                   (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"),
                   (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-"))

    for ax, genes, ha, label in annotations:
        g1, g2 = genes.split()
        x1, x2 = order[g1][1].start, order[g2][1].start
        if ha == "center":
            x = (x1 + x2) / 2 * .8
        elif ha == "left":
            x = x2
        else:
            x = x1
        label = r"\textit{{{0}}}".format(label)
        color = rr if "+" in label else gg
        ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center")

    ax_Ar.set_xlim(0, tracks[1].total)
    ax_Ar.set_ylim(-1, 1)
    ax_Co.set_xlim(0, tracks[2].total)
    ax_Co.set_ylim(-1, 1)

    # Plot coverage in resequencing lines
    gstep = 5000000
    order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",")
    labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"}
    hlsuffix = "regions.forhaibao"
    chr1, chr2 = "chrA02", "chrC02"
    t1, t2 = tracks[0], tracks[-1]
    s1, s2 = sizes[chr1], sizes[chr2]

    canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2)
    c = Coverage(fig, root, canvas1, chr1, (0, s1), datadir,
                 order=order, gauge=None, plot_chr_label=False,
                 gauge_step=gstep, palette="gray",
                 cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict,
                 diverge=diverge)
    yys = c.yys
    x1, x2 = .37, .72
    tip = .02
    annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2, "FLC"),
                   (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"),
                   (x1, yys[5] + .6 * tip, tip, tip / 2, "FLC"),
                   (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"),
                   (x2, yys[4] + .9 * tip, -1.2 * tip, 0, "GSL"),
                   (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL"))

    arrowprops=dict(facecolor='black', shrink=.05, frac=.5,
                    width=1, headwidth=4)
    for x, y, dx, dy, label in annotations:
        label = r"\textit{{{0}}}".format(label)
        root.annotate(label, xy=(x, y), xytext=(x + dx, y + dy),
                      arrowprops=arrowprops, color=rr, fontsize=9,
                      ha="center", va="center")

    canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2)
    Coverage(fig, root, canvas2, chr2, (0, s2), datadir,
                 order=order, gauge=None, plot_chr_label=False,
                 gauge_step=gstep, palette="gray",
                 cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict,
                 diverge=diverge)

    pad = .03
    labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"),
              (t2.xstart - 3 * pad, .25 + pad, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    image_name = "napus-fig3." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Beispiel #7
0
def simulate(args):
    """
    %prog simulate

    Run simulation on female restitution.
    """
    import seaborn as sns

    sns.set_style("darkgrid")

    p = OptionParser(simulate.__doc__)
    p.add_option(
        "--verbose",
        default=False,
        action="store_true",
        help="Verbose logging during simulation",
    )
    opts, args, iopts = p.set_image_options(args, figsize="7x10")
    if len(args) != 0:
        sys.exit(not p.print_help())

    # Construct a composite figure with 6 tracks
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    rows = 7
    ypad = 0.05
    yinterval = (1 - 2 * ypad) / (rows + 1)
    yy = 1 - ypad
    xpad = 0.18
    xwidth = 0.6

    # Axes are vertically stacked, and share x-axis
    axes = []
    yy_positions = []  # Save yy positions so we can show details to the right laterr
    for idx in range(rows):
        yy_positions.append(yy)
        yy -= yinterval
        ax = fig.add_axes([xpad, yy, xwidth, yinterval * 0.85])
        if idx != rows - 1:
            plt.setp(ax.get_xticklabels(), visible=False)
        axes.append(ax)
    ax1, ax2, ax3, ax4, ax5, ax6, ax7 = axes

    # Prepare the simulated data
    # Simulate two parents
    SS = Genome("SS", "SS", 10, 8)
    SO = Genome("SO", "SO", 8, 10)

    verbose = opts.verbose
    all_F1s = [simulate_F1(SO, SS, verbose=verbose) for _ in range(1000)]
    all_F2s = [simulate_F2(SO, SS, verbose=verbose) for _ in range(1000)]
    all_F1intercrosses = [simulate_F1intercross(SO, SS, verbose) for _ in range(1000)]
    all_BC1s = [simulate_BCn(1, SO, SS, verbose=verbose) for _ in range(1000)]
    all_BC2s = [simulate_BCn(2, SO, SS, verbose=verbose) for _ in range(1000)]
    all_BC3s = [simulate_BCn(3, SO, SS, verbose=verbose) for _ in range(1000)]
    all_BC4s = [simulate_BCn(4, SO, SS, verbose=verbose) for _ in range(1000)]

    # Plotting
    f1s = plot_summary(ax1, all_F1s)
    f2s = plot_summary(ax2, all_F2s)
    f1is = plot_summary(ax3, all_F1intercrosses)
    bc1s = plot_summary(ax4, all_BC1s)
    bc2s = plot_summary(ax5, all_BC2s)
    bc3s = plot_summary(ax6, all_BC3s)
    bc4s = plot_summary(ax7, all_BC4s)

    # Show title to the left
    xx = xpad / 2
    for (title, subtitle), yy in zip(
        (
            ("F1", None),
            ("F2", "via selfing"),
            ("F2", "via intercross"),
            ("BC1", None),
            ("BC2", None),
            ("BC3", None),
            ("BC4", None),
        ),
        yy_positions,
    ):
        if subtitle:
            yy -= 0.06
        else:
            yy -= 0.07
        root.text(xx, yy, title, color="darkslategray", ha="center", va="center")
        if subtitle:
            yy -= 0.02
            root.text(
                xx, yy, subtitle, color="lightslategray", ha="center", va="center"
            )

    # Show summary stats to the right
    xx = 1 - (1 - xpad - xwidth) / 2
    for summary, yy in zip((f1s, f2s, f1is, bc1s, bc2s, bc3s, bc4s), yy_positions):
        yy -= 0.04
        root.text(
            xx, yy, summary.SO_summary, color=SoColor, ha="center", va="center",
        )
        yy -= 0.02
        root.text(
            xx, yy, summary.SS_summary, color=SsColor, ha="center", va="center",
        )
        yy -= 0.02
        root.text(
            xx, yy, summary.percent_SO_summary, color=SoColor, ha="center", va="center",
        )

    ax7.set_xlabel("Number of unique chromosomes")
    adjust_spines(ax7, ["bottom"], outward=True)
    normalize_axes(root)

    savefig("plotter.pdf", dpi=120)

    outdir = "simulations"
    mkdir(outdir)
    # Write chromosomes to disk
    for genomes, filename in (
        (all_F1s, "all_F1s"),
        (all_F2s, "all_F2s"),
        (all_F1intercrosses, "all_F1intercrosses"),
        (all_BC1s, "all_BC1s"),
        (all_BC2s, "all_BC2s"),
        (all_BC3s, "all_BC3s"),
        (all_BC4s, "all_BC4s"),
    ):
        write_chromosomes(genomes, op.join(outdir, filename))
Beispiel #8
0
    def __init__(self,
                 fig,
                 root,
                 canvas,
                 chr,
                 xlim,
                 datadir,
                 order=None,
                 hlsuffix=None,
                 palette=None,
                 cap=50,
                 gauge="bottom",
                 plot_label=True,
                 plot_chr_label=True,
                 gauge_step=5000000,
                 vlines=None,
                 labels_dict={},
                 diverge=('r', 'g')):
        x, y, w, h = canvas
        p = .01
        root.add_patch(
            Rectangle((x - p, y - p),
                      w + 2 * p,
                      h + 2 * p,
                      lw=1,
                      fill=False,
                      ec="darkslategray",
                      zorder=10))
        datafiles = glob(op.join(datadir, chr + "*"))

        if order:
            datafiles = [z for z in datafiles if z.split(".")[1] in order]
            datafiles.sort(key=lambda x: order.index(x.split(".")[1]))

        ntracks = len(datafiles)
        yinterval = h / ntracks
        yy = y + h

        if palette is None:
            # Get the palette
            set2 = get_map('Set2', 'qualitative', ntracks).mpl_colors
        else:
            set2 = [palette] * ntracks

        if gauge == "top":
            gauge_ax = fig.add_axes([x, yy + p, w, .0001])
            adjust_spines(gauge_ax, ["top"])
            tpos = yy + .07
        elif gauge == "bottom":
            gauge_ax = fig.add_axes([x, y - p, w, .0001])
            adjust_spines(gauge_ax, ["bottom"])
            tpos = y - .07

        start, end = xlim
        if gauge:
            fs = gauge_step < 1000000
            setup_gauge_ax(gauge_ax,
                           start,
                           end,
                           gauge_step,
                           float_formatter=fs)

        if plot_chr_label:
            root.text(x + w / 2,
                      tpos,
                      chr,
                      ha="center",
                      va="center",
                      color="darkslategray",
                      size=16)

        yys = []
        for label, datafile, c in zip(order, datafiles, set2):
            yy -= yinterval
            yys.append(yy)
            ax = fig.add_axes([x, yy, w, yinterval * .9])
            xy = XYtrack(ax, datafile, color=c)
            xy.interpolate(end)
            xy.cap(ymax=cap)
            if vlines:
                xy.vlines(vlines)
            if hlsuffix:
                hlfile = op.join(datadir, ".".join((label, hlsuffix)))
                xy.import_hlfile(hlfile, chr, diverge=diverge)
            if plot_label:
                label = labels_dict.get(label, label.capitalize())
                label = r"\textit{{{0}}}".format(label)
                root.text(x - .015,
                          yy + yinterval / 2,
                          label,
                          ha="right",
                          va="center")
            xy.draw()
            ax.set_xlim(*xlim)

        self.yys = yys
Beispiel #9
0
def mosdepth(args):
    """
    %prog mosdepth mosdepth.global.dist.txt groups

    Plot depth vs. coverage per chromosome. Inspired by mosdepth plot. See also:
    https://github.com/brentp/mosdepth
    """
    import seaborn as sns

    sns.set_style("darkgrid")

    p = OptionParser(mosdepth.__doc__)
    p.add_option("--maxdepth",
                 default=100,
                 type="int",
                 help="Maximum depth to plot")
    p.add_option("--logscale",
                 default=False,
                 action="store_true",
                 help="Use log-scale on depth")
    opts, args, iopts = p.set_image_options(args, style="dark", figsize="6x8")

    if len(args) != 2:
        sys.exit(p.print_help())

    # Read in datasets
    distfile, groupsfile = args
    dists = parse_distfile(distfile)
    groups = parse_groupsfile(groupsfile)
    logscale = opts.logscale

    # Construct a composite figure with N tracks indicated in the groups
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    rows = len(groups)
    ypad = 0.05
    yinterval = (1 - 2 * ypad) / (rows + 1)
    yy = 1 - ypad

    for group_idx, (chrs, colors) in enumerate(groups):
        yy -= yinterval
        ax = fig.add_axes([0.15, yy, 0.7, yinterval * 0.85])
        for c, color in zip(chrs, colors):
            cdata = dists[c].items()
            logging.debug("Importing {} records for {}".format(len(cdata), c))
            cx, cy = zip(*sorted(cdata))
            ax.plot(cx, cy, "-", color=color)
        if logscale:
            ax.set_xscale("log", basex=2)
        ax.set_xlim(1 if logscale else 0, opts.maxdepth)
        ax.get_yaxis().set_visible(False)
        if group_idx != rows - 1:
            ax.get_xaxis().set_visible(False)

        # Add legend to the right of the canvas
        label_pad = 0.02
        label_yy = yy + yinterval
        for c, color in zip(chrs, colors):
            label_yy -= label_pad
            root.text(0.92, label_yy, c, color=color, ha="center", va="center")

    root.text(
        0.1,
        0.5,
        "Proportion of bases at coverage",
        rotation=90,
        color="darkslategray",
        ha="center",
        va="center",
    )
    root.text(0.5,
              0.05,
              "Coverage",
              color="darkslategray",
              ha="center",
              va="center")
    normalize_axes(root)
    adjust_spines(ax, ["bottom"], outward=True)

    pf = "mosdepth"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)