Esempio n. 1
0
def main(tx=None):
    """
    %prog newicktree

    Plot Newick formatted tree. The gene structure can be plotted along if
    --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is
    on, also show the number of amino acids.
    """
    p = OptionParser(main.__doc__)
    p.add_option("--outgroup", help="Root the tree using the outgroup. " + \
                      "Use comma to separate multiple taxa.")
    p.add_option("--rmargin",
                 default=.3,
                 type="float",
                 help="Set blank rmargin to the right [default: %default]")
    p.add_option(
        "--gffdir",
        default=None,
        help="The directory that contain GFF files [default: %default]")
    p.add_option("--sizes",
                 default=None,
                 help="The FASTA file or the sizes file [default: %default]")

    opts, args, iopts = set_image_options(p, figsize="8x6")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    outgroup = None
    if opts.outgroup:
        outgroup = opts.outgroup.split(",")
    pf = datafile.rsplit(".", 1)[0]
    if tx:
        pf = "demo"
    else:
        tx = open(datafile).read()
        logging.debug("Load tree file `{0}`.".format(datafile))

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    draw_tree(root,
              tx,
              rmargin=opts.rmargin,
              outgroup=outgroup,
              gffdir=opts.gffdir,
              sizes=opts.sizes)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
Esempio n. 2
0
def gff(args):
    """
    %prog gff *.gff

    Draw exons for genes based on gff files. Each gff file should contain only
    one gene, and only the "mRNA" and "CDS" feature will be drawn on the canvas.
    """
    align_choices = ("left", "center", "right")

    p = OptionParser(gff.__doc__)
    p.add_option("--align", default="left", choices=align_choices,
                 help="Horizontal alignment {0} [default: %default]".\
                    format("|".join(align_choices)))
    p.add_option("--noUTR",
                 default=False,
                 action="store_true",
                 help="Do not plot UTRs [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    gffiles = args
    ngenes = len(gffiles)

    setups, ratio = get_setups(gffiles, canvas=.6, noUTR=opts.noUTR)
    align = opts.align
    xs = .2 if align == "left" else .8
    yinterval = canvas / ngenes
    ys = .8
    tip = .01
    for genename, mrnabed, cdsbeds in setups:
        ex = ExonGlyph(root,
                       xs,
                       ys,
                       mrnabed,
                       cdsbeds,
                       ratio=ratio,
                       align=align)
        genename = _(genename)
        if align == "left":
            root.text(xs - tip, ys, genename, ha="right", va="center")
        elif align == "right":
            root.text(xs + tip, ys, genename, ha="left", va="center")
        ys -= yinterval

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = "exons.pdf"
    plt.savefig(figname, dpi=300)
    logging.debug("Figure saved to `{0}`".format(figname))
Esempio n. 3
0
def gff(args):
    """
    %prog gff *.gff

    Draw exons for genes based on gff files. Each gff file should contain only
    one gene, and only the "mRNA" and "CDS" feature will be drawn on the canvas.
    """
    align_choices = ("left", "center", "right")

    p = OptionParser(gff.__doc__)
    p.add_option("--align", default="left", choices=align_choices,
                 help="Horizontal alignment {0} [default: %default]".\
                    format("|".join(align_choices)))
    p.add_option("--noUTR", default=False, action="store_true",
                 help="Do not plot UTRs [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    gffiles = args
    ngenes = len(gffiles)

    setups, ratio = get_setups(gffiles, canvas=.6, noUTR=opts.noUTR)
    align = opts.align
    xs = .2 if align == "left" else .8
    yinterval = canvas / ngenes
    ys = .8
    tip = .01
    for genename, mrnabed, cdsbeds in setups:
        ex = ExonGlyph(root, xs, ys, mrnabed, cdsbeds, ratio=ratio, align=align)
        genename = _(genename)
        if align == "left":
            root.text(xs - tip, ys, genename, ha="right", va="center")
        elif align == "right":
            root.text(xs + tip, ys, genename, ha="left", va="center")
        ys -= yinterval

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = "exons.pdf"
    plt.savefig(figname, dpi=300)
    logging.debug("Figure saved to `{0}`".format(figname))
Esempio n. 4
0
File: tree.py Progetto: bennyyu/jcvi
def main(tx=None):
    """
    %prog newicktree

    Plot Newick formatted tree. The gene structure can be plotted along if
    --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is
    on, also show the number of amino acids.
    """
    p = OptionParser(main.__doc__)
    p.add_option("--outgroup", help="Root the tree using the outgroup. " + \
                      "Use comma to separate multiple taxa.")
    p.add_option("--rmargin", default=.3, type="float",
                 help="Set blank rmargin to the right [default: %default]")
    p.add_option("--gffdir", default=None,
                 help="The directory that contain GFF files [default: %default]")
    p.add_option("--sizes", default=None,
                 help="The FASTA file or the sizes file [default: %default]")

    opts, args, iopts = set_image_options(p, figsize="8x6")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    outgroup = None
    if opts.outgroup:
        outgroup = opts.outgroup.split(",")
    pf = datafile.rsplit(".", 1)[0]
    if tx:
        pf = "demo"
    else:
        tx = open(datafile).read()
        logging.debug("Load tree file `{0}`.".format(datafile))

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    draw_tree(root, tx, rmargin=opts.rmargin,
              outgroup=outgroup, gffdir=opts.gffdir, sizes=opts.sizes)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
Esempio n. 5
0
def plot_one_scaffold(scaffoldID, ssizes, sbed, trios, imagename, iopts,
                      highlights=None):
    ntrios = len(trios)
    fig = plt.figure(1, (14, 8))
    plt.cla()
    plt.clf()
    root = fig.add_axes([0, 0, 1, 1])
    axes = [fig.add_subplot(1, ntrios, x) for x in range(1, ntrios + 1)]
    scafsize = ssizes.get_size(scaffoldID)

    for trio, ax in zip(trios, axes):
        blastf, qsizes, qbed = trio
        scaffolding(ax, scaffoldID, blastf, qsizes, ssizes, qbed, sbed,
                    highlights=highlights)

    root.text(.5, .95, _("{0}   (size={1})".\
            format(scaffoldID, thousands(scafsize))),
            size=18, ha="center", color='b')
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    plt.savefig(imagename, dpi=iopts.dpi)
    logging.debug("Print image to `{0}` {1}".format(imagename, iopts))
Esempio n. 6
0
def heatmap(args):
    """
    %prog heatmap fastafile chr1

    Combine stack plot with heatmap to show abundance of various tracks along
    given chromosome. Need to give multiple beds to --stacks and --heatmaps
    """
    p = OptionParser(heatmap.__doc__)
    p.add_option("--stacks",
                 default="Exons,Introns,DNA_transposons,Retrotransposons",
                 help="Features to plot in stackplot [default: %default]")
    p.add_option("--heatmaps",
                 default="Copia,Gypsy,hAT,Helitron,Introns,Exons",
                 help="Features to plot in heatmaps [default: %default]")
    p.add_option("--meres", default=None,
                 help="Extra centromere / telomere features [default: %default]")
    add_window_options(p)
    opts, args, iopts = set_image_options(p, args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift = check_window_options(opts)

    stacks = opts.stacks.split(",")
    heatmaps = opts.heatmaps.split(",")
    stackbeds = [x + ".bed" for x in stacks]
    heatmapbeds = [x + ".bed" for x in heatmaps]
    stackbins = get_binfiles(stackbeds, fastafile, shift)
    heatmapbins = get_binfiles(heatmapbeds, fastafile, shift)

    window, shift = check_window_options(opts)
    margin = .06
    inner = .015
    clen = Sizes(fastafile).mapping[chr]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Gauge
    ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin)
    yinterval = .3
    xx = margin
    yy = 1 - margin
    yy -= yinterval
    xlen = clen / ratio
    if "_" in chr:
        ca, cb = chr.split("_")
        cc = ca[0].upper() + cb

    root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray))
    ax = fig.add_axes([xx, yy, xlen, yinterval - inner])

    nbins = clen / shift
    if clen % shift:
        nbins += 1

    owindow = clen / 100
    if owindow > window:
        window = owindow / shift * shift

    stackplot(ax, stackbins, nbins, palette, chr, window, shift)
    root.text(xx + inner, yy + yinterval - 2 * inner, cc, va="top")

    # Legends
    xx += xlen + .01
    yspace = (yinterval - inner) / (len(stackbins) + 1)
    yy = 1 - margin - yinterval
    for s, p in zip(stacks, palette):
        s = s.replace("_", " ")
        s = Registration.get(s, s)

        yy += yspace
        root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
        root.text(xx + 1.5 * inner, yy, s, size=10)

    yh = .05  # Heatmap height
    # Heatmaps
    xx = margin
    yy = 1 - margin - yinterval - inner
    for s, p in zip(heatmaps, heatmapbins):
        s = s.replace("_", " ")
        s = Registration.get(s, s)

        yy -= yh
        m = stackarray(p, chr, window, shift)

        Y = np.array([m, m])
        root.imshow(Y, extent=(xx, xx + xlen, yy, yy + yh - inner),
                    interpolation="nearest", aspect="auto")
        root.text(xx + xlen + .01, yy, s, size=10)

    yy -= yh

    meres = opts.meres
    if meres:
        bed = Bed(meres)
        for b in bed:
            if b.seqid != chr:
                continue
            pos = (b.start + b.end) / 2
            cpos = pos / ratio
            xx = margin + cpos
            accn = b.accn.capitalize()
            root.add_patch(CirclePolygon((xx, yy), radius=.01, fc="m", ec="m"))
            root.text(xx + .014, yy, _(accn), va="center", color="m")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + "." + iopts.format
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
Esempio n. 7
0
def stack(args):
    """
    %prog stack fastafile

    Create landscape plots that show the amounts of genic sequences, and repetitive
    sequences along the chromosomes.
    """
    p = OptionParser(stack.__doc__)
    p.add_option("--top", default=10, type="int",
                 help="Draw the first N chromosomes [default: %default]")
    p.add_option("--stacks",
                 default="Exons,Introns,DNA_transposons,Retrotransposons",
                 help="Features to plot in stackplot [default: %default]")
    p.add_option("--switch",
                 help="Change chr names based on two-column file [default: %default]")
    add_window_options(p)
    opts, args, iopts = set_image_options(p, args, figsize="8x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastafile, = args
    top = opts.top
    window, shift = check_window_options(opts)
    switch = opts.switch
    if switch:
        switch = DictFile(opts.switch)

    bedfiles = [x + ".bed" for x in opts.stacks.split(",")]
    binfiles = get_binfiles(bedfiles, fastafile, shift)

    sizes = Sizes(fastafile)
    s = list(sizes.iter_sizes())[:top]
    maxl = max(x[1] for x in s)
    margin = .08
    inner = .02   # y distance between tracks

    pf = fastafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    max_len = s
    # Gauge
    ratio = draw_gauge(root, margin, maxl)

    # Per chromosome
    yinterval = (1 - 2 * margin) / (top + 1)
    xx = margin
    yy = 1 - margin
    for chr, clen in s:
        yy -= yinterval
        xlen = clen / ratio
        if "_" in chr:
            ca, cb = chr.split("_")
            cc = ca[0].upper() + cb

        if switch and cc in switch:
            cc = "\n".join((cc, "({0})".format(switch[cc])))

        root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray))
        ax = fig.add_axes([xx, yy, xlen, yinterval - inner])

        nbins = clen / shift
        if clen % shift:
            nbins += 1

        stackplot(ax, binfiles, nbins, palette, chr, window, shift)
        root.text(xx - .04, yy + .5 * (yinterval - inner), cc, ha="center", va="center")

        ax.set_xlim(0, nbins)
        ax.set_ylim(0, 1)
        ax.set_axis_off()

    # Legends
    yy -= yinterval
    xx = margin
    for b, p in zip(bedfiles, palette):
        b = b.rsplit(".", 1)[0].replace("_", " ")
        b = Registration.get(b, b)

        root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
        xx += 2 * inner
        root.text(xx, yy, _(b), size=13)
        xx += len(b) * .012 + inner

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
Esempio n. 8
0
def main():
    p = OptionParser(__doc__)
    p.add_option("--groups", default=False, action="store_true",
                 help="The first row contains group info [default: %default]")
    p.add_option("--rowgroups", help="Row groupings [default: %default]")
    p.add_option("--horizontalbar", default=False, action="store_true",
                 help="Horizontal color bar [default: vertical]")
    p.add_option("--cmap", default="jet",
                 help="Use this color map [default: %default]")
    opts, args, iopts = set_image_options(p, figsize="8x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    pf = datafile.rsplit(".", 1)[0]
    rowgroups = opts.rowgroups

    groups, rows, cols, data = parse_csv(datafile, vmin=1, groups=opts.groups)
    cols = [x.replace("ay ", "") for x in cols]

    if rowgroups:
        fp = open(rowgroups)
        rgroups = []
        for row in fp:
            a, b = row.split()
            irows = [rows.index(x) for x in b.split(",")]
            rgroups.append((a, min(irows), max(irows)))

    plt.rcParams["axes.linewidth"] = 0

    xstart = .18
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax = fig.add_axes([xstart, .15, .7, .7])

    default_cm = cm.get_cmap(opts.cmap)
    im = ax.matshow(data, cmap=default_cm, norm=LogNorm(vmin=1, vmax=10000))
    nrows, ncols = len(rows), len(cols)

    xinterval = .7 / ncols
    yinterval = .7 / max(nrows, ncols)

    plt.xticks(range(ncols), cols, rotation=45, size=10, ha="center")
    plt.yticks(range(nrows), rows, size=10)

    for x in ax.get_xticklines() + ax.get_yticklines():
        x.set_visible(False)

    ax.set_xlim(-.5, ncols - .5)

    t = [1, 10, 100, 1000, 10000]
    pad = .06
    if opts.horizontalbar:
        ypos = .5 * (1 - nrows * yinterval) - pad
        axcolor = fig.add_axes([.3, ypos, .4, .02])
        orientation = "horizontal"
    else:
        axcolor = fig.add_axes([.9, .3, .02, .4])
        orientation = "vertical"
    fig.colorbar(im, cax=axcolor, ticks=t, format=_("%d"), orientation=orientation)

    if groups:
        groups = [(key, len(list(nn))) for key, nn in groupby(groups)]
        yy = .5 + .5 * nrows / ncols * .7 + .06
        e = .005
        sep = -.5

        for k, kl in groups:
            # Separator in the array area
            sep += kl
            ax.plot([sep, sep], [-.5, nrows - .5], "w-", lw=2)
            # Group labels on the top
            kl *= xinterval
            root.plot([xstart + e, xstart + kl - e], [yy, yy], "-", color="gray", lw=2)
            root.text(xstart + .5 * kl, yy + e, k, ha="center", color="gray")
            xstart += kl

    if rowgroups:
        from jcvi.graphics.glyph import TextCircle

        xpos = .04
        tip = .015
        assert rgroups
        ystart = 1 - .5 * (1 - nrows * yinterval)
        for gname, start, end in rgroups:
            start = ystart - start * yinterval
            end = ystart - (end + 1) * yinterval
            start -= tip / 3
            end += tip / 3

            # Bracket the groups
            root.plot((xpos, xpos + tip), (start, start), "k-", lw=2)
            root.plot((xpos, xpos), (start, end), "k-", lw=2)
            root.plot((xpos, xpos + tip), (end, end), "k-", lw=2)
            TextCircle(root, xpos, .5 * (start + end), gname)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + opts.cmap + "." + iopts.format
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
Esempio n. 9
0
File: kmer.py Progetto: bennyyu/jcvi
def histogram(args):
    """
    %prog histogram meryl.histogram species K

    Plot the histogram based on meryl K-mer distribution, species and N are
    only used to annotate the graphic. Find out totalKmers when running
    kmer.meryl().
    """
    p = OptionParser(histogram.__doc__)
    p.add_option("--pdf", default=False, action="store_true",
            help="Print PDF instead of ASCII plot [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    histfile, species, N = args
    ascii = not opts.pdf
    fp = open(histfile)
    hist = {}
    totalKmers = 0

    # Guess the format of the Kmer histogram
    soap = False
    for row in fp:
        if len(row.split()) == 1:
            soap = True
            break
    fp.seek(0)

    for rowno, row in enumerate(fp):
        if soap:
            K = rowno + 1
            counts = int(row.strip())
        else:  # meryl histogram
            K, counts = row.split()[:2]
            K, counts = int(K), int(counts)

        Kcounts = K * counts
        totalKmers += Kcounts
        hist[K] = counts

    history = ["drop"]
    for a, b in pairwise(sorted(hist.items())):
        Ka, ca = a
        Kb, cb = b
        if ca <= cb:
            status = "rise"
        else:
            status = "drop"
        if history[-1] != status:
            history.append(status)
        if history == ["drop", "rise", "drop"]:
            break

    Total_Kmers = int(totalKmers)
    Kmer_coverage = Ka
    Genome_size = Total_Kmers * 1. / Ka / 1e6

    Total_Kmers_msg = "Total {0}-mers: {1}".format(N, Total_Kmers)
    Kmer_coverage_msg = "{0}-mer coverage: {1}".format(N, Kmer_coverage)
    Genome_size_msg = "Estimated genome size: {0:.1f}Mb".format(Genome_size)

    for msg in (Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg):
        print >> sys.stderr, msg

    counts = sorted((a, b) for a, b in hist.items() if a <= 100)
    x, y = zip(*counts)
    title = "{0} genome {1}-mer histogram".format(species, N)

    if ascii:
        return asciiplot(x, y, title=title)

    fig = plt.figure(1, (6, 6))
    plt.plot(x, y, 'g-', lw=2, alpha=.5)

    ax = plt.gca()
    ax.text(.5, .9, _(Total_Kmers_msg),
            ha="center", color='b', transform=ax.transAxes)
    ax.text(.5, .8, _(Kmer_coverage_msg),
            ha="center", color='b', transform=ax.transAxes)
    ax.text(.5, .7, _(Genome_size_msg),
            ha="center", color='b', transform=ax.transAxes)

    ax.set_title(_(title), color='r')
    xlabel, ylabel = "Coverage (X)", "Counts"
    ax.set_xlabel(_(xlabel), color='r')
    ax.set_ylabel(_(ylabel), color='r')
    set_human_axis(ax)

    imagename = histfile.split(".")[0] + ".pdf"
    plt.savefig(imagename, dpi=100)
    print >> sys.stderr, "Image saved to `{0}`.".format(imagename)
Esempio n. 10
0
def heatmap(args):
    """
    %prog heatmap fastafile chr1

    Combine stack plot with heatmap to show abundance of various tracks along
    given chromosome. Need to give multiple beds to --stacks and --heatmaps
    """
    p = OptionParser(heatmap.__doc__)
    p.add_option("--stacks",
                 default="Exons,Introns,DNA_transposons,Retrotransposons",
                 help="Features to plot in stackplot [default: %default]")
    p.add_option("--heatmaps",
                 default="Copia,Gypsy,hAT,Helitron,Introns,Exons",
                 help="Features to plot in heatmaps [default: %default]")
    p.add_option(
        "--meres",
        default=None,
        help="Extra centromere / telomere features [default: %default]")
    add_window_options(p)
    opts, args, iopts = set_image_options(p, args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift, subtract = check_window_options(opts)

    stacks = opts.stacks.split(",")
    heatmaps = opts.heatmaps.split(",")
    stackbeds = [x + ".bed" for x in stacks]
    heatmapbeds = [x + ".bed" for x in heatmaps]
    stackbins = get_binfiles(stackbeds, fastafile, shift, subtract)
    heatmapbins = get_binfiles(heatmapbeds, fastafile, shift, subtract)

    margin = .06
    inner = .015
    clen = Sizes(fastafile).mapping[chr]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Gauge
    ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin)
    yinterval = .3
    xx = margin
    yy = 1 - margin
    yy -= yinterval
    xlen = clen / ratio
    if "_" in chr:
        ca, cb = chr.split("_")
        cc = ca[0].upper() + cb

    root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray))
    ax = fig.add_axes([xx, yy, xlen, yinterval - inner])

    nbins = clen / shift
    if clen % shift:
        nbins += 1

    owindow = clen / 100
    if owindow > window:
        window = owindow / shift * shift

    stackplot(ax, stackbins, nbins, palette, chr, window, shift)
    root.text(xx + inner, yy + yinterval - 2 * inner, cc, va="top")

    # Legends
    xx += xlen + .01
    yspace = (yinterval - inner) / (len(stackbins) + 1)
    yy = 1 - margin - yinterval
    for s, p in zip(stacks, palette):
        s = s.replace("_", " ")
        s = Registration.get(s, s)

        yy += yspace
        root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
        root.text(xx + 1.5 * inner, yy, s, size=10)

    yh = .05  # Heatmap height
    # Heatmaps
    xx = margin
    yy = 1 - margin - yinterval - inner
    for s, p in zip(heatmaps, heatmapbins):
        s = s.replace("_", " ")
        s = Registration.get(s, s)

        yy -= yh
        m = stackarray(p, chr, window, shift)

        Y = np.array([m, m])
        root.imshow(Y,
                    extent=(xx, xx + xlen, yy, yy + yh - inner),
                    interpolation="nearest",
                    aspect="auto")
        root.text(xx + xlen + .01, yy, s, size=10)

    yy -= yh

    meres = opts.meres
    if meres:
        bed = Bed(meres)
        for b in bed:
            if b.seqid != chr:
                continue
            pos = (b.start + b.end) / 2
            cpos = pos / ratio
            xx = margin + cpos
            accn = b.accn.capitalize()
            root.add_patch(CirclePolygon((xx, yy), radius=.01, fc="m", ec="m"))
            root.text(xx + .014, yy, _(accn), va="center", color="m")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + "." + iopts.format
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
Esempio n. 11
0
    width = iopts.w
    height = iopts.h * ratio
    fig = plt.figure(1, (width, height))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([.1, .1, .8, .8])  # the dot plot

    blastplot(ax,
              blastfile,
              qsizes,
              ssizes,
              qbed,
              sbed,
              style=opts.style,
              proportional=proportional,
              sampleN=opts.sample,
              baseticks=True,
              stripNames=opts.stripNames)

    # add genome names
    to_ax_label = lambda fname: _(op.basename(fname).split(".")[0])
    gx, gy = [to_ax_label(x.filename) for x in (qsizes, ssizes)]
    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
Esempio n. 12
0
def coverage(args):
    """
    %prog coverage fastafile ctg bedfile1 bedfile2 ..

    Plot coverage from a set of BED files that contain the read mappings. The
    paired read span will be converted to a new bedfile that contain the happy
    mates. ctg is the chr/scf/ctg that you want to plot the histogram on.

    If the bedfiles already contain the clone spans, turn on --spans.
    """
    from jcvi.formats.bed import mates, bedpe

    p = OptionParser(coverage.__doc__)
    p.add_option("--ymax", default=None, type="int",
                 help="Limit ymax [default: %default]")
    p.add_option("--spans", default=False, action="store_true",
                 help="BED files already contain clone spans [default: %default]")
    opts, args, iopts = set_image_options(p, args, figsize="8x5")

    if len(args) < 3:
        sys.exit(not p.print_help())

    fastafile, ctg = args[0:2]
    bedfiles = args[2:]

    sizes = Sizes(fastafile)
    size = sizes.mapping[ctg]

    fig = plt.figure(1, (iopts.w, iopts.h))
    ax = plt.gca()

    bins = 100  # smooth the curve
    lines = []
    legends = []
    not_covered = []
    yy = .9
    for bedfile, c in zip(bedfiles, "rgbcky"):
        if not opts.spans:
            pf = bedfile.rsplit(".", 1)[0]
            matesfile = pf + ".mates"
            if need_update(bedfile, matesfile):
                matesfile, matesbedfile = mates([bedfile, "--lib"])

            bedspanfile = pf + ".spans.bed"
            if need_update(matesfile, bedspanfile):
                bedpefile, bedspanfile = bedpe([bedfile, "--span",
                    "--mates={0}".format(matesfile)])
            bedfile = bedspanfile

        bedsum = Bed(bedfile).sum(seqid=ctg)
        notcoveredbases = size - bedsum

        legend = _(bedfile.split(".")[0])
        msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases))
        not_covered.append(msg)
        print >> sys.stderr, msg
        ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes)
        yy -= .08

        cov = Coverage(bedfile, sizes.filename)
        x, y = cov.get_plot_data(ctg, bins=bins)
        line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5)
        lines.append(line)
        legends.append(legend)

    leg = ax.legend(lines, legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(.5)

    ylabel = "Average depth per {0}Kb".format(size / bins / 1000)
    ax.set_xlim(0, size)
    ax.set_ylim(0, opts.ymax)
    ax.set_xlabel(ctg)
    ax.set_ylabel(ylabel)
    set_human_base_axis(ax)

    figname ="{0}.{1}.pdf".format(fastafile, ctg)
    plt.savefig(figname, dpi=iopts.dpi)
    logging.debug("Figure saved to `{0}` {1}.".format(figname, iopts))
Esempio n. 13
0
def dotplot(anchorfile, qbed, sbed, image_name, vmin, vmax, iopts,
        is_self=False, synteny=False, cmap_text=None):

    fp = open(anchorfile)

    qorder = qbed.order
    sorder = sbed.order

    data = []
    if cmap_text:
        logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax))

    for row in fp:
        atoms = row.split()
        # first two columns are query and subject, and an optional third column
        if len(atoms) < 2:
            continue
        query, subject = atoms[:2]
        value = atoms[-1]

        try:
            value = float(value)
        except ValueError:
            value = vmax

        if value < vmin:
            value = vmin
        if value > vmax:
            value = vmax

        if query not in qorder:
            #logging.warning("ignore %s" % query)
            continue
        if subject not in sorder:
            #logging.warning("ignore %s" % subject)
            continue

        qi, q = qorder[query]
        si, s = sorder[subject]

        nv = vmax - value
        data.append((qi, si, nv))
        if is_self:  # Mirror image
            data.append((si, qi, nv))

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([.1, .1, .8, .8])  # the dot plot

    sample_number = 5000  # only show random subset
    if len(data) > sample_number:
        data = sample(data, sample_number)

    # the data are plotted in this order, the least value are plotted
    # last for aesthetics
    data.sort(key=lambda x: -x[2])

    default_cm = cm.copper
    x, y, c = zip(*data)
    ax.scatter(x, y, c=c, s=2, lw=0, cmap=default_cm,
            vmin=vmin, vmax=vmax)

    if synteny:
        clusters = batch_scan(data, qbed, sbed)
        draw_box(clusters, ax)

    if cmap_text:
        draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True)

    xsize, ysize = len(qbed), len(sbed)
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))
    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    xchr_labels, ychr_labels = [], []
    ignore = True  # tag to mark whether to plot chr name (skip small ones)
    ignore_size_x = xsize * .005
    ignore_size_y = ysize * .005

    # plot the chromosome breaks
    for (seqid, beg, end) in qbed.get_breaks():
        ignore = abs(end - beg) < ignore_size_x
        seqid = seqid.split("_")[-1]
        try:
            seqid = int(seqid)
            seqid = "c%d" % seqid
        except:
            pass

        xchr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot([beg, beg], ylim, "g-", lw=1)

    for (seqid, beg, end) in sbed.get_breaks():
        ignore = abs(end - beg) < ignore_size_y
        seqid = seqid.split("_")[-1]
        try:
            seqid = int(seqid)
            seqid = "c%d" % seqid
        except:
            pass

        ychr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot(xlim, [beg, beg], "g-", lw=1)

    # plot the chromosome labels
    for label, pos, ignore in xchr_labels:
        pos = .1 + pos * .8 / xsize
        if not ignore:
            root.text(pos, .91, label,
                ha="center", va="bottom", rotation=45, color="grey")

    # remember y labels are inverted
    for label, pos, ignore in ychr_labels:
        pos = .9 - pos * .8 / ysize
        if not ignore:
            root.text(.91, pos, label,
                va="center", color="grey")

    # create a diagonal to separate mirror image for self comparison
    if is_self:
        ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # add genome names
    to_ax_label = lambda fname: _(op.basename(fname).split(".")[0])
    gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)]
    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
            color='gray', size=10)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
Esempio n. 14
0
def demo(args):
    """
    %prog demo

    Draw sample gene features to illustrate the various fates of duplicate
    genes - to be used in a book chapter.
    """
    p = OptionParser(demo.__doc__)
    opts, args = p.parse_args(args)

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    panel_space = .23
    dup_space = .025
    # Draw a gene and two regulatory elements at these arbitrary locations
    locs = [(.5, .9), # ancestral gene
            (.5, .9 - panel_space + dup_space), # identical copies
            (.5, .9 - panel_space - dup_space),
            (.5, .9 - 2 * panel_space + dup_space), # degenerate copies
            (.5, .9 - 2 * panel_space - dup_space),
            (.2, .9 - 3 * panel_space + dup_space), # sub-functionalization
            (.2, .9 - 3 * panel_space - dup_space),
            (.5, .9 - 3 * panel_space + dup_space), # neo-functionalization
            (.5, .9 - 3 * panel_space - dup_space),
            (.8, .9 - 3 * panel_space + dup_space), # non-functionalization
            (.8, .9 - 3 * panel_space - dup_space),
            ]

    default_regulator = "gm"
    regulators = [default_regulator,
            default_regulator, default_regulator,
            "wm", default_regulator,
            "wm", "gw",
            "wb", default_regulator,
            "ww", default_regulator,
            ]

    width = .24
    for i, (xx, yy) in enumerate(locs):
        regulator = regulators[i]
        x1, x2 = xx - .5 * width, xx + .5 * width
        Glyph(root, x1, x2, yy)
        if i == 9:  # upper copy for non-functionalization
            continue

        # coding region
        x1, x2 = xx - .16 * width, xx + .45 * width
        Glyph(root, x1, x2, yy, fc="k")

        # two regulatory elements
        x1, x2 = xx - .4 * width, xx - .28 * width
        for xx, fc in zip((x1, x2), regulator):
            if fc == 'w':
                continue

            DoubleCircle(root, xx, yy, fc=fc)

        rotation = 30
        tip = .02
        if i == 0:
            ya = yy + tip
            root.text(x1, ya, _("Flower"), rotation=rotation, va="bottom")
            root.text(x2, ya, _("Root"), rotation=rotation, va="bottom")
        elif i == 7:
            ya = yy + tip
            root.text(x2, ya, _("Leaf"), rotation=rotation, va="bottom")

    # Draw arrows between panels (center)
    arrow_dist = .08
    ar_xpos = .5
    for ar_ypos in (.3, .53, .76):
        root.annotate(" ", (ar_xpos, ar_ypos),
                (ar_xpos, ar_ypos + arrow_dist),
                arrowprops=arrowprops)

    ar_ypos = .3
    for ar_xpos in (.2, .8):
        root.annotate(" ", (ar_xpos, ar_ypos),
                (.5, ar_ypos + arrow_dist),
                arrowprops=arrowprops)

    # Duplication, Degeneration
    xx = .6
    ys = (.76, .53)
    processes = ("Duplication", "Degeneration")
    for yy, process in zip(ys, processes):
        root.text(xx, yy + .02, process, fontweight="bold")

    # Label of fates
    xs = (.2, .5, .8)
    fates = ("Subfunctionalization", "Neofunctionalization",
            "Nonfunctionalization")
    yy = .05
    for xx, fate in zip(xs, fates):
        RoundLabel(root, xx, yy, fate)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = "demo.pdf"
    plt.savefig(figname, dpi=300)
    logging.debug("Figure saved to `{0}`".format(figname))
Esempio n. 15
0
def stack(args):
    """
    %prog stack fastafile

    Create landscape plots that show the amounts of genic sequences, and repetitive
    sequences along the chromosomes.
    """
    p = OptionParser(stack.__doc__)
    p.add_option("--top",
                 default=10,
                 type="int",
                 help="Draw the first N chromosomes [default: %default]")
    p.add_option("--stacks",
                 default="Exons,Introns,DNA_transposons,Retrotransposons",
                 help="Features to plot in stackplot [default: %default]")
    p.add_option(
        "--switch",
        help="Change chr names based on two-column file [default: %default]")
    add_window_options(p)
    opts, args, iopts = set_image_options(p, args, figsize="8x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastafile, = args
    top = opts.top
    window, shift, subtract = check_window_options(opts)
    switch = opts.switch
    if switch:
        switch = DictFile(opts.switch)

    bedfiles = [x + ".bed" for x in opts.stacks.split(",")]
    binfiles = get_binfiles(bedfiles, fastafile, shift, subtract)

    sizes = Sizes(fastafile)
    s = list(sizes.iter_sizes())[:top]
    maxl = max(x[1] for x in s)
    margin = .08
    inner = .02  # y distance between tracks

    pf = fastafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    max_len = s
    # Gauge
    ratio = draw_gauge(root, margin, maxl)

    # Per chromosome
    yinterval = (1 - 2 * margin) / (top + 1)
    xx = margin
    yy = 1 - margin
    for chr, clen in s:
        yy -= yinterval
        xlen = clen / ratio
        if "_" in chr:
            ca, cb = chr.split("_")
            cc = ca[0].upper() + cb

        if switch and cc in switch:
            cc = "\n".join((cc, "({0})".format(switch[cc])))

        root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner,
                                 color=gray))
        ax = fig.add_axes([xx, yy, xlen, yinterval - inner])

        nbins = clen / shift
        if clen % shift:
            nbins += 1

        stackplot(ax, binfiles, nbins, palette, chr, window, shift)
        root.text(xx - .04,
                  yy + .5 * (yinterval - inner),
                  cc,
                  ha="center",
                  va="center")

        ax.set_xlim(0, nbins)
        ax.set_ylim(0, 1)
        ax.set_axis_off()

    # Legends
    yy -= yinterval
    xx = margin
    for b, p in zip(bedfiles, palette):
        b = b.rsplit(".", 1)[0].replace("_", " ")
        b = Registration.get(b, b)

        root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
        xx += 2 * inner
        root.text(xx, yy, _(b), size=13)
        xx += len(b) * .012 + inner

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
Esempio n. 16
0
def qc(args):
    """
    %prog qc prefix

    Expects data files including:
    1. `prefix.bedpe` draws Bezier curve between paired reads
    2. `prefix.sizes` draws length of the contig/scaffold
    3. `prefix.gaps.bed` mark the position of the gaps in sequence
    4. `prefix.bed.coverage` plots the base coverage
    5. `prefix.pairs.bed.coverage` plots the clone coverage

    See assembly.coverage.posmap() for the generation of these files.
    """
    from jcvi.graphics.glyph import Bezier

    p = OptionParser(qc.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(p.print_help())

    prefix, = args
    scf = prefix

    # All these files *must* be present in the current folder
    bedpefile = prefix + ".bedpe"
    fastafile = prefix + ".fasta"
    sizesfile = prefix + ".sizes"
    gapsbedfile = prefix + ".gaps.bed"
    bedfile = prefix + ".bed"
    bedpefile = prefix + ".bedpe"
    pairsbedfile = prefix + ".pairs.bed"

    sizes = Sizes(fastafile).mapping
    size = sizes[scf]

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    # the scaffold
    root.add_patch(Rectangle((.1, .15), .8, .03, fc='k'))

    # basecoverage and matecoverage
    ax = fig.add_axes([.1, .45, .8, .45])

    bins = 200  # Smooth the curve
    logging.debug("Coverage curve use window size of {0} bases.".format(window))
    basecoverage = Coverage(bedfile, sizesfile)
    matecoverage = Coverage(pairsbedfile, sizesfile)

    x, y = basecoverage.get_plot_data(scf, bins=bins)
    baseline, = ax.plot(x, y, 'g-')
    x, y = matecoverage.get_plot_data(scf, bins=bins)
    mateline, = ax.plot(x, y, 'r-')
    legends = (_("Base coverage"), _("Mate coverage"))
    leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(.5)
    ax.set_xlim(0, size)

    # draw the read pairs
    fp = open(bedpefile)
    pairs = []
    for row in fp:
        scf, astart, aend, scf, bstart, bend, clonename = row.split()
        astart, bstart = int(astart), int(bstart)
        aend, bend = int(aend), int(bend)
        start = min(astart, bstart) + 1
        end = max(aend, bend)
        pairs.append((start, end))

    bpratio = .8 / size
    cutoff = 1000  # inserts smaller than this are not plotted
    # this convert from base => x-coordinate
    pos = lambda x: (.1 + x * bpratio)
    ypos = .15 + .03
    for start, end in pairs:
        dist = end - start

        if dist < cutoff:
            continue

        dist = min(dist, 10000)
        # 10Kb == .25 canvas height
        height = .25 * dist / 10000
        xstart = pos(start)
        xend = pos(end)
        p0 = (xstart, ypos)
        p1 = (xstart, ypos + height)
        p2 = (xend, ypos + height)
        p3 = (xend, ypos)
        Bezier(root, p0, p1, p2, p3)

    # gaps on the scaffold
    fp = open(gapsbedfile)
    for row in fp:
        b = BedLine(row)
        start, end = b.start, b.end
        xstart = pos(start)
        xend = pos(end)
        root.add_patch(Rectangle((xstart, .15), xend - xstart, .03, fc='w'))

    root.text(.5, .1, _(scf), color='b', ha="center")
    warn_msg = "Only the inserts > {0}bp are shown".format(cutoff)
    root.text(.5, .1, _(scf), color='b', ha="center")
    root.text(.5, .05, _(warn_msg), color='gray', ha="center")
    # clean up and output
    set_human_base_axis(ax)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = prefix + ".pdf"
    plt.savefig(figname, dpi=300)
    logging.debug("Figure saved to `{0}`".format(figname))
Esempio n. 17
0
    image_name = op.splitext(blastfile)[0] + "." + opts.format
    plt.rcParams["xtick.major.pad"] = 16
    plt.rcParams["ytick.major.pad"] = 16

    # Fix the width
    xsize, ysize = qsizes.totalsize, ssizes.totalsize

    ratio = ysize * 1. / xsize if proportional else 1
    width = iopts.w
    height = iopts.h * ratio
    fig = plt.figure(1, (width, height))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([.1, .1, .8, .8])  # the dot plot

    blastplot(ax, blastfile, qsizes, ssizes, qbed, sbed,
            style=opts.style, proportional=proportional, sampleN=opts.sample,
            baseticks=True, stripNames=opts.stripNames)

    # add genome names
    to_ax_label = lambda fname: _(op.basename(fname).split(".")[0])
    gx, gy = [to_ax_label(x.filename) for x in (qsizes, ssizes)]
    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
Esempio n. 18
0
def main():
    """
    %prog bedfile id_mappings

    Takes a bedfile that contains the coordinates of features to plot on the
    chromosomes, and `id_mappings` file that map the ids to certain class. Each
    class will get assigned a unique color. `id_mappings` file is optional (if
    omitted, will not paint the chromosome features, except the centromere).
    """
    p = OptionParser(main.__doc__)
    p.add_option("--title",
                 default="Medicago truncatula v3.5",
                 help="title of the image [default: `%default`]")
    p.add_option("--gauge",
                 default=False,
                 action="store_true",
                 help="draw a gauge with size label [default: %default]")
    p.add_option(
        "--imagemap",
        default=False,
        action="store_true",
        help=
        "generate an HTML image map associated with the image [default: %default]"
    )
    p.add_option(
        "--winsize",
        default=50000,
        type="int",
        help=
        "if drawing an imagemap, specify the window size (bases) of each map element "
        "[default: %default bp]")
    opts, args, iopts = set_image_options(p, figsize="6x6", dpi=300)

    if len(args) not in (1, 2):
        sys.exit(p.print_help())

    bedfile = args[0]
    mappingfile = None
    if len(args) == 2:
        mappingfile = args[1]

    winsize = opts.winsize
    imagemap = opts.imagemap
    w, h = iopts.w, iopts.h
    dpi = iopts.dpi

    prefix = bedfile.rsplit(".", 1)[0]
    figname = prefix + "." + opts.format
    if imagemap:
        imgmapfile = prefix + '.map'
        mapfh = open(imgmapfile, "w")
        print >> mapfh, '<map id="' + prefix + '">'

    if mappingfile:
        mappings = dict(x.split() for x in open(mappingfile))
        classes = sorted(set(mappings.values()))
        logging.debug("A total of {0} classes found: {1}".format(
            len(classes), ','.join(classes)))
    else:
        mappings = {}
        classes = []
        logging.debug("No classes registered (no id_mappings given).")

    mycolors = "wrgbymc"
    class_colors = dict(zip(classes, mycolors))

    bed = Bed(bedfile)
    chr_lens = {}
    centromeres = {}
    for b, blines in groupby(bed, key=(lambda x: x.seqid)):
        blines = list(blines)
        maxlen = max(x.end for x in blines)
        chr_lens[b] = maxlen

    for b in bed:
        accn = b.accn
        if accn == "centromere":
            centromeres[b.seqid] = b.start
        if accn in mappings:
            b.accn = mappings[accn]
        else:
            b.accn = '-'

    chr_number = len(chr_lens)
    assert chr_number == len(centromeres)

    fig = plt.figure(1, (w, h))
    root = fig.add_axes([0, 0, 1, 1])

    r = .7  # width and height of the whole chromosome set
    xstart, ystart = .15, .85
    xinterval = r / chr_number
    xwidth = xinterval * .5  # chromosome width
    max_chr_len = max(chr_lens.values())
    ratio = r / max_chr_len  # canvas / base

    # first the chromosomes
    for a, (chr, cent_position) in enumerate(sorted(centromeres.items())):
        clen = chr_lens[chr]
        xx = xstart + a * xinterval + .5 * xwidth
        yy = ystart - cent_position * ratio
        root.text(xx, ystart + .01, _(chr), ha="center")
        ChromosomeWithCentromere(root,
                                 xx,
                                 ystart,
                                 yy,
                                 ystart - clen * ratio,
                                 width=xwidth)

    chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys())))

    alpha = .75
    # color the regions
    for chr in sorted(chr_lens.keys()):
        segment_size, excess = 0, 0
        bac_list = []
        for b in bed.sub_bed(chr):
            clen = chr_lens[chr]
            idx = chr_idxs[chr]
            klass = b.accn
            start = b.start
            end = b.end
            xx = xstart + idx * xinterval
            yystart = ystart - end * ratio
            yyend = ystart - start * ratio
            root.add_patch(
                Rectangle((xx, yystart),
                          xwidth,
                          yyend - yystart,
                          fc=class_colors.get(klass, "w"),
                          lw=0,
                          alpha=alpha))

            if imagemap:
                """
                `segment` : size of current BAC being investigated + `excess`
                `excess`  : left-over bases from the previous BAC, as a result of
                            iterating over `winsize` regions of `segment`
                """
                if excess == 0:
                    segment_start = start
                segment = (end - start + 1) + excess
                while True:
                    if segment < winsize:
                        bac_list.append(b.accn)
                        excess = segment
                        break
                    segment_end = segment_start + winsize - 1
                    tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \
                                  xx + xwidth, (1 - ystart) + segment_end * ratio
                    print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \
                            w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end)

                    segment_start += winsize
                    segment -= winsize
                    bac_list = []

        if imagemap and excess > 0:
            bac_list.append(b.accn)
            segment_end = end
            tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \
                          xx + xwidth, (1 - ystart) + segment_end * ratio
            print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \
                    w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end)

    if imagemap:
        print >> mapfh, '</map>'
        mapfh.close()
        logging.debug("Image map written to `{0}`".format(mapfh.name))

    if opts.gauge:
        tip = .008  # the ticks on the gauge bar
        extra = .006  # the offset for the unit label
        xstart, ystart = .9, .85
        yy = ystart
        gauge = int(ceil(max_chr_len / 1e6))
        mb = ratio * 1e6
        yinterval = 2 * mb
        root.plot([xstart, xstart], [yy, yy - r], 'b-', lw=2)
        for x in xrange(0, gauge, 2):
            if x % 10:
                root.plot([xstart, xstart + tip], [yy, yy], "b-")
            else:
                root.plot([xstart - tip, xstart + tip], [yy, yy], 'b-', lw=2)
                root.text(xstart + tip + extra,
                          yy,
                          _(x),
                          color="gray",
                          va="center")
            yy -= yinterval
        root.text(xstart, yy - .03, _("Mb"), color="gray", va="center")

    # class legends, four in a row
    xstart = .1
    xinterval = .2
    xwidth = .04
    yy = .08
    for klass, cc in sorted(class_colors.items()):
        if klass == '-':
            continue
        root.add_patch(
            Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha))
        root.text(xstart + xwidth + .01, yy, _(klass), fontsize=9)
        xstart += xinterval

    root.text(.5,
              .95,
              opts.title,
              fontstyle="italic",
              ha="center",
              va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    plt.savefig(figname, dpi=dpi)
    logging.debug("Figure saved to `{0}` {1}".format(figname, iopts))
Esempio n. 19
0
def demo(args):
    """
    %prog demo

    Draw sample gene features to illustrate the various fates of duplicate
    genes - to be used in a book chapter.
    """
    p = OptionParser(demo.__doc__)
    opts, args = p.parse_args(args)

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    panel_space = .23
    dup_space = .025
    # Draw a gene and two regulatory elements at these arbitrary locations
    locs = [
        (.5, .9),  # ancestral gene
        (.5, .9 - panel_space + dup_space),  # identical copies
        (.5, .9 - panel_space - dup_space),
        (.5, .9 - 2 * panel_space + dup_space),  # degenerate copies
        (.5, .9 - 2 * panel_space - dup_space),
        (.2, .9 - 3 * panel_space + dup_space),  # sub-functionalization
        (.2, .9 - 3 * panel_space - dup_space),
        (.5, .9 - 3 * panel_space + dup_space),  # neo-functionalization
        (.5, .9 - 3 * panel_space - dup_space),
        (.8, .9 - 3 * panel_space + dup_space),  # non-functionalization
        (.8, .9 - 3 * panel_space - dup_space),
    ]

    default_regulator = "gm"
    regulators = [
        default_regulator,
        default_regulator,
        default_regulator,
        "wm",
        default_regulator,
        "wm",
        "gw",
        "wb",
        default_regulator,
        "ww",
        default_regulator,
    ]

    width = .24
    for i, (xx, yy) in enumerate(locs):
        regulator = regulators[i]
        x1, x2 = xx - .5 * width, xx + .5 * width
        Glyph(root, x1, x2, yy)
        if i == 9:  # upper copy for non-functionalization
            continue

        # coding region
        x1, x2 = xx - .16 * width, xx + .45 * width
        Glyph(root, x1, x2, yy, fc="k")

        # two regulatory elements
        x1, x2 = xx - .4 * width, xx - .28 * width
        for xx, fc in zip((x1, x2), regulator):
            if fc == 'w':
                continue

            DoubleCircle(root, xx, yy, fc=fc)

        rotation = 30
        tip = .02
        if i == 0:
            ya = yy + tip
            root.text(x1, ya, _("Flower"), rotation=rotation, va="bottom")
            root.text(x2, ya, _("Root"), rotation=rotation, va="bottom")
        elif i == 7:
            ya = yy + tip
            root.text(x2, ya, _("Leaf"), rotation=rotation, va="bottom")

    # Draw arrows between panels (center)
    arrow_dist = .08
    ar_xpos = .5
    for ar_ypos in (.3, .53, .76):
        root.annotate(" ", (ar_xpos, ar_ypos), (ar_xpos, ar_ypos + arrow_dist),
                      arrowprops=arrowprops)

    ar_ypos = .3
    for ar_xpos in (.2, .8):
        root.annotate(" ", (ar_xpos, ar_ypos), (.5, ar_ypos + arrow_dist),
                      arrowprops=arrowprops)

    # Duplication, Degeneration
    xx = .6
    ys = (.76, .53)
    processes = ("Duplication", "Degeneration")
    for yy, process in zip(ys, processes):
        root.text(xx, yy + .02, process, fontweight="bold")

    # Label of fates
    xs = (.2, .5, .8)
    fates = ("Subfunctionalization", "Neofunctionalization",
             "Nonfunctionalization")
    yy = .05
    for xx, fate in zip(xs, fates):
        RoundLabel(root, xx, yy, fate)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = "demo.pdf"
    plt.savefig(figname, dpi=300)
    logging.debug("Figure saved to `{0}`".format(figname))
Esempio n. 20
0
def main():
    """
    %prog bedfile id_mappings

    Takes a bedfile that contains the coordinates of features to plot on the
    chromosomes, and `id_mappings` file that map the ids to certain class. Each
    class will get assigned a unique color. `id_mappings` file is optional (if
    omitted, will not paint the chromosome features, except the centromere).
    """
    p = OptionParser(main.__doc__)
    p.add_option("--title", default="Medicago truncatula v3.5",
            help="title of the image [default: `%default`]")
    p.add_option("--gauge", default=False, action="store_true",
            help="draw a gauge with size label [default: %default]")
    p.add_option("--imagemap", default=False, action="store_true",
            help="generate an HTML image map associated with the image [default: %default]")
    p.add_option("--winsize", default=50000, type="int",
            help="if drawing an imagemap, specify the window size (bases) of each map element "
                 "[default: %default bp]")
    opts, args, iopts = set_image_options(p, figsize="6x6", dpi=300)

    if len(args) not in (1, 2):
        sys.exit(p.print_help())

    bedfile = args[0]
    mappingfile = None
    if len(args) == 2:
        mappingfile = args[1]

    winsize = opts.winsize
    imagemap = opts.imagemap
    w, h = iopts.w, iopts.h
    dpi = iopts.dpi

    prefix = bedfile.rsplit(".", 1)[0]
    figname = prefix + "." + opts.format
    if imagemap:
        imgmapfile = prefix + '.map'
        mapfh = open(imgmapfile, "w")
        print >> mapfh, '<map id="' + prefix + '">'

    if mappingfile:
        mappings = dict(x.split() for x in open(mappingfile))
        classes = sorted(set(mappings.values()))
        logging.debug("A total of {0} classes found: {1}".format(len(classes),
            ','.join(classes)))
    else:
        mappings = {}
        classes = []
        logging.debug("No classes registered (no id_mappings given).")

    mycolors = "wrgbymc"
    class_colors = dict(zip(classes, mycolors))

    bed = Bed(bedfile)
    chr_lens = {}
    centromeres = {}
    for b, blines in groupby(bed, key=(lambda x: x.seqid)):
        blines = list(blines)
        maxlen = max(x.end for x in blines)
        chr_lens[b] = maxlen

    for b in bed:
        accn = b.accn
        if accn == "centromere":
            centromeres[b.seqid] = b.start
        if accn in mappings:
            b.accn = mappings[accn]
        else:
            b.accn = '-'

    chr_number = len(chr_lens)
    assert chr_number == len(centromeres)

    fig = plt.figure(1, (w, h))
    root = fig.add_axes([0, 0, 1, 1])

    r = .7  # width and height of the whole chromosome set
    xstart, ystart = .15, .85
    xinterval = r / chr_number
    xwidth = xinterval * .5  # chromosome width
    max_chr_len = max(chr_lens.values())
    ratio = r / max_chr_len  # canvas / base

    # first the chromosomes
    for a, (chr, cent_position) in enumerate(sorted(centromeres.items())):
        clen = chr_lens[chr]
        xx = xstart + a * xinterval + .5 * xwidth
        yy = ystart - cent_position * ratio
        root.text(xx, ystart + .01, _(chr), ha="center")
        ChromosomeWithCentromere(root, xx, ystart, yy,
                ystart - clen * ratio, width=xwidth)

    chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys())))

    alpha = .75
    # color the regions
    for chr in sorted(chr_lens.keys()):
        segment_size, excess = 0, 0
        bac_list = []
        for b in bed.sub_bed(chr):
            clen = chr_lens[chr]
            idx = chr_idxs[chr]
            klass = b.accn
            start = b.start
            end = b.end
            xx = xstart + idx * xinterval
            yystart = ystart - end * ratio
            yyend = ystart - start * ratio
            root.add_patch(Rectangle((xx, yystart), xwidth, yyend - yystart,
                fc=class_colors.get(klass, "w"), lw=0, alpha=alpha))

            if imagemap:
                """
                `segment` : size of current BAC being investigated + `excess`
                `excess`  : left-over bases from the previous BAC, as a result of
                            iterating over `winsize` regions of `segment`
                """
                if excess == 0:
                    segment_start = start
                segment = (end - start + 1) + excess
                while True:
                    if segment < winsize:
                        bac_list.append(b.accn)
                        excess = segment
                        break
                    segment_end = segment_start + winsize - 1
                    tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \
                                  xx + xwidth, (1 - ystart) + segment_end * ratio
                    print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \
                            w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end)

                    segment_start += winsize
                    segment -= winsize
                    bac_list = []

        if imagemap and excess > 0:
            bac_list.append(b.accn)
            segment_end = end
            tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \
                          xx + xwidth, (1 - ystart) + segment_end * ratio
            print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \
                    w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end)

    if imagemap:
        print >> mapfh, '</map>'
        mapfh.close()
        logging.debug("Image map written to `{0}`".format(mapfh.name))

    if opts.gauge:
        tip = .008  # the ticks on the gauge bar
        extra = .006  # the offset for the unit label
        xstart, ystart = .9, .85
        yy = ystart
        gauge = int(ceil(max_chr_len / 1e6))
        mb = ratio * 1e6
        yinterval = 2 * mb
        root.plot([xstart, xstart], [yy, yy - r], 'b-', lw=2)
        for x in xrange(0, gauge, 2):
            if x % 10:
                root.plot([xstart, xstart + tip], [yy, yy], "b-")
            else:
                root.plot([xstart - tip, xstart + tip], [yy, yy], 'b-', lw=2)
                root.text(xstart + tip + extra, yy, _(x),
                        color="gray", va="center")
            yy -= yinterval
        root.text(xstart, yy - .03, _("Mb"), color="gray", va="center")

    # class legends, four in a row
    xstart = .1
    xinterval = .2
    xwidth = .04
    yy = .08
    for klass, cc in sorted(class_colors.items()):
        if klass == '-':
            continue
        root.add_patch(Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0,
            alpha=alpha))
        root.text(xstart + xwidth + .01, yy, _(klass), fontsize=9)
        xstart += xinterval

    root.text(.5, .95, opts.title, fontstyle="italic", ha="center", va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    plt.savefig(figname, dpi=dpi)
    logging.debug("Figure saved to `{0}` {1}".format(figname, iopts))
Esempio n. 21
0
def histogram(args):
    """
    %prog histogram meryl.histogram species K

    Plot the histogram based on meryl K-mer distribution, species and N are
    only used to annotate the graphic. Find out totalKmers when running
    kmer.meryl().
    """
    p = OptionParser(histogram.__doc__)
    p.add_option("--pdf",
                 default=False,
                 action="store_true",
                 help="Print PDF instead of ASCII plot [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    histfile, species, N = args
    ascii = not opts.pdf
    fp = open(histfile)
    hist = {}
    totalKmers = 0

    # Guess the format of the Kmer histogram
    soap = False
    for row in fp:
        if len(row.split()) == 1:
            soap = True
            break
    fp.seek(0)

    for rowno, row in enumerate(fp):
        if soap:
            K = rowno + 1
            counts = int(row.strip())
        else:  # meryl histogram
            K, counts = row.split()[:2]
            K, counts = int(K), int(counts)

        Kcounts = K * counts
        totalKmers += Kcounts
        hist[K] = counts

    history = ["drop"]
    for a, b in pairwise(sorted(hist.items())):
        Ka, ca = a
        Kb, cb = b
        if ca <= cb:
            status = "rise"
        else:
            status = "drop"
        if history[-1] != status:
            history.append(status)
        if history == ["drop", "rise", "drop"]:
            break

    Total_Kmers = int(totalKmers)
    Kmer_coverage = Ka
    Genome_size = Total_Kmers * 1. / Ka / 1e6

    Total_Kmers_msg = "Total {0}-mers: {1}".format(N, Total_Kmers)
    Kmer_coverage_msg = "{0}-mer coverage: {1}".format(N, Kmer_coverage)
    Genome_size_msg = "Estimated genome size: {0:.1f}Mb".format(Genome_size)

    for msg in (Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg):
        print >> sys.stderr, msg

    counts = sorted((a, b) for a, b in hist.items() if a <= 100)
    x, y = zip(*counts)
    title = "{0} genome {1}-mer histogram".format(species, N)

    if ascii:
        return asciiplot(x, y, title=title)

    fig = plt.figure(1, (6, 6))
    plt.plot(x, y, 'g-', lw=2, alpha=.5)

    ax = plt.gca()
    ax.text(.5,
            .9,
            _(Total_Kmers_msg),
            ha="center",
            color='b',
            transform=ax.transAxes)
    ax.text(.5,
            .8,
            _(Kmer_coverage_msg),
            ha="center",
            color='b',
            transform=ax.transAxes)
    ax.text(.5,
            .7,
            _(Genome_size_msg),
            ha="center",
            color='b',
            transform=ax.transAxes)

    ax.set_title(_(title), color='r')
    xlabel, ylabel = "Coverage (X)", "Counts"
    ax.set_xlabel(_(xlabel), color='r')
    ax.set_ylabel(_(ylabel), color='r')
    set_human_axis(ax)

    imagename = histfile.split(".")[0] + ".pdf"
    plt.savefig(imagename, dpi=100)
    print >> sys.stderr, "Image saved to `{0}`.".format(imagename)