Ejemplo n.º 1
0
def gaps(args):
    """
    %prog gaps A_vs_B.blast

    Find distribution of gap sizes betwen adjacent HSPs.
    """
    p = OptionParser(gaps.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    blastfile, = args
    blast = BlastSlow(blastfile)
    logging.debug("A total of {} records imported".format(len(blast)))

    query_gaps = list(collect_gaps(blast))
    subject_gaps = list(collect_gaps(blast, use_subject=True))
    logging.debug("Query gaps: {}  Subject gaps: {}"\
                    .format(len(query_gaps), len(subject_gaps)))

    from jcvi.graphics.base import savefig
    import seaborn as sns

    sns.distplot(query_gaps)
    savefig("query_gaps.pdf")
Ejemplo n.º 2
0
Archivo: cnv.py Proyecto: xuanblo/jcvi
def coverage(args):
    """
    %prog coverage *.coverage

    Plot coverage along chromosome. The coverage file can be generated with:
    $ samtools depth a.bam > a.coverage

    The plot is a simple line plot using matplotlib.
    """
    from jcvi.graphics.base import savefig

    p = OptionParser(coverage.__doc__)
    opts, args, iopts = p.set_image_options(args, format="png")

    if len(args) != 1:
        sys.exit(not p.print_help())

    covfile, = args
    df = pd.read_csv(covfile, sep='\t', names=["Ref", "Position", "Depth"])

    xlabel, ylabel = "Position", "Depth"
    df.plot(xlabel, ylabel, color='g')

    image_name = covfile + "." + iopts.format
    savefig(image_name)
Ejemplo n.º 3
0
Archivo: ks.py Proyecto: rrane/jcvi
    def draw(self, title="Ks distribution"):

        from jcvi.graphics.base import tex_formatter, \
                    tex_1digit_formatter, tex_2digit_formatter, savefig

        ax = self.ax
        ks_max = self.ks_max
        lines = self.lines
        labels = self.labels
        legendp = self.legendp
        leg = ax.legend(lines, labels, legendp,
                        shadow=True, fancybox=True, prop={"size": 10})
        leg.get_frame().set_alpha(.5)

        ax.set_xlim((0, ks_max - self.interval))
        ax.set_title(title, fontweight="bold")
        ax.set_xlabel('Synonymous substitutions per site (Ks)')
        ax.set_ylabel('Percentage of gene pairs')

        tf = tex_2digit_formatter if self.interval < .1 else \
             tex_1digit_formatter
        ax.xaxis.set_major_formatter(tf)
        ax.yaxis.set_major_formatter(tex_formatter)

        image_name = "Ks_plot.pdf"
        savefig(image_name, dpi=300)
Ejemplo n.º 4
0
def birch(args):
    """
    %prog birch seqids layout

    Plot birch macro-synteny, with an embedded phylogenetic tree to the right.
    """
    p = OptionParser(birch.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x6")

    if len(args) != 2:
        sys.exit(not p.print_help())

    seqids, layout = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    K = Karyotype(fig, root, seqids, layout)
    L = K.layout

    xs = .79
    dt = dict(rectangle=False, circle=False)
    # Embed a phylogenetic tree to the right
    coords = {}
    coords["Amborella"] = (xs, L[0].y)
    coords["Vitis"] = (xs, L[1].y)
    coords["Prunus"] = (xs, L[2].y)
    coords["Betula"] = (xs, L[3].y)
    coords["Populus"] = (xs, L[4].y)
    coords["Arabidopsis"] = (xs, L[5].y)
    coords["fabids"] = join_nodes(root, coords, "Prunus", "Betula", xs, **dt)
    coords["malvids"] = join_nodes(root, coords, \
                                   "Populus", "Arabidopsis", xs, **dt)
    coords["rosids"] = join_nodes(root, coords, "fabids", "malvids", xs, **dt)
    coords["eudicots"] = join_nodes(root, coords, "rosids", "Vitis", xs, **dt)
    coords["angiosperm"] = join_nodes(root, coords, \
                                      "eudicots", "Amborella", xs, **dt)

    # Show branch length
    branch_length(root, coords["Amborella"], coords["angiosperm"], ">160.0")
    branch_length(root, coords["eudicots"], coords["angiosperm"],
                  ">78.2", va="top")
    branch_length(root, coords["Vitis"], coords["eudicots"], "138.5")
    branch_length(root, coords["rosids"], coords["eudicots"],
                  "19.8", va="top")
    branch_length(root, coords["Prunus"], coords["fabids"],
                  "104.2", ha="right", va="top")
    branch_length(root, coords["Arabidopsis"], coords["malvids"],
                  "110.2", va="top")
    branch_length(root, coords["fabids"], coords["rosids"],
                  "19.8", ha="right", va="top")
    branch_length(root, coords["malvids"], coords["rosids"],
                  "8.5", va="top")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "birch"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 5
0
def resample(args):
    """
    %prog resample yellow-catfish-resample.txt medicago-resample.txt

    Plot ALLMAPS performance across resampled real data.
    """
    p = OptionParser(resample.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300)

    if len(args) != 2:
        sys.exit(not p.print_help())

    dataA, dataB = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([.1, .18, .32, .64])
    B = fig.add_axes([.6, .18, .32, .64])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    xlabel = "Fraction of markers"
    ylabels = ("Anchor rate", "Runtime (m)")
    legend = ("anchor rate", "runtime")
    subplot_twinx(A, dataA, xlabel, ylabels,
                     title="Yellow catfish", legend=legend)
    subplot_twinx(B, dataB, xlabel, ylabels,
                     title="Medicago", legend=legend)

    labels = ((.04, .92, "A"), (.54, .92, "B"))
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "resample." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 6
0
def pomegranate(args):
    """
    %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of WGD history of pineapple genome. The script calls both graphics.karyotype
    and graphic.synteny.
    """
    p = OptionParser(pomegranate.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x7")

    if len(args) != 5:
        sys.exit(not p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .42, .52, .48)

    labels = ((.04, .96, 'A'), (.04, .52, 'B'))
    panel_labels(root, labels)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "pomegranate-karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 7
0
def epoch(args):
    """
    %prog epoch

    Illustrate the methods used in Maggie's epoch paper, in particular, how to
    classifiy S/G/F/FB/FN for the genes.
    """
    p = OptionParser(__doc__)
    opts, args = p.parse_args()

    fig = plt.figure(1, (6, 4))
    root = fig.add_axes([0, 0, 1, 1])

    # Separators
    linestyle = dict(lw=2, color="b", alpha=.2, zorder=2)
    root.plot((0, 1), (.5, .5), "--", **linestyle)
    for i in (1./3, 2./3):
        root.plot((i, i), (.5, 1), "--", **linestyle)
    for i in (1./6, 3./6, 5./6):
        root.plot((i, i), (0, .5), "--", **linestyle)

    # Diagrams
    plot_diagram(root, 1./6, 3./4, "S", "syntenic")
    plot_diagram(root, 3./6, 3./4, "F", "missing, with both flankers")
    plot_diagram(root, 5./6, 3./4, "G", "missing, with one flanker")
    plot_diagram(root, 2./6, 1./4, "FB", "has non-coding matches")
    plot_diagram(root, 4./6, 1./4, "FN", "syntenic region has gap")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = fname() + ".pdf"
    savefig(figname, dpi=300)
Ejemplo n.º 8
0
def main():
    p = OptionParser(__doc__)
    p.add_option("--switch",
                 help="Rename the seqid with two-column file [default: %default]")
    p.add_option("--tree",
                 help="Display trees on the bottom of the figure [default: %default]")
    p.add_option("--extra", help="Extra features in BED format")
    p.add_option("--scalebar", default=False, action="store_true",
                 help="Add scale bar to the plot")
    opts, args, iopts = p.set_image_options(figsize="8x7")

    if len(args) != 3:
        sys.exit(not p.print_help())

    datafile, bedfile, layoutfile = args
    switch = opts.switch
    tree = opts.tree

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Synteny(fig, root, datafile, bedfile, layoutfile,
            switch=switch, tree=tree, extra_features=opts.extra,
            scalebar=opts.scalebar)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 9
0
def main():
    p = OptionParser(__doc__)
    p.add_option("--order",
                help="The order to plot the tracks, comma-separated")
    opts, args, iopts = p.set_image_options()

    if len(args) != 3:
        sys.exit(not p.print_help())

    chr, sizes, datadir = args
    order = opts.order
    hlsuffix = opts.hlsuffix
    if order:
        order = order.split(",")
    sizes = Sizes(sizes)
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    canvas = (.12, .35, .8, .35)
    chr_size = sizes.get_size(chr)
    c = Coverage(fig, root, canvas, chr, (0, chr_size), datadir,
                 order=order, hlsuffix=hlsuffix)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 10
0
def oropetium(args):
    """
    %prog oropetium mcscan.out all.bed layout switch.ids

    Build a composite figure that calls graphis.synteny.
    """
    p = OptionParser(oropetium.__doc__)
    p.add_option("--extra", help="Extra features in BED format")
    opts, args, iopts = p.set_image_options(args, figsize="9x6")

    if len(args) != 4:
        sys.exit(not p.print_help())

    datafile, bedfile, slayout, switch = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Synteny(fig, root, datafile, bedfile, slayout,
            switch=switch, extra_features=opts.extra)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .4, .57, .74, text=True, repeat=True)

    # On the left panel, make a species tree
    fc = 'lightslategrey'

    coords = {}
    xs, xp = .16, .03
    coords["oropetium"] = (xs, .7)
    coords["setaria"] = (xs, .6)
    coords["sorghum"] = (xs, .5)
    coords["rice"] = (xs, .4)
    coords["brachypodium"] = (xs, .3)
    xs -= xp
    coords["Panicoideae"] = join_nodes(root, coords, "setaria", "sorghum", xs)
    xs -= xp
    coords["BEP"] = join_nodes(root, coords, "rice", "brachypodium", xs)
    coords["PACMAD"] = join_nodes(root, coords, "oropetium", "Panicoideae", xs)
    xs -= xp
    coords["Poaceae"] = join_nodes(root, coords, "BEP", "PACMAD", xs)

    # Names of the internal nodes
    for tag in ("BEP", "Poaceae"):
        nx, ny = coords[tag]
        nx, ny = nx - .005, ny - .02
        root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc)
    for tag in ("PACMAD",):
        nx, ny = coords[tag]
        nx, ny = nx - .005, ny + .02
        root.text(nx, ny, tag, rotation=90, ha="right", va="bottom", color=fc)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "oropetium"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 11
0
def multilineplot(args):
    """
    %prog multilineplot fastafile chr1

    Combine multiple line plots in one vertical stack
    Inputs must be BED-formatted.

    --lines: traditional line plots, useful for plotting feature freq
    """
    p = OptionParser(multilineplot.__doc__)
    p.add_option("--lines",
                 help="Features to plot in lineplot [default: %default]")
    p.add_option("--colors",
                 help="List of colors matching number of input bed files")
    p.add_option("--mode", default="span", choices=("span", "count", "score"),
                 help="Accumulate feature based on [default: %default]")
    p.add_option("--binned", default=False, action="store_true",
                 help="Specify whether the input is already binned; " +
                 "if True, input files are considered to be binfiles")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift, subtract = check_window_options(opts)
    linebeds = []
    colors = opts.colors
    if opts.lines:
        lines = opts.lines.split(",")
        assert len(colors) == len(lines), "Number of chosen colors must match" + \
                " number of input bed files"
        linebeds = get_beds(lines, binned=opts.binned)

    linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode, binned=opts.binned)

    clen = Sizes(fastafile).mapping[chr]
    nbins = get_nbins(clen, shift)

    plt.rcParams["xtick.major.size"] = 0
    plt.rcParams["ytick.major.size"] = 0
    plt.rcParams["figure.figsize"] = iopts.w, iopts.h

    fig, axarr = plt.subplots(nrows=len(lines))
    if len(linebeds) == 1:
        axarr = (axarr, )
    fig.suptitle(chr, color="darkslategray")

    for i, ax in enumerate(axarr):
        lineplot(ax, [linebins[i]], nbins, chr, window, shift, \
                color="{0}{1}".format(colors[i], 'r'))

    plt.subplots_adjust(hspace=0.5)

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 12
0
Archivo: tsp.py Proyecto: Hensonmw/jcvi
def plot_data(x, y, tour, M):
    from jcvi.graphics.base import plt, savefig
    plt.plot(x, y, "ro")
    for ia, ib in pairwise(tour):
        plt.plot((x[ia], x[ib]), (y[ia], y[ib]), "r-")

    score = evaluate(tour, M)
    plt.title("Score={0:.2f}".format(score))

    savefig("demo.pdf")
Ejemplo n.º 13
0
def venn(args):
    """
    %prog venn *.benchmark

    Display benchmark results as Venn diagram.
    """
    from matplotlib_venn import venn2

    p = OptionParser(venn.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x9")

    if len(args) < 1:
        sys.exit(not p.print_help())

    bcs = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    pad = .02
    ystart = 1
    ywidth = 1. / len(bcs)
    tags = ("Bowers", "YGOB", "Schnable")
    for bc, tag in zip(bcs, tags):
        fp = open(bc)
        data = []
        for row in fp:
            prog, pcounts, tcounts, shared = row.split()
            pcounts = int(pcounts)
            tcounts = int(tcounts)
            shared = int(shared)
            data.append((prog, pcounts, tcounts, shared))
        xstart = 0
        xwidth = 1. / len(data)
        for prog, pcounts, tcounts, shared in data:
            a, b, c = pcounts - shared, tcounts - shared, shared
            ax = fig.add_axes([xstart + pad, ystart - ywidth + pad,
                               xwidth - 2 * pad, ywidth - 2 * pad])
            venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax)
            message = "Sn={0} Pu={1}".\
                format(percentage(shared, tcounts, precision=0, mode=-1),
                       percentage(shared, pcounts, precision=0, mode=-1))
            print >> sys.stderr, message
            ax.text(.5, .92, latex(message), ha="center", va="center",
                    transform=ax.transAxes, color='b')
            ax.set_axis_off()
            xstart += xwidth
        ystart -= ywidth

    panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"),
                  (.04, .96 - 2 * ywidth, "C")))
    panel_labels(root, ((.5, .98, "A. thaliana duplicates"),
                        (.5, .98 - ywidth, "14 Yeast genomes"),
                        (.5, .98 - 2 * ywidth, "4 Grass genomes")))
    normalize_axes(root)
    savefig("venn.pdf", dpi=opts.dpi)
Ejemplo n.º 14
0
def scenario(args):
    """
    %prog scenario

    Illustration of the two-step genome merger process for B. rapa companion paper.
    """
    p = OptionParser(__doc__)
    opts, args = p.parse_args()

    fig = plt.figure(1, (5, 5))
    root = fig.add_axes([0, 0, 1, 1])

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    # Layout format: (x, y, label, (chr lengths))
    anc = (.5, .9, "Ancestor", (1,))
    s1 = (.2, .6, "Genome I", (1,))
    s2 = (.5, .6, "Genome II", (1,))
    s3 = (.8, .6, "Genome III", (1,))
    tetra = (.35, .4, "Tetraploid I / II", (.5, .9))
    hexa = (.5, .1, "Hexaploid I / II / III", (.36, .46, .9))
    labels = (anc, s1, s2, s3, tetra, hexa)
    connections = ((anc, s1), (anc, s2), (anc, s3),\
            (s1, tetra), (s2, tetra),
            (tetra, hexa), (s3, hexa))

    xinterval = .02
    yratio = .05
    for xx, yy, label, chrl in labels:
        #RoundLabel(root, xx, yy, label)
        root.text(xx, yy, label, ha="center", va="center")
        offset = len(label) * .012
        for i, c in enumerate(chrl):
            ya = yy + yratio * c
            yb = yy - yratio * c
            Chromosome(root, xx - offset + i * xinterval, ya, yb, width=.01)

    # Comments
    comments = ((.15, .33, "II dominant"),
                (.25, .03, "III dominant"))

    for xx, yy, c in comments:
        root.text(xx, yy, c, size=9, ha="center", va="center")

    # Branches
    tip = .04
    for a, b in connections:
        xa, ya, la, chra = a
        xb, yb, lb, chrb = b
        plt.plot((xa, xb), (ya - tip, yb + 2 * tip), 'k-', lw=2, alpha=.5)

    figname = fname() + ".pdf"
    savefig(figname, dpi=300)
Ejemplo n.º 15
0
def litchi(args):
    """
    %prog litchi mcscan.out all.bed layout switch.ids

    Build a composite figure that calls graphis.synteny.
    """
    p = OptionParser(litchi.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x6")

    if len(args) != 4:
        sys.exit(not p.print_help())

    datafile, bedfile, slayout, switch = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Synteny(fig, root, datafile, bedfile, slayout, switch=switch)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .4, .7, .82)

    # On the left panel, make a species tree
    fc = 'lightslategrey'

    coords = {}
    xs, xp = .16, .03
    coords["lychee"] = (xs, .37)
    coords["clementine"] = (xs, .5)
    coords["cacao"] = (xs, .6)
    coords["strawberry"] = (xs, .7)
    coords["grape"] = (xs, .8)
    xs -= xp
    coords["Sapindales"] = join_nodes(root, coords, "clementine", "lychee", xs)
    xs -= xp
    coords["Rosid-II"] = join_nodes(root, coords, "cacao", "Sapindales", xs)
    xs -= xp
    coords["Rosid"] = join_nodes(root, coords, "strawberry", "Rosid-II", xs)
    xs -= xp
    coords["crown"] = join_nodes(root, coords, "grape", "Rosid", xs,
                                 circle=False)

    # Names of the internal nodes
    for tag in ("Rosid", "Rosid-II", "Sapindales"):
        nx, ny = coords[tag]
        nx, ny = nx - .01, ny - .02
        root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "litchi"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 16
0
def covlen(args):
    """
    %prog covlen covfile fastafile

    Plot coverage vs length. `covfile` is two-column listing contig id and
    depth of coverage.
    """
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from jcvi.formats.base import DictFile

    p = OptionParser(covlen.__doc__)
    p.add_option("--maxsize", default=1000000, type="int", help="Max contig size")
    p.add_option("--maxcov", default=100, type="int", help="Max contig size")
    p.add_option("--color", default='m', help="Color of the data points")
    p.add_option("--kind", default="scatter",
                 choices=("scatter", "reg", "resid", "kde", "hex"),
                 help="Kind of plot to draw")
    opts, args, iopts = p.set_image_options(args, figsize="8x8")

    if len(args) != 2:
        sys.exit(not p.print_help())

    covfile, fastafile = args
    cov = DictFile(covfile, cast=float)
    s = Sizes(fastafile)
    data = []
    maxsize, maxcov = opts.maxsize, opts.maxcov
    for ctg, size in s.iter_sizes():
        c = cov.get(ctg, 0)
        if size > maxsize:
            continue
        if c > maxcov:
            continue
        data.append((size, c))

    x, y = zip(*data)
    x = np.array(x)
    y = np.array(y)
    logging.debug("X size {0}, Y size {1}".format(x.size, y.size))

    df = pd.DataFrame()
    xlab, ylab = "Length", "Coverage of depth (X)"
    df[xlab] = x
    df[ylab] = y
    sns.jointplot(xlab, ylab, kind=opts.kind, data=df,
                  xlim=(0, maxsize), ylim=(0, maxcov),
                  stat_func=None, edgecolor="w", color=opts.color)

    figname = covfile + ".pdf"
    savefig(figname, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 17
0
def snpplot(args):
    """
    %prog counts.cdt

    Illustrate the histogram per SNP site.
    """
    p = OptionParser(snpplot.__doc__)
    opts, args, iopts = p.set_image_options(args, format="png")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    # Read in CDT file
    fp = open(datafile)
    next(fp)
    next(fp)
    data = []
    for row in fp:
        atoms = row.split()[4:]
        nval = len(atoms)
        values = [float(x) for x in atoms]
        # normalize
        values = [x * 1. / sum(values) for x in values]
        data.append(values)

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    xmin, xmax = .1, .9
    ymin, ymax = .1, .9
    yinterval = (ymax - ymin) / len(data)
    colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg")
    ystart = ymax
    for d in data:
        xstart = xmin
        for dd, c in zip(d, colors):
            xend = xstart + (xmax - xmin) * dd
            root.plot((xstart, xend), (ystart, ystart), "-", color=c)
            xstart = xend
        ystart -= yinterval

    root.text(.05, .5, "{0} LMD50 SNPs".format(len(data)),
              ha="center", va="center", rotation=90, color="lightslategray")

    for x, t, c in zip((.3, .5, .7), ("REF", "ALT", "HET"), "rbg"):
        root.text(x, .95, t, color=c, ha="center", va="center")
    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 18
0
def dotplot_main(anchorfile, qbed, sbed, image_name, iopts, vmin=0, vmax=1,
        is_self=False, synteny=False, cmap_text=None, cmap="copper", genomenames=None,
        sample_number=10000, minfont=5, palette=None, chrlw=.01, title=None):

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([.1, .1, .8, .8])  # the dot plot

    dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=vmin, vmax=vmax,
        is_self=is_self, synteny=synteny, cmap_text=cmap_text, cmap=cmap,
        genomenames=genomenames, sample_number=sample_number,
        minfont=minfont, palette=palette, chrlw=chrlw, title=title)

    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 19
0
def ploidy(args):
    """
    %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of WGD history of pineapple genome. The script calls both graphics.karyotype
    and graphic.synteny.
    """
    p = OptionParser(ploidy.__doc__)
    p.add_option("--switch", help="Rename the seqid with two-column file")
    opts, args, iopts = p.set_image_options(args, figsize="9x7")

    if len(args) != 5:
        sys.exit(not p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .27, .37, .52)

    # annotate the WGD events
    fc = 'lightslategrey'
    x = .09
    radius = .012
    TextCircle(root, x, .825, r'$\tau$', radius=radius, fc=fc)
    TextCircle(root, x, .8, r'$\sigma$', radius=radius, fc=fc)
    TextCircle(root, x, .72, r'$\rho$', radius=radius, fc=fc)
    for ypos in (.825, .8, .72):
        root.text(.12, ypos, r"$\times2$", color=fc, ha="center", va="center")
    root.plot([x, x], [.85, .775], ":", color=fc, lw=2)
    root.plot([x, x], [.75, .675], ":", color=fc, lw=2)

    labels = ((.04, .96, 'A'), (.04, .54, 'B'))
    panel_labels(root, labels)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "pineapple-karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 20
0
def amborella(args):
    """
    %prog amborella seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a composite figure that calls graphics.karyotype and graphics.synteny.
    """
    p = OptionParser(amborella.__doc__)
    p.add_option("--tree",
                 help="Display trees on the bottom of the figure [default: %default]")
    p.add_option("--switch",
                 help="Rename the seqid with two-column file [default: %default]")
    opts, args, iopts = p.set_image_options(args, figsize="8x7")

    if len(args) != 5:
        sys.exit(not p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args
    switch = opts.switch
    tree = opts.tree

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout, switch=switch, tree=tree)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .5, .68, .5)

    # annotate the WGD events
    fc = 'lightslategrey'
    x = .05
    radius = .012
    TextCircle(root, x, .86, '$\gamma$', radius=radius)
    TextCircle(root, x, .95, '$\epsilon$', radius=radius)
    root.plot([x, x], [.83, .9], ":", color=fc, lw=2)
    pts = plot_cap((x, .95), np.radians(range(-70, 250)), .02)
    x, y = zip(*pts)
    root.plot(x, y, ":", color=fc, lw=2)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "amborella"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 21
0
Archivo: napus.py Proyecto: rrane/jcvi
def ploidy(args):
    """
    %prog ploidy seqids layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of B. napus genome.
    """
    p = OptionParser(ploidy.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x7")

    if len(args) != 2:
        sys.exit(not p.print_help())

    seqidsfile, klayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)

    fc = "darkslategrey"
    radius = .012
    ot = -.05  # use this to adjust vertical position of the left panel
    TextCircle(root, .1, .9 + ot, r'$\gamma$', radius=radius, fc=fc)
    root.text(.1, .88 + ot, r"$\times3$", ha="center", va="top", color=fc)
    TextCircle(root, .08, .79 + ot, r'$\alpha$', radius=radius, fc=fc)
    TextCircle(root, .12, .79 + ot, r'$\beta$', radius=radius, fc=fc)
    root.text(.1, .77 + ot, r"$\times3\times2\times2$", ha="center", va="top", color=fc)
    root.text(.1, .67 + ot, r"Brassica triplication", ha="center",
                va="top", color=fc, size=11)
    root.text(.1, .65 + ot, r"$\times3\times2\times2\times3$", ha="center", va="top", color=fc)
    root.text(.1, .42 + ot, r"Allo-tetraploidy", ha="center",
                va="top", color=fc, size=11)
    root.text(.1, .4 + ot, r"$\times3\times2\times2\times3\times2$", ha="center", va="top", color=fc)

    bb = dict(boxstyle="round,pad=.5", fc="w", ec="0.5", alpha=0.5)
    root.text(.5, .2 + ot, r"\noindent\textit{Brassica napus}\\"
                "(A$\mathsf{_n}$C$\mathsf{_n}$ genome)", ha="center",
                size=16, color="k", bbox=bb)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "napus"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 22
0
def _draw_trees(trees, nrow=1, ncol=1, rmargin=.3, iopts=None, outdir=".",
    shfile=None, **kwargs):
    """
    Draw one or multiple trees on one plot.
    """
    from jcvi.graphics.tree import draw_tree

    if shfile:
        SHs = DictFile(shfile, delimiter="\t")

    ntrees = len(trees)
    n = nrow * ncol
    for x in xrange(int(ceil(float(ntrees)/n))):
        fig = plt.figure(1, (iopts.w, iopts.h)) if iopts \
              else plt.figure(1, (5, 5))
        root = fig.add_axes([0, 0, 1, 1])

        xiv = 1. / ncol
        yiv = 1. / nrow
        xstart = list(np.arange(0, 1, xiv)) * nrow
        ystart = list(chain(*zip(*[list(np.arange(0, 1, yiv))[::-1]] * ncol)))
        for i in xrange(n*x, n*(x+1)):
            if i == ntrees:
                break
            ax = fig.add_axes([xstart[i%n], ystart[i%n], xiv, yiv])
            f = trees.keys()[i]
            tree = trees[f]
            try:
                SH = SHs[f]
            except:
                SH = None
            draw_tree(ax, tree, rmargin=rmargin, reroot=False, \
                supportcolor="r", SH=SH, **kwargs)

        root.set_xlim(0, 1)
        root.set_ylim(0, 1)
        root.set_axis_off()

        format = iopts.format if iopts else "pdf"
        dpi = iopts.dpi if iopts else 300
        if n == 1:
            image_name = f.rsplit(".", 1)[0] + "." + format
        else:
            image_name = "trees{0}.{1}".format(x, format)
        image_name = op.join(outdir, image_name)
        savefig(image_name, dpi=dpi, iopts=iopts)
        plt.clf()
Ejemplo n.º 23
0
def correlation(args):
    """
    %prog correlation postgenomic-s.tsv

    Plot correlation of age vs. postgenomic features.
    """
    p = OptionParser(correlation.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="12x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    tsvfile, = args
    df = pd.read_csv(tsvfile, sep="\t")
    composite_correlation(df, size=(iopts.w, iopts.h))
    outfile = tsvfile.rsplit(".", 1)[0] + ".correlation.pdf"
    savefig(outfile)
Ejemplo n.º 24
0
def ccn(args):
    """
    %prog ccn combined.tsv

    Plot several ccn plots including chr1,chrX,chrY,chrM
    """
    p = OptionParser(ccn.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="12x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    tsvfile, = args
    df = pd.read_csv(tsvfile, sep="\t")
    composite_ccn(df, size=(iopts.w, iopts.h))
    outfile = tsvfile.rsplit(".", 1)[0] + ".ccn.pdf"
    savefig(outfile)
Ejemplo n.º 25
0
def mtdotplots(args):
    """
    %prog mtdotplots Mt3.5 Mt4.0 medicago.medicago.lifted.1x1.anchors

    Plot Mt3.5 and Mt4.0 side-by-side. This is essentially combined from two
    graphics.dotplot() function calls as panel A and B.
    """
    from jcvi.graphics.dotplot import check_beds, dotplot

    p = OptionParser(mtdotplots.__doc__)
    p.set_beds()
    opts, args, iopts = p.set_image_options(args, figsize="16x8", dpi=90)

    if len(args) != 3:
        sys.exit(not p.print_help())

    a, b, ac = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    r1 = fig.add_axes([0, 0, .5, 1])
    r2 = fig.add_axes([.5, 0, .5, 1])
    a1 = fig.add_axes([.05, .1, .4, .8])
    a2 = fig.add_axes([.55, .1, .4, .8])

    anchorfile = op.join(a, ac)
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts)
    dotplot(anchorfile, qbed, sbed, fig, r1, a1, is_self=is_self,
            genomenames="Mt3.5_Mt3.5")

    opts.qbed = opts.sbed = None
    anchorfile = op.join(b, ac)
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts)
    dotplot(anchorfile, qbed, sbed, fig, r2, a2, is_self=is_self,
            genomenames="Mt4.0_Mt4.0")

    root.text(.03, .95, "A", ha="center", va="center", size=36)
    root.text(.53, .95, "B", ha="center", va="center", size=36)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "mtdotplots"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 26
0
Archivo: glyph.py Proyecto: fw1121/jcvi
def gff(args):
    """
    %prog gff *.gff

    Draw exons for genes based on gff files. Each gff file should contain only
    one gene, and only the "mRNA" and "CDS" feature will be drawn on the canvas.
    """
    align_choices = ("left", "center", "right")
    p = OptionParser(gff.__doc__)
    p.add_option("--align", default="left", choices=align_choices,
                 help="Horizontal alignment [default: %default]")
    p.add_option("--noUTR", default=False, action="store_true",
                 help="Do not plot UTRs [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    gffiles = args
    ngenes = len(gffiles)

    canvas = .6
    setups, ratio = get_setups(gffiles, canvas=canvas, noUTR=opts.noUTR)
    align = opts.align
    xs = .2 if align == "left" else .8
    yinterval = canvas / ngenes
    ys = .8
    tip = .01
    for genename, mrnabed, cdsbeds in setups:
        ExonGlyph(root, xs, ys, mrnabed, cdsbeds, ratio=ratio, align=align)
        if align == "left":
            root.text(xs - tip, ys, genename, ha="right", va="center")
        elif align == "right":
            root.text(xs + tip, ys, genename, ha="left", va="center")
        ys -= yinterval

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = "exons.pdf"
    savefig(figname, dpi=300)
Ejemplo n.º 27
0
def qc(args):
    """
    %prog qc postgenomic-s.tsv

    Plot basic statistics of a given sample:
    Age, Gender, Ethnicity, Cohort, Chemistry
    """
    p = OptionParser(heritability.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="10x6")

    if len(args) != 1:
        sys.exit(not p.print_help())

    tsvfile, = args
    df = pd.read_csv(tsvfile, sep="\t")
    composite_qc(df, size=(iopts.w, iopts.h))
    outfile = tsvfile.rsplit(".", 1)[0] + ".qc.pdf"
    savefig(outfile)
Ejemplo n.º 28
0
Archivo: hic.py Proyecto: xuanblo/jcvi
def movieframe(args):
    """
    %prog movieframe tour test.clm contigs.ref.anchors

    Draw heatmap and synteny in the same plot.
    """
    p = OptionParser(movieframe.__doc__)
    p.add_option("--label", help="Figure title")
    p.set_beds()
    p.set_outfile(outfile=None)
    opts, args, iopts = p.set_image_options(args, figsize="16x8",
                                            style="white", cmap="coolwarm",
                                            format="png", dpi=120)

    if len(args) != 3:
        sys.exit(not p.print_help())

    tour, clmfile, anchorsfile = args
    tour = tour.split(",")
    image_name = opts.outfile or ("movieframe." + iopts.format)
    label = opts.label or op.basename(image_name).rsplit(".", 1)[0]

    clm = CLMFile(clmfile)
    totalbins, bins, breaks = make_bins(tour, clm.tig_to_size)
    M = read_clm(clm, totalbins, bins)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])        # whole canvas
    ax1 = fig.add_axes([.05, .1, .4, .8])    # heatmap
    ax2 = fig.add_axes([.55, .1, .4, .8])    # dot plot
    ax2_root = fig.add_axes([.5, 0, .5, 1])  # dot plot canvas

    # Left axis: heatmap
    plot_heatmap(ax1, M, breaks, iopts)

    # Right axis: synteny
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts,
                                                     sorted=False)
    dotplot(anchorsfile, qbed, sbed, fig, ax2_root, ax2, sep=False, title="")

    root.text(.5, .98, clm.name, color="g", ha="center", va="center")
    root.text(.5, .95, label, color="darkslategray", ha="center", va="center")
    normalize_axes(root)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 29
0
def main():
    p = OptionParser(__doc__)
    opts, args, iopts = p.set_image_options(figsize="8x7")

    if len(args) != 2:
        sys.exit(not p.print_help())

    seqidsfile, layoutfile = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, layoutfile)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 30
0
Archivo: ks.py Proyecto: ascendo/jcvi
    def draw(self, title="*Ks* distribution", filename="Ks_plot.pdf"):

        ax = self.ax
        ks_max = self.ks_max
        lines = self.lines
        labels = self.labels
        legendp = self.legendp
        if len(lines) > 1:
            leg = ax.legend(lines, labels, loc=legendp,
                            shadow=True, fancybox=True, prop={"size": 10})
            leg.get_frame().set_alpha(.5)

        ax.set_xlim((0, ks_max - self.interval))
        ax.set_title(markup(title), fontweight="bold")
        ax.set_xlabel(markup('Synonymous substitutions per site (*Ks*)'))
        ax.set_ylabel('Percentage of gene pairs')

        ax.set_xticklabels(ax.get_xticks(), family='Helvetica')
        ax.set_yticklabels(ax.get_yticks(), family='Helvetica')

        savefig(filename, dpi=300)
Ejemplo n.º 31
0
def main():
    p = OptionParser(__doc__)
    p.add_option(
        "--switch",
        help="Rename the seqid with two-column file [default: %default]")
    p.add_option(
        "--tree",
        help="Display trees on the bottom of the figure [default: %default]")
    p.add_option("--extra", help="Extra features in BED format")
    p.add_option(
        "--gene_style",
        default="Rectangle",
        help=
        "Default <Rectangle> to plot genes as rectangle. Accept <Arrow> to add orientation of genes."
    )
    p.add_option(
        "--scalebar",
        default=False,
        action="store_true",
        help="Add scale bar to the plot",
    )
    p.add_option(
        "--add_gene_legend",
        default=False,
        action="store_true",
        help="Add forward and reverse strand gene legend to the plot",
    )
    p.add_option(
        "--add_gene_label",
        default=False,
        action="store_true",
        help="Add gene names to the plot",
    )
    p.add_option(
        "--shadestyle",
        default="curve",
        choices=Shade.Styles,
        help="Style of syntenic wedges",
    )
    opts, args, iopts = p.set_image_options(figsize="8x7")

    if len(args) != 3:
        sys.exit(not p.print_help())

    datafile, bedfile, layoutfile = args
    switch = opts.switch
    tree = opts.tree

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Synteny(fig,
            root,
            datafile,
            bedfile,
            layoutfile,
            switch=switch,
            tree=tree,
            extra_features=opts.extra,
            scalebar=opts.scalebar,
            shadestyle=opts.shadestyle,
            gene_legend=opts.add_gene_legend,
            add_gene_label=opts.add_gene_label,
            gene_style=opts.gene_style)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 32
0
def dotplot_main(args):
    p = OptionParser(__doc__)
    p.set_beds()
    p.add_option(
        "--synteny",
        default=False,
        action="store_true",
        help="Run a fast synteny scan and display blocks",
    )
    p.add_option("--cmaptext",
                 help="Draw colormap box on the bottom-left corner")
    p.add_option(
        "--vmin",
        dest="vmin",
        type="float",
        default=0,
        help="Minimum value in the colormap",
    )
    p.add_option(
        "--vmax",
        dest="vmax",
        type="float",
        default=2,
        help="Maximum value in the colormap",
    )
    p.add_option(
        "--nmax",
        dest="sample_number",
        type="int",
        default=10000,
        help="Maximum number of data points to plot",
    )
    p.add_option(
        "--minfont",
        type="int",
        default=4,
        help="Do not render labels with size smaller than",
    )
    p.add_option("--colormap",
                 help="Two column file, block id to color mapping")
    p.add_option(
        "--colororientation",
        action="store_true",
        default=False,
        help="Color the blocks based on orientation, similar to mummerplot",
    )
    p.add_option(
        "--nosort",
        default=False,
        action="store_true",
        help="Do not sort the seqids along the axes",
    )
    p.add_option("--nosep",
                 default=False,
                 action="store_true",
                 help="Do not add contig lines")
    p.add_option("--title", help="Title of the dot plot")
    p.set_dotplot_opts()
    p.set_outfile(outfile=None)
    opts, args, iopts = p.set_image_options(args,
                                            figsize="9x9",
                                            style="dark",
                                            dpi=90,
                                            cmap="copper")

    if len(args) != 1:
        sys.exit(not p.print_help())

    (anchorfile, ) = args
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile,
                                                     p,
                                                     opts,
                                                     sorted=(not opts.nosort))

    palette = opts.colormap
    if palette:
        palette = Palette(palettefile=palette)
    elif opts.colororientation:
        palette = Palette.from_block_orientation(anchorfile, qbed, sbed)

    cmaptext = opts.cmaptext
    if anchorfile.endswith(".ks"):
        from jcvi.apps.ks import KsFile

        logging.debug("Anchors contain Ks values")
        cmaptext = cmaptext or "*Ks* values"
        anchorksfile = anchorfile + ".anchors"
        if need_update(anchorfile, anchorksfile):
            ksfile = KsFile(anchorfile)
            ksfile.print_to_anchors(anchorksfile)
        anchorfile = anchorksfile

    if opts.skipempty:
        ac = AnchorFile(anchorfile)
        if is_self:
            qseqids = sseqids = set()
        else:
            qseqids, sseqids = set(), set()

        for pair in ac.iter_pairs():
            q, s = pair[:2]
            qi, q = qorder[q]
            si, s = sorder[s]
            qseqids.add(q.seqid)
            sseqids.add(s.seqid)

        if is_self:
            qbed = sbed = subset_bed(qbed, qseqids)
        else:
            qbed = subset_bed(qbed, qseqids)
            sbed = subset_bed(sbed, sseqids)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])  # the dot plot

    dotplot(
        anchorfile,
        qbed,
        sbed,
        fig,
        root,
        ax,
        vmin=opts.vmin,
        vmax=opts.vmax,
        is_self=is_self,
        synteny=opts.synteny,
        cmap_text=opts.cmaptext,
        cmap=iopts.cmap,
        genomenames=opts.genomenames,
        sample_number=opts.sample_number,
        minfont=opts.minfont,
        palette=palette,
        sep=(not opts.nosep),
        sepcolor=set1[int(opts.theme)],
        title=opts.title,
        stdpf=(not opts.nostdpf),
        chpf=(not opts.nochpf),
    )

    image_name = opts.outfile or (op.splitext(anchorfile)[0] + "." +
                                  opts.format)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    fig.clear()
Ejemplo n.º 33
0
def heatmap(args):
    """
    %prog heatmap fastafile chr1

    Combine stack plot with heatmap to show abundance of various tracks along
    given chromosome. Need to give multiple beds to --stacks and --heatmaps
    """
    p = OptionParser(heatmap.__doc__)
    p.add_option("--stacks",
                 default="Exons,Introns,DNA_transposons,Retrotransposons",
                 help="Features to plot in stackplot [default: %default]")
    p.add_option("--heatmaps",
                 default="Copia,Gypsy,hAT,Helitron,Introns,Exons",
                 help="Features to plot in heatmaps [default: %default]")
    p.add_option("--meres", default=None,
                 help="Extra centromere / telomere features [default: %default]")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift, subtract = check_window_options(opts)

    stacks = opts.stacks.split(",")
    heatmaps = opts.heatmaps.split(",")
    stackbeds = get_beds(stacks)
    heatmapbeds = get_beds(heatmaps)
    stackbins = get_binfiles(stackbeds, fastafile, shift, subtract=subtract)
    heatmapbins = get_binfiles(heatmapbeds, fastafile, shift, subtract=subtract)

    margin = .06
    inner = .015
    clen = Sizes(fastafile).mapping[chr]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Gauge
    ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin)
    yinterval = .3
    xx = margin
    yy = 1 - margin
    yy -= yinterval
    xlen = clen / ratio
    cc = chr
    if "_" in chr:
        ca, cb = chr.split("_")
        cc = ca[0].upper() + cb

    root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray))
    ax = fig.add_axes([xx, yy, xlen, yinterval - inner])

    nbins = get_nbins(clen, shift)

    owindow = clen / 100
    if owindow > window:
        window = owindow / shift * shift

    stackplot(ax, stackbins, nbins, palette, chr, window, shift)
    ax.text(.1, .9, cc, va="top", zorder=100, transform=ax.transAxes,
              bbox=dict(boxstyle="round", fc="w", alpha=.5))

    # Legends
    xx += xlen + .01
    yspace = (yinterval - inner) / (len(stackbins) + 1)
    yy = 1 - margin - yinterval
    for s, p in zip(stacks, palette):
        s = s.replace("_", " ")
        s = Registration.get(s, s)

        yy += yspace
        root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
        root.text(xx + 1.5 * inner, yy, s, size=10)

    yh = .05  # Heatmap height
    # Heatmaps
    xx = margin
    yy = 1 - margin - yinterval - inner
    for s, p in zip(heatmaps, heatmapbins):
        s = s.replace("_", " ")
        s = Registration.get(s, s)

        yy -= yh
        m = stackarray(p, chr, window, shift)

        Y = np.array([m, m])
        root.imshow(Y, extent=(xx, xx + xlen, yy, yy + yh - inner),
                    interpolation="nearest", aspect="auto")
        root.text(xx + xlen + .01, yy, s, size=10)

    yy -= yh

    meres = opts.meres
    if meres:
        bed = Bed(meres)
        for b in bed:
            if b.seqid != chr:
                continue
            pos = (b.start + b.end) / 2
            cpos = pos / ratio
            xx = margin + cpos
            accn = b.accn.capitalize()
            root.add_patch(CirclePolygon((xx, yy), radius=.01, fc="m", ec="m"))
            root.text(xx + .014, yy, accn, va="center", color="m")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 34
0
def main():
    p = OptionParser(__doc__)
    p.add_option("--switch", help="Rename the seqid with two-column file")
    p.add_option("--tree", help="Display trees on the bottom of the figure")
    p.add_option("--extra", help="Extra features in BED format")
    p.add_option(
        "--genelabelsize",
        default=0,
        type="int",
        help="Show gene labels at this font size, useful for debugging. " +
        "However, plot may appear visually crowded. " +
        "Reasonably good values are 2 to 6 [Default: disabled]",
    )
    p.add_option(
        "--scalebar",
        default=False,
        action="store_true",
        help="Add scale bar to the plot",
    )
    p.add_option(
        "--glyphstyle",
        default="box",
        choices=Glyph.Styles,
        help="Style of feature glyphs",
    )
    p.add_option(
        "--glyphcolor",
        default="orientation",
        choices=Glyph.Palette,
        help="Glyph coloring based on",
    )
    p.add_option(
        "--shadestyle",
        default="curve",
        choices=Shade.Styles,
        help="Style of syntenic wedges",
    )
    opts, args, iopts = p.set_image_options(figsize="8x7")

    if len(args) != 3:
        sys.exit(not p.print_help())

    datafile, bedfile, layoutfile = args
    switch = opts.switch
    tree = opts.tree

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    Synteny(
        fig,
        root,
        datafile,
        bedfile,
        layoutfile,
        switch=switch,
        tree=tree,
        extra_features=opts.extra,
        genelabelsize=opts.genelabelsize,
        scalebar=opts.scalebar,
        shadestyle=opts.shadestyle,
        glyphstyle=opts.glyphstyle,
        glyphcolor=opts.glyphcolor,
    )

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 35
0
def wheel(args):
    """
    %prog wheel datafile.csv groups.csv

    Wheel plot that shows continous data in radial axes.
    """
    p = OptionParser(wheel.__doc__)
    p.add_option(
        "--column",
        default="score",
        choices=("score", "percentile"),
        help="Which column to extract from `datafile.csv`",
    )
    opts, args, iopts = p.set_image_options(args, figsize="5x5", format="png")

    if len(args) != 2:
        sys.exit(not p.print_help())

    datafile, groupsfile = args
    column = opts.column
    linecolor = "#d6d6d6"
    df = parse_data(datafile, score_column=opts.column)
    groups = parse_groups(groupsfile)
    labels = [g for g in groups if g in df]
    print(labels)
    df = [df[g] for g in labels]
    print(df)
    groups = [groups[g] for g in labels]
    print(groups)

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    categories = len(df)
    # ax = plt.subplot(111, projection='polar')
    ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True)

    brewer = [
        "#FF3B30",
        "#DD43A0",
        "#5856D6",
        "#007AFE",
        "#56BDEC",
        "#4CD8BA",
        "#4CD864",
        "#B0F457",
        "#FEF221",
        "#FFCC01",
        "#FF9500",
        "#FF3B30",
    ]

    # Baseline
    theta = np.linspace(1.5 * np.pi,
                        3.5 * np.pi,
                        endpoint=False,
                        num=categories)
    _theta = np.linspace(1.5 * np.pi, 3.5 * np.pi)
    R = max(max(df), 10)
    xlim = (-R, R) if column == "score" else (-100, 100)
    plim = (-R / 2, R) if column == "score" else (0, 100)
    ci = (-0.5, 2) if column == "score" else (10, 90)

    # Grid
    if column == "score":
        for t in theta:
            ax.plot([t, t], plim, color=linecolor)
    ax.axis("off")

    # Contours
    for t in plim:
        ax.plot(_theta, [t] * len(_theta), color=linecolor)

    # Sectors (groupings)
    collapsed_groups = []
    gg = []
    for group, c in groupby(enumerate(groups), lambda x: x[1]):
        c = [x[0] for x in list(c)]
        collapsed_groups.append(group)
        gg.append(c)

    show_sector = False
    if show_sector:
        theta_interval = 2 * np.pi / categories
        theta_pad = theta_interval / 2 * 0.9
        for color, group in zip(brewer, gg):
            tmin, tmax = min(group), max(group)
            sector(
                ax,
                theta[tmin],
                theta[tmax],
                theta_pad,
                R * 0.95,
                ls="-",
                color=color,
                lw=2,
            )

    # Data
    r = df
    closed_plot(ax, theta, r, color="lightslategray", alpha=0.25)
    for color, group in zip(brewer, gg):
        hidden_data = [(theta[x], r[x]) for x in group
                       if (ci[0] <= r[x] <= ci[1])]
        shown_data = [(theta[x], r[x]) for x in group
                      if (r[x] < ci[0] or r[x] > ci[1])]
        for alpha, data in zip((1, 1), (hidden_data, shown_data)):
            if not data:
                continue
            color_theta, color_r = zip(*data)
            ax.plot(color_theta, color_r, "o", color=color, alpha=alpha)

    # Print out data
    diseaseNames, risks = labels, df
    print("var theta = [{}]".format(",".join("{:.1f}".format(degrees(x))
                                             for x in theta)))
    print("var risks = [{}]".format(",".join(str(x) for x in risks)))
    print("var diseaseNames = [{}]".format(",".join(
        ['"{}"'.format(x) for x in diseaseNames])))

    # Labels
    from math import cos, sin

    r = 0.5
    for i, label in enumerate(labels):
        tl = theta[i]
        x, y = 0.5 + r * cos(tl), 0.5 + r * sin(tl)
        d = degrees(tl)
        if 90 < d % 360 < 270:  # On the left quardrants
            d -= 180
        root.text(x,
                  y,
                  label,
                  size=4,
                  rotation=d,
                  ha="center",
                  va="center",
                  color=linecolor)
        print(x, y, label)

    # Add baseline
    baseline = 0 if column == "score" else 50
    _r = len(_theta) * [baseline]
    closed_plot(ax, _theta, _r, "k:", lw=1, ms=4)

    # Add confidence interval
    if column == "percentile":
        barcolor = "#eeeeee"
        ax.bar([0], [ci[1] - ci[0]],
               width=2 * np.pi,
               bottom=ci[0],
               fc=barcolor)

    ax.set_rmin(xlim[0])
    ax.set_rmax(xlim[1])

    normalize_axes(root)

    image_name = pf + "-" + column + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 36
0
def main(args):
    """
    %prog newicktree

    Plot Newick formatted tree. The gene structure can be plotted along if
    --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is
    on, also show the number of amino acids.

    With --barcode a mapping file can be provided to convert seq names to
    eg. species names, useful in unified tree display. This file should have
    distinctive barcodes in column1 and new names in column2, tab delimited.
    """
    p = OptionParser(main.__doc__)
    p.add_option("--outgroup", help="Outgroup for rerooting the tree. " + \
                 "Use comma to separate multiple taxa.")
    p.add_option("--noreroot", default=False, action="store_true", \
                 help="Don't reroot the input tree [default: %default]")
    p.add_option("--rmargin", default=.3, type="float",
                 help="Set blank rmargin to the right [default: %default]")
    p.add_option("--gffdir", default=None,
                 help="The directory that contain GFF files [default: %default]")
    p.add_option("--sizes", default=None,
                 help="The FASTA file or the sizes file [default: %default]")
    p.add_option("--SH", default=None, type="string",
                 help="SH test p-value [default: %default]")
    p.add_option("--scutoff", default=0, type="int",
                 help="cutoff for displaying node support, 0-100 [default: %default]")
    p.add_option("--barcode", default=None,
                 help="path to seq names barcode mapping file: " \
                 "barcode<tab>new_name [default: %default]")
    p.add_option("--leafcolor", default="k",
                 help="Font color for the OTUs, or path to a file " \
                 "containing color mappings: leafname<tab>color [default: %default]")
    p.add_option("--leaffont", default=12, help="Font size for the OTUs")
    p.add_option("--geoscale", default=False, action="store_true",
                 help="Plot geological scale")

    opts, args, iopts = p.set_image_options(args, figsize="8x6")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    outgroup = None
    reroot = not opts.noreroot
    if opts.outgroup:
        outgroup = opts.outgroup.split(",")

    if datafile == "demo":
        tx = """(((Os02g0681100:0.1151,Sb04g031800:0.11220)1.0:0.0537,
        (Os04g0578800:0.04318,Sb06g026210:0.04798)-1.0:0.08870)1.0:0.06985,
        ((Os03g0124100:0.08845,Sb01g048930:0.09055)1.0:0.05332,
        (Os10g0534700:0.06592,Sb01g030630:0.04824)-1.0:0.07886):0.09389);"""
    else:
        logging.debug("Load tree file `{0}`.".format(datafile))
        tx = open(datafile).read()

    pf = datafile.rsplit(".", 1)[0]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    if opts.geoscale:
        draw_geoscale(root)

    else:
        if op.isfile(opts.leafcolor):
            leafcolor = "k"
            leafcolorfile = opts.leafcolor
        else:
            leafcolor = opts.leafcolor
            leafcolorfile = None

        draw_tree(root, tx, rmargin=opts.rmargin, leafcolor=leafcolor, \
                  outgroup=outgroup, reroot=reroot, gffdir=opts.gffdir, \
                  sizes=opts.sizes, SH=opts.SH, scutoff=opts.scutoff, \
                  barcodefile=opts.barcode, leafcolorfile=leafcolorfile,
                  leaffont=opts.leaffont)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 37
0
def lms(args):
    """
    %prog lms

    ALLMAPS cartoon to illustrate LMS metric.
    """
    from random import randint
    from jcvi.graphics.chromosome import HorizontalChromosome

    p = OptionParser(lms.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    w, h = .7, .35
    ax = fig.add_axes([.15, .6, w, h])

    xdata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata[3:7] = ydata[3:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:3] + [xydata[4]] + xydata[7:]
    lds = xydata[3:7]
    xlis, ylis = zip(*lis)
    xlds, ylds = zip(*lds)
    ax.plot(xlis,
            ylis,
            "r-",
            lw=12,
            alpha=.3,
            solid_capstyle="round",
            solid_joinstyle="round")
    ax.plot(xlds,
            ylds,
            "g-",
            lw=12,
            alpha=.3,
            solid_capstyle="round",
            solid_joinstyle="round")
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    HorizontalChromosome(root, .15, .15 + w, .57, height=.02, lw=2)
    root.text(.15 + w / 2,
              .55,
              "Chromosome location (bp)",
              ha="center",
              va="top")

    ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center")
    ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center")
    ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center")
    normalize_lms_axis(ax)

    # Panel B
    w = .37
    p = (0, 45, 75, 110)
    ax = fig.add_axes([.1, .12, w, h])
    xdata = [x for x in range(10, 110, 10)]
    ydata = ydata_orig = [x for x in range(10, 110, 10)]
    ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:7]
    xlis, ylis = zip(*lis)
    ax.plot(xlis,
            ylis,
            "r-",
            lw=12,
            alpha=.3,
            solid_capstyle="round",
            solid_joinstyle="round")
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, 0, 110, colors="beige", lw=3)
    normalize_lms_axis(ax)
    patch = [.1 + w * x / 110. for x in p]
    HorizontalChromosome(root, .1, .1 + w, .09, patch=patch, height=.02, lw=2)
    scaffolds = ("a", "b", "c")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, .09, s, va="center", ha="center")
    root.text(.1 + w / 2, .04, "LMS($a||b||c$) = 7", ha="center")

    # Panel C
    ax = fig.add_axes([.6, .12, w, h])
    patch = [.6 + w * x / 110. for x in p]
    ydata = ydata_orig
    ax.plot(xdata,
            ydata,
            "r-",
            lw=12,
            alpha=.3,
            solid_capstyle="round",
            solid_joinstyle="round")
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, [0], [110], colors="beige", lw=3)
    normalize_lms_axis(ax)
    HorizontalChromosome(root, .6, .6 + w, .09, patch=patch, height=.02, lw=2)
    scaffolds = ("a", "-c", "b")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, .09, s, va="center", ha="center")
    root.text(.6 + w / 2, .04, "LMS($a||-c||b$) = 10", ha="center")

    labels = ((.05, .95, 'A'), (.05, .48, 'B'), (.55, .48, 'C'))
    panel_labels(root, labels)

    normalize_axes(root)

    pf = "lms"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 38
0
def covlen(args):
    """
    %prog covlen covfile fastafile

    Plot coverage vs length. `covfile` is two-column listing contig id and
    depth of coverage.
    """
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from jcvi.formats.base import DictFile

    p = OptionParser(covlen.__doc__)
    p.add_option("--maxsize",
                 default=1000000,
                 type="int",
                 help="Max contig size")
    p.add_option("--maxcov", default=100, type="int", help="Max contig size")
    p.add_option("--color", default='m', help="Color of the data points")
    p.add_option("--kind",
                 default="scatter",
                 choices=("scatter", "reg", "resid", "kde", "hex"),
                 help="Kind of plot to draw")
    opts, args, iopts = p.set_image_options(args, figsize="8x8")

    if len(args) != 2:
        sys.exit(not p.print_help())

    covfile, fastafile = args
    cov = DictFile(covfile, cast=float)
    s = Sizes(fastafile)
    data = []
    maxsize, maxcov = opts.maxsize, opts.maxcov
    for ctg, size in s.iter_sizes():
        c = cov.get(ctg, 0)
        if size > maxsize:
            continue
        if c > maxcov:
            continue
        data.append((size, c))

    x, y = zip(*data)
    x = np.array(x)
    y = np.array(y)
    logging.debug("X size {0}, Y size {1}".format(x.size, y.size))

    df = pd.DataFrame()
    xlab, ylab = "Length", "Coverage of depth (X)"
    df[xlab] = x
    df[ylab] = y
    sns.jointplot(xlab,
                  ylab,
                  kind=opts.kind,
                  data=df,
                  xlim=(0, maxsize),
                  ylim=(0, maxcov),
                  stat_func=None,
                  edgecolor="w",
                  color=opts.color)

    figname = covfile + ".pdf"
    savefig(figname, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 39
0
def coverage(args):
    """
    %prog coverage fastafile ctg bedfile1 bedfile2 ..

    Plot coverage from a set of BED files that contain the read mappings. The
    paired read span will be converted to a new bedfile that contain the happy
    mates. ctg is the chr/scf/ctg that you want to plot the histogram on.

    If the bedfiles already contain the clone spans, turn on --spans.
    """
    from jcvi.formats.bed import mates, bedpe

    p = OptionParser(coverage.__doc__)
    p.add_option("--ymax",
                 default=None,
                 type="int",
                 help="Limit ymax [default: %default]")
    p.add_option(
        "--spans",
        default=False,
        action="store_true",
        help="BED files already contain clone spans [default: %default]")
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) < 3:
        sys.exit(not p.print_help())

    fastafile, ctg = args[0:2]
    bedfiles = args[2:]

    sizes = Sizes(fastafile)
    size = sizes.mapping[ctg]

    plt.figure(1, (iopts.w, iopts.h))
    ax = plt.gca()

    bins = 100  # smooth the curve
    lines = []
    legends = []
    not_covered = []
    yy = .9
    for bedfile, c in zip(bedfiles, "rgbcky"):
        if not opts.spans:
            pf = bedfile.rsplit(".", 1)[0]
            matesfile = pf + ".mates"
            if need_update(bedfile, matesfile):
                matesfile, matesbedfile = mates([bedfile, "--lib"])

            bedspanfile = pf + ".spans.bed"
            if need_update(matesfile, bedspanfile):
                bedpefile, bedspanfile = bedpe(
                    [bedfile, "--span", "--mates={0}".format(matesfile)])
            bedfile = bedspanfile

        bedsum = Bed(bedfile).sum(seqid=ctg)
        notcoveredbases = size - bedsum

        legend = bedfile.split(".")[0]
        msg = "{0}: {1} bp not covered".format(legend,
                                               thousands(notcoveredbases))
        not_covered.append(msg)
        print(msg, file=sys.stderr)
        ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes)
        yy -= .08

        cov = Coverage(bedfile, sizes.filename)
        x, y = cov.get_plot_data(ctg, bins=bins)
        line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5)
        lines.append(line)
        legends.append(legend)

    leg = ax.legend(lines, legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(.5)

    ylabel = "Average depth per {0}Kb".format(size / bins / 1000)
    ax.set_xlim(0, size)
    ax.set_ylim(0, opts.ymax)
    ax.set_xlabel(ctg)
    ax.set_ylabel(ylabel)
    set_human_base_axis(ax)

    figname = "{0}.{1}.pdf".format(fastafile, ctg)
    savefig(figname, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 40
0
Archivo: ca.py Proyecto: zjwang6/jcvi
def astat(args):
    """
    %prog astat coverage.log

    Create coverage-rho scatter plot.
    """
    p = OptionParser(astat.__doc__)
    p.add_option("--cutoff", default=1000, type="int", help="Length cutoff")
    p.add_option("--genome", default="", help="Genome name")
    p.add_option(
        "--arrDist",
        default=False,
        action="store_true",
        help="Use arrDist instead",
    )
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (covfile, ) = args
    cutoff = opts.cutoff
    genome = opts.genome
    plot_arrDist = opts.arrDist

    suffix = ".{0}".format(cutoff)
    small_covfile = covfile + suffix
    update_covfile = need_update(covfile, small_covfile)
    if update_covfile:
        fw = open(small_covfile, "w")
    else:
        logging.debug("Found `{0}`, will use this one".format(small_covfile))
        covfile = small_covfile

    fp = open(covfile)
    header = next(fp)
    if update_covfile:
        fw.write(header)

    data = []
    msg = "{0} tigs scanned ..."
    for row in fp:
        tigID, rho, covStat, arrDist = row.split()
        tigID = int(tigID)
        if tigID % 1000000 == 0:
            sys.stderr.write(msg.format(tigID) + "\r")

        rho, covStat, arrDist = [float(x) for x in (rho, covStat, arrDist)]
        if rho < cutoff:
            continue

        if update_covfile:
            fw.write(row)
        data.append((tigID, rho, covStat, arrDist))

    print(msg.format(tigID), file=sys.stderr)

    from jcvi.graphics.base import plt, savefig

    logging.debug("Plotting {0} data points.".format(len(data)))
    tigID, rho, covStat, arrDist = zip(*data)

    y = arrDist if plot_arrDist else covStat
    ytag = "arrDist" if plot_arrDist else "covStat"

    fig = plt.figure(1, (7, 7))
    ax = fig.add_axes([0.12, 0.1, 0.8, 0.8])
    ax.plot(rho, y, ".", color="lightslategrey")

    xtag = "rho"
    info = (genome, xtag, ytag)
    title = "{0} {1} vs. {2}".format(*info)
    ax.set_title(title)
    ax.set_xlabel(xtag)
    ax.set_ylabel(ytag)

    if plot_arrDist:
        ax.set_yscale("log")

    imagename = "{0}.png".format(".".join(info))
    savefig(imagename, dpi=150)
Ejemplo n.º 41
0
def depth(args):
    """
    %prog depth *.regions.bed.gz

    Plot the mosdepth regions BED file. We recommend to generate this BED file
    by (please adjust the --by parameter to your required resolution):

    $ mosdepth --no-per-base --use-median --fast-mode --by 1000000 sample.wgs
    sample.bam

    Use --chrinfo to specify a colormap between seqid, desired color, and
    optionally a new name. For example:

    chr01A, #c51b7d, 1A
    chr01B, #4d9221, 1B
    ...

    Only seqids that are in the colormap will be plotted, in the order that's
    given in the file. When --colormap is not set, every seqid will be drawn in
    black.

    Can take multiple BED files as input and then plot all of them in a
    composite figure.
    """
    p = OptionParser(depth.__doc__)
    p.add_option(
        "--chrinfo",
        help="Comma-separated mappings between seqid, color, new_name")
    p.add_option(
        "--titleinfo",
        help="Comma-separated titles mappings between filename, title",
    )
    opts, args, iopts = p.set_image_options(args, style="dark", figsize="14x4")

    if len(args) < 1:
        sys.exit(not p.print_help())

    bedfiles = args
    chrinfo = ChrInfoFile(opts.chrinfo) if opts.chrinfo else {}
    titleinfo = TitleInfoFile(opts.titleinfo) if opts.titleinfo else {}

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    npanels = len(bedfiles)
    yinterval = 1.0 / npanels
    ypos = 1 - yinterval
    for bedfile in bedfiles:
        pf = op.basename(bedfile).split(".", 1)[0]
        bed = Bed(bedfile)

        panel_root = root if npanels == 1 else fig.add_axes(
            [0, ypos, 1, yinterval])
        panel_ax = fig.add_axes(
            [0.1, ypos + 0.2 * yinterval, 0.8, 0.65 * yinterval])
        if ypos > 0.001:
            root.plot((0, 1), (ypos, ypos), "-", lw=2, color="lightslategray")

        title = titleinfo.get(bedfile, pf.split("_", 1)[0])
        subtitle = None
        if isinstance(title, TitleInfoLine):
            subtitle = title.subtitle
            title = title.title

        draw_depth(panel_root,
                   panel_ax,
                   bed,
                   chrinfo=chrinfo,
                   title=title,
                   subtitle=subtitle)
        ypos -= yinterval

    normalize_axes(root)

    if npanels > 1:
        pf = op.commonprefix(bedfiles)
    pf = pf or "depth"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 42
0
def simulate(args):
    """
    %prog simulate

    Run simulation on female restitution.
    """
    import seaborn as sns

    sns.set_style("darkgrid")

    p = OptionParser(simulate.__doc__)
    p.add_option(
        "--verbose",
        default=False,
        action="store_true",
        help="Verbose logging during simulation",
    )
    opts, args, iopts = p.set_image_options(args, figsize="6x6")
    if len(args) != 0:
        sys.exit(not p.print_help())

    # Construct a composite figure with 6 tracks
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    rows = 6
    ypad = 0.05
    yinterval = (1 - 2 * ypad) / (rows + 1)
    yy = 1 - ypad
    xpad = 0.2
    xwidth = 0.7

    # Axes are vertically stacked, and share x-axis
    axes = []
    yy_positions = [
    ]  # Save yy positions so we can show details to the right laterr
    for idx in range(rows):
        yy_positions.append(yy)
        yy -= yinterval
        ax = fig.add_axes([xpad, yy, xwidth, yinterval * 0.85])
        if idx != rows - 1:
            plt.setp(ax.get_xticklabels(), visible=False)
        axes.append(ax)
    ax1, ax2, ax3, ax4, ax5, ax6 = axes

    # Prepare the simulated data
    # Simulate two parents
    SS = Genome("SS", "SS", 10, 8)
    SO = Genome("SO", "SO", 8, 10)

    verbose = opts.verbose
    all_F1s = [simulate_F1(SO, SS, verbose=verbose) for _ in range(1000)]
    all_F2s = [simulate_F2(SO, SS, verbose=verbose) for _ in range(1000)]
    all_BC1s = [simulate_BCn(1, SO, SS, verbose=verbose) for _ in range(1000)]
    all_BC2s = [simulate_BCn(2, SO, SS, verbose=verbose) for _ in range(1000)]
    all_BC3s = [simulate_BCn(3, SO, SS, verbose=verbose) for _ in range(1000)]
    all_BC4s = [simulate_BCn(4, SO, SS, verbose=verbose) for _ in range(1000)]

    # Plotting
    plot_summary(ax1, all_F1s)
    plot_summary(ax2, all_F2s)
    plot_summary(ax3, all_BC1s)
    plot_summary(ax4, all_BC2s)
    plot_summary(ax5, all_BC3s)
    plot_summary(ax6, all_BC4s)

    # Show title to the left
    xx = xpad / 2
    for (title, subtitle), yy in zip(
        (
            ("F1", None),
            ("F2", None),
            ("BC1", None),
            ("BC2", None),
            ("BC3", None),
            ("BC4", None),
        ),
            yy_positions,
    ):
        if subtitle:
            yy -= 0.06
        else:
            yy -= 0.07
        root.text(
            xx,
            yy,
            title,
            color="darkslategray",
            ha="center",
            va="center",
            fontweight="semibold",
        )
        if subtitle:
            yy -= 0.02
            root.text(xx,
                      yy,
                      subtitle,
                      color="lightslategray",
                      ha="center",
                      va="center")

    axes[-1].set_xlabel("Number of unique chromosomes")
    adjust_spines(axes[-1], ["bottom"], outward=True)
    normalize_axes(root)

    savefig("plotter.pdf", dpi=120)

    outdir = "simulations"
    mkdir(outdir)
    # Write chromosomes to disk
    for genomes, filename in (
        (all_F1s, "all_F1s"),
        (all_F2s, "all_F2s"),
        (all_BC1s, "all_BC1s"),
        (all_BC2s, "all_BC2s"),
        (all_BC3s, "all_BC3s"),
        (all_BC4s, "all_BC4s"),
    ):
        write_chromosomes(genomes, op.join(outdir, filename))
Ejemplo n.º 43
0
def plot(args):
    """
    %prog plot input.bed seqid

    Plot the matchings between the reconstructed pseudomolecules and the maps.
    Two types of visualizations are available in one canvas:

    1. Parallel axes, and matching markers are shown in connecting lines;
    2. Scatter plot.
    """
    from jcvi.graphics.base import plt, savefig, normalize_axes, \
                set2, panel_labels
    from jcvi.graphics.chromosome import Chromosome, GeneticMap, \
                HorizontalChromosome

    p = OptionParser(plot.__doc__)
    add_allmaps_plot_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x6")

    if len(args) != 2:
        sys.exit(not p.print_help())

    inputbed, seqid = args
    pf = inputbed.rsplit(".", 1)[0]
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"
    weightsfile = opts.weightsfile
    links = opts.links

    function = get_function(opts.distance)
    cc = Map(bedfile, function)
    allseqids = cc.seqids
    mapnames = cc.mapnames
    weights = Weights(weightsfile, mapnames)
    assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids)

    s = Scaffold(seqid, cc)
    mlgs = [k for k, v in s.mlg_counts.items() if v >= links]
    while not mlgs:
        links /= 2
        logging.error("No markers to plot, --links reset to {0}".format(links))
        mlgs = [k for k, v in s.mlg_counts.items() if v >= links]

    mlgsizes = {}
    for mlg in mlgs:
        mm = cc.extract_mlg(mlg)
        mlgsize = max(function(x) for x in mm)
        mlgsizes[mlg] = mlgsize

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax1 = fig.add_axes([0, 0, .5, 1])
    ax2 = fig.add_axes([.5, 0, .5, 1])

    # Find the layout first
    ystart, ystop = .9, .1
    L = Layout(mlgsizes)
    coords = L.coords

    tip = .02
    marker_pos = {}
    # Palette
    colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames))
    colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs)

    rhos = {}
    # Parallel coordinates
    for mlg, (x, y1, y2) in coords.items():
        mm = cc.extract_mlg(mlg)
        markers = [(m.accn, function(m)) for m in mm]  # exhaustive marker list
        xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid]
        mx, my = zip(*xy)
        rho = spearmanr(mx, my)
        rhos[mlg] = rho
        flip = rho < 0

        g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip)
        extra = -3 * tip if x < .5 else 3 * tip
        ha = "right" if x < .5 else "left"
        mapname = mlg.split("-")[0]
        tlg = mlg.replace("_", ".")  # Latex does not like underscore char
        label = "{0} (w={1})".format(tlg, weights[mapname])
        ax1.text(x + extra, (y1 + y2) / 2, label, color=colors[mlg],
                 ha=ha, va="center", rotation=90)
        marker_pos.update(g.marker_pos)

    agp = AGP(agpfile)
    agp = [x for x in agp if x.object == seqid]
    chrsize = max(x.object_end for x in agp)

    # Pseudomolecules in the center
    r = ystart - ystop
    ratio = r / chrsize
    f = lambda x: (ystart - ratio * x)
    patchstart = [f(x.object_beg) for x in agp if not x.is_gap]
    Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2)

    label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0))
    ax1.text(.5, ystart + tip, label, ha="center")

    scatter_data = defaultdict(list)
    # Connecting lines
    for b in s.markers:
        marker_name = b.accn
        if marker_name not in marker_pos:
            continue

        cx = .5
        cy = f(b.pos)
        mx = coords[b.mlg][0]
        my = marker_pos[marker_name]

        extra = -tip if mx < cx else tip
        extra *= 1.25  # leave boundaries for aesthetic reasons
        cx += extra
        mx -= extra
        ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg])
        scatter_data[b.mlg].append((b.pos, function(b)))

    # Scatter plot, same data as parallel coordinates
    xstart, xstop = sorted((ystart, ystop))
    f = lambda x: (xstart + ratio * x)
    pp = [x.object_beg for x in agp if not x.is_gap]
    patchstart = [f(x) for x in pp]
    HorizontalChromosome(ax2, xstart, xstop, ystop,
                         height=2 * tip, patch=patchstart, lw=2)

    gap = .03
    ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values())

    tlgs = []
    for mlg, mlgsize in sorted(mlgsizes.items()):
        height = ratio * mlgsize
        ystart -= height
        xx = .5 + xstart / 2
        width = r / 2
        color = colors[mlg]
        ax = fig.add_axes([xx, ystart, width, height])
        ypos = ystart + height / 2
        ystart -= gap
        sd = scatter_data[mlg]
        xx, yy = zip(*sd)
        ax.vlines(pp, 0, mlgsize, colors="beige")
        ax.plot(xx, yy, ".", color=color)
        rho = rhos[mlg]
        ax.text(.5, 1 - .4 * gap / height, r"$\rho$={0:.3f}".format(rho),
                    ha="center", va="top", transform=ax.transAxes, color="gray")
        tlg = mlg.replace("_", ".")
        tlgs.append((tlg, ypos, color))
        ax.set_xlim(0, chrsize)
        ax.set_ylim(0, mlgsize)
        ax.set_xticks([])
        while height / len(ax.get_yticks()) < .03 and len(ax.get_yticks()) >= 2:
            ax.set_yticks(ax.get_yticks()[::2])  # Sparsify the ticks
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_yticklabels(yticklabels, family='Helvetica')
        if rho < 0:
            ax.invert_yaxis()

    for i, (tlg, ypos, color) in enumerate(tlgs):
        ha = "center"
        if len(tlgs) > 4:
            ha = "right" if i % 2 else "left"
        root.text(.5, ypos, tlg, color=color, rotation=90,
                      ha=ha, va="center")

    if opts.panels:
        labels = ((.04, .96, 'A'), (.48, .96, 'B'))
        panel_labels(root, labels)

    normalize_axes((ax1, ax2, root))
    image_name = seqid + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    plt.close(fig)
Ejemplo n.º 44
0
def histogram(args):
    """
    %prog histogram meryl.histogram species K

    Plot the histogram based on meryl K-mer distribution, species and N are
    only used to annotate the graphic.
    """
    p = OptionParser(histogram.__doc__)
    p.add_option("--vmin", dest="vmin", default=1, type="int",
            help="minimum value, inclusive [default: %default]")
    p.add_option("--vmax", dest="vmax", default=100, type="int",
            help="maximum value, inclusive [default: %default]")
    p.add_option("--pdf", default=False, action="store_true",
            help="Print PDF instead of ASCII plot [default: %default]")
    p.add_option("--coverage", default=0, type="int",
            help="Kmer coverage [default: auto]")
    p.add_option("--nopeaks", default=False, action="store_true",
            help="Do not annotate K-mer peaks")
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    histfile, species, N = args
    ascii = not opts.pdf
    peaks = not opts.nopeaks
    N = int(N)

    if histfile.rsplit(".", 1)[-1] in ("mcdat", "mcidx"):
        logging.debug("CA kmer index found")
        histfile = merylhistogram(histfile)

    ks = KmerSpectrum(histfile)
    ks.analyze(K=N)

    Total_Kmers = int(ks.totalKmers)
    coverage = opts.coverage
    Kmer_coverage = ks.max2 if not coverage else coverage
    Genome_size = int(round(Total_Kmers * 1. / Kmer_coverage))

    Total_Kmers_msg = "Total {0}-mers: {1}".format(N, thousands(Total_Kmers))
    Kmer_coverage_msg = "{0}-mer coverage: {1}".format(N, Kmer_coverage)
    Genome_size_msg = "Estimated genome size: {0:.1f}Mb".\
                        format(Genome_size / 1e6)
    Repetitive_msg = ks.repetitive
    SNPrate_msg = ks.snprate

    for msg in (Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg):
        print >> sys.stderr, msg

    x, y = ks.get_xy(opts.vmin, opts.vmax)
    title = "{0} {1}-mer histogram".format(species, N)

    if ascii:
        asciiplot(x, y, title=title)
        return Genome_size

    plt.figure(1, (6, 6))
    plt.plot(x, y, 'g-', lw=2, alpha=.5)
    ax = plt.gca()

    if peaks:
        t = (ks.min1, ks.max1, ks.min2, ks.max2, ks.min3)
        tcounts = [(x, y) for x, y in ks.counts if x in t]
        if tcounts:
            x, y = zip(*tcounts)
            tcounts = dict(tcounts)
            plt.plot(x, y, 'ko', lw=2, mec='k', mfc='w')
            ax.text(ks.max1, tcounts[ks.max1], "SNP peak", va="top")
            ax.text(ks.max2, tcounts[ks.max2], "Main peak")

    messages = [Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg,
                Repetitive_msg, SNPrate_msg]
    write_messages(ax, messages)

    ymin, ymax = ax.get_ylim()
    ymax = ymax * 7 / 6

    ax.set_title(markup(title))
    ax.set_ylim((ymin, ymax))
    xlabel, ylabel = "Coverage (X)", "Counts"
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    set_human_axis(ax)

    imagename = histfile.split(".")[0] + ".pdf"
    savefig(imagename, dpi=100)

    return Genome_size
Ejemplo n.º 45
0
def estimategaps(args):
    """
    %prog estimategaps JM-4 chr1 JMMale-1

    Illustrate ALLMAPS gap estimation algorithm.
    """
    p = OptionParser(estimategaps.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pf, seqid, mlg = args
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"

    function = lambda x: x.cm
    cc = Map(bedfile, scaffold_info=True, function=function)
    agp = AGP(agpfile)

    g = GapEstimator(cc, agp, seqid, mlg, function=function)
    pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize
    spl, spld = g.spl, g.spld
    g.compute_all_gaps(verbose=False)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    xstart, ystart = .15, .65
    w, h = .7, .3
    t = np.linspace(0, chrsize, 1000)
    ax = fig.add_axes([xstart, ystart, w, h])
    mx, my = zip(*g.scatter_data)
    rho = spearmanr(mx, my)

    dsg = "g"
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(mx, my, ".", color=set2[3])
    ax.plot(t, spl(t), "-", color=dsg)
    ax.text(.05, .95, mlg, va="top", transform=ax.transAxes)
    normalize_lms_axis(ax,
                       xlim=chrsize,
                       ylim=mlgsize,
                       ylabel="Genetic distance (cM)")
    if rho < 0:
        ax.invert_yaxis()

    # Panel B
    ystart -= .28
    h = .25
    ax = fig.add_axes([xstart, ystart, w, h])
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(t, spld(t), "-", lw=2, color=dsg)
    ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5)
    normalize_lms_axis(ax,
                       xlim=chrsize,
                       ylim=25 * 1e-6,
                       xfactor=1e-6,
                       xlabel="Physical position (Mb)",
                       yfactor=1000000,
                       ylabel="Recomb. rate\n(cM / Mb)")

    # Panel C (specific to JMMale-1)
    a, b = "scaffold_1076", "scaffold_861"
    sizes = dict((x.component_id, (x.object_beg, x.object_end,
                                   x.component_span, x.orientation)) \
                                   for x in g.agp if not x.is_gap)
    a_beg, a_end, asize, ao = sizes[a]
    b_beg, b_end, bsize, bo = sizes[b]
    gapsize = g.get_gapsize(a)
    total_size = asize + gapsize + bsize
    ratio = .6 / total_size
    y = .16
    pad = .03
    pb_ratio = w / chrsize

    # Zoom
    lsg = "lightslategray"
    root.plot((.15 + pb_ratio * a_beg, .2), (ystart, ystart - .14),
              ":",
              color=lsg)
    root.plot((.15 + pb_ratio * b_end, .3), (ystart, ystart - .08),
              ":",
              color=lsg)
    ends = []
    for tag, size, marker, beg in zip((a, b), (asize, bsize), (49213, 81277),
                                      (.2, .2 + (asize + gapsize) * ratio)):
        end = beg + size * ratio
        marker = beg + marker * ratio
        ends.append((beg, end, marker))
        root.plot((marker, ), (y, ), "o", color=lsg)
        root.text((beg + end) / 2,
                  y + pad,
                  latex(tag),
                  ha="center",
                  va="center")
        HorizontalChromosome(root, beg, end, y, height=.025, fc='gainsboro')

    begs, ends, markers = zip(*ends)
    fontprop = dict(color=lsg, ha="center", va="center")
    ypos = y + pad * 2
    root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(
        sum(markers) / 2, ypos + pad,
        "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop)

    ypos = y - pad
    xx = markers[0], ends[0]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop)
    xx = markers[1], begs[1]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop)

    root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg)
    root.text(sum(markers) / 2,
              ypos - 3 * pad,
              r"$\textit{Estimated gap size: 96,433bp}$",
              color="r",
              ha="center",
              va="center")

    labels = ((.05, .95, 'A'), (.05, .6, 'B'), (.05, .27, 'C'))
    panel_labels(root, labels)
    normalize_axes(root)

    pf = "estimategaps"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 46
0
def qc(args):
    """
    %prog qc prefix

    Expects data files including:
    1. `prefix.bedpe` draws Bezier curve between paired reads
    2. `prefix.sizes` draws length of the contig/scaffold
    3. `prefix.gaps.bed` mark the position of the gaps in sequence
    4. `prefix.bed.coverage` plots the base coverage
    5. `prefix.pairs.bed.coverage` plots the clone coverage

    See assembly.coverage.posmap() for the generation of these files.
    """
    from jcvi.graphics.glyph import Bezier

    p = OptionParser(qc.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(p.print_help())

    prefix, = args
    scf = prefix

    # All these files *must* be present in the current folder
    bedpefile = prefix + ".bedpe"
    fastafile = prefix + ".fasta"
    sizesfile = prefix + ".sizes"
    gapsbedfile = prefix + ".gaps.bed"
    bedfile = prefix + ".bed"
    bedpefile = prefix + ".bedpe"
    pairsbedfile = prefix + ".pairs.bed"

    sizes = Sizes(fastafile).mapping
    size = sizes[scf]

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    # the scaffold
    root.add_patch(Rectangle((.1, .15), .8, .03, fc='k'))

    # basecoverage and matecoverage
    ax = fig.add_axes([.1, .45, .8, .45])

    bins = 200  # Smooth the curve
    basecoverage = Coverage(bedfile, sizesfile)
    matecoverage = Coverage(pairsbedfile, sizesfile)

    x, y = basecoverage.get_plot_data(scf, bins=bins)
    baseline, = ax.plot(x, y, 'g-')
    x, y = matecoverage.get_plot_data(scf, bins=bins)
    mateline, = ax.plot(x, y, 'r-')
    legends = ("Base coverage", "Mate coverage")
    leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(.5)
    ax.set_xlim(0, size)

    # draw the read pairs
    fp = open(bedpefile)
    pairs = []
    for row in fp:
        scf, astart, aend, scf, bstart, bend, clonename = row.split()
        astart, bstart = int(astart), int(bstart)
        aend, bend = int(aend), int(bend)
        start = min(astart, bstart) + 1
        end = max(aend, bend)
        pairs.append((start, end))

    bpratio = .8 / size
    cutoff = 1000  # inserts smaller than this are not plotted
    # this convert from base => x-coordinate
    pos = lambda x: (.1 + x * bpratio)
    ypos = .15 + .03
    for start, end in pairs:
        dist = end - start

        if dist < cutoff:
            continue

        dist = min(dist, 10000)
        # 10Kb == .25 canvas height
        height = .25 * dist / 10000
        xstart = pos(start)
        xend = pos(end)
        p0 = (xstart, ypos)
        p1 = (xstart, ypos + height)
        p2 = (xend, ypos + height)
        p3 = (xend, ypos)
        Bezier(root, p0, p1, p2, p3)

    # gaps on the scaffold
    fp = open(gapsbedfile)
    for row in fp:
        b = BedLine(row)
        start, end = b.start, b.end
        xstart = pos(start)
        xend = pos(end)
        root.add_patch(Rectangle((xstart, .15), xend - xstart, .03, fc='w'))

    root.text(.5, .1, scf, color='b', ha="center")
    warn_msg = "Only the inserts > {0}bp are shown".format(cutoff)
    root.text(.5, .1, scf, color='b', ha="center")
    root.text(.5, .05, warn_msg, color='gray', ha="center")
    # clean up and output
    set_human_base_axis(ax)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = prefix + ".pdf"
    savefig(figname, dpi=300)
Ejemplo n.º 47
0
def gcdepth(args):
    """
    %prog gcdepth sample_name tag

    Plot GC content vs depth vs genomnic bins. Inputs are mosdepth output:
    - NA12878_S1.mosdepth.global.dist.txt
    - NA12878_S1.mosdepth.region.dist.txt
    - NA12878_S1.regions.bed.gz
    - NA12878_S1.regions.bed.gz.csi
    - NA12878_S1.regions.gc.bed.gz

    A sample mosdepth.sh script might look like:
    ```
    #!/bin/bash
    LD_LIBRARY_PATH=mosdepth/htslib/ mosdepth/mosdepth $1 \\
        bams/$1.bam -t 4 -c chr1 -n --by 1000

    bedtools nuc -fi GRCh38/WholeGenomeFasta/genome.fa \\
        -bed $1.regions.bed.gz \\
        | pigz -c > $1.regions.gc.bed.gz
    ```
    """
    import hashlib
    from jcvi.algorithms.formula import MAD_interval as confidence_interval
    from jcvi.graphics.base import latex, plt, savefig, set2

    p = OptionParser(gcdepth.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    sample_name, tag = args
    # The tag is used to add to title, also provide a random (hashed) color
    coloridx = int(hashlib.sha1(tag).hexdigest(), 16) % len(set2)
    color = set2[coloridx]

    # mosdepth outputs a table that we can use to plot relationship
    gcbedgz = sample_name + ".regions.gc.bed.gz"
    df = pd.read_csv(gcbedgz, delimiter="\t")
    mf = df.loc[:, ("4_usercol", "6_pct_gc")]
    mf.columns = ["depth", "gc"]

    # We discard any bins that are gaps
    mf = mf[(mf["depth"] > .001) | (mf["gc"] > .001)]

    # Create GC bins
    gcbins = defaultdict(list)
    for i, row in mf.iterrows():
        gcp = int(round(row["gc"] * 100))
        gcbins[gcp].append(row["depth"])
    gcd = sorted((k * .01, confidence_interval(v))
                 for (k, v) in gcbins.items())
    gcd_x, gcd_y = zip(*gcd)
    m, lo, hi = zip(*gcd_y)

    # Plot
    plt.plot(mf["gc"], mf["depth"], ".", color="lightslategray", ms=2,
             mec="lightslategray", alpha=.1)
    patch = plt.fill_between(gcd_x, lo, hi,
                             facecolor=color, alpha=.25, zorder=10,
                             linewidth=0.0, label="Median +/- MAD band")
    plt.plot(gcd_x, m, "-", color=color, lw=2, zorder=20)

    ax = plt.gca()
    ax.legend(handles=[patch], loc="best")
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 100)
    ax.set_title("{} ({})".format(latex(sample_name), tag))
    ax.set_xlabel("GC content")
    ax.set_ylabel("Depth")
    savefig(sample_name + ".gcdepth.png")
Ejemplo n.º 48
0
def demo(args):
    """
    %prog demo

    Draw sample gene features to illustrate the various fates of duplicate
    genes - to be used in a book chapter.
    """
    p = OptionParser(demo.__doc__)
    opts, args = p.parse_args(args)

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    panel_space = 0.23
    dup_space = 0.025
    # Draw a gene and two regulatory elements at these arbitrary locations
    locs = [
        (0.5, 0.9),  # ancestral gene
        (0.5, 0.9 - panel_space + dup_space),  # identical copies
        (0.5, 0.9 - panel_space - dup_space),
        (0.5, 0.9 - 2 * panel_space + dup_space),  # degenerate copies
        (0.5, 0.9 - 2 * panel_space - dup_space),
        (0.2, 0.9 - 3 * panel_space + dup_space),  # sub-functionalization
        (0.2, 0.9 - 3 * panel_space - dup_space),
        (0.5, 0.9 - 3 * panel_space + dup_space),  # neo-functionalization
        (0.5, 0.9 - 3 * panel_space - dup_space),
        (0.8, 0.9 - 3 * panel_space + dup_space),  # non-functionalization
        (0.8, 0.9 - 3 * panel_space - dup_space),
    ]

    default_regulator = "gm"
    regulators = [
        default_regulator,
        default_regulator,
        default_regulator,
        "wm",
        default_regulator,
        "wm",
        "gw",
        "wb",
        default_regulator,
        "ww",
        default_regulator,
    ]

    width = 0.24
    for i, (xx, yy) in enumerate(locs):
        regulator = regulators[i]
        x1, x2 = xx - 0.5 * width, xx + 0.5 * width
        Glyph(root, x1, x2, yy)
        if i == 9:  # upper copy for non-functionalization
            continue

        # coding region
        x1, x2 = xx - 0.16 * width, xx + 0.45 * width
        Glyph(root, x1, x2, yy, fc="k")

        # two regulatory elements
        x1, x2 = xx - 0.4 * width, xx - 0.28 * width
        for xx, fc in zip((x1, x2), regulator):
            if fc == "w":
                continue

            DoubleCircle(root, xx, yy, fc=fc)

        rotation = 30
        tip = 0.02
        if i == 0:
            ya = yy + tip
            root.text(x1, ya, "Flower", rotation=rotation, va="bottom")
            root.text(x2, ya, "Root", rotation=rotation, va="bottom")
        elif i == 7:
            ya = yy + tip
            root.text(x2, ya, "Leaf", rotation=rotation, va="bottom")

    # Draw arrows between panels (center)
    arrow_dist = 0.08
    ar_xpos = 0.5
    for ar_ypos in (0.3, 0.53, 0.76):
        root.annotate(
            " ",
            (ar_xpos, ar_ypos),
            (ar_xpos, ar_ypos + arrow_dist),
            arrowprops=arrowprops,
        )

    ar_ypos = 0.3
    for ar_xpos in (0.2, 0.8):
        root.annotate(" ", (ar_xpos, ar_ypos), (0.5, ar_ypos + arrow_dist),
                      arrowprops=arrowprops)

    # Duplication, Degeneration
    xx = 0.6
    ys = (0.76, 0.53)
    processes = ("Duplication", "Degeneration")
    for yy, process in zip(ys, processes):
        root.text(xx, yy + 0.02, process, fontweight="bold")

    # Label of fates
    xs = (0.2, 0.5, 0.8)
    fates = ("Subfunctionalization", "Neofunctionalization",
             "Nonfunctionalization")
    yy = 0.05
    for xx, fate in zip(xs, fates):
        RoundLabel(root, xx, yy, fate)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = "demo.pdf"
    savefig(figname, dpi=300)
Ejemplo n.º 49
0
def plot(args):
    """
    %prog plot tagged.new.bed chr1

    Plot gene identifiers along a particular chromosome, often to illustrate the
    gene id assignment procedure.
    """
    from jcvi.graphics.base import plt, savefig
    from jcvi.graphics.chromosome import ChromosomeMap

    p = OptionParser(plot.__doc__)
    p.add_option("--firstn", type="int", help="Only plot the first N genes")
    p.add_option("--ymax", type="int", help="Y-axis max value")
    p.add_option("--log", action="store_true", help="Write plotting data")
    opts, args, iopts = p.set_image_options(args, figsize="6x4")

    if len(args) != 2:
        sys.exit(not p.print_help())

    taggedbed, chr = args
    bed = Bed(taggedbed)
    beds = list(bed.sub_bed(chr))
    old, new = [], []
    i = 0
    for b in beds:
        accn = b.extra[0]
        if "te" in accn:
            continue

        accn, tag = accn.split("|")
        if tag == "OVERLAP":
            continue

        c, r = atg_name(accn)
        if tag == "NEW":
            new.append((i, r))
        else:
            old.append((i, r))
        i += 1

    ngenes = i
    assert ngenes == len(new) + len(old)

    logging.debug("Imported {0} ranks on {1}.".format(ngenes, chr))
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    xstart, xend = 0.2, 0.8
    ystart, yend = 0.2, 0.8
    pad = 0.02

    ngenes = opts.firstn or ngenes
    ymax = opts.ymax or 500000

    title = "Assignment of Medtr identifiers"
    if opts.ymax:
        subtitle = "{0}, first {1} genes".format(chr, ngenes)
    else:
        subtitle = "{0}, {1} genes ({2} new)".format(chr, ngenes, len(new))

    chr_map = ChromosomeMap(fig, root, xstart, xend, ystart, yend, pad, 0,
                            ymax, 5, title, subtitle)

    ax = chr_map.axes

    if opts.log:
        from jcvi.utils.table import write_csv

        header = ["x", "y"]
        write_csv(header, new, filename=chr + ".new")
        write_csv(header, old, filename=chr + ".old")

    x, y = zip(*new)
    ax.plot(x, y, "b,")
    x, y = zip(*old)
    ax.plot(x, y, "r,")

    # Legends
    ymid = (ystart + yend) / 2
    y = ymid + pad
    root.plot([0.2], [y], "r.", lw=2)
    root.text(0.2 + pad, y, "Existing Medtr ids", va="center", size=10)
    y = ymid - pad
    root.plot([0.2], [y], "b.", lw=2)
    root.text(0.2 + pad, y, "Newly instantiated ids", va="center", size=10)

    ax.set_xlim(0, ngenes)
    ax.set_ylim(0, ymax)
    ax.set_axis_off()

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + ".identifiers." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 50
0
    ratio = ysize * 1. / xsize if proportional else 1
    width = iopts.w
    height = iopts.h * ratio
    fig = plt.figure(1, (width, height))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([.1, .1, .8, .8])  # the dot plot

    blastplot(ax,
              blastfile,
              qsizes,
              ssizes,
              qbed,
              sbed,
              style=opts.dotstyle,
              proportional=proportional,
              sampleN=opts.nmax,
              baseticks=True,
              stripNames=opts.stripNames,
              highlights=highlights)

    # add genome names
    to_ax_label = lambda fname: op.basename(fname).split(".")[0]
    gx, gy = [to_ax_label(x.filename) for x in (qsizes, ssizes)]
    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 51
0
def main():
    p = OptionParser(__doc__)
    opts, args, iopts = p.set_image_options(figsize="9x7")

    if len(args) != 1:
        sys.exit(not p.print_help())

    (mode, ) = args
    assert mode == "demo"

    a, b = 30, 70
    pad = 0.08
    w = 0.31
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Row separators
    yy = 1 - pad
    for i in range(3):
        root.plot((0, 1), (yy, yy), "-", lw=2, color="lightgray")
        yy -= w

    # Row headers
    xx = pad * 0.6
    yy = 1 - pad - 0.5 * w
    for title in ("Inversion", "Indel", "Duplication"):
        root.text(xx, yy, title, ha="center", va="center")
        yy -= w

    # Column headers
    xx = pad + 0.5 * w
    yy = 1 - pad / 2
    for title in ("Assembly alignment", "Read alignment",
                  "Optical map alignment"):
        root.text(xx, yy, title, ha="center", va="center")
        xx += w

    p = PairwiseAlign(fig, [pad, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = PairwiseAlign(fig, [pad, w, w, w])
    p.delete(a, b)
    p.draw()

    p = PairwiseAlign(fig, [pad, 0, w, w])
    p.duplicate(a, b, gap=5)
    p.draw()

    p = ReadAlign(fig, [pad + w, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = ReadAlign(fig, [pad + w, w, w, w])
    p.delete(a, b)
    p.draw()

    p = ReadAlign(fig, [pad + w, 0, w, w])
    p.duplicate(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, w, w, w])
    p.delete(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, 0, w, w])
    p.duplicate(a, b)
    p.draw()

    normalize_axes(root)

    image_name = mode + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 52
0
Archivo: misc.py Proyecto: lqsae/jcvi
def cotton(args):
    """
    %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a composite figure that calls graphics.karyotype and graphic.synteny.
    """
    p = OptionParser(cotton.__doc__)
    p.add_option("--depthfile", help="Use depth info in this file")
    p.add_option("--switch", help="Rename the seqid with two-column file")
    opts, args, iopts = p.set_image_options(args, figsize="8x7")

    if len(args) != 5:
        sys.exit(p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args
    switch = opts.switch
    depthfile = opts.depthfile

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    kt = Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout, switch=switch)

    light = "lightslategrey"
    # Show the dup depth along the cotton chromosomes
    if depthfile:
        ymin, ymax = 0.9, 0.95
        root.text(0.11,
                  0.96,
                  "Cotton duplication level",
                  color="gray",
                  size=10)
        root.plot([0.1, 0.95], [ymin, ymin], color="gray")
        root.text(0.96, 0.9, "1x", color="gray", va="center")
        root.plot([0.1, 0.95], [ymax, ymax], color="gray")
        root.text(0.96, 0.95, "6x", color="gray", va="center")

        fp = open(depthfile)
        track = kt.tracks[0]  # Cotton
        depths = []
        for row in fp:
            a, b, depth = row.split()
            depth = int(depth)
            try:
                p = track.get_coords(a)
                depths.append((p, depth))
            except KeyError:
                pass

        depths.sort(key=lambda x: (x[0], -x[1]))
        xx, yy = zip(*depths)
        yy = [ymin + 0.01 * (x - 1) for x in yy]
        root.plot(xx, yy, "-", color=light)

    # legend showing the orientation of the genes
    draw_gene_legend(root, 0.5, 0.68, 0.5)

    # Zoom
    xpos = 0.835
    ytop = 0.9
    xmin, xmax = 0.18, 0.82
    ymin, ymax = ytop, 0.55
    lc = "k"
    kwargs = dict(lw=3, color=lc, mec=lc, mfc="w", zorder=3)
    root.plot((xpos, xpos), (ymax, 0.63), ":o", **kwargs)
    root.plot((xpos, xmin), (ymax, ymin), ":o", **kwargs)
    root.plot((xpos, xmax), (ymax, ymin), ":o", **kwargs)
    RoundRect(root, (0.06, 0.17), 0.92, 0.35, fill=False, lw=2, ec=light)

    # Panels
    root.text(0.05, 0.95, "a", size=20, fontweight="bold")
    root.text(0.1, 0.45, "b", size=20, fontweight="bold")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "cotton"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 53
0
def stack(args):
    """
    %prog stack fastafile

    Create landscape plots that show the amounts of genic sequences, and repetitive
    sequences along the chromosomes.
    """
    p = OptionParser(stack.__doc__)
    p.add_option("--top", default=10, type="int",
                 help="Draw the first N chromosomes [default: %default]")
    p.add_option("--stacks",
                 default="Exons,Introns,DNA_transposons,Retrotransposons",
                 help="Features to plot in stackplot [default: %default]")
    p.add_option("--switch",
                 help="Change chr names based on two-column file [default: %default]")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastafile, = args
    top = opts.top
    window, shift, subtract = check_window_options(opts)
    switch = opts.switch
    if switch:
        switch = DictFile(opts.switch)

    stacks = opts.stacks.split(",")
    bedfiles = get_beds(stacks)
    binfiles = get_binfiles(bedfiles, fastafile, shift, subtract=subtract)

    sizes = Sizes(fastafile)
    s = list(sizes.iter_sizes())[:top]
    maxl = max(x[1] for x in s)
    margin = .08
    inner = .02   # y distance between tracks

    pf = fastafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Gauge
    ratio = draw_gauge(root, margin, maxl)

    # Per chromosome
    yinterval = (1 - 2 * margin) / (top + 1)
    xx = margin
    yy = 1 - margin
    for chr, clen in s:
        yy -= yinterval
        xlen = clen / ratio
        cc = chr
        if "_" in chr:
            ca, cb = chr.split("_")
            cc = ca[0].upper() + cb

        if switch and cc in switch:
            cc = "\n".join((cc, "({0})".format(switch[cc])))

        root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray))
        ax = fig.add_axes([xx, yy, xlen, yinterval - inner])

        nbins = clen / shift
        if clen % shift:
            nbins += 1

        stackplot(ax, binfiles, nbins, palette, chr, window, shift)
        root.text(xx - .04, yy + .5 * (yinterval - inner), cc, ha="center", va="center")

        ax.set_xlim(0, nbins)
        ax.set_ylim(0, 1)
        ax.set_axis_off()

    # Legends
    yy -= yinterval
    xx = margin
    for b, p in zip(bedfiles, palette):
        b = b.rsplit(".", 1)[0].replace("_", " ")
        b = Registration.get(b, b)

        root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
        xx += 2 * inner
        root.text(xx, yy, b, size=13)
        xx += len(b) * .012 + inner

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 54
0
Archivo: misc.py Proyecto: lqsae/jcvi
def waterlilyGOM(args):
    """
    %prog mcmctree.tre table.csv

    Customized figure to plot phylogeny and related infographics.
    """
    from jcvi.graphics.tree import (
        LeafInfoFile,
        WGDInfoFile,
        draw_tree,
        parse_tree,
        draw_wgd_xy,
    )
    from jcvi.graphics.table import CsvTable, draw_table

    p = OptionParser(waterlilyGOM.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="12x9")

    if len(args) != 2:
        sys.exit(not p.print_help())

    (datafile, csvfile) = args
    outgroup = ["ginkgo"]

    logging.debug("Load tree file `{0}`".format(datafile))
    t, hpd = parse_tree(datafile)

    pf = datafile.rsplit(".", 1)[0]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    margin, rmargin = 0.15, 0.19  # Left and right margin
    leafinfo = LeafInfoFile("leafinfo.csv").cache
    wgdinfo = WGDInfoFile("wgdinfo.csv").cache
    groups = "Monocots,Eudicots,ANA-grade,Gymnosperms"

    draw_tree(
        root,
        t,
        hpd=hpd,
        margin=margin,
        rmargin=rmargin,
        supportcolor=None,
        internal=False,
        outgroup=outgroup,
        leafinfo=leafinfo,
        wgdinfo=wgdinfo,
        geoscale=True,
        groups=groups.split(","),
    )

    # Bottom right show legends for the WGD circles
    pad = 0.02
    ypad = 0.04
    xstart = 1 - rmargin + pad
    ystart = 0.2
    waterlily_wgdline = wgdinfo["waterlily"][0]
    ypos = ystart - 2 * ypad
    draw_wgd_xy(root, xstart, ypos, waterlily_wgdline)
    root.text(
        xstart + pad,
        ypos,
        "Nymphaealean WGD",
        color=waterlily_wgdline.color,
        va="center",
    )
    other_wgdline = wgdinfo["banana"][0]
    ypos = ystart - 3 * ypad
    draw_wgd_xy(root, xstart, ypos, other_wgdline)
    root.text(
        xstart + pad,
        ypos,
        "Other known WGDs",
        color=other_wgdline.color,
        va="center",
    )

    # Top left draw the comparison table
    csv_table = CsvTable(csvfile)
    draw_table(
        root,
        csv_table,
        extent=(0.02, 0.44, 0.55, 0.985),
        stripe_color="lavender",
        yinflation=iopts.w / iopts.h,
    )

    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 55
0
def composite(args):
    """
    %prog composite fastafile chr1

    Combine line plots, feature bars and alt-bars, different data types
    specified in options. Inputs must be BED-formatted. Three types of viz are
    currently supported:

    --lines: traditional line plots, useful for plotting feature freq
    --bars: show where the extent of features are
    --altbars: similar to bars, yet in two alternating tracks, e.g. scaffolds
    """
    from jcvi.graphics.chromosome import HorizontalChromosome

    p = OptionParser(composite.__doc__)
    p.add_option("--lines",
                 help="Features to plot in lineplot [default: %default]")
    p.add_option("--bars",
                 help="Features to plot in bars [default: %default]")
    p.add_option("--altbars",
                 help="Features to plot in alt-bars [default: %default]")
    p.add_option("--fatten", default=False, action="store_true",
                 help="Help visualize certain narrow features [default: %default]")
    p.add_option("--mode", default="span", choices=("span", "count", "score"),
                 help="Accumulate feature based on [default: %default]")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift, subtract = check_window_options(opts)
    linebeds, barbeds, altbarbeds = [], [], []
    fatten = opts.fatten
    if opts.lines:
        lines = opts.lines.split(",")
        linebeds = get_beds(lines)
    if opts.bars:
        bars = opts.bars.split(",")
        barbeds = get_beds(bars)
    if opts.altbars:
        altbars = opts.altbars.split(",")
        altbarbeds = get_beds(altbars)

    linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode)

    margin = .12
    clen = Sizes(fastafile).mapping[chr]
    nbins = get_nbins(clen, shift)

    plt.rcParams["xtick.major.size"] = 0
    plt.rcParams["ytick.major.size"] = 0

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    root.text(.5, .95, chr, ha="center", color="darkslategray")

    xstart, xend = margin, 1 - margin
    xlen = xend - xstart
    ratio = xlen / clen
    # Line plots
    ax = fig.add_axes([xstart, .6, xlen, .3])
    lineplot(ax, linebins, nbins, chr, window, shift)

    # Bar plots
    yy = .5
    yinterval = .08
    xs = lambda x: xstart + ratio * x
    r = .01
    fattend = .0025
    for bb in barbeds:
        root.text(xend + .01, yy, bb.split(".")[0], va="center")
        HorizontalChromosome(root, xstart, xend, yy, height=.02)
        bb = Bed(bb)
        for b in bb:
            start, end = xs(b.start), xs(b.end)
            span = end - start
            if fatten and span < fattend:
                span = fattend

            root.add_patch(Rectangle((start, yy - r), span, 2 * r, \
                            lw=0, fc="darkslategray"))
        yy -= yinterval

    # Alternative bar plots
    offset = r / 2
    for bb in altbarbeds:
        root.text(xend + .01, yy, bb.split(".")[0], va="center")
        bb = Bed(bb)
        for i, b in enumerate(bb):
            start, end = xs(b.start), xs(b.end)
            span = end - start
            if span < .0001:
                continue
            offset = -offset
            root.add_patch(Rectangle((start, yy + offset), end - start, .003, \
                            lw=0, fc="darkslategray"))
        yy -= yinterval

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 56
0
Archivo: misc.py Proyecto: lqsae/jcvi
def oropetium(args):
    """
    %prog oropetium mcscan.out all.bed layout switch.ids

    Build a composite figure that calls graphis.synteny.
    """
    p = OptionParser(oropetium.__doc__)
    p.add_option("--extra", help="Extra features in BED format")
    opts, args, iopts = p.set_image_options(args, figsize="9x6")

    if len(args) != 4:
        sys.exit(not p.print_help())

    datafile, bedfile, slayout, switch = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Synteny(fig,
            root,
            datafile,
            bedfile,
            slayout,
            switch=switch,
            extra_features=opts.extra)

    # legend showing the orientation of the genes
    draw_gene_legend(root, 0.4, 0.57, 0.74, text=True, repeat=True)

    # On the left panel, make a species tree
    fc = "lightslategrey"

    coords = {}
    xs, xp = 0.16, 0.03
    coords["oropetium"] = (xs, 0.7)
    coords["setaria"] = (xs, 0.6)
    coords["sorghum"] = (xs, 0.5)
    coords["rice"] = (xs, 0.4)
    coords["brachypodium"] = (xs, 0.3)
    xs -= xp
    coords["Panicoideae"] = join_nodes(root, coords, "setaria", "sorghum", xs)
    xs -= xp
    coords["BEP"] = join_nodes(root, coords, "rice", "brachypodium", xs)
    coords["PACMAD"] = join_nodes(root, coords, "oropetium", "Panicoideae", xs)
    xs -= xp
    coords["Poaceae"] = join_nodes(root, coords, "BEP", "PACMAD", xs)

    # Names of the internal nodes
    for tag in ("BEP", "Poaceae"):
        nx, ny = coords[tag]
        nx, ny = nx - 0.005, ny - 0.02
        root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc)
    for tag in ("PACMAD", ):
        nx, ny = coords[tag]
        nx, ny = nx - 0.005, ny + 0.02
        root.text(nx, ny, tag, rotation=90, ha="right", va="bottom", color=fc)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "oropetium"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 57
0
def bites(args):
    """
    %prog bites

    Illustrate the pipeline for automated bite discovery.
    """

    p = OptionParser(__doc__)
    opts, args = p.parse_args()

    fig = plt.figure(1, (6, 6))
    root = fig.add_axes([0, 0, 1, 1])

    # HSP pairs
    hsps = (
        ((50, 150), (60, 180)),
        ((190, 250), (160, 235)),
        ((300, 360), (270, 330)),
        ((430, 470), (450, 490)),
        ((570, 620), (493, 543)),
        ((540, 555), (370, 385)),  # non-collinear hsps
    )

    titlepos = (0.9, 0.65, 0.4)
    titles = ("Compare orthologous region", "Find collinear HSPs",
              "Scan paired gaps")
    ytip = 0.01
    mrange = 650.0
    m = lambda x: x / mrange * 0.7 + 0.1
    for i, (ya, title) in enumerate(zip(titlepos, titles)):
        yb = ya - 0.1
        plt.plot((0.1, 0.8), (ya, ya), "-", color="gray", lw=2, zorder=1)
        plt.plot((0.1, 0.8), (yb, yb), "-", color="gray", lw=2, zorder=1)
        RoundLabel(root, 0.5, ya + 4 * ytip, title)
        root.text(0.9, ya, "A. thaliana", ha="center", va="center")
        root.text(0.9, yb, "B. rapa", ha="center", va="center")
        myhsps = hsps
        if i >= 1:
            myhsps = hsps[:-1]
        for (a, b), (c, d) in myhsps:
            a, b, c, d = [m(x) for x in (a, b, c, d)]
            r1 = Rectangle((a, ya - ytip),
                           b - a,
                           2 * ytip,
                           fc="r",
                           lw=0,
                           zorder=2)
            r2 = Rectangle((c, yb - ytip),
                           d - c,
                           2 * ytip,
                           fc="r",
                           lw=0,
                           zorder=2)
            r3 = Rectangle((a, ya - ytip),
                           b - a,
                           2 * ytip,
                           fill=False,
                           zorder=3)
            r4 = Rectangle((c, yb - ytip),
                           d - c,
                           2 * ytip,
                           fill=False,
                           zorder=3)
            r5 = Polygon(
                ((a, ya - ytip), (c, yb + ytip), (d, yb + ytip),
                 (b, ya - ytip)),
                fc="r",
                alpha=0.2,
            )
            rr = (r1, r2, r3, r4, r5)
            if i == 2:
                rr = rr[:-1]
            for r in rr:
                root.add_patch(r)

    # Gap pairs
    hspa, hspb = zip(*myhsps)
    gapa, gapb = [], []
    for (a, b), (c, d) in pairwise(hspa):
        gapa.append((b + 1, c - 1))
    for (a, b), (c, d) in pairwise(hspb):
        gapb.append((b + 1, c - 1))
    gaps = zip(gapa, gapb)
    tpos = titlepos[-1]

    yy = tpos - 0.05
    for i, ((a, b), (c, d)) in enumerate(gaps):
        i += 1
        a, b, c, d = [m(x) for x in (a, b, c, d)]
        xx = (a + b + c + d) / 4
        TextCircle(root, xx, yy, str(i))

    # Bites
    ystart = 0.24
    ytip = 0.05
    bites = (
        ("Bite(40=>-15)", True),
        ("Bite(50=>35)", False),
        ("Bite(70=>120)", False),
        ("Bite(100=>3)", True),
    )
    for i, (bite, selected) in enumerate(bites):
        xx = 0.15 if (i % 2 == 0) else 0.55
        yy = ystart - i / 2 * ytip
        i += 1
        TextCircle(root, xx, yy, str(i))
        color = "k" if selected else "gray"
        root.text(xx + ytip, yy, bite, size=10, color=color, va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = fname() + ".pdf"
    savefig(figname, dpi=300)
Ejemplo n.º 58
0
Archivo: misc.py Proyecto: lqsae/jcvi
def birch(args):
    """
    %prog birch seqids layout

    Plot birch macro-synteny, with an embedded phylogenetic tree to the right.
    """
    p = OptionParser(birch.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x6")

    if len(args) != 2:
        sys.exit(not p.print_help())

    seqids, layout = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    K = Karyotype(fig, root, seqids, layout)
    L = K.layout

    xs = 0.79
    dt = dict(rectangle=False, circle=False)
    # Embed a phylogenetic tree to the right
    coords = {}
    coords["Amborella"] = (xs, L[0].y)
    coords["Vitis"] = (xs, L[1].y)
    coords["Prunus"] = (xs, L[2].y)
    coords["Betula"] = (xs, L[3].y)
    coords["Populus"] = (xs, L[4].y)
    coords["Arabidopsis"] = (xs, L[5].y)
    coords["fabids"] = join_nodes(root, coords, "Prunus", "Betula", xs, **dt)
    coords["malvids"] = join_nodes(root, coords, "Populus", "Arabidopsis", xs,
                                   **dt)
    coords["rosids"] = join_nodes(root, coords, "fabids", "malvids", xs, **dt)
    coords["eudicots"] = join_nodes(root, coords, "rosids", "Vitis", xs, **dt)
    coords["angiosperm"] = join_nodes(root, coords, "eudicots", "Amborella",
                                      xs, **dt)

    # Show branch length
    branch_length(root, coords["Amborella"], coords["angiosperm"], ">160.0")
    branch_length(root,
                  coords["eudicots"],
                  coords["angiosperm"],
                  ">78.2",
                  va="top")
    branch_length(root, coords["Vitis"], coords["eudicots"], "138.5")
    branch_length(root, coords["rosids"], coords["eudicots"], "19.8", va="top")
    branch_length(root,
                  coords["Prunus"],
                  coords["fabids"],
                  "104.2",
                  ha="right",
                  va="top")
    branch_length(root,
                  coords["Arabidopsis"],
                  coords["malvids"],
                  "110.2",
                  va="top")
    branch_length(root,
                  coords["fabids"],
                  coords["rosids"],
                  "19.8",
                  ha="right",
                  va="top")
    branch_length(root, coords["malvids"], coords["rosids"], "8.5", va="top")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "birch"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 59
0
def depth(args):
    """
    %prog depth anchorfile --qbed qbedfile --sbed sbedfile

    Calculate the depths in the two genomes in comparison, given in --qbed and
    --sbed. The synteny blocks will be layered on the genomes, and the
    multiplicity will be summarized to stderr.
    """
    from jcvi.utils.range import range_depth

    p = OptionParser(depth.__doc__)
    p.add_option("--depthfile",
                 help="Generate file with gene and depth [default: %default]")
    p.add_option("--histogram", default=False, action="store_true",
                 help="Plot histograms in PDF")
    p.add_option("--xmax", type="int", help="x-axis maximum to display in plot")
    p.add_option("--title", default=None, help="Title to display in plot")
    p.add_option("--quota", help="Force to use this quota, e.g. 1:1, 1:2 ...")
    p.set_beds()

    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    anchorfile, = args
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts)
    depthfile = opts.depthfile
    ac = AnchorFile(anchorfile)
    qranges = []
    sranges = []
    blocks = ac.blocks
    for ib in blocks:
        q, s, t = zip(*ib)
        q = [qorder[x] for x in q]
        s = [sorder[x] for x in s]
        qrange = (min(q)[0], max(q)[0])
        srange = (min(s)[0], max(s)[0])
        qranges.append(qrange)
        sranges.append(srange)
        if is_self:
            qranges.append(srange)

    qgenome = op.basename(qbed.filename).split(".")[0]
    sgenome = op.basename(sbed.filename).split(".")[0]
    qtag = "Genome {0} depths".format(qgenome)
    print("{}:".format(qtag), file=sys.stderr)
    dsq, details = range_depth(qranges, len(qbed))
    if depthfile:
        fw = open(depthfile, "w")
        write_details(fw, details, qbed)

    if is_self:
        return

    stag = "Genome {0} depths".format(sgenome)
    print("{}:".format(stag), file=sys.stderr)
    dss, details = range_depth(sranges, len(sbed))
    if depthfile:
        write_details(fw, details, sbed)
        fw.close()
        logging.debug("Depth written to `{0}`.".format(depthfile))

    if not opts.histogram:
        return

    from jcvi.graphics.base import plt, quickplot_ax, savefig, normalize_axes

    # Plot two histograms one for query genome, one for subject genome
    plt.figure(1, (6, 3))
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)

    xmax = opts.xmax or max(4, max(list(dsq.keys()) + list(dss.keys())))
    if opts.quota:
        speak, qpeak = opts.quota.split(":")
        qpeak, speak = int(qpeak), int(speak)
    else:
        qpeak = find_peak(dsq)
        speak = find_peak(dss)

    qtag = "# of {} blocks per {} gene".format(sgenome, qgenome)
    stag = "# of {} blocks per {} gene".format(qgenome, sgenome)
    quickplot_ax(ax1, dss, 0, xmax, stag, ylabel="Percentage of genome",
                 highlight=range(1, speak + 1))
    quickplot_ax(ax2, dsq, 0, xmax, qtag, ylabel=None,
                 highlight=range(1, qpeak + 1))

    title = opts.title or "{} vs {} syntenic depths\n{}:{} pattern"\
                    .format(qgenome, sgenome, speak, qpeak)
    root = f.add_axes([0, 0, 1, 1])
    vs, pattern = title.split('\n')
    root.text(.5, .97, vs, ha="center", va="center", color="darkslategray")
    root.text(.5, .925, pattern, ha="center", va="center",
                                 color="tomato", size=16)
    print(title, file=sys.stderr)

    normalize_axes(root)

    pf = anchorfile.rsplit(".", 1)[0] + ".depth"
    image_name = pf + ".pdf"
    savefig(image_name)
Ejemplo n.º 60
0
def seeds(args):
    """
    %prog seeds [pngfile|jpgfile]

    Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image.
    """
    p = OptionParser(seeds.__doc__)
    p.set_outfile()
    opts, args, iopts = add_seeds_options(p, args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (pngfile,) = args
    pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0]
    sigma, kernel = opts.sigma, opts.kernel
    rows, cols = opts.rows, opts.cols
    labelrows, labelcols = opts.labelrows, opts.labelcols
    ff = opts.filter
    calib = opts.calibrate
    outdir = opts.outdir
    if outdir != ".":
        mkdir(outdir)
    if calib:
        calib = json.load(must_open(calib))
        pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"]
        tr = np.array(tr)
    nbcolor = opts.changeBackground
    pngfile = convert_background(pngfile, nbcolor)
    resizefile, mainfile, labelfile, exif = convert_image(
        pngfile,
        pf,
        outdir=outdir,
        rotate=opts.rotate,
        rows=rows,
        cols=cols,
        labelrows=labelrows,
        labelcols=labelcols,
    )
    oimg = load_image(resizefile)
    img = load_image(mainfile)

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(
        ncols=4, nrows=1, figsize=(iopts.w, iopts.h)
    )
    # Edge detection
    img_gray = rgb2gray(img)
    logging.debug("Running {0} edge detection ...".format(ff))
    if ff == "canny":
        edges = canny(img_gray, sigma=opts.sigma)
    elif ff == "roberts":
        edges = roberts(img_gray)
    elif ff == "sobel":
        edges = sobel(img_gray)
    edges = clear_border(edges, buffer_size=opts.border)
    selem = disk(kernel)
    closed = closing(edges, selem) if kernel else edges
    filled = binary_fill_holes(closed)

    # Watershed algorithm
    if opts.watershed:
        distance = distance_transform_edt(filled)
        local_maxi = peak_local_max(distance, threshold_rel=0.05, indices=False)
        coordinates = peak_local_max(distance, threshold_rel=0.05)
        markers, nmarkers = label(local_maxi, return_num=True)
        logging.debug("Identified {0} watershed markers".format(nmarkers))
        labels = watershed(closed, markers, mask=filled)
    else:
        labels = label(filled)

    # Object size filtering
    w, h = img_gray.shape
    canvas_size = w * h
    min_size = int(round(canvas_size * opts.minsize / 100))
    max_size = int(round(canvas_size * opts.maxsize / 100))
    logging.debug(
        "Find objects with pixels between {0} ({1}%) and {2} ({3}%)".format(
            min_size, opts.minsize, max_size, opts.maxsize
        )
    )

    # Plotting
    ax1.set_title("Original picture")
    ax1.imshow(oimg)

    params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel)
    if opts.watershed:
        params += ", watershed"
    ax2.set_title("Edge detection\n({0})".format(params))
    closed = gray2rgb(closed)
    ax2_img = labels
    if opts.edges:
        ax2_img = closed
    elif opts.watershed:
        ax2.plot(coordinates[:, 1], coordinates[:, 0], "g.")
    ax2.imshow(ax2_img, cmap=iopts.cmap)

    ax3.set_title("Object detection")
    ax3.imshow(img)

    filename = op.basename(pngfile)
    if labelfile:
        accession = extract_label(labelfile)
    else:
        accession = pf

    # Calculate region properties
    rp = regionprops(labels)
    rp = [x for x in rp if min_size <= x.area <= max_size]
    nb_labels = len(rp)
    logging.debug("A total of {0} objects identified.".format(nb_labels))
    objects = []
    for i, props in enumerate(rp):
        i += 1
        if i > opts.count:
            break

        y0, x0 = props.centroid
        orientation = props.orientation
        major, minor = props.major_axis_length, props.minor_axis_length
        major_dx = cos(orientation) * major / 2
        major_dy = sin(orientation) * major / 2
        minor_dx = sin(orientation) * minor / 2
        minor_dy = cos(orientation) * minor / 2
        ax2.plot((x0 - major_dx, x0 + major_dx), (y0 + major_dy, y0 - major_dy), "r-")
        ax2.plot((x0 - minor_dx, x0 + minor_dx), (y0 - minor_dy, y0 + minor_dy), "r-")

        npixels = int(props.area)
        # Sample the center of the blob for color
        d = min(int(round(minor / 2 * 0.35)) + 1, 50)
        x0d, y0d = int(round(x0)), int(round(y0))
        square = img[(y0d - d) : (y0d + d), (x0d - d) : (x0d + d)]
        pixels = []
        for row in square:
            pixels.extend(row)
        logging.debug(
            "Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".format(
                i, npixels, len(pixels), 100.0 * npixels / canvas_size
            )
        )

        rgb = pixel_stats(pixels)
        objects.append(Seed(filename, accession, i, rgb, props, exif))
        minr, minc, maxr, maxc = props.bbox
        rect = Rectangle(
            (minc, minr), maxc - minc, maxr - minr, fill=False, ec="w", lw=1
        )
        ax3.add_patch(rect)
        mc, mr = (minc + maxc) / 2, (minr + maxr) / 2
        ax3.text(mc, mr, "{0}".format(i), color="w", ha="center", va="center", size=6)

    for ax in (ax2, ax3):
        ax.set_xlim(0, h)
        ax.set_ylim(w, 0)

    # Output identified seed stats
    ax4.text(0.1, 0.92, "File: {0}".format(latex(filename)), color="g")
    ax4.text(0.1, 0.86, "Label: {0}".format(latex(accession)), color="m")
    yy = 0.8
    fw = must_open(opts.outfile, "w")
    if not opts.noheader:
        print(Seed.header(calibrate=calib), file=fw)
    for o in objects:
        if calib:
            o.calibrate(pixel_cm_ratio, tr)
        print(o, file=fw)
        i = o.seedno
        if i > 7:
            continue
        ax4.text(0.01, yy, str(i), va="center", bbox=dict(fc="none", ec="k"))
        ax4.text(0.1, yy, o.pixeltag, va="center")
        yy -= 0.04
        ax4.add_patch(
            Rectangle((0.1, yy - 0.025), 0.12, 0.05, lw=0, fc=rgb_to_hex(o.rgb))
        )
        ax4.text(0.27, yy, o.hashtag, va="center")
        yy -= 0.06
    ax4.text(
        0.1,
        yy,
        "(A total of {0} objects displayed)".format(nb_labels),
        color="darkslategray",
    )
    normalize_axes(ax4)

    for ax in (ax1, ax2, ax3):
        xticklabels = [int(x) for x in ax.get_xticks()]
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_xticklabels(xticklabels, family="Helvetica", size=8)
        ax.set_yticklabels(yticklabels, family="Helvetica", size=8)

    image_name = op.join(outdir, pf + "." + iopts.format)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    return objects