コード例 #1
0
ファイル: synteny.py プロジェクト: xuanblo/jcvi
def main():
    p = OptionParser(__doc__)
    p.add_option("--switch",
                 help="Rename the seqid with two-column file [default: %default]")
    p.add_option("--tree",
                 help="Display trees on the bottom of the figure [default: %default]")
    p.add_option("--extra", help="Extra features in BED format")
    p.add_option("--scalebar", default=False, action="store_true",
                 help="Add scale bar to the plot")
    opts, args, iopts = p.set_image_options(figsize="8x7")

    if len(args) != 3:
        sys.exit(not p.print_help())

    datafile, bedfile, layoutfile = args
    switch = opts.switch
    tree = opts.tree

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Synteny(fig, root, datafile, bedfile, layoutfile,
            switch=switch, tree=tree, extra_features=opts.extra,
            scalebar=opts.scalebar)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #2
0
ファイル: age.py プロジェクト: tanghaibao/jcvi
def composite_qc(df_orig, size=(16, 12)):
    """ Plot composite QC figures
    """
    df = df_orig.rename(columns={"hli_calc_age_sample_taken": "Age",
                       "hli_calc_gender": "Gender",
                       "eth7_max": "Ethnicity",
                       "MeanCoverage": "Mean coverage",
                       "Chemistry": "Sequencing chemistry",
                       "Release Client": "Cohort",

                      })

    fig = plt.figure(1, size)
    ax1 = plt.subplot2grid((2, 7), (0, 0), rowspan=1, colspan=2)
    ax2 = plt.subplot2grid((2, 7), (0, 2), rowspan=1, colspan=2)
    ax3 = plt.subplot2grid((2, 7), (0, 4), rowspan=1, colspan=3)
    ax4 = plt.subplot2grid((2, 7), (1, 0), rowspan=1, colspan=2)
    ax5 = plt.subplot2grid((2, 7), (1, 2), rowspan=1, colspan=2)
    ax6 = plt.subplot2grid((2, 7), (1, 4), rowspan=1, colspan=3)

    sns.distplot(df["Age"].dropna(), kde=False, ax=ax1)
    sns.countplot(x="Gender", data=df, ax=ax2)
    sns.countplot(x="Ethnicity", data=df, ax=ax3,
                    order = df['Ethnicity'].value_counts().index)
    sns.distplot(df["Mean coverage"].dropna(), kde=False, ax=ax4)
    ax4.set_xlim(0, 100)
    sns.countplot(x="Sequencing chemistry", data=df, ax=ax5)
    sns.countplot(x="Cohort", data=df, ax=ax6,
                    order = df['Cohort'].value_counts().index)
    # Anonymize the cohorts
    cohorts = ax6.get_xticklabels()
    newCohorts = []
    for i, c in enumerate(cohorts):
        if c.get_text() == "Spector":
            c = "TwinsUK"
        elif c.get_text() != "Health Nucleus":
            c = "C{}".format(i + 1)
        newCohorts.append(c)
    ax6.set_xticklabels(newCohorts)

    for ax in (ax6,):
        ax.set_xticklabels(ax.get_xticklabels(), ha="right", rotation=30)

    for ax in (ax1, ax2, ax3, ax4, ax5, ax6):
        ax.set_title(ax.get_xlabel())
        ax.set_xlabel("")

    plt.tight_layout()

    root = fig.add_axes((0, 0, 1, 1))
    labels = ((.02, .96, "A"),
              (.3, .96, "B"),
              (.6, .96, "C"),
              (.02, .52, "D"),
              (.3, .52, "E"),
              (.6, .52, "F"))
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
コード例 #3
0
ファイル: coverage.py プロジェクト: rrane/jcvi
def main():
    p = OptionParser(__doc__)
    p.add_option("--order",
                help="The order to plot the tracks, comma-separated")
    opts, args, iopts = p.set_image_options()

    if len(args) != 3:
        sys.exit(not p.print_help())

    chr, sizes, datadir = args
    order = opts.order
    hlsuffix = opts.hlsuffix
    if order:
        order = order.split(",")
    sizes = Sizes(sizes)
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    canvas = (.12, .35, .8, .35)
    chr_size = sizes.get_size(chr)
    c = Coverage(fig, root, canvas, chr, (0, chr_size), datadir,
                 order=order, hlsuffix=hlsuffix)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #4
0
ファイル: misc.py プロジェクト: tanghaibao/jcvi
def birch(args):
    """
    %prog birch seqids layout

    Plot birch macro-synteny, with an embedded phylogenetic tree to the right.
    """
    p = OptionParser(birch.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x6")

    if len(args) != 2:
        sys.exit(not p.print_help())

    seqids, layout = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    K = Karyotype(fig, root, seqids, layout)
    L = K.layout

    xs = .79
    dt = dict(rectangle=False, circle=False)
    # Embed a phylogenetic tree to the right
    coords = {}
    coords["Amborella"] = (xs, L[0].y)
    coords["Vitis"] = (xs, L[1].y)
    coords["Prunus"] = (xs, L[2].y)
    coords["Betula"] = (xs, L[3].y)
    coords["Populus"] = (xs, L[4].y)
    coords["Arabidopsis"] = (xs, L[5].y)
    coords["fabids"] = join_nodes(root, coords, "Prunus", "Betula", xs, **dt)
    coords["malvids"] = join_nodes(root, coords, \
                                   "Populus", "Arabidopsis", xs, **dt)
    coords["rosids"] = join_nodes(root, coords, "fabids", "malvids", xs, **dt)
    coords["eudicots"] = join_nodes(root, coords, "rosids", "Vitis", xs, **dt)
    coords["angiosperm"] = join_nodes(root, coords, \
                                      "eudicots", "Amborella", xs, **dt)

    # Show branch length
    branch_length(root, coords["Amborella"], coords["angiosperm"], ">160.0")
    branch_length(root, coords["eudicots"], coords["angiosperm"],
                  ">78.2", va="top")
    branch_length(root, coords["Vitis"], coords["eudicots"], "138.5")
    branch_length(root, coords["rosids"], coords["eudicots"],
                  "19.8", va="top")
    branch_length(root, coords["Prunus"], coords["fabids"],
                  "104.2", ha="right", va="top")
    branch_length(root, coords["Arabidopsis"], coords["malvids"],
                  "110.2", va="top")
    branch_length(root, coords["fabids"], coords["rosids"],
                  "19.8", ha="right", va="top")
    branch_length(root, coords["malvids"], coords["rosids"],
                  "8.5", va="top")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "birch"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #5
0
ファイル: misc.py プロジェクト: tanghaibao/jcvi
def pomegranate(args):
    """
    %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of WGD history of pineapple genome. The script calls both graphics.karyotype
    and graphic.synteny.
    """
    p = OptionParser(pomegranate.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x7")

    if len(args) != 5:
        sys.exit(not p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .42, .52, .48)

    labels = ((.04, .96, 'A'), (.04, .52, 'B'))
    panel_labels(root, labels)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "pomegranate-karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #6
0
ファイル: misc.py プロジェクト: tanghaibao/jcvi
def epoch(args):
    """
    %prog epoch

    Illustrate the methods used in Maggie's epoch paper, in particular, how to
    classifiy S/G/F/FB/FN for the genes.
    """
    p = OptionParser(__doc__)
    opts, args = p.parse_args()

    fig = plt.figure(1, (6, 4))
    root = fig.add_axes([0, 0, 1, 1])

    # Separators
    linestyle = dict(lw=2, color="b", alpha=.2, zorder=2)
    root.plot((0, 1), (.5, .5), "--", **linestyle)
    for i in (1./3, 2./3):
        root.plot((i, i), (.5, 1), "--", **linestyle)
    for i in (1./6, 3./6, 5./6):
        root.plot((i, i), (0, .5), "--", **linestyle)

    # Diagrams
    plot_diagram(root, 1./6, 3./4, "S", "syntenic")
    plot_diagram(root, 3./6, 3./4, "F", "missing, with both flankers")
    plot_diagram(root, 5./6, 3./4, "G", "missing, with one flanker")
    plot_diagram(root, 2./6, 1./4, "FB", "has non-coding matches")
    plot_diagram(root, 4./6, 1./4, "FN", "syntenic region has gap")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = fname() + ".pdf"
    savefig(figname, dpi=300)
コード例 #7
0
ファイル: allmaps.py プロジェクト: JinfengChen/jcvi
def resample(args):
    """
    %prog resample yellow-catfish-resample.txt medicago-resample.txt

    Plot ALLMAPS performance across resampled real data.
    """
    p = OptionParser(resample.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300)

    if len(args) != 2:
        sys.exit(not p.print_help())

    dataA, dataB = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([.1, .18, .32, .64])
    B = fig.add_axes([.6, .18, .32, .64])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    xlabel = "Fraction of markers"
    ylabels = ("Anchor rate", "Runtime (m)")
    legend = ("anchor rate", "runtime")
    subplot_twinx(A, dataA, xlabel, ylabels,
                     title="Yellow catfish", legend=legend)
    subplot_twinx(B, dataB, xlabel, ylabels,
                     title="Medicago", legend=legend)

    labels = ((.04, .92, "A"), (.54, .92, "B"))
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "resample." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #8
0
ファイル: phylo.py プロジェクト: tanghaibao/jcvi
def _draw_trees(trees, nrow=1, ncol=1, rmargin=.3, iopts=None, outdir=".",
    shfile=None, **kwargs):
    """
    Draw one or multiple trees on one plot.
    """
    from jcvi.graphics.tree import draw_tree

    if shfile:
        SHs = DictFile(shfile, delimiter="\t")

    ntrees = len(trees)
    n = nrow * ncol
    for x in xrange(int(ceil(float(ntrees)/n))):
        fig = plt.figure(1, (iopts.w, iopts.h)) if iopts \
              else plt.figure(1, (5, 5))
        root = fig.add_axes([0, 0, 1, 1])

        xiv = 1. / ncol
        yiv = 1. / nrow
        xstart = list(np.arange(0, 1, xiv)) * nrow
        ystart = list(chain(*zip(*[list(np.arange(0, 1, yiv))[::-1]] * ncol)))
        for i in xrange(n*x, n*(x+1)):
            if i == ntrees:
                break
            ax = fig.add_axes([xstart[i%n], ystart[i%n], xiv, yiv])
            f = trees.keys()[i]
            tree = trees[f]
            try:
                SH = SHs[f]
            except:
                SH = None
            draw_tree(ax, tree, rmargin=rmargin, reroot=False, \
                supportcolor="r", SH=SH, **kwargs)

        root.set_xlim(0, 1)
        root.set_ylim(0, 1)
        root.set_axis_off()

        format = iopts.format if iopts else "pdf"
        dpi = iopts.dpi if iopts else 300
        if n == 1:
            image_name = f.rsplit(".", 1)[0] + "." + format
        else:
            image_name = "trees{0}.{1}".format(x, format)
        image_name = op.join(outdir, image_name)
        savefig(image_name, dpi=dpi, iopts=iopts)
        plt.clf()
コード例 #9
0
ファイル: ks.py プロジェクト: JinfengChen/jcvi
def report(args):
    '''
    %prog report ksfile

    generate a report given a Ks result file (as produced by synonymous_calc.py).
    describe the median Ks, Ka values, as well as the distribution in stem-leaf plot
    '''
    from jcvi.utils.cbook import SummaryStats
    from jcvi.graphics.histogram import stem_leaf_plot

    p = OptionParser(report.__doc__)
    p.add_option("--pdf", default=False, action="store_true",
            help="Generate graphic output for the histogram [default: %default]")
    p.add_option("--components", default=1, type="int",
            help="Number of components to decompose peaks [default: %default]")
    add_plot_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="5x5")

    if len(args) !=  1:
        sys.exit(not p.print_help())

    ks_file, = args
    data = read_ks_file(ks_file)
    ks_min = opts.vmin
    ks_max = opts.vmax
    bins = opts.bins

    for f in fields.split(",")[1:]:
        columndata = [getattr(x, f) for x in data]
        ks = ("ks" in f)
        if not ks:
            continue

        columndata = [x for x in columndata if ks_min <= x <= ks_max]

        st = SummaryStats(columndata)
        title = "{0} ({1}): ".format(descriptions[f], ks_file)
        title += "Median:{0:.3f} (1Q:{1:.3f}|3Q:{2:.3f}||".\
                format(st.median, st.firstq, st.thirdq)
        title += "Mean:{0:.3f}|Std:{1:.3f}||N:{2})".\
                format(st.mean, st.sd, st.size)

        tbins = (0, ks_max, bins) if ks else (0, .6, 10)
        digit = 2 if (ks_max * 1. / bins) < .1 else 1
        stem_leaf_plot(columndata, *tbins, digit=digit, title=title)

    if not opts.pdf:
        return


    components = opts.components
    data = [x.ng_ks for x in data]
    data = [x for x in data if ks_min <= x <= ks_max]

    fig = plt.figure(1, (iopts.w, iopts.h))
    ax = fig.add_axes([.12, .1, .8, .8])
    kp = KsPlot(ax, ks_max, opts.bins, legendp=opts.legendp)
    kp.add_data(data, components, fill=opts.fill)
    kp.draw(title=opts.title)
コード例 #10
0
ファイル: misc.py プロジェクト: tanghaibao/jcvi
def oropetium(args):
    """
    %prog oropetium mcscan.out all.bed layout switch.ids

    Build a composite figure that calls graphis.synteny.
    """
    p = OptionParser(oropetium.__doc__)
    p.add_option("--extra", help="Extra features in BED format")
    opts, args, iopts = p.set_image_options(args, figsize="9x6")

    if len(args) != 4:
        sys.exit(not p.print_help())

    datafile, bedfile, slayout, switch = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Synteny(fig, root, datafile, bedfile, slayout,
            switch=switch, extra_features=opts.extra)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .4, .57, .74, text=True, repeat=True)

    # On the left panel, make a species tree
    fc = 'lightslategrey'

    coords = {}
    xs, xp = .16, .03
    coords["oropetium"] = (xs, .7)
    coords["setaria"] = (xs, .6)
    coords["sorghum"] = (xs, .5)
    coords["rice"] = (xs, .4)
    coords["brachypodium"] = (xs, .3)
    xs -= xp
    coords["Panicoideae"] = join_nodes(root, coords, "setaria", "sorghum", xs)
    xs -= xp
    coords["BEP"] = join_nodes(root, coords, "rice", "brachypodium", xs)
    coords["PACMAD"] = join_nodes(root, coords, "oropetium", "Panicoideae", xs)
    xs -= xp
    coords["Poaceae"] = join_nodes(root, coords, "BEP", "PACMAD", xs)

    # Names of the internal nodes
    for tag in ("BEP", "Poaceae"):
        nx, ny = coords[tag]
        nx, ny = nx - .005, ny - .02
        root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc)
    for tag in ("PACMAD",):
        nx, ny = coords[tag]
        nx, ny = nx - .005, ny + .02
        root.text(nx, ny, tag, rotation=90, ha="right", va="bottom", color=fc)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "oropetium"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #11
0
ファイル: age.py プロジェクト: tanghaibao/jcvi
def composite(df, sameGenderMZ, sameGenderDZ, size=(16, 24)):
    """Embed both absdiff figures and heritability figures.
    """
    fig = plt.figure(1, size)

    ax1a = plt.subplot2grid((6, 4), (0, 0), rowspan=2, colspan=1)
    ax2a = plt.subplot2grid((6, 4), (0, 1), rowspan=2, colspan=1)
    ax3a = plt.subplot2grid((6, 4), (0, 2), rowspan=2, colspan=1)
    ax4a = plt.subplot2grid((6, 4), (0, 3), rowspan=2, colspan=1)
    ax1b = plt.subplot2grid((6, 4), (2, 0), rowspan=2, colspan=2)
    ax2b = plt.subplot2grid((6, 4), (2, 2), rowspan=2, colspan=2)
    ax3b = plt.subplot2grid((6, 4), (4, 0), rowspan=2, colspan=2)
    ax4b = plt.subplot2grid((6, 4), (4, 2), rowspan=2, colspan=2)

    # Telomeres
    telomeres = extract_trait(df, "Sample name", "telomeres.Length")
    mzTelomeres = extract_twin_values(sameGenderMZ, telomeres)
    dzTelomeres = extract_twin_values(sameGenderDZ, telomeres)
    plot_paired_values(ax1b, mzTelomeres, dzTelomeres, label="Telomere length")
    plot_abs_diff(ax1a, mzTelomeres, dzTelomeres, label="Telomere length")

    # CCNX
    CCNX = extract_trait(df, "Sample name", "ccn.chrX")
    mzCCNX = extract_twin_values(sameGenderMZ, CCNX, gender="Female")
    dzCCNX = extract_twin_values(sameGenderDZ, CCNX, gender="Female")
    dzCCNX = filter_low_values(dzCCNX, 1.75)
    plot_paired_values(ax2b, mzCCNX, dzCCNX, gender="Female only", label="ChrX copy number")
    plot_abs_diff(ax2a, mzCCNX, dzCCNX, label="ChrX copy number")

    # CCNY
    CCNY = extract_trait(df, "Sample name", "ccn.chrY")
    mzCCNY = extract_twin_values(sameGenderMZ, CCNY, gender="Male")
    dzCCNY = extract_twin_values(sameGenderDZ, CCNY, gender="Male")
    dzCCNY = filter_low_values(dzCCNY, .75)

    plot_paired_values(ax3b, mzCCNY, dzCCNY, gender="Male only", label="ChrY copy number")
    plot_abs_diff(ax3a, mzCCNY, dzCCNY, label="ChrY copy number")

    # CCNY
    TRA = extract_trait(df, "Sample name", "TRA.PPM")
    mzTRA = extract_twin_values(sameGenderMZ, TRA)
    dzTRA = extract_twin_values(sameGenderDZ, TRA)
    plot_paired_values(ax4b, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions")
    plot_abs_diff(ax4a, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions")

    plt.tight_layout()

    root = fig.add_axes((0, 0, 1, 1))
    # ABCD absdiff, EFGH heritability
    labels = ((.03, .99, 'A'), (.27, .99, 'B'), (.53, .99, 'C'), (.77, .99, 'D'),
              (.03, .67, 'E'), (.53, .67, 'F'),
              (.03, .34, 'G'), (.53, .34, 'H'))
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
コード例 #12
0
ファイル: synfind.py プロジェクト: xuanblo/jcvi
def venn(args):
    """
    %prog venn *.benchmark

    Display benchmark results as Venn diagram.
    """
    from matplotlib_venn import venn2

    p = OptionParser(venn.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x9")

    if len(args) < 1:
        sys.exit(not p.print_help())

    bcs = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    pad = .02
    ystart = 1
    ywidth = 1. / len(bcs)
    tags = ("Bowers", "YGOB", "Schnable")
    for bc, tag in zip(bcs, tags):
        fp = open(bc)
        data = []
        for row in fp:
            prog, pcounts, tcounts, shared = row.split()
            pcounts = int(pcounts)
            tcounts = int(tcounts)
            shared = int(shared)
            data.append((prog, pcounts, tcounts, shared))
        xstart = 0
        xwidth = 1. / len(data)
        for prog, pcounts, tcounts, shared in data:
            a, b, c = pcounts - shared, tcounts - shared, shared
            ax = fig.add_axes([xstart + pad, ystart - ywidth + pad,
                               xwidth - 2 * pad, ywidth - 2 * pad])
            venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax)
            message = "Sn={0} Pu={1}".\
                format(percentage(shared, tcounts, precision=0, mode=-1),
                       percentage(shared, pcounts, precision=0, mode=-1))
            print >> sys.stderr, message
            ax.text(.5, .92, latex(message), ha="center", va="center",
                    transform=ax.transAxes, color='b')
            ax.set_axis_off()
            xstart += xwidth
        ystart -= ywidth

    panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"),
                  (.04, .96 - 2 * ywidth, "C")))
    panel_labels(root, ((.5, .98, "A. thaliana duplicates"),
                        (.5, .98 - ywidth, "14 Yeast genomes"),
                        (.5, .98 - 2 * ywidth, "4 Grass genomes")))
    normalize_axes(root)
    savefig("venn.pdf", dpi=opts.dpi)
コード例 #13
0
ファイル: bites.py プロジェクト: Hensonmw/jcvi
def scenario(args):
    """
    %prog scenario

    Illustration of the two-step genome merger process for B. rapa companion paper.
    """
    p = OptionParser(__doc__)
    opts, args = p.parse_args()

    fig = plt.figure(1, (5, 5))
    root = fig.add_axes([0, 0, 1, 1])

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    # Layout format: (x, y, label, (chr lengths))
    anc = (.5, .9, "Ancestor", (1,))
    s1 = (.2, .6, "Genome I", (1,))
    s2 = (.5, .6, "Genome II", (1,))
    s3 = (.8, .6, "Genome III", (1,))
    tetra = (.35, .4, "Tetraploid I / II", (.5, .9))
    hexa = (.5, .1, "Hexaploid I / II / III", (.36, .46, .9))
    labels = (anc, s1, s2, s3, tetra, hexa)
    connections = ((anc, s1), (anc, s2), (anc, s3),\
            (s1, tetra), (s2, tetra),
            (tetra, hexa), (s3, hexa))

    xinterval = .02
    yratio = .05
    for xx, yy, label, chrl in labels:
        #RoundLabel(root, xx, yy, label)
        root.text(xx, yy, label, ha="center", va="center")
        offset = len(label) * .012
        for i, c in enumerate(chrl):
            ya = yy + yratio * c
            yb = yy - yratio * c
            Chromosome(root, xx - offset + i * xinterval, ya, yb, width=.01)

    # Comments
    comments = ((.15, .33, "II dominant"),
                (.25, .03, "III dominant"))

    for xx, yy, c in comments:
        root.text(xx, yy, c, size=9, ha="center", va="center")

    # Branches
    tip = .04
    for a, b in connections:
        xa, ya, la, chra = a
        xb, yb, lb, chrb = b
        plt.plot((xa, xb), (ya - tip, yb + 2 * tip), 'k-', lw=2, alpha=.5)

    figname = fname() + ".pdf"
    savefig(figname, dpi=300)
コード例 #14
0
ファイル: age.py プロジェクト: tanghaibao/jcvi
def composite_correlation(df, size=(12, 8)):
    """ Plot composite correlation figure
    """
    fig = plt.figure(1, size)
    ax1 = plt.subplot2grid((2, 2), (0, 0))
    ax2 = plt.subplot2grid((2, 2), (0, 1))
    ax3 = plt.subplot2grid((2, 2), (1, 0))
    ax4 = plt.subplot2grid((2, 2), (1, 1))
    chemistry = ["V1", "V2", "V2.5", float("nan")]
    colors = sns.color_palette("Set2", 8)
    color_map = dict(zip(chemistry, colors))

    age_label = "Chronological age (yr)"
    ax1.scatter(df["hli_calc_age_sample_taken"], df["teloLength"],
                s=10, marker='.',
                color=df["Chemistry"].map(color_map))
    ax1.set_ylim(0, 15)
    ax1.set_ylabel("Telomere length (Kb)")

    ax2.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrX"],
                s=10, marker='.',
                color=df["Chemistry"].map(color_map))
    ax2.set_ylim(1.8, 2.1)
    ax2.set_ylabel("ChrX copy number")

    ax4.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrY"],
                s=10, marker='.',
                color=df["Chemistry"].map(color_map))
    ax4.set_ylim(0.8, 1.1)
    ax4.set_ylabel("ChrY copy number")

    ax3.scatter(df["hli_calc_age_sample_taken"], df["TRA.PPM"],
                s=10, marker='.',
                color=df["Chemistry"].map(color_map))
    ax3.set_ylim(0, 250)
    ax3.set_ylabel("$TCR-\\alpha$ deletions (count per million reads)")

    from matplotlib.lines import Line2D
    legend_elements = [Line2D([0], [0], marker='.', color='w', label=chem,
                          markerfacecolor=color, markersize=16) \
                        for (chem, color) in zip(chemistry, colors)[:3]]
    for ax in (ax1, ax2, ax3, ax4):
        ax.set_xlabel(age_label)
        ax.legend(handles=legend_elements, loc="upper right")

    plt.tight_layout()
    root = fig.add_axes((0, 0, 1, 1))
    labels = ((.02, .98, "A"),
              (.52, .98, "B"),
              (.02, .5, "C"),
              (.52, .5, "D"))
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
コード例 #15
0
ファイル: misc.py プロジェクト: tanghaibao/jcvi
def litchi(args):
    """
    %prog litchi mcscan.out all.bed layout switch.ids

    Build a composite figure that calls graphis.synteny.
    """
    p = OptionParser(litchi.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x6")

    if len(args) != 4:
        sys.exit(not p.print_help())

    datafile, bedfile, slayout, switch = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Synteny(fig, root, datafile, bedfile, slayout, switch=switch)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .4, .7, .82)

    # On the left panel, make a species tree
    fc = 'lightslategrey'

    coords = {}
    xs, xp = .16, .03
    coords["lychee"] = (xs, .37)
    coords["clementine"] = (xs, .5)
    coords["cacao"] = (xs, .6)
    coords["strawberry"] = (xs, .7)
    coords["grape"] = (xs, .8)
    xs -= xp
    coords["Sapindales"] = join_nodes(root, coords, "clementine", "lychee", xs)
    xs -= xp
    coords["Rosid-II"] = join_nodes(root, coords, "cacao", "Sapindales", xs)
    xs -= xp
    coords["Rosid"] = join_nodes(root, coords, "strawberry", "Rosid-II", xs)
    xs -= xp
    coords["crown"] = join_nodes(root, coords, "grape", "Rosid", xs,
                                 circle=False)

    # Names of the internal nodes
    for tag in ("Rosid", "Rosid-II", "Sapindales"):
        nx, ny = coords[tag]
        nx, ny = nx - .01, ny - .02
        root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "litchi"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #16
0
ファイル: tgbs.py プロジェクト: tanghaibao/jcvi
def snpplot(args):
    """
    %prog counts.cdt

    Illustrate the histogram per SNP site.
    """
    p = OptionParser(snpplot.__doc__)
    opts, args, iopts = p.set_image_options(args, format="png")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    # Read in CDT file
    fp = open(datafile)
    next(fp)
    next(fp)
    data = []
    for row in fp:
        atoms = row.split()[4:]
        nval = len(atoms)
        values = [float(x) for x in atoms]
        # normalize
        values = [x * 1. / sum(values) for x in values]
        data.append(values)

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    xmin, xmax = .1, .9
    ymin, ymax = .1, .9
    yinterval = (ymax - ymin) / len(data)
    colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg")
    ystart = ymax
    for d in data:
        xstart = xmin
        for dd, c in zip(d, colors):
            xend = xstart + (xmax - xmin) * dd
            root.plot((xstart, xend), (ystart, ystart), "-", color=c)
            xstart = xend
        ystart -= yinterval

    root.text(.05, .5, "{0} LMD50 SNPs".format(len(data)),
              ha="center", va="center", rotation=90, color="lightslategray")

    for x, t, c in zip((.3, .5, .7), ("REF", "ALT", "HET"), "rbg"):
        root.text(x, .95, t, color=c, ha="center", va="center")
    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #17
0
ファイル: dotplot.py プロジェクト: JinfengChen/jcvi
def dotplot_main(anchorfile, qbed, sbed, image_name, iopts, vmin=0, vmax=1,
        is_self=False, synteny=False, cmap_text=None, cmap="copper", genomenames=None,
        sample_number=10000, minfont=5, palette=None, chrlw=.01, title=None):

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([.1, .1, .8, .8])  # the dot plot

    dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=vmin, vmax=vmax,
        is_self=is_self, synteny=synteny, cmap_text=cmap_text, cmap=cmap,
        genomenames=genomenames, sample_number=sample_number,
        minfont=minfont, palette=palette, chrlw=chrlw, title=title)

    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #18
0
ファイル: pineapple.py プロジェクト: galaxy001/jcvi
def ploidy(args):
    """
    %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of WGD history of pineapple genome. The script calls both graphics.karyotype
    and graphic.synteny.
    """
    p = OptionParser(ploidy.__doc__)
    p.add_option("--switch", help="Rename the seqid with two-column file")
    opts, args, iopts = p.set_image_options(args, figsize="9x7")

    if len(args) != 5:
        sys.exit(not p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .27, .37, .52)

    # annotate the WGD events
    fc = 'lightslategrey'
    x = .09
    radius = .012
    TextCircle(root, x, .825, r'$\tau$', radius=radius, fc=fc)
    TextCircle(root, x, .8, r'$\sigma$', radius=radius, fc=fc)
    TextCircle(root, x, .72, r'$\rho$', radius=radius, fc=fc)
    for ypos in (.825, .8, .72):
        root.text(.12, ypos, r"$\times2$", color=fc, ha="center", va="center")
    root.plot([x, x], [.85, .775], ":", color=fc, lw=2)
    root.plot([x, x], [.75, .675], ":", color=fc, lw=2)

    labels = ((.04, .96, 'A'), (.04, .54, 'B'))
    panel_labels(root, labels)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "pineapple-karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #19
0
ファイル: tree.py プロジェクト: bennyyu/jcvi
def main(tx=None):
    """
    %prog newicktree

    Plot Newick formatted tree. The gene structure can be plotted along if
    --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is
    on, also show the number of amino acids.
    """
    p = OptionParser(main.__doc__)
    p.add_option("--outgroup", help="Root the tree using the outgroup. " + \
                      "Use comma to separate multiple taxa.")
    p.add_option("--rmargin", default=.3, type="float",
                 help="Set blank rmargin to the right [default: %default]")
    p.add_option("--gffdir", default=None,
                 help="The directory that contain GFF files [default: %default]")
    p.add_option("--sizes", default=None,
                 help="The FASTA file or the sizes file [default: %default]")

    opts, args, iopts = set_image_options(p, figsize="8x6")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    outgroup = None
    if opts.outgroup:
        outgroup = opts.outgroup.split(",")
    pf = datafile.rsplit(".", 1)[0]
    if tx:
        pf = "demo"
    else:
        tx = open(datafile).read()
        logging.debug("Load tree file `{0}`.".format(datafile))

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    draw_tree(root, tx, rmargin=opts.rmargin,
              outgroup=outgroup, gffdir=opts.gffdir, sizes=opts.sizes)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
コード例 #20
0
ファイル: glyph.py プロジェクト: bennyyu/jcvi
def gff(args):
    """
    %prog gff *.gff

    Draw exons for genes based on gff files. Each gff file should contain only
    one gene, and only the "mRNA" and "CDS" feature will be drawn on the canvas.
    """
    align_choices = ("left", "center", "right")

    p = OptionParser(gff.__doc__)
    p.add_option("--align", default="left", choices=align_choices,
                 help="Horizontal alignment {0} [default: %default]".\
                    format("|".join(align_choices)))
    p.add_option("--noUTR", default=False, action="store_true",
                 help="Do not plot UTRs [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    gffiles = args
    ngenes = len(gffiles)

    setups, ratio = get_setups(gffiles, canvas=.6, noUTR=opts.noUTR)
    align = opts.align
    xs = .2 if align == "left" else .8
    yinterval = canvas / ngenes
    ys = .8
    tip = .01
    for genename, mrnabed, cdsbeds in setups:
        ex = ExonGlyph(root, xs, ys, mrnabed, cdsbeds, ratio=ratio, align=align)
        genename = _(genename)
        if align == "left":
            root.text(xs - tip, ys, genename, ha="right", va="center")
        elif align == "right":
            root.text(xs + tip, ys, genename, ha="left", va="center")
        ys -= yinterval

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = "exons.pdf"
    plt.savefig(figname, dpi=300)
    logging.debug("Figure saved to `{0}`".format(figname))
コード例 #21
0
ファイル: misc.py プロジェクト: tanghaibao/jcvi
def amborella(args):
    """
    %prog amborella seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a composite figure that calls graphics.karyotype and graphics.synteny.
    """
    p = OptionParser(amborella.__doc__)
    p.add_option("--tree",
                 help="Display trees on the bottom of the figure [default: %default]")
    p.add_option("--switch",
                 help="Rename the seqid with two-column file [default: %default]")
    opts, args, iopts = p.set_image_options(args, figsize="8x7")

    if len(args) != 5:
        sys.exit(not p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args
    switch = opts.switch
    tree = opts.tree

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout, switch=switch, tree=tree)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .5, .68, .5)

    # annotate the WGD events
    fc = 'lightslategrey'
    x = .05
    radius = .012
    TextCircle(root, x, .86, '$\gamma$', radius=radius)
    TextCircle(root, x, .95, '$\epsilon$', radius=radius)
    root.plot([x, x], [.83, .9], ":", color=fc, lw=2)
    pts = plot_cap((x, .95), np.radians(range(-70, 250)), .02)
    x, y = zip(*pts)
    root.plot(x, y, ":", color=fc, lw=2)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "amborella"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #22
0
ファイル: napus.py プロジェクト: rrane/jcvi
def ploidy(args):
    """
    %prog ploidy seqids layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of B. napus genome.
    """
    p = OptionParser(ploidy.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x7")

    if len(args) != 2:
        sys.exit(not p.print_help())

    seqidsfile, klayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)

    fc = "darkslategrey"
    radius = .012
    ot = -.05  # use this to adjust vertical position of the left panel
    TextCircle(root, .1, .9 + ot, r'$\gamma$', radius=radius, fc=fc)
    root.text(.1, .88 + ot, r"$\times3$", ha="center", va="top", color=fc)
    TextCircle(root, .08, .79 + ot, r'$\alpha$', radius=radius, fc=fc)
    TextCircle(root, .12, .79 + ot, r'$\beta$', radius=radius, fc=fc)
    root.text(.1, .77 + ot, r"$\times3\times2\times2$", ha="center", va="top", color=fc)
    root.text(.1, .67 + ot, r"Brassica triplication", ha="center",
                va="top", color=fc, size=11)
    root.text(.1, .65 + ot, r"$\times3\times2\times2\times3$", ha="center", va="top", color=fc)
    root.text(.1, .42 + ot, r"Allo-tetraploidy", ha="center",
                va="top", color=fc, size=11)
    root.text(.1, .4 + ot, r"$\times3\times2\times2\times3\times2$", ha="center", va="top", color=fc)

    bb = dict(boxstyle="round,pad=.5", fc="w", ec="0.5", alpha=0.5)
    root.text(.5, .2 + ot, r"\noindent\textit{Brassica napus}\\"
                "(A$\mathsf{_n}$C$\mathsf{_n}$ genome)", ha="center",
                size=16, color="k", bbox=bb)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "napus"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #23
0
ファイル: misc.py プロジェクト: tanghaibao/jcvi
def mtdotplots(args):
    """
    %prog mtdotplots Mt3.5 Mt4.0 medicago.medicago.lifted.1x1.anchors

    Plot Mt3.5 and Mt4.0 side-by-side. This is essentially combined from two
    graphics.dotplot() function calls as panel A and B.
    """
    from jcvi.graphics.dotplot import check_beds, dotplot

    p = OptionParser(mtdotplots.__doc__)
    p.set_beds()
    opts, args, iopts = p.set_image_options(args, figsize="16x8", dpi=90)

    if len(args) != 3:
        sys.exit(not p.print_help())

    a, b, ac = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    r1 = fig.add_axes([0, 0, .5, 1])
    r2 = fig.add_axes([.5, 0, .5, 1])
    a1 = fig.add_axes([.05, .1, .4, .8])
    a2 = fig.add_axes([.55, .1, .4, .8])

    anchorfile = op.join(a, ac)
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts)
    dotplot(anchorfile, qbed, sbed, fig, r1, a1, is_self=is_self,
            genomenames="Mt3.5_Mt3.5")

    opts.qbed = opts.sbed = None
    anchorfile = op.join(b, ac)
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts)
    dotplot(anchorfile, qbed, sbed, fig, r2, a2, is_self=is_self,
            genomenames="Mt4.0_Mt4.0")

    root.text(.03, .95, "A", ha="center", va="center", size=36)
    root.text(.53, .95, "B", ha="center", va="center", size=36)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "mtdotplots"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #24
0
ファイル: hic.py プロジェクト: xuanblo/jcvi
def movieframe(args):
    """
    %prog movieframe tour test.clm contigs.ref.anchors

    Draw heatmap and synteny in the same plot.
    """
    p = OptionParser(movieframe.__doc__)
    p.add_option("--label", help="Figure title")
    p.set_beds()
    p.set_outfile(outfile=None)
    opts, args, iopts = p.set_image_options(args, figsize="16x8",
                                            style="white", cmap="coolwarm",
                                            format="png", dpi=120)

    if len(args) != 3:
        sys.exit(not p.print_help())

    tour, clmfile, anchorsfile = args
    tour = tour.split(",")
    image_name = opts.outfile or ("movieframe." + iopts.format)
    label = opts.label or op.basename(image_name).rsplit(".", 1)[0]

    clm = CLMFile(clmfile)
    totalbins, bins, breaks = make_bins(tour, clm.tig_to_size)
    M = read_clm(clm, totalbins, bins)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])        # whole canvas
    ax1 = fig.add_axes([.05, .1, .4, .8])    # heatmap
    ax2 = fig.add_axes([.55, .1, .4, .8])    # dot plot
    ax2_root = fig.add_axes([.5, 0, .5, 1])  # dot plot canvas

    # Left axis: heatmap
    plot_heatmap(ax1, M, breaks, iopts)

    # Right axis: synteny
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts,
                                                     sorted=False)
    dotplot(anchorsfile, qbed, sbed, fig, ax2_root, ax2, sep=False, title="")

    root.text(.5, .98, clm.name, color="g", ha="center", va="center")
    root.text(.5, .95, label, color="darkslategray", ha="center", va="center")
    normalize_axes(root)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #25
0
ファイル: ks.py プロジェクト: ascendo/jcvi
def multireport(args):
    """
    %prog multireport layoutfile

    Generate several Ks value distributions in the same figure. If the layout
    file is missing then a template file listing all ks files will be written.

    The layout file contains the Ks file, number of components, colors, and labels:

    # Ks file, ncomponents, label, color, marker
    LAP.sorghum.ks, 1, LAP-sorghum, r, o
    SES.sorghum.ks, 1, SES-sorghum, g, +
    MOL.sorghum.ks, 1, MOL-sorghum, m, ^

    If color or marker is missing, then a random one will be assigned.
    """
    p = OptionParser(multireport.__doc__)
    p.set_outfile(outfile="Ks_plot.pdf")
    add_plot_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="5x5")

    if len(args) != 1:
        sys.exit(not p.print_help())

    layoutfile, = args
    ks_min = opts.vmin
    ks_max = opts.vmax
    bins = opts.bins
    fill = opts.fill
    layout = Layout(layoutfile)
    print >> sys.stderr, layout

    fig = plt.figure(1, (iopts.w, iopts.h))
    ax = fig.add_axes([.12, .1, .8, .8])
    kp = KsPlot(ax, ks_max, bins, legendp=opts.legendp)
    for lo in layout:
        data = KsFile(lo.ksfile)
        data = [x.ng_ks for x in data]
        data = [x for x in data if ks_min <= x <= ks_max]
        kp.add_data(data, lo.components, label=lo.label, \
                    color=lo.color, marker=lo.marker,
                    fill=fill, fitted=opts.fit)

    kp.draw(title=opts.title, filename=opts.outfile)
コード例 #26
0
ファイル: ks.py プロジェクト: rrane/jcvi
def multireport(args):
    """
    %prog multireport layoutfile

    Generate several Ks value distributions in the same figure. The layout file
    contains the Ks file to plot, number of components, colors, labels. For example:

    # Ks file, ncomponents, label, color, marker
    LAP.sorghum.ks, 1, LAP-sorghum, r, o
    SES.sorghum.ks, 1, SES-sorghum, g, +
    MOL.sorghum.ks, 1, MOL-sorghum, m, ^
    """
    from jcvi.graphics.base import plt

    p = OptionParser(multireport.__doc__)
    p.add_option("--nofit", default=False, action="store_true",
                 help="Do not plot fitted lines [default: %default]")
    add_plot_options(p)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    layoutfile, = args
    ks_min = opts.vmin
    ks_max = opts.vmax
    bins = opts.bins
    fill = opts.fill
    fitted = not opts.nofit
    layout = Layout(layoutfile)

    fig = plt.figure(1, (5, 5))
    ax = fig.add_axes([.12, .1, .8, .8])
    kp = KsPlot(ax, ks_max, bins, legendp=opts.legendp)
    for lo in layout:
        data = read_ks_file(lo.ksfile)
        data = [x.ng_ks for x in data]
        data = [x for x in data if ks_min <= x <= ks_max]
        kp.add_data(data, lo.components, label=lo.label, \
                    color=lo.color, marker=lo.marker,
                    fill=fill, fitted=fitted)

    kp.draw(title=opts.title)
コード例 #27
0
ファイル: karyotype.py プロジェクト: JinfengChen/jcvi
def main():
    p = OptionParser(__doc__)
    opts, args, iopts = p.set_image_options(figsize="8x7")

    if len(args) != 2:
        sys.exit(not p.print_help())

    seqidsfile, layoutfile = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, layoutfile)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #28
0
ファイル: misc.py プロジェクト: tanghaibao/jcvi
def utricularia(args):
    from jcvi.graphics.synteny import main as synteny_main

    p = OptionParser(synteny_main.__doc__)
    p.add_option("--switch",
                 help="Rename the seqid with two-column file")
    opts, args, iopts = p.set_image_options(args, figsize="8x7")

    if len(args) != 3:
        sys.exit(not p.print_help())

    datafile, bedfile, layoutfile = args
    switch = opts.switch

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    s = Synteny(fig, root, datafile, bedfile, layoutfile, loc_label=False, switch=switch)
    light = "lightslategrey"
    RoundRect(root, (.02, .69), .96, .24, fill=False, lw=2, ec=light)
    RoundRect(root, (.02, .09), .96, .48, fill=False, lw=2, ec=light)
    za, zb = s.layout[1].ratio, s.layout[-1].ratio  # zoom level
    if za != 1:
        root.text(.96, .89, "{}x zoom".format(za).replace(".0x", "x"),
                  color=light, ha="right", va="center", size=14)
    if zb != 1:
        root.text(.96, .12, "{}x zoom".format(zb).replace(".0x", "x"),
                  color=light, ha="right", va="center", size=14)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .22, .3, .64, text=True)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #29
0
ファイル: assembly.py プロジェクト: JinfengChen/jcvi
def plot_one_scaffold(scaffoldID, ssizes, sbed, trios, imagename, iopts,
                      highlights=None):
    ntrios = len(trios)
    fig = plt.figure(1, (14, 8))
    plt.cla()
    plt.clf()
    root = fig.add_axes([0, 0, 1, 1])
    axes = [fig.add_subplot(1, ntrios, x) for x in range(1, ntrios + 1)]
    scafsize = ssizes.get_size(scaffoldID)

    for trio, ax in zip(trios, axes):
        blastf, qsizes, qbed = trio
        scaffolding(ax, scaffoldID, blastf, qsizes, ssizes, qbed, sbed,
                    highlights=highlights)

    root.text(.5, .95, "{0}   (size={1})".format(scaffoldID, thousands(scafsize)),
            size=18, ha="center", color='b')
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    savefig(imagename, dpi=iopts.dpi, iopts=iopts)
コード例 #30
0
ファイル: allmaps.py プロジェクト: radaniba/jcvi
def simulation(args):
    """
    %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt

    Plot ALLMAPS accuracy across a range of simulated datasets.
    """
    p = OptionParser(simulation.__doc__)
    opts, args, iopts = p.set_image_options(args, dpi=300)

    if len(args) != 4:
        sys.exit(not p.print_help())

    dataA, dataB, dataC, dataD = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([.12, .62, .35, .35])
    B = fig.add_axes([.62, .62, .35, .35])
    C = fig.add_axes([.12, .12, .35, .35])
    D = fig.add_axes([.62, .12, .35, .35])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    dataC = import_data(dataC)
    dataD = import_data(dataD)
    subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=.5)
    subplot(B, dataB, "Translocation error rate", "Accuracy", xlim=.5,
                      legend=("intra-chromosomal", "inter-chromosomal",
                              "75\% intra + 25\% inter"))
    subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int)
    subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int)

    labels = ((.03, .97, "A"), (.53, .97, "B"),
              (.03, .47, "C"), (.53, .47, "D"))
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "simulation." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #31
0
def demo(args):
    """
    %prog demo

    Draw sample gene features to illustrate the various fates of duplicate
    genes - to be used in a book chapter.
    """
    p = OptionParser(demo.__doc__)
    opts, args = p.parse_args(args)

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    panel_space = .23
    dup_space = .025
    # Draw a gene and two regulatory elements at these arbitrary locations
    locs = [
        (.5, .9),  # ancestral gene
        (.5, .9 - panel_space + dup_space),  # identical copies
        (.5, .9 - panel_space - dup_space),
        (.5, .9 - 2 * panel_space + dup_space),  # degenerate copies
        (.5, .9 - 2 * panel_space - dup_space),
        (.2, .9 - 3 * panel_space + dup_space),  # sub-functionalization
        (.2, .9 - 3 * panel_space - dup_space),
        (.5, .9 - 3 * panel_space + dup_space),  # neo-functionalization
        (.5, .9 - 3 * panel_space - dup_space),
        (.8, .9 - 3 * panel_space + dup_space),  # non-functionalization
        (.8, .9 - 3 * panel_space - dup_space),
    ]

    default_regulator = "gm"
    regulators = [
        default_regulator,
        default_regulator,
        default_regulator,
        "wm",
        default_regulator,
        "wm",
        "gw",
        "wb",
        default_regulator,
        "ww",
        default_regulator,
    ]

    width = .24
    for i, (xx, yy) in enumerate(locs):
        regulator = regulators[i]
        x1, x2 = xx - .5 * width, xx + .5 * width
        Glyph(root, x1, x2, yy)
        if i == 9:  # upper copy for non-functionalization
            continue

        # coding region
        x1, x2 = xx - .16 * width, xx + .45 * width
        Glyph(root, x1, x2, yy, fc="k")

        # two regulatory elements
        x1, x2 = xx - .4 * width, xx - .28 * width
        for xx, fc in zip((x1, x2), regulator):
            if fc == 'w':
                continue

            DoubleCircle(root, xx, yy, fc=fc)

        rotation = 30
        tip = .02
        if i == 0:
            ya = yy + tip
            root.text(x1, ya, "Flower", rotation=rotation, va="bottom")
            root.text(x2, ya, "Root", rotation=rotation, va="bottom")
        elif i == 7:
            ya = yy + tip
            root.text(x2, ya, "Leaf", rotation=rotation, va="bottom")

    # Draw arrows between panels (center)
    arrow_dist = .08
    ar_xpos = .5
    for ar_ypos in (.3, .53, .76):
        root.annotate(" ", (ar_xpos, ar_ypos), (ar_xpos, ar_ypos + arrow_dist),
                      arrowprops=arrowprops)

    ar_ypos = .3
    for ar_xpos in (.2, .8):
        root.annotate(" ", (ar_xpos, ar_ypos), (.5, ar_ypos + arrow_dist),
                      arrowprops=arrowprops)

    # Duplication, Degeneration
    xx = .6
    ys = (.76, .53)
    processes = ("Duplication", "Degeneration")
    for yy, process in zip(ys, processes):
        root.text(xx, yy + .02, process, fontweight="bold")

    # Label of fates
    xs = (.2, .5, .8)
    fates = ("Subfunctionalization", "Neofunctionalization",
             "Nonfunctionalization")
    yy = .05
    for xx, fate in zip(xs, fates):
        RoundLabel(root, xx, yy, fate)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = "demo.pdf"
    savefig(figname, dpi=300)
コード例 #32
0
ファイル: align.py プロジェクト: wroldwiedbwe/jcvi
def main():
    p = OptionParser(__doc__)
    opts, args, iopts = p.set_image_options(figsize="9x7")

    if len(args) != 1:
        sys.exit(not p.print_help())

    (mode, ) = args
    assert mode == "demo"

    a, b = 30, 70
    pad = 0.08
    w = 0.31
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Row separators
    yy = 1 - pad
    for i in range(3):
        root.plot((0, 1), (yy, yy), "-", lw=2, color="lightgray")
        yy -= w

    # Row headers
    xx = pad * 0.6
    yy = 1 - pad - 0.5 * w
    for title in ("Inversion", "Indel", "Duplication"):
        root.text(xx, yy, title, ha="center", va="center")
        yy -= w

    # Column headers
    xx = pad + 0.5 * w
    yy = 1 - pad / 2
    for title in ("Assembly alignment", "Read alignment",
                  "Optical map alignment"):
        root.text(xx, yy, title, ha="center", va="center")
        xx += w

    p = PairwiseAlign(fig, [pad, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = PairwiseAlign(fig, [pad, w, w, w])
    p.delete(a, b)
    p.draw()

    p = PairwiseAlign(fig, [pad, 0, w, w])
    p.duplicate(a, b, gap=5)
    p.draw()

    p = ReadAlign(fig, [pad + w, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = ReadAlign(fig, [pad + w, w, w, w])
    p.delete(a, b)
    p.draw()

    p = ReadAlign(fig, [pad + w, 0, w, w])
    p.duplicate(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, w, w, w])
    p.delete(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, 0, w, w])
    p.duplicate(a, b)
    p.draw()

    normalize_axes(root)

    image_name = mode + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #33
0
ファイル: tree.py プロジェクト: lqsae/jcvi
def main(args):
    """
    %prog newicktree

    Plot Newick formatted tree. The gene structure can be plotted along if
    --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is
    on, also show the number of amino acids.
    """
    p = OptionParser(main.__doc__)
    p.add_option(
        "--outgroup",
        help="Outgroup for rerooting the tree. " +
        "Use comma to separate multiple taxa.",
    )
    p.add_option(
        "--noreroot",
        default=False,
        action="store_true",
        help="Don't reroot the input tree",
    )
    p.add_option("--rmargin",
                 default=0.2,
                 type="float",
                 help="Set blank rmargin to the right")
    p.add_option("--gffdir",
                 default=None,
                 help="The directory that contain GFF files")
    p.add_option("--sizes",
                 default=None,
                 help="The FASTA file or the sizes file")
    p.add_option("--SH", default=None, type="string", help="SH test p-value")

    group = p.add_option_group("Node style")
    group.add_option("--leafcolor",
                     default="k",
                     help="Font color for the OTUs")
    group.add_option("--leaffont", default=12, help="Font size for the OTUs")
    group.add_option(
        "--leafinfo",
        help="CSV file specifying the leaves: name,color,new_name")
    group.add_option(
        "--scutoff",
        default=0,
        type="int",
        help="cutoff for displaying node support, 0-100",
    )
    group.add_option(
        "--no_support",
        dest="support",
        default=True,
        action="store_false",
        help="Do not print node support values",
    )
    group.add_option(
        "--no_internal",
        dest="internal",
        default=True,
        action="store_false",
        help="Do not show internal nodes",
    )

    group = p.add_option_group("Edge style")
    group.add_option(
        "--dashedoutgroup",
        default=False,
        action="store_true",
        help="Gray out the edges connecting outgroup and non-outgroup",
    )

    group = p.add_option_group("Additional annotations")
    group.add_option(
        "--geoscale",
        default=False,
        action="store_true",
        help="Plot geological scale",
    )
    group.add_option(
        "--wgdinfo",
        help="CSV specifying the position and style of WGD events")
    group.add_option(
        "--groups",
        help="Group names from top to bottom, to the right of the tree. "
        "Each distinct color in --leafinfo is considered part of the same group. "
        "Separate the names with comma, such as 'eudicots,,monocots,'. "
        "Empty names will be ignored for that specific group. ",
    )

    opts, args, iopts = p.set_image_options(args, figsize="10x7")

    if len(args) != 1:
        sys.exit(not p.print_help())

    (datafile, ) = args
    outgroup = None
    reroot = not opts.noreroot
    if opts.outgroup:
        outgroup = opts.outgroup.split(",")

    hpd = None
    if datafile == "demo":
        t = Tree("""(((Os02g0681100:0.1151,Sb04g031800:0.11220)1.0:0.0537,
        (Os04g0578800:0.04318,Sb06g026210:0.04798)-1.0:0.08870)1.0:0.06985,
        ((Os03g0124100:0.08845,Sb01g048930:0.09055)1.0:0.05332,
        (Os10g0534700:0.06592,Sb01g030630:0.04824)-1.0:0.07886):0.09389);""")
    else:
        logging.debug("Load tree file `{0}`".format(datafile))
        t, hpd = parse_tree(datafile)

    pf = datafile.rsplit(".", 1)[0]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    supportcolor = "k" if opts.support else None
    margin, rmargin = 0.1, opts.rmargin  # Left and right margin
    leafinfo = LeafInfoFile(opts.leafinfo).cache if opts.leafinfo else None
    wgdinfo = WGDInfoFile(opts.wgdinfo).cache if opts.wgdinfo else None

    draw_tree(
        root,
        t,
        hpd=hpd,
        margin=margin,
        rmargin=rmargin,
        ymargin=margin,
        supportcolor=supportcolor,
        internal=opts.internal,
        outgroup=outgroup,
        dashedoutgroup=opts.dashedoutgroup,
        reroot=reroot,
        gffdir=opts.gffdir,
        sizes=opts.sizes,
        SH=opts.SH,
        scutoff=opts.scutoff,
        leafcolor=opts.leafcolor,
        leaffont=opts.leaffont,
        leafinfo=leafinfo,
        wgdinfo=wgdinfo,
        geoscale=opts.geoscale,
        groups=opts.groups.split(",") if opts.groups else [],
    )

    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #34
0
def fig4(args):
    """
    %prog fig4 layout data

    Napus Figure 4A displays an example deleted region for quartet chromosomes,
    showing read alignments from high GL and low GL lines.
    """
    p = OptionParser(fig4.__doc__)
    p.add_option("--gauge_step",
                 default=200000,
                 type="int",
                 help="Step size for the base scale")
    opts, args, iopts = p.set_image_options(args, figsize="9x7")

    if len(args) != 2:
        sys.exit(not p.print_help())

    layout, datadir = args
    layout = F4ALayout(layout, datadir=datadir)

    gs = opts.gauge_step
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    block, napusbed, slayout = "r28.txt", "all.bed", "r28.layout"
    s = Synteny(fig, root, block, napusbed, slayout, chr_label=False)
    synteny_exts = [(x.xstart, x.xend) for x in s.rr]

    h = .1
    order = "bzh,yudal".split(",")
    labels = (r"\textit{B. napus} A$\mathsf{_n}$2",
              r"\textit{B. rapa} A$\mathsf{_r}$2",
              r"\textit{B. oleracea} C$\mathsf{_o}$2",
              r"\textit{B. napus} C$\mathsf{_n}$2")
    for t in layout:
        xstart, xend = synteny_exts[2 * t.i]
        canvas = [xstart, t.y, xend - xstart, h]
        root.text(xstart - h,
                  t.y + h / 2,
                  labels[t.i],
                  ha="center",
                  va="center")
        ch, ab = t.box_region.split(":")
        a, b = ab.split("-")
        vlines = [int(x) for x in (a, b)]
        Coverage(fig,
                 root,
                 canvas,
                 t.seqid, (t.start, t.end),
                 datadir,
                 order=order,
                 gauge="top",
                 plot_chr_label=False,
                 gauge_step=gs,
                 palette="gray",
                 cap=40,
                 hlsuffix="regions.forhaibao",
                 vlines=vlines)

    # Highlight GSL biosynthesis genes
    a, b = (3, "Bra029311"), (5, "Bo2g161590")
    for gid in (a, b):
        start, end = s.gg[gid]
        xstart, ystart = start
        xend, yend = end
        x = (xstart + xend) / 2
        arrow = FancyArrowPatch(posA=(x, ystart - .04),
                                posB=(x, ystart - .005),
                                arrowstyle="fancy,head_width=6,head_length=8",
                                lw=3,
                                fc='k',
                                ec='k',
                                zorder=20)
        root.add_patch(arrow)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = "napus-fig4." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #35
0
def wheel(args):
    """
    %prog wheel datafile.csv groups.csv

    Wheel plot that shows continous data in radial axes.
    """
    p = OptionParser(wheel.__doc__)
    p.add_option(
        "--column",
        default="score",
        choices=("score", "percentile"),
        help="Which column to extract from `datafile.csv`",
    )
    opts, args, iopts = p.set_image_options(args, figsize="5x5", format="png")

    if len(args) != 2:
        sys.exit(not p.print_help())

    datafile, groupsfile = args
    column = opts.column
    linecolor = "#d6d6d6"
    df = parse_data(datafile, score_column=opts.column)
    groups = parse_groups(groupsfile)
    labels = [g for g in groups if g in df]
    print(labels)
    df = [df[g] for g in labels]
    print(df)
    groups = [groups[g] for g in labels]
    print(groups)

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    categories = len(df)
    # ax = plt.subplot(111, projection='polar')
    ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True)

    brewer = [
        "#FF3B30",
        "#DD43A0",
        "#5856D6",
        "#007AFE",
        "#56BDEC",
        "#4CD8BA",
        "#4CD864",
        "#B0F457",
        "#FEF221",
        "#FFCC01",
        "#FF9500",
        "#FF3B30",
    ]

    # Baseline
    theta = np.linspace(1.5 * np.pi,
                        3.5 * np.pi,
                        endpoint=False,
                        num=categories)
    _theta = np.linspace(1.5 * np.pi, 3.5 * np.pi)
    R = max(max(df), 10)
    xlim = (-R, R) if column == "score" else (-100, 100)
    plim = (-R / 2, R) if column == "score" else (0, 100)
    ci = (-0.5, 2) if column == "score" else (10, 90)

    # Grid
    if column == "score":
        for t in theta:
            ax.plot([t, t], plim, color=linecolor)
    ax.axis("off")

    # Contours
    for t in plim:
        ax.plot(_theta, [t] * len(_theta), color=linecolor)

    # Sectors (groupings)
    collapsed_groups = []
    gg = []
    for group, c in groupby(enumerate(groups), lambda x: x[1]):
        c = [x[0] for x in list(c)]
        collapsed_groups.append(group)
        gg.append(c)

    show_sector = False
    if show_sector:
        theta_interval = 2 * np.pi / categories
        theta_pad = theta_interval / 2 * 0.9
        for color, group in zip(brewer, gg):
            tmin, tmax = min(group), max(group)
            sector(
                ax,
                theta[tmin],
                theta[tmax],
                theta_pad,
                R * 0.95,
                ls="-",
                color=color,
                lw=2,
            )

    # Data
    r = df
    closed_plot(ax, theta, r, color="lightslategray", alpha=0.25)
    for color, group in zip(brewer, gg):
        hidden_data = [(theta[x], r[x]) for x in group
                       if (ci[0] <= r[x] <= ci[1])]
        shown_data = [(theta[x], r[x]) for x in group
                      if (r[x] < ci[0] or r[x] > ci[1])]
        for alpha, data in zip((1, 1), (hidden_data, shown_data)):
            if not data:
                continue
            color_theta, color_r = zip(*data)
            ax.plot(color_theta, color_r, "o", color=color, alpha=alpha)

    # Print out data
    diseaseNames, risks = labels, df
    print("var theta = [{}]".format(",".join("{:.1f}".format(degrees(x))
                                             for x in theta)))
    print("var risks = [{}]".format(",".join(str(x) for x in risks)))
    print("var diseaseNames = [{}]".format(",".join(
        ['"{}"'.format(x) for x in diseaseNames])))

    # Labels
    from math import cos, sin

    r = 0.5
    for i, label in enumerate(labels):
        tl = theta[i]
        x, y = 0.5 + r * cos(tl), 0.5 + r * sin(tl)
        d = degrees(tl)
        if 90 < d % 360 < 270:  # On the left quardrants
            d -= 180
        root.text(x,
                  y,
                  label,
                  size=4,
                  rotation=d,
                  ha="center",
                  va="center",
                  color=linecolor)
        print(x, y, label)

    # Add baseline
    baseline = 0 if column == "score" else 50
    _r = len(_theta) * [baseline]
    closed_plot(ax, _theta, _r, "k:", lw=1, ms=4)

    # Add confidence interval
    if column == "percentile":
        barcolor = "#eeeeee"
        ax.bar([0], [ci[1] - ci[0]],
               width=2 * np.pi,
               bottom=ci[0],
               fc=barcolor)

    ax.set_rmin(xlim[0])
    ax.set_rmax(xlim[1])

    normalize_axes(root)

    image_name = pf + "-" + column + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #36
0
ファイル: allmaps.py プロジェクト: zjwang6/jcvi
def estimategaps(args):
    """
    %prog estimategaps JM-4 chr1 JMMale-1

    Illustrate ALLMAPS gap estimation algorithm.
    """
    p = OptionParser(estimategaps.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pf, seqid, mlg = args
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"

    function = lambda x: x.cm
    cc = Map(bedfile, scaffold_info=True, function=function)
    agp = AGP(agpfile)

    g = GapEstimator(cc, agp, seqid, mlg, function=function)
    pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize
    spl, spld = g.spl, g.spld
    g.compute_all_gaps(verbose=False)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    xstart, ystart = 0.15, 0.65
    w, h = 0.7, 0.3
    t = np.linspace(0, chrsize, 1000)
    ax = fig.add_axes([xstart, ystart, w, h])
    mx, my = zip(*g.scatter_data)
    rho = spearmanr(mx, my)

    dsg = "g"
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(mx, my, ".", color=set2[3])
    ax.plot(t, spl(t), "-", color=dsg)
    ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes)
    normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)")
    if rho < 0:
        ax.invert_yaxis()

    # Panel B
    ystart -= 0.28
    h = 0.25
    ax = fig.add_axes([xstart, ystart, w, h])
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(t, spld(t), "-", lw=2, color=dsg)
    ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5)
    normalize_lms_axis(
        ax,
        xlim=chrsize,
        ylim=25 * 1e-6,
        xfactor=1e-6,
        xlabel="Physical position (Mb)",
        yfactor=1000000,
        ylabel="Recomb. rate\n(cM / Mb)",
    )
    ax.xaxis.grid(False)

    # Panel C (specific to JMMale-1)
    a, b = "scaffold_1076", "scaffold_861"
    sizes = dict(
        (x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation))
        for x in g.agp
        if not x.is_gap
    )
    a_beg, a_end, asize, ao = sizes[a]
    b_beg, b_end, bsize, bo = sizes[b]
    gapsize = g.get_gapsize(a)
    total_size = asize + gapsize + bsize
    ratio = 0.6 / total_size
    y = 0.16
    pad = 0.03
    pb_ratio = w / chrsize

    # Zoom
    lsg = "lightslategray"
    root.plot((0.15 + pb_ratio * a_beg, 0.2), (ystart, ystart - 0.14), ":", color=lsg)
    root.plot((0.15 + pb_ratio * b_end, 0.3), (ystart, ystart - 0.08), ":", color=lsg)
    ends = []
    for tag, size, marker, beg in zip(
        (a, b), (asize, bsize), (49213, 81277), (0.2, 0.2 + (asize + gapsize) * ratio)
    ):
        end = beg + size * ratio
        marker = beg + marker * ratio
        ends.append((beg, end, marker))
        root.plot((marker,), (y,), "o", color=lsg)
        root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center")
        HorizontalChromosome(root, beg, end, y, height=0.025, fc="gainsboro")

    begs, ends, markers = zip(*ends)
    fontprop = dict(color=lsg, ha="center", va="center")
    ypos = y + pad * 2
    root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(
        sum(markers) / 2,
        ypos + pad,
        "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)",
        **fontprop
    )

    ypos = y - pad
    xx = markers[0], ends[0]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop)
    xx = markers[1], begs[1]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop)

    root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg)
    root.text(
        sum(markers) / 2,
        ypos - 3 * pad,
        r"$\textit{Estimated gap size: 96,433bp}$",
        color="r",
        ha="center",
        va="center",
    )

    labels = ((0.05, 0.95, "A"), (0.05, 0.6, "B"), (0.05, 0.27, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    pf = "estimategaps"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #37
0
def ploidy(args):
    """
    %prog ploidy seqids layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of B. napus genome.
    """
    p = OptionParser(ploidy.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x7")

    if len(args) != 2:
        sys.exit(not p.print_help())

    seqidsfile, klayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)

    fc = "darkslategrey"
    radius = .012
    ot = -.05  # use this to adjust vertical position of the left panel
    TextCircle(root, .1, .9 + ot, r'$\gamma$', radius=radius, fc=fc)
    root.text(.1, .88 + ot, r"$\times3$", ha="center", va="top", color=fc)
    TextCircle(root, .08, .79 + ot, r'$\alpha$', radius=radius, fc=fc)
    TextCircle(root, .12, .79 + ot, r'$\beta$', radius=radius, fc=fc)
    root.text(.1,
              .77 + ot,
              r"$\times3\times2\times2$",
              ha="center",
              va="top",
              color=fc)
    root.text(.1,
              .67 + ot,
              r"Brassica triplication",
              ha="center",
              va="top",
              color=fc,
              size=11)
    root.text(.1,
              .65 + ot,
              r"$\times3\times2\times2\times3$",
              ha="center",
              va="top",
              color=fc)
    root.text(.1,
              .42 + ot,
              r"Allo-tetraploidy",
              ha="center",
              va="top",
              color=fc,
              size=11)
    root.text(.1,
              .4 + ot,
              r"$\times3\times2\times2\times3\times2$",
              ha="center",
              va="top",
              color=fc)

    bb = dict(boxstyle="round,pad=.5", fc="w", ec="0.5", alpha=0.5)
    root.text(.5,
              .2 + ot, r"\noindent\textit{Brassica napus}\\"
              "(A$\mathsf{_n}$C$\mathsf{_n}$ genome)",
              ha="center",
              size=16,
              color="k",
              bbox=bb)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "napus"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #38
0
ファイル: hic.py プロジェクト: fasemoreakinyemi/jcvi
def heatmap(args):
    """
    %prog heatmap input.npy genome.json

    Plot heatmap based on .npy data file. The .npy stores a square matrix with
    bins of genome, and cells inside the matrix represent number of links
    between bin i and bin j. The `genome.json` contains the offsets of each
    contig/chr so that we know where to draw boundary lines, or extract per
    contig/chromosome heatmap.
    """
    p = OptionParser(heatmap.__doc__)
    p.add_option("--resolution", default=500000, type="int",
                 help="Resolution when counting the links")
    p.add_option("--chr", help="Plot this contig/chr only")
    p.add_option("--nobreaks", default=False, action="store_true",
                 help="Do not plot breaks (esp. if contigs are small)")
    opts, args, iopts = p.set_image_options(args, figsize="10x10",
                                            style="white", cmap="coolwarm",
                                            format="png", dpi=120)

    if len(args) != 2:
        sys.exit(not p.print_help())

    npyfile, jsonfile = args
    contig = opts.chr
    # Load contig/chromosome starts and sizes
    header = json.loads(open(jsonfile).read())
    resolution = header.get("resolution", opts.resolution)
    logging.debug("Resolution set to {}".format(resolution))
    # Load the matrix
    A = np.load(npyfile)

    # Select specific submatrix
    if contig:
        contig_start = header["starts"][contig]
        contig_size = header["sizes"][contig]
        contig_end = contig_start + contig_size
        A = A[contig_start: contig_end, contig_start: contig_end]

    # Several concerns in practice:
    # The diagonal counts may be too strong, this can either be resolved by
    # masking them. Or perform a log transform on the entire heatmap.
    B = A.astype("float64")
    B += 1.0
    B = np.log(B)
    vmin, vmax = 1, 7
    B[B < vmin] = vmin
    B[B > vmax] = vmax
    print B
    logging.debug("Matrix log-transformation and thresholding ({}-{}) done"
                  .format(vmin, vmax))

    # Canvas
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])       # whole canvas
    ax = fig.add_axes([.05, .05, .9, .9])   # just the heatmap

    breaks = header["starts"].values()
    breaks += [header["total_bins"]]   # This is actually discarded
    breaks = sorted(breaks)[1:]
    if contig or opts.nobreaks:
        breaks = []
    plot_heatmap(ax, B, breaks, iopts, binsize=resolution)

    # Title
    pf = npyfile.rsplit(".", 1)[0]
    title = pf
    if contig:
        title += "-{}".format(contig)
    root.text(.5, .98, title, color="darkslategray", size=18,
              ha="center", va="center")

    normalize_axes(root)
    image_name = title + "." + iopts.format
    # macOS sometimes has way too verbose output
    logging.getLogger().setLevel(logging.CRITICAL)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #39
0
def cotton(args):
    """
    %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a composite figure that calls graphics.karyotype and graphic.synteny.
    """
    p = OptionParser(cotton.__doc__)
    p.add_option("--depthfile", help="Use depth info in this file")
    p.add_option("--switch", help="Rename the seqid with two-column file")
    opts, args, iopts = p.set_image_options(args, figsize="8x7")

    if len(args) != 5:
        sys.exit(p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args
    switch = opts.switch
    depthfile = opts.depthfile

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    kt = Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout, switch=switch)

    light = "lightslategrey"
    # Show the dup depth along the cotton chromosomes
    if depthfile:
        ymin, ymax = 0.9, 0.95
        root.text(0.11, 0.96, "Cotton duplication level", color="gray", size=10)
        root.plot([0.1, 0.95], [ymin, ymin], color="gray")
        root.text(0.96, 0.9, "1x", color="gray", va="center")
        root.plot([0.1, 0.95], [ymax, ymax], color="gray")
        root.text(0.96, 0.95, "6x", color="gray", va="center")

        fp = open(depthfile)
        track = kt.tracks[0]  # Cotton
        depths = []
        for row in fp:
            a, b, depth = row.split()
            depth = int(depth)
            try:
                p = track.get_coords(a)
                depths.append((p, depth))
            except KeyError:
                pass

        depths.sort(key=lambda x: (x[0], -x[1]))
        xx, yy = zip(*depths)
        yy = [ymin + 0.01 * (x - 1) for x in yy]
        root.plot(xx, yy, "-", color=light)

    # legend showing the orientation of the genes
    draw_gene_legend(root, 0.5, 0.68, 0.5)

    # Zoom
    xpos = 0.835
    ytop = 0.9
    xmin, xmax = 0.18, 0.82
    ymin, ymax = ytop, 0.55
    lc = "k"
    kwargs = dict(lw=3, color=lc, mec=lc, mfc="w", zorder=3)
    root.plot((xpos, xpos), (ymax, 0.63), ":o", **kwargs)
    root.plot((xpos, xmin), (ymax, ymin), ":o", **kwargs)
    root.plot((xpos, xmax), (ymax, ymin), ":o", **kwargs)
    RoundRect(root, (0.06, 0.17), 0.92, 0.35, fill=False, lw=2, ec=light)

    # Panels
    root.text(0.05, 0.95, "a", size=20, fontweight="bold")
    root.text(0.1, 0.45, "b", size=20, fontweight="bold")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "cotton"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #40
0
def composite(args):
    """
    %prog composite fastafile chr1

    Combine line plots, feature bars and alt-bars, different data types
    specified in options. Inputs must be BED-formatted. Three types of viz are
    currently supported:

    --lines: traditional line plots, useful for plotting feature freq
    --bars: show where the extent of features are
    --altbars: similar to bars, yet in two alternating tracks, e.g. scaffolds
    """
    from jcvi.graphics.chromosome import HorizontalChromosome

    p = OptionParser(composite.__doc__)
    p.add_option("--lines",
                 help="Features to plot in lineplot [default: %default]")
    p.add_option("--bars",
                 help="Features to plot in bars [default: %default]")
    p.add_option("--altbars",
                 help="Features to plot in alt-bars [default: %default]")
    p.add_option("--fatten", default=False, action="store_true",
                 help="Help visualize certain narrow features [default: %default]")
    p.add_option("--mode", default="span", choices=("span", "count", "score"),
                 help="Accumulate feature based on [default: %default]")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift, subtract = check_window_options(opts)
    linebeds, barbeds, altbarbeds = [], [], []
    fatten = opts.fatten
    if opts.lines:
        lines = opts.lines.split(",")
        linebeds = get_beds(lines)
    if opts.bars:
        bars = opts.bars.split(",")
        barbeds = get_beds(bars)
    if opts.altbars:
        altbars = opts.altbars.split(",")
        altbarbeds = get_beds(altbars)

    linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode)

    margin = .12
    clen = Sizes(fastafile).mapping[chr]
    nbins = get_nbins(clen, shift)

    plt.rcParams["xtick.major.size"] = 0
    plt.rcParams["ytick.major.size"] = 0

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    root.text(.5, .95, chr, ha="center", color="darkslategray")

    xstart, xend = margin, 1 - margin
    xlen = xend - xstart
    ratio = xlen / clen
    # Line plots
    ax = fig.add_axes([xstart, .6, xlen, .3])
    lineplot(ax, linebins, nbins, chr, window, shift)

    # Bar plots
    yy = .5
    yinterval = .08
    xs = lambda x: xstart + ratio * x
    r = .01
    fattend = .0025
    for bb in barbeds:
        root.text(xend + .01, yy, bb.split(".")[0], va="center")
        HorizontalChromosome(root, xstart, xend, yy, height=.02)
        bb = Bed(bb)
        for b in bb:
            start, end = xs(b.start), xs(b.end)
            span = end - start
            if fatten and span < fattend:
                span = fattend

            root.add_patch(Rectangle((start, yy - r), span, 2 * r, \
                            lw=0, fc="darkslategray"))
        yy -= yinterval

    # Alternative bar plots
    offset = r / 2
    for bb in altbarbeds:
        root.text(xend + .01, yy, bb.split(".")[0], va="center")
        bb = Bed(bb)
        for i, b in enumerate(bb):
            start, end = xs(b.start), xs(b.end)
            span = end - start
            if span < .0001:
                continue
            offset = -offset
            root.add_patch(Rectangle((start, yy + offset), end - start, .003, \
                            lw=0, fc="darkslategray"))
        yy -= yinterval

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #41
0
def astat(args):
    """
    %prog astat coverage.log

    Create coverage-rho scatter plot.
    """
    p = OptionParser(astat.__doc__)
    p.add_option("--cutoff", default=1000, type="int", help="Length cutoff")
    p.add_option("--genome", default="", help="Genome name")
    p.add_option(
        "--arrDist",
        default=False,
        action="store_true",
        help="Use arrDist instead",
    )
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (covfile, ) = args
    cutoff = opts.cutoff
    genome = opts.genome
    plot_arrDist = opts.arrDist

    suffix = ".{0}".format(cutoff)
    small_covfile = covfile + suffix
    update_covfile = need_update(covfile, small_covfile)
    if update_covfile:
        fw = open(small_covfile, "w")
    else:
        logging.debug("Found `{0}`, will use this one".format(small_covfile))
        covfile = small_covfile

    fp = open(covfile)
    header = next(fp)
    if update_covfile:
        fw.write(header)

    data = []
    msg = "{0} tigs scanned ..."
    for row in fp:
        tigID, rho, covStat, arrDist = row.split()
        tigID = int(tigID)
        if tigID % 1000000 == 0:
            sys.stderr.write(msg.format(tigID) + "\r")

        rho, covStat, arrDist = [float(x) for x in (rho, covStat, arrDist)]
        if rho < cutoff:
            continue

        if update_covfile:
            fw.write(row)
        data.append((tigID, rho, covStat, arrDist))

    print(msg.format(tigID), file=sys.stderr)

    from jcvi.graphics.base import plt, savefig

    logging.debug("Plotting {0} data points.".format(len(data)))
    tigID, rho, covStat, arrDist = zip(*data)

    y = arrDist if plot_arrDist else covStat
    ytag = "arrDist" if plot_arrDist else "covStat"

    fig = plt.figure(1, (7, 7))
    ax = fig.add_axes([0.12, 0.1, 0.8, 0.8])
    ax.plot(rho, y, ".", color="lightslategrey")

    xtag = "rho"
    info = (genome, xtag, ytag)
    title = "{0} {1} vs. {2}".format(*info)
    ax.set_title(title)
    ax.set_xlabel(xtag)
    ax.set_ylabel(ytag)

    if plot_arrDist:
        ax.set_yscale("log")

    imagename = "{0}.png".format(".".join(info))
    savefig(imagename, dpi=150)
コード例 #42
0
def heatmap(args):
    """
    %prog heatmap fastafile chr1

    Combine stack plot with heatmap to show abundance of various tracks along
    given chromosome. Need to give multiple beds to --stacks and --heatmaps
    """
    p = OptionParser(heatmap.__doc__)
    p.add_option("--stacks",
                 default="Exons,Introns,DNA_transposons,Retrotransposons",
                 help="Features to plot in stackplot [default: %default]")
    p.add_option("--heatmaps",
                 default="Copia,Gypsy,hAT,Helitron,Introns,Exons",
                 help="Features to plot in heatmaps [default: %default]")
    p.add_option("--meres", default=None,
                 help="Extra centromere / telomere features [default: %default]")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift, subtract = check_window_options(opts)

    stacks = opts.stacks.split(",")
    heatmaps = opts.heatmaps.split(",")
    stackbeds = get_beds(stacks)
    heatmapbeds = get_beds(heatmaps)
    stackbins = get_binfiles(stackbeds, fastafile, shift, subtract=subtract)
    heatmapbins = get_binfiles(heatmapbeds, fastafile, shift, subtract=subtract)

    margin = .06
    inner = .015
    clen = Sizes(fastafile).mapping[chr]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Gauge
    ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin)
    yinterval = .3
    xx = margin
    yy = 1 - margin
    yy -= yinterval
    xlen = clen / ratio
    cc = chr
    if "_" in chr:
        ca, cb = chr.split("_")
        cc = ca[0].upper() + cb

    root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray))
    ax = fig.add_axes([xx, yy, xlen, yinterval - inner])

    nbins = get_nbins(clen, shift)

    owindow = clen / 100
    if owindow > window:
        window = owindow / shift * shift

    stackplot(ax, stackbins, nbins, palette, chr, window, shift)
    ax.text(.1, .9, cc, va="top", zorder=100, transform=ax.transAxes,
              bbox=dict(boxstyle="round", fc="w", alpha=.5))

    # Legends
    xx += xlen + .01
    yspace = (yinterval - inner) / (len(stackbins) + 1)
    yy = 1 - margin - yinterval
    for s, p in zip(stacks, palette):
        s = s.replace("_", " ")
        s = Registration.get(s, s)

        yy += yspace
        root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
        root.text(xx + 1.5 * inner, yy, s, size=10)

    yh = .05  # Heatmap height
    # Heatmaps
    xx = margin
    yy = 1 - margin - yinterval - inner
    for s, p in zip(heatmaps, heatmapbins):
        s = s.replace("_", " ")
        s = Registration.get(s, s)

        yy -= yh
        m = stackarray(p, chr, window, shift)

        Y = np.array([m, m])
        root.imshow(Y, extent=(xx, xx + xlen, yy, yy + yh - inner),
                    interpolation="nearest", aspect="auto")
        root.text(xx + xlen + .01, yy, s, size=10)

    yy -= yh

    meres = opts.meres
    if meres:
        bed = Bed(meres)
        for b in bed:
            if b.seqid != chr:
                continue
            pos = (b.start + b.end) / 2
            cpos = pos / ratio
            xx = margin + cpos
            accn = b.accn.capitalize()
            root.add_patch(CirclePolygon((xx, yy), radius=.01, fc="m", ec="m"))
            root.text(xx + .014, yy, accn, va="center", color="m")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #43
0
def main(args):
    """
    %prog newicktree

    Plot Newick formatted tree. The gene structure can be plotted along if
    --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is
    on, also show the number of amino acids.

    With --barcode a mapping file can be provided to convert seq names to
    eg. species names, useful in unified tree display. This file should have
    distinctive barcodes in column1 and new names in column2, tab delimited.
    """
    p = OptionParser(main.__doc__)
    p.add_option("--outgroup", help="Outgroup for rerooting the tree. " + \
                 "Use comma to separate multiple taxa.")
    p.add_option("--noreroot", default=False, action="store_true", \
                 help="Don't reroot the input tree [default: %default]")
    p.add_option("--rmargin", default=.3, type="float",
                 help="Set blank rmargin to the right [default: %default]")
    p.add_option("--gffdir", default=None,
                 help="The directory that contain GFF files [default: %default]")
    p.add_option("--sizes", default=None,
                 help="The FASTA file or the sizes file [default: %default]")
    p.add_option("--SH", default=None, type="string",
                 help="SH test p-value [default: %default]")
    p.add_option("--scutoff", default=0, type="int",
                 help="cutoff for displaying node support, 0-100 [default: %default]")
    p.add_option("--barcode", default=None,
                 help="path to seq names barcode mapping file: " \
                 "barcode<tab>new_name [default: %default]")
    p.add_option("--leafcolor", default="k",
                 help="Font color for the OTUs, or path to a file " \
                 "containing color mappings: leafname<tab>color [default: %default]")
    p.add_option("--leaffont", default=12, help="Font size for the OTUs")
    p.add_option("--geoscale", default=False, action="store_true",
                 help="Plot geological scale")

    opts, args, iopts = p.set_image_options(args, figsize="8x6")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    outgroup = None
    reroot = not opts.noreroot
    if opts.outgroup:
        outgroup = opts.outgroup.split(",")

    if datafile == "demo":
        tx = """(((Os02g0681100:0.1151,Sb04g031800:0.11220)1.0:0.0537,
        (Os04g0578800:0.04318,Sb06g026210:0.04798)-1.0:0.08870)1.0:0.06985,
        ((Os03g0124100:0.08845,Sb01g048930:0.09055)1.0:0.05332,
        (Os10g0534700:0.06592,Sb01g030630:0.04824)-1.0:0.07886):0.09389);"""
    else:
        logging.debug("Load tree file `{0}`.".format(datafile))
        tx = open(datafile).read()

    pf = datafile.rsplit(".", 1)[0]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    if opts.geoscale:
        draw_geoscale(root)

    else:
        if op.isfile(opts.leafcolor):
            leafcolor = "k"
            leafcolorfile = opts.leafcolor
        else:
            leafcolor = opts.leafcolor
            leafcolorfile = None

        draw_tree(root, tx, rmargin=opts.rmargin, leafcolor=leafcolor, \
                  outgroup=outgroup, reroot=reroot, gffdir=opts.gffdir, \
                  sizes=opts.sizes, SH=opts.SH, scutoff=opts.scutoff, \
                  barcodefile=opts.barcode, leafcolorfile=leafcolorfile,
                  leaffont=opts.leaffont)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #44
0
ファイル: allmaps.py プロジェクト: zjwang6/jcvi
def lms(args):
    """
    %prog lms

    ALLMAPS cartoon to illustrate LMS metric.
    """
    from random import randint
    from jcvi.graphics.chromosome import HorizontalChromosome

    p = OptionParser(lms.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    w, h = 0.7, 0.35
    ax = fig.add_axes([0.15, 0.6, w, h])

    xdata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata[3:7] = ydata[3:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:3] + [xydata[4]] + xydata[7:]
    lds = xydata[3:7]
    xlis, ylis = zip(*lis)
    xlds, ylds = zip(*lds)
    ax.plot(
        xlis,
        ylis,
        "r-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(
        xlds,
        ylds,
        "g-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    HorizontalChromosome(root, 0.15, 0.15 + w, 0.57, height=0.02, lw=2)
    root.text(0.15 + w / 2, 0.55, "Chromosome location (bp)", ha="center", va="top")

    ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center")
    ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center")
    ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center")
    normalize_lms_axis(ax, xlim=110, ylim=110)

    # Panel B
    w = 0.37
    p = (0, 45, 75, 110)
    ax = fig.add_axes([0.1, 0.12, w, h])
    xdata = [x for x in range(10, 110, 10)]
    ydata = ydata_orig = [x for x in range(10, 110, 10)]
    ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:7]
    xlis, ylis = zip(*lis)
    ax.plot(
        xlis,
        ylis,
        "r-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, 0, 110, colors="beige", lw=3)
    normalize_lms_axis(ax, xlim=110, ylim=110)
    patch = [0.1 + w * x / 110.0 for x in p]
    HorizontalChromosome(root, 0.1, 0.1 + w, 0.09, patch=patch, height=0.02, lw=2)
    scaffolds = ("a", "b", "c")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, 0.09, s, va="center", ha="center")
    root.text(0.1 + w / 2, 0.04, "LMS($a||b||c$) = 7", ha="center")

    # Panel C
    ax = fig.add_axes([0.6, 0.12, w, h])
    patch = [0.6 + w * x / 110.0 for x in p]
    ydata = ydata_orig
    ax.plot(
        xdata,
        ydata,
        "r-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, [0], [110], colors="beige", lw=3)
    normalize_lms_axis(ax, xlim=110, ylim=110)
    HorizontalChromosome(root, 0.6, 0.6 + w, 0.09, patch=patch, height=0.02, lw=2)
    scaffolds = ("a", "-c", "b")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, 0.09, s, va="center", ha="center")
    root.text(0.6 + w / 2, 0.04, "LMS($a||-c||b$) = 10", ha="center")

    labels = ((0.05, 0.95, "A"), (0.05, 0.48, "B"), (0.55, 0.48, "C"))
    panel_labels(root, labels)

    normalize_axes(root)

    pf = "lms"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #45
0
def main():
    p = OptionParser(__doc__)
    p.add_option("--groups",
                 default=False,
                 action="store_true",
                 help="The first row contains group info [default: %default]")
    p.add_option("--rowgroups", help="Row groupings [default: %default]")
    p.add_option("--horizontalbar",
                 default=False,
                 action="store_true",
                 help="Horizontal color bar [default: vertical]")
    opts, args, iopts = p.set_image_options(figsize="8x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    pf = datafile.rsplit(".", 1)[0]
    rowgroups = opts.rowgroups

    groups, rows, cols, data = parse_csv(datafile, vmin=1, groups=opts.groups)
    cols = [x.replace("ay ", "") for x in cols]

    if rowgroups:
        fp = open(rowgroups)
        rgroups = []
        for row in fp:
            a, b = row.split()
            irows = [rows.index(x) for x in b.split(",")]
            rgroups.append((a, min(irows), max(irows)))

    plt.rcParams["axes.linewidth"] = 0

    xstart = .18
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax = fig.add_axes([xstart, .15, .7, .7])

    im = ax.matshow(data,
                    cmap=iopts.cmap,
                    norm=mpl.colors.LogNorm(vmin=1, vmax=10000))
    nrows, ncols = len(rows), len(cols)

    xinterval = .7 / ncols
    yinterval = .7 / max(nrows, ncols)

    plt.xticks(range(ncols), cols, rotation=45, size=10, ha="center")
    plt.yticks(range(nrows), rows, size=10)

    for x in ax.get_xticklines() + ax.get_yticklines():
        x.set_visible(False)

    ax.set_xlim(-.5, ncols - .5)

    t = [1, 10, 100, 1000, 10000]
    pad = .06
    if opts.horizontalbar:
        ypos = .5 * (1 - nrows * yinterval) - pad
        axcolor = fig.add_axes([.3, ypos, .4, .02])
        orientation = "horizontal"
    else:
        axcolor = fig.add_axes([.9, .3, .02, .4])
        orientation = "vertical"
    fig.colorbar(im, cax=axcolor, ticks=t, orientation=orientation)

    if groups:
        groups = [(key, len(list(nn))) for key, nn in groupby(groups)]
        yy = .5 + .5 * nrows / ncols * .7 + .06
        e = .005
        sep = -.5

        for k, kl in groups:
            # Separator in the array area
            sep += kl
            ax.plot([sep, sep], [-.5, nrows - .5], "w-", lw=2)
            # Group labels on the top
            kl *= xinterval
            root.plot([xstart + e, xstart + kl - e], [yy, yy],
                      "-",
                      color="gray",
                      lw=2)
            root.text(xstart + .5 * kl, yy + e, k, ha="center", color="gray")
            xstart += kl

    if rowgroups:
        from jcvi.graphics.glyph import TextCircle

        xpos = .04
        tip = .015
        assert rgroups
        ystart = 1 - .5 * (1 - nrows * yinterval)
        for gname, start, end in rgroups:
            start = ystart - start * yinterval
            end = ystart - (end + 1) * yinterval
            start -= tip / 3
            end += tip / 3

            # Bracket the groups
            root.plot((xpos, xpos + tip), (start, start), "k-", lw=2)
            root.plot((xpos, xpos), (start, end), "k-", lw=2)
            root.plot((xpos, xpos + tip), (end, end), "k-", lw=2)
            TextCircle(root, xpos, .5 * (start + end), gname)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + opts.cmap + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #46
0
def report(args):
    '''
    %prog report ksfile

    generate a report given a Ks result file (as produced by synonymous_calc.py).
    describe the median Ks, Ka values, as well as the distribution in stem-leaf plot
    '''
    from jcvi.utils.cbook import SummaryStats
    from jcvi.graphics.histogram import stem_leaf_plot

    p = OptionParser(report.__doc__)
    p.add_option(
        "--pdf",
        default=False,
        action="store_true",
        help="Generate graphic output for the histogram [default: %default]")
    p.add_option(
        "--components",
        default=1,
        type="int",
        help="Number of components to decompose peaks [default: %default]")
    add_plot_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="5x5")

    if len(args) != 1:
        sys.exit(not p.print_help())

    ks_file, = args
    data = KsFile(ks_file)
    ks_min = opts.vmin
    ks_max = opts.vmax
    bins = opts.bins

    for f in fields.split(",")[1:]:
        columndata = [getattr(x, f) for x in data]
        ks = ("ks" in f)
        if not ks:
            continue

        columndata = [x for x in columndata if ks_min <= x <= ks_max]

        st = SummaryStats(columndata)
        title = "{0} ({1}): ".format(descriptions[f], ks_file)
        title += "Median:{0:.3f} (1Q:{1:.3f}|3Q:{2:.3f}||".\
                format(st.median, st.firstq, st.thirdq)
        title += "Mean:{0:.3f}|Std:{1:.3f}||N:{2})".\
                format(st.mean, st.sd, st.size)

        tbins = (0, ks_max, bins) if ks else (0, .6, 10)
        digit = 2 if (ks_max * 1. / bins) < .1 else 1
        stem_leaf_plot(columndata, *tbins, digit=digit, title=title)

    if not opts.pdf:
        return

    components = opts.components
    data = [x.ng_ks for x in data]
    data = [x for x in data if ks_min <= x <= ks_max]

    fig = plt.figure(1, (iopts.w, iopts.h))
    ax = fig.add_axes([.12, .1, .8, .8])
    kp = KsPlot(ax, ks_max, opts.bins, legendp=opts.legendp)
    kp.add_data(data, components, fill=opts.fill, fitted=opts.fit)
    kp.draw(title=opts.title)
コード例 #47
0
def dotplot_main(args):
    p = OptionParser(__doc__)
    p.set_beds()
    p.add_option(
        "--synteny",
        default=False,
        action="store_true",
        help="Run a fast synteny scan and display blocks",
    )
    p.add_option("--cmaptext",
                 help="Draw colormap box on the bottom-left corner")
    p.add_option(
        "--vmin",
        dest="vmin",
        type="float",
        default=0,
        help="Minimum value in the colormap",
    )
    p.add_option(
        "--vmax",
        dest="vmax",
        type="float",
        default=2,
        help="Maximum value in the colormap",
    )
    p.add_option(
        "--nmax",
        dest="sample_number",
        type="int",
        default=10000,
        help="Maximum number of data points to plot",
    )
    p.add_option(
        "--minfont",
        type="int",
        default=4,
        help="Do not render labels with size smaller than",
    )
    p.add_option("--colormap",
                 help="Two column file, block id to color mapping")
    p.add_option(
        "--colororientation",
        action="store_true",
        default=False,
        help="Color the blocks based on orientation, similar to mummerplot",
    )
    p.add_option(
        "--nosort",
        default=False,
        action="store_true",
        help="Do not sort the seqids along the axes",
    )
    p.add_option("--nosep",
                 default=False,
                 action="store_true",
                 help="Do not add contig lines")
    p.add_option("--title", help="Title of the dot plot")
    p.set_dotplot_opts()
    p.set_outfile(outfile=None)
    opts, args, iopts = p.set_image_options(args,
                                            figsize="9x9",
                                            style="dark",
                                            dpi=90,
                                            cmap="copper")

    if len(args) != 1:
        sys.exit(not p.print_help())

    (anchorfile, ) = args
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile,
                                                     p,
                                                     opts,
                                                     sorted=(not opts.nosort))

    palette = opts.colormap
    if palette:
        palette = Palette(palettefile=palette)
    elif opts.colororientation:
        palette = Palette.from_block_orientation(anchorfile, qbed, sbed)

    cmaptext = opts.cmaptext
    if anchorfile.endswith(".ks"):
        from jcvi.apps.ks import KsFile

        logging.debug("Anchors contain Ks values")
        cmaptext = cmaptext or "*Ks* values"
        anchorksfile = anchorfile + ".anchors"
        if need_update(anchorfile, anchorksfile):
            ksfile = KsFile(anchorfile)
            ksfile.print_to_anchors(anchorksfile)
        anchorfile = anchorksfile

    if opts.skipempty:
        ac = AnchorFile(anchorfile)
        if is_self:
            qseqids = sseqids = set()
        else:
            qseqids, sseqids = set(), set()

        for pair in ac.iter_pairs():
            q, s = pair[:2]
            qi, q = qorder[q]
            si, s = sorder[s]
            qseqids.add(q.seqid)
            sseqids.add(s.seqid)

        if is_self:
            qbed = sbed = subset_bed(qbed, qseqids)
        else:
            qbed = subset_bed(qbed, qseqids)
            sbed = subset_bed(sbed, sseqids)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])  # the dot plot

    dotplot(
        anchorfile,
        qbed,
        sbed,
        fig,
        root,
        ax,
        vmin=opts.vmin,
        vmax=opts.vmax,
        is_self=is_self,
        synteny=opts.synteny,
        cmap_text=opts.cmaptext,
        cmap=iopts.cmap,
        genomenames=opts.genomenames,
        sample_number=opts.sample_number,
        minfont=opts.minfont,
        palette=palette,
        sep=(not opts.nosep),
        sepcolor=set1[int(opts.theme)],
        title=opts.title,
        stdpf=(not opts.nostdpf),
        chpf=(not opts.nochpf),
    )

    image_name = opts.outfile or (op.splitext(anchorfile)[0] + "." +
                                  opts.format)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    fig.clear()
コード例 #48
0
def stack(args):
    """
    %prog stack fastafile

    Create landscape plots that show the amounts of genic sequences, and repetitive
    sequences along the chromosomes.
    """
    p = OptionParser(stack.__doc__)
    p.add_option("--top", default=10, type="int",
                 help="Draw the first N chromosomes [default: %default]")
    p.add_option("--stacks",
                 default="Exons,Introns,DNA_transposons,Retrotransposons",
                 help="Features to plot in stackplot [default: %default]")
    p.add_option("--switch",
                 help="Change chr names based on two-column file [default: %default]")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastafile, = args
    top = opts.top
    window, shift, subtract = check_window_options(opts)
    switch = opts.switch
    if switch:
        switch = DictFile(opts.switch)

    stacks = opts.stacks.split(",")
    bedfiles = get_beds(stacks)
    binfiles = get_binfiles(bedfiles, fastafile, shift, subtract=subtract)

    sizes = Sizes(fastafile)
    s = list(sizes.iter_sizes())[:top]
    maxl = max(x[1] for x in s)
    margin = .08
    inner = .02   # y distance between tracks

    pf = fastafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Gauge
    ratio = draw_gauge(root, margin, maxl)

    # Per chromosome
    yinterval = (1 - 2 * margin) / (top + 1)
    xx = margin
    yy = 1 - margin
    for chr, clen in s:
        yy -= yinterval
        xlen = clen / ratio
        cc = chr
        if "_" in chr:
            ca, cb = chr.split("_")
            cc = ca[0].upper() + cb

        if switch and cc in switch:
            cc = "\n".join((cc, "({0})".format(switch[cc])))

        root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray))
        ax = fig.add_axes([xx, yy, xlen, yinterval - inner])

        nbins = clen / shift
        if clen % shift:
            nbins += 1

        stackplot(ax, binfiles, nbins, palette, chr, window, shift)
        root.text(xx - .04, yy + .5 * (yinterval - inner), cc, ha="center", va="center")

        ax.set_xlim(0, nbins)
        ax.set_ylim(0, 1)
        ax.set_axis_off()

    # Legends
    yy -= yinterval
    xx = margin
    for b, p in zip(bedfiles, palette):
        b = b.rsplit(".", 1)[0].replace("_", " ")
        b = Registration.get(b, b)

        root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
        xx += 2 * inner
        root.text(xx, yy, b, size=13)
        xx += len(b) * .012 + inner

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #49
0
ファイル: chromosome.py プロジェクト: zhimenggan/jcvi
def main():
    """
    %prog bedfile id_mappings

    Takes a bedfile that contains the coordinates of features to plot on the
    chromosomes, and `id_mappings` file that map the ids to certain class. Each
    class will get assigned a unique color. `id_mappings` file is optional (if
    omitted, will not paint the chromosome features, except the centromere).
    """
    p = OptionParser(main.__doc__)
    p.add_option("--title",
                 default="Medicago truncatula v3.5",
                 help="title of the image [default: `%default`]")
    p.add_option("--gauge",
                 default=False,
                 action="store_true",
                 help="draw a gauge with size label [default: %default]")
    p.add_option(
        "--imagemap",
        default=False,
        action="store_true",
        help=
        "generate an HTML image map associated with the image [default: %default]"
    )
    p.add_option(
        "--winsize",
        default=50000,
        type="int",
        help=
        "if drawing an imagemap, specify the window size (bases) of each map element "
        "[default: %default bp]")
    p.add_option("--empty", help="Write legend for unpainted region")
    opts, args, iopts = p.set_image_options(figsize="6x6", dpi=300)

    if len(args) not in (1, 2):
        sys.exit(p.print_help())

    bedfile = args[0]
    mappingfile = None
    if len(args) == 2:
        mappingfile = args[1]

    winsize = opts.winsize
    imagemap = opts.imagemap
    w, h = iopts.w, iopts.h
    dpi = iopts.dpi

    prefix = bedfile.rsplit(".", 1)[0]
    figname = prefix + "." + opts.format
    if imagemap:
        imgmapfile = prefix + '.map'
        mapfh = open(imgmapfile, "w")
        print('<map id="' + prefix + '">', file=mapfh)

    if mappingfile:
        mappings = DictFile(mappingfile, delimiter="\t")
        classes = sorted(set(mappings.values()))
        logging.debug("A total of {0} classes found: {1}".format(
            len(classes), ','.join(classes)))
    else:
        mappings = {}
        classes = []
        logging.debug("No classes registered (no id_mappings given).")

    mycolors = "rgbymc"
    class_colors = dict(zip(classes, mycolors))

    bed = Bed(bedfile)
    chr_lens = {}
    centromeres = {}
    for b, blines in groupby(bed, key=(lambda x: x.seqid)):
        blines = list(blines)
        maxlen = max(x.end for x in blines)
        chr_lens[b] = maxlen

    for b in bed:
        accn = b.accn
        if accn == "centromere":
            centromeres[b.seqid] = b.start
        if accn in mappings:
            b.accn = mappings[accn]
        else:
            b.accn = '-'

    chr_number = len(chr_lens)
    if centromeres:
        assert chr_number == len(centromeres)

    fig = plt.figure(1, (w, h))
    root = fig.add_axes([0, 0, 1, 1])

    r = .7  # width and height of the whole chromosome set
    xstart, ystart = .15, .85
    xinterval = r / chr_number
    xwidth = xinterval * .5  # chromosome width
    max_chr_len = max(chr_lens.values())
    ratio = r / max_chr_len  # canvas / base

    # first the chromosomes
    for a, (chr, clen) in enumerate(sorted(chr_lens.items())):
        xx = xstart + a * xinterval + .5 * xwidth
        root.text(xx, ystart + .01, chr, ha="center")
        if centromeres:
            yy = ystart - centromeres[chr] * ratio
            ChromosomeWithCentromere(root,
                                     xx,
                                     ystart,
                                     yy,
                                     ystart - clen * ratio,
                                     width=xwidth)
        else:
            Chromosome(root, xx, ystart, ystart - clen * ratio, width=xwidth)

    chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys())))

    alpha = .75
    # color the regions
    for chr in sorted(chr_lens.keys()):
        segment_size, excess = 0, 0
        bac_list = []
        for b in bed.sub_bed(chr):
            clen = chr_lens[chr]
            idx = chr_idxs[chr]
            klass = b.accn
            start = b.start
            end = b.end
            xx = xstart + idx * xinterval
            yystart = ystart - end * ratio
            yyend = ystart - start * ratio
            root.add_patch(
                Rectangle((xx, yystart),
                          xwidth,
                          yyend - yystart,
                          fc=class_colors.get(klass, "w"),
                          lw=0,
                          alpha=alpha))

            if imagemap:
                """
                `segment` : size of current BAC being investigated + `excess`
                `excess`  : left-over bases from the previous BAC, as a result of
                            iterating over `winsize` regions of `segment`
                """
                if excess == 0:
                    segment_start = start
                segment = (end - start + 1) + excess
                while True:
                    if segment < winsize:
                        bac_list.append(b.accn)
                        excess = segment
                        break
                    segment_end = segment_start + winsize - 1
                    tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \
                                  xx + xwidth, (1 - ystart) + segment_end * ratio
                    print('\t' + write_ImageMapLine(tlx, tly, brx, bry, \
                            w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end), file=mapfh)

                    segment_start += winsize
                    segment -= winsize
                    bac_list = []

        if imagemap and excess > 0:
            bac_list.append(b.accn)
            segment_end = end
            tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \
                          xx + xwidth, (1 - ystart) + segment_end * ratio
            print('\t' + write_ImageMapLine(tlx, tly, brx, bry, \
                    w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end), file=mapfh)

    if imagemap:
        print('</map>', file=mapfh)
        mapfh.close()
        logging.debug("Image map written to `{0}`".format(mapfh.name))

    if opts.gauge:
        xstart, ystart = .9, .85
        Gauge(root, xstart, ystart - r, ystart, max_chr_len)

    # class legends, four in a row
    xstart = .1
    xinterval = .2
    xwidth = .04
    yy = .08
    for klass, cc in sorted(class_colors.items()):
        if klass == '-':
            continue
        root.add_patch(
            Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha))
        root.text(xstart + xwidth + .01, yy, klass, fontsize=10)
        xstart += xinterval

    empty = opts.empty
    if empty:
        root.add_patch(
            Rectangle((xstart, yy), xwidth, xwidth, fill=False, lw=1))
        root.text(xstart + xwidth + .01, yy, empty, fontsize=10)

    root.text(.5,
              .95,
              opts.title,
              fontstyle="italic",
              ha="center",
              va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    savefig(figname, dpi=dpi, iopts=iopts)
コード例 #50
0
ファイル: bites.py プロジェクト: zjwang6/jcvi
def bites(args):
    """
    %prog bites

    Illustrate the pipeline for automated bite discovery.
    """

    p = OptionParser(__doc__)
    opts, args = p.parse_args()

    fig = plt.figure(1, (6, 6))
    root = fig.add_axes([0, 0, 1, 1])

    # HSP pairs
    hsps = (
        ((50, 150), (60, 180)),
        ((190, 250), (160, 235)),
        ((300, 360), (270, 330)),
        ((430, 470), (450, 490)),
        ((570, 620), (493, 543)),
        ((540, 555), (370, 385)),  # non-collinear hsps
    )

    titlepos = (0.9, 0.65, 0.4)
    titles = ("Compare orthologous region", "Find collinear HSPs",
              "Scan paired gaps")
    ytip = 0.01
    mrange = 650.0
    m = lambda x: x / mrange * 0.7 + 0.1
    for i, (ya, title) in enumerate(zip(titlepos, titles)):
        yb = ya - 0.1
        plt.plot((0.1, 0.8), (ya, ya), "-", color="gray", lw=2, zorder=1)
        plt.plot((0.1, 0.8), (yb, yb), "-", color="gray", lw=2, zorder=1)
        RoundLabel(root, 0.5, ya + 4 * ytip, title)
        root.text(0.9, ya, "A. thaliana", ha="center", va="center")
        root.text(0.9, yb, "B. rapa", ha="center", va="center")
        myhsps = hsps
        if i >= 1:
            myhsps = hsps[:-1]
        for (a, b), (c, d) in myhsps:
            a, b, c, d = [m(x) for x in (a, b, c, d)]
            r1 = Rectangle((a, ya - ytip),
                           b - a,
                           2 * ytip,
                           fc="r",
                           lw=0,
                           zorder=2)
            r2 = Rectangle((c, yb - ytip),
                           d - c,
                           2 * ytip,
                           fc="r",
                           lw=0,
                           zorder=2)
            r3 = Rectangle((a, ya - ytip),
                           b - a,
                           2 * ytip,
                           fill=False,
                           zorder=3)
            r4 = Rectangle((c, yb - ytip),
                           d - c,
                           2 * ytip,
                           fill=False,
                           zorder=3)
            r5 = Polygon(
                ((a, ya - ytip), (c, yb + ytip), (d, yb + ytip),
                 (b, ya - ytip)),
                fc="r",
                alpha=0.2,
            )
            rr = (r1, r2, r3, r4, r5)
            if i == 2:
                rr = rr[:-1]
            for r in rr:
                root.add_patch(r)

    # Gap pairs
    hspa, hspb = zip(*myhsps)
    gapa, gapb = [], []
    for (a, b), (c, d) in pairwise(hspa):
        gapa.append((b + 1, c - 1))
    for (a, b), (c, d) in pairwise(hspb):
        gapb.append((b + 1, c - 1))
    gaps = zip(gapa, gapb)
    tpos = titlepos[-1]

    yy = tpos - 0.05
    for i, ((a, b), (c, d)) in enumerate(gaps):
        i += 1
        a, b, c, d = [m(x) for x in (a, b, c, d)]
        xx = (a + b + c + d) / 4
        TextCircle(root, xx, yy, str(i))

    # Bites
    ystart = 0.24
    ytip = 0.05
    bites = (
        ("Bite(40=>-15)", True),
        ("Bite(50=>35)", False),
        ("Bite(70=>120)", False),
        ("Bite(100=>3)", True),
    )
    for i, (bite, selected) in enumerate(bites):
        xx = 0.15 if (i % 2 == 0) else 0.55
        yy = ystart - i / 2 * ytip
        i += 1
        TextCircle(root, xx, yy, str(i))
        color = "k" if selected else "gray"
        root.text(xx + ytip, yy, bite, size=10, color=color, va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = fname() + ".pdf"
    savefig(figname, dpi=300)
コード例 #51
0
def waterlilyGOM(args):
    """
    %prog mcmctree.tre table.csv

    Customized figure to plot phylogeny and related infographics.
    """
    from jcvi.graphics.tree import (
        LeafInfoFile,
        WGDInfoFile,
        draw_tree,
        parse_tree,
        draw_wgd_xy,
    )
    from jcvi.graphics.table import CsvTable, draw_table

    p = OptionParser(waterlilyGOM.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="12x9")

    if len(args) != 2:
        sys.exit(not p.print_help())

    (datafile, csvfile) = args
    outgroup = ["ginkgo"]

    logging.debug("Load tree file `{0}`".format(datafile))
    t, hpd = parse_tree(datafile)

    pf = datafile.rsplit(".", 1)[0]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    margin, rmargin = 0.15, 0.19  # Left and right margin
    leafinfo = LeafInfoFile("leafinfo.csv").cache
    wgdinfo = WGDInfoFile("wgdinfo.csv").cache
    groups = "Monocots,Eudicots,ANA-grade,Gymnosperms"

    draw_tree(
        root,
        t,
        hpd=hpd,
        margin=margin,
        rmargin=rmargin,
        supportcolor=None,
        internal=False,
        outgroup=outgroup,
        leafinfo=leafinfo,
        wgdinfo=wgdinfo,
        geoscale=True,
        groups=groups.split(","),
    )

    # Bottom right show legends for the WGD circles
    pad = 0.02
    ypad = 0.04
    xstart = 1 - rmargin + pad
    ystart = 0.2
    waterlily_wgdline = wgdinfo["waterlily"][0]
    ypos = ystart - 2 * ypad
    draw_wgd_xy(root, xstart, ypos, waterlily_wgdline)
    root.text(
        xstart + pad,
        ypos,
        "Nymphaealean WGD",
        color=waterlily_wgdline.color,
        va="center",
    )
    other_wgdline = wgdinfo["banana"][0]
    ypos = ystart - 3 * ypad
    draw_wgd_xy(root, xstart, ypos, other_wgdline)
    root.text(
        xstart + pad,
        ypos,
        "Other known WGDs",
        color=other_wgdline.color,
        va="center",
    )

    # Top left draw the comparison table
    csv_table = CsvTable(csvfile)
    draw_table(
        root,
        csv_table,
        extent=(0.02, 0.44, 0.55, 0.985),
        stripe_color="lavender",
        yinflation=iopts.w / iopts.h,
    )

    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #52
0
def main():
    p = OptionParser(__doc__)
    p.add_option(
        "--switch",
        help="Rename the seqid with two-column file [default: %default]")
    p.add_option(
        "--tree",
        help="Display trees on the bottom of the figure [default: %default]")
    p.add_option("--extra", help="Extra features in BED format")
    p.add_option(
        "--gene_style",
        default="Rectangle",
        help=
        "Default <Rectangle> to plot genes as rectangle. Accept <Arrow> to add orientation of genes."
    )
    p.add_option(
        "--scalebar",
        default=False,
        action="store_true",
        help="Add scale bar to the plot",
    )
    p.add_option(
        "--add_gene_legend",
        default=False,
        action="store_true",
        help="Add forward and reverse strand gene legend to the plot",
    )
    p.add_option(
        "--add_gene_label",
        default=False,
        action="store_true",
        help="Add gene names to the plot",
    )
    p.add_option(
        "--shadestyle",
        default="curve",
        choices=Shade.Styles,
        help="Style of syntenic wedges",
    )
    opts, args, iopts = p.set_image_options(figsize="8x7")

    if len(args) != 3:
        sys.exit(not p.print_help())

    datafile, bedfile, layoutfile = args
    switch = opts.switch
    tree = opts.tree

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Synteny(fig,
            root,
            datafile,
            bedfile,
            layoutfile,
            switch=switch,
            tree=tree,
            extra_features=opts.extra,
            scalebar=opts.scalebar,
            shadestyle=opts.shadestyle,
            gene_legend=opts.add_gene_legend,
            add_gene_label=opts.add_gene_label,
            gene_style=opts.gene_style)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #53
0
def expr(args):
    """
    %prog expr block exp layout napus.bed

    Plot a composite figure showing synteny and the expression level between
    homeologs in two tissues - total 4 lists of values. block file contains the
    gene pairs between AN and CN.
    """
    from jcvi.graphics.base import red_purple as default_cm

    p = OptionParser(expr.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 4:
        sys.exit(not p.print_help())

    block, exp, layout, napusbed = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    s = Synteny(fig, root, block, napusbed, layout)

    # Import the expression values
    # Columns are: leaf-A, leaf-C, root-A, root-C
    fp = open(exp)
    data = {}
    for row in fp:
        gid, lf, rt = row.split()
        lf, rt = float(lf), float(rt)
        data[gid] = (lf, rt)

    rA, rB = s.rr
    gA = [x.accn for x in rA.genes]
    gC = [x.accn for x in rB.genes]

    A = [data.get(x, (0, 0)) for x in gA]
    C = [data.get(x, (0, 0)) for x in gC]
    A = np.array(A)
    C = np.array(C)
    A = np.transpose(A)
    C = np.transpose(C)

    d, h = .01, .1
    lsg = "lightslategrey"
    coords = s.gg  # Coordinates of the genes
    axes = []
    for j, (y, gg) in enumerate(((.79, gA), (.24, gC))):
        r = s.rr[j]
        x = r.xstart
        w = r.xend - r.xstart
        ax = fig.add_axes([x, y, w, h])
        axes.append(ax)
        root.add_patch(
            Rectangle((x - h, y - d),
                      w + h + d,
                      h + 2 * d,
                      fill=False,
                      ec=lsg,
                      lw=1))
        root.text(x - d, y + 3 * h / 4, "root", ha="right", va="center")
        root.text(x - d, y + h / 4, "leaf", ha="right", va="center")
        ty = y - 2 * d if y > .5 else y + h + 2 * d
        nrows = len(gg)
        for i, g in enumerate(gg):
            start, end = coords[(j, g)]
            sx, sy = start
            ex, ey = end
            assert sy == ey
            sy = sy + 2 * d if sy > .5 else sy - 2 * d
            root.plot(((sx + ex) / 2, x + w * (i + .5) / nrows), (sy, ty),
                      lw=1,
                      ls=":",
                      color="k",
                      alpha=.2)

    axA, axC = axes
    p = axA.pcolormesh(A, cmap=default_cm)
    p = axC.pcolormesh(C, cmap=default_cm)
    axA.set_xlim(0, len(gA))
    axC.set_xlim(0, len(gC))

    x, y, w, h = .35, .1, .3, .05
    ax_colorbar = fig.add_axes([x, y, w, h])
    fig.colorbar(p, cax=ax_colorbar, orientation='horizontal')
    root.text(x - d, y + h / 2, "RPKM", ha="right", va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    for x in (axA, axC, root):
        x.set_axis_off()

    image_name = "napusf4b." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #54
0
def depth(args):
    """
    %prog depth anchorfile --qbed qbedfile --sbed sbedfile

    Calculate the depths in the two genomes in comparison, given in --qbed and
    --sbed. The synteny blocks will be layered on the genomes, and the
    multiplicity will be summarized to stderr.
    """
    from jcvi.utils.range import range_depth

    p = OptionParser(depth.__doc__)
    p.add_option("--depthfile",
                 help="Generate file with gene and depth [default: %default]")
    p.add_option("--histogram",
                 default=False,
                 action="store_true",
                 help="Plot histograms in PDF")
    p.add_option("--xmax",
                 type="int",
                 help="x-axis maximum to display in plot")
    p.add_option("--title", default=None, help="Title to display in plot")
    p.add_option("--quota", help="Force to use this quota, e.g. 1:1, 1:2 ...")
    p.set_beds()

    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    anchorfile, = args
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts)
    depthfile = opts.depthfile
    ac = AnchorFile(anchorfile)
    qranges = []
    sranges = []
    blocks = ac.blocks
    for ib in blocks:
        q, s, t = zip(*ib)
        q = [qorder[x] for x in q]
        s = [sorder[x] for x in s]
        qrange = (min(q)[0], max(q)[0])
        srange = (min(s)[0], max(s)[0])
        qranges.append(qrange)
        sranges.append(srange)
        if is_self:
            qranges.append(srange)

    qgenome = op.basename(qbed.filename).split(".")[0]
    sgenome = op.basename(sbed.filename).split(".")[0]
    qtag = "Genome {0} depths".format(qgenome)
    print("{}:".format(qtag), file=sys.stderr)
    dsq, details = range_depth(qranges, len(qbed))
    if depthfile:
        fw = open(depthfile, "w")
        write_details(fw, details, qbed)

    if is_self:
        return

    stag = "Genome {0} depths".format(sgenome)
    print("{}:".format(stag), file=sys.stderr)
    dss, details = range_depth(sranges, len(sbed))
    if depthfile:
        write_details(fw, details, sbed)
        fw.close()
        logging.debug("Depth written to `{0}`.".format(depthfile))

    if not opts.histogram:
        return

    from jcvi.graphics.base import plt, quickplot_ax, savefig, normalize_axes

    # Plot two histograms one for query genome, one for subject genome
    plt.figure(1, (6, 3))
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)

    xmax = opts.xmax or max(4, max(dsq.keys() + dss.keys()))
    if opts.quota:
        speak, qpeak = opts.quota.split(":")
        qpeak, speak = int(qpeak), int(speak)
    else:
        qpeak = find_peak(dsq)
        speak = find_peak(dss)

    qtag = "# of {} blocks per {} gene".format(sgenome, qgenome)
    stag = "# of {} blocks per {} gene".format(qgenome, sgenome)
    quickplot_ax(ax1,
                 dss,
                 0,
                 xmax,
                 stag,
                 ylabel="Percentage of genome",
                 highlight=range(1, speak + 1))
    quickplot_ax(ax2,
                 dsq,
                 0,
                 xmax,
                 qtag,
                 ylabel=None,
                 highlight=range(1, qpeak + 1))

    title = opts.title or "{} vs {} syntenic depths\n{}:{} pattern"\
                    .format(qgenome, sgenome, speak, qpeak)
    root = f.add_axes([0, 0, 1, 1])
    vs, pattern = title.split('\n')
    root.text(.5, .97, vs, ha="center", va="center", color="darkslategray")
    root.text(.5,
              .925,
              pattern,
              ha="center",
              va="center",
              color="tomato",
              size=16)
    print(title, file=sys.stderr)

    normalize_axes(root)

    pf = anchorfile.rsplit(".", 1)[0] + ".depth"
    image_name = pf + ".pdf"
    savefig(image_name)
コード例 #55
0
def deletion(args):
    """
    %prog deletion [deletion-genes|deletion-bases] C2-deletions boleracea.bed

    Plot histogram for napus deletions. Can plot deletion-genes or
    deletion-bases. The three largest segmental deletions will be highlighted
    along with a drawing of the C2 chromosome.
    """
    import math
    from jcvi.formats.bed import Bed
    from jcvi.graphics.chromosome import HorizontalChromosome
    from jcvi.graphics.base import kb_formatter

    p = OptionParser(deletion.__doc__)
    opts, args, iopts = p.set_image_options(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    deletion_genes, deletions, bed = args
    dg = [int(x) for x in open(deletion_genes)]
    dsg, lsg = "darkslategray", "lightslategray"

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax = fig.add_axes([.1, .1, .8, .8])
    minval = 2 if deletion_genes == "deleted-genes" else 2048
    bins = np.logspace(math.log(minval, 10), math.log(max(dg), 10), 16)
    n, bins, histpatches = ax.hist(dg, bins=bins, \
                                   fc=lsg, alpha=.75)
    ax.set_xscale('log', basex=2)
    if deletion_genes == "deleted-genes":
        ax.xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%d'))
        ax.set_xlabel('No. of deleted genes in each segment')
    else:
        ax.xaxis.set_major_formatter(kb_formatter)
        ax.set_xlabel('No. of deleted bases in each segment')
    ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%d'))
    ax.set_ylabel('No. of segments')
    ax.patch.set_alpha(0.1)

    # Draw chromosome C2
    na, nb = .45, .85
    root.text((na + nb) / 2, .54, "ChrC02", ha="center")
    HorizontalChromosome(root, na, nb, .5, height=.025, fc=lsg, fill=True)

    order = Bed(bed).order
    fp = open(deletions)
    scale = lambda x: na + x * (nb - na) / 52886895
    for i, row in enumerate(fp):
        i += 1
        num, genes = row.split()
        genes = genes.split("|")
        ia, a = order[genes[0]]
        ib, b = order[genes[-1]]
        mi, mx = a.start, a.end
        mi, mx = scale(mi), scale(mx)
        root.add_patch(Rectangle((mi, .475), mx - mi, .05, fc="red", ec="red"))
        if i == 1:  # offset between two adjacent regions for aesthetics
            mi -= .015
        elif i == 2:
            mi += .015
        TextCircle(root, mi, .44, str(i), fc="red")

    for i, mi in zip(range(1, 4), (.83, .78, .73)):
        TextCircle(root, mi, .2, str(i), fc="red")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = deletion_genes + ".pdf"
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #56
0
ファイル: kmer.py プロジェクト: zengxiaofei/jcvi
def histogram(args):
    """
    %prog histogram meryl.histogram species K

    Plot the histogram based on meryl K-mer distribution, species and N are
    only used to annotate the graphic.
    """
    p = OptionParser(histogram.__doc__)
    p.add_option("--vmin", dest="vmin", default=1, type="int",
            help="minimum value, inclusive [default: %default]")
    p.add_option("--vmax", dest="vmax", default=100, type="int",
            help="maximum value, inclusive [default: %default]")
    p.add_option("--pdf", default=False, action="store_true",
            help="Print PDF instead of ASCII plot [default: %default]")
    p.add_option("--coverage", default=0, type="int",
            help="Kmer coverage [default: auto]")
    p.add_option("--nopeaks", default=False, action="store_true",
            help="Do not annotate K-mer peaks")
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    histfile, species, N = args
    ascii = not opts.pdf
    peaks = not opts.nopeaks
    N = int(N)

    if histfile.rsplit(".", 1)[-1] in ("mcdat", "mcidx"):
        logging.debug("CA kmer index found")
        histfile = merylhistogram(histfile)

    ks = KmerSpectrum(histfile)
    ks.analyze(K=N)

    Total_Kmers = int(ks.totalKmers)
    coverage = opts.coverage
    Kmer_coverage = ks.max2 if not coverage else coverage
    Genome_size = int(round(Total_Kmers * 1. / Kmer_coverage))

    Total_Kmers_msg = "Total {0}-mers: {1}".format(N, thousands(Total_Kmers))
    Kmer_coverage_msg = "{0}-mer coverage: {1}".format(N, Kmer_coverage)
    Genome_size_msg = "Estimated genome size: {0:.1f}Mb".\
                        format(Genome_size / 1e6)
    Repetitive_msg = ks.repetitive
    SNPrate_msg = ks.snprate

    for msg in (Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg):
        print >> sys.stderr, msg

    x, y = ks.get_xy(opts.vmin, opts.vmax)
    title = "{0} {1}-mer histogram".format(species, N)

    if ascii:
        asciiplot(x, y, title=title)
        return Genome_size

    plt.figure(1, (6, 6))
    plt.plot(x, y, 'g-', lw=2, alpha=.5)
    ax = plt.gca()

    if peaks:
        t = (ks.min1, ks.max1, ks.min2, ks.max2, ks.min3)
        tcounts = [(x, y) for x, y in ks.counts if x in t]
        if tcounts:
            x, y = zip(*tcounts)
            tcounts = dict(tcounts)
            plt.plot(x, y, 'ko', lw=2, mec='k', mfc='w')
            ax.text(ks.max1, tcounts[ks.max1], "SNP peak", va="top")
            ax.text(ks.max2, tcounts[ks.max2], "Main peak")

    messages = [Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg,
                Repetitive_msg, SNPrate_msg]
    write_messages(ax, messages)

    ymin, ymax = ax.get_ylim()
    ymax = ymax * 7 / 6

    ax.set_title(markup(title))
    ax.set_ylim((ymin, ymax))
    xlabel, ylabel = "Coverage (X)", "Counts"
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    set_human_axis(ax)

    imagename = histfile.split(".")[0] + ".pdf"
    savefig(imagename, dpi=100)

    return Genome_size
コード例 #57
0
def fig3(args):
    """
    %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data

    Napus Figure 3 displays alignments between quartet chromosomes, inset
    with read histograms.
    """
    from jcvi.formats.bed import Bed

    p = OptionParser(fig3.__doc__)
    p.add_option("--gauge_step",
                 default=10000000,
                 type="int",
                 help="Step size for the base scale")
    opts, args, iopts = p.set_image_options(args, figsize="12x9")

    if len(args) != 4:
        sys.exit(not p.print_help())

    chrs, sizes, bedfile, datadir = args
    gauge_step = opts.gauge_step
    diverge = iopts.diverge
    rr, gg = diverge
    chrs = [[x] for x in chrs.split(",")]
    sizes = Sizes(sizes).mapping

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes)

    # Synteny panel
    seqidsfile = make_seqids(chrs)
    klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05)
    height = .07
    r = height / 4
    K = Karyotype(fig,
                  root,
                  seqidsfile,
                  klayout,
                  gap=gap,
                  height=height,
                  lw=2,
                  generank=False,
                  sizes=sizes,
                  heightpad=r,
                  roundrect=True,
                  plot_label=False)

    # Chromosome labels
    for kl in K.layout:
        if kl.empty:
            continue
        lx, ly = kl.xstart, kl.y
        if lx < .11:
            lx += .1
            ly += .06
        label = kl.label
        root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center")

    # Inset with datafiles
    datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr",
                 "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr")
    datafiles = [op.join(datadir, x) for x in datafiles]
    tracks = K.tracks
    hlfile = op.join(datadir, "bzh.regions.forhaibao")
    xy_axes = []
    for t, datafile in zip(tracks, datafiles):
        ax = make_affix_axis(fig, t, -r, height=2 * r)
        xy_axes.append(ax)
        chr = t.seqids[0]
        xy = XYtrack(ax, datafile, color="lightslategray")
        start, end = 0, t.total
        xy.interpolate(end)
        xy.cap(ymax=40)
        xy.import_hlfile(hlfile, chr, diverge=diverge)
        xy.draw()
        ax.set_xlim(start, end)
        gauge_ax = make_affix_axis(fig, t, -r)
        adjust_spines(gauge_ax, ["bottom"])
        setup_gauge_ax(gauge_ax, start, end, gauge_step)

    # Converted gene tracks
    ax_Ar = make_affix_axis(fig, tracks[1], r, height=r / 2)
    ax_Co = make_affix_axis(fig, tracks[2], r, height=r / 2)

    order = Bed(bedfile).order
    for asterisk in (False, True):
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         0,
                         "A02",
                         ax_Ar,
                         rr,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         1,
                         "C2",
                         ax_Co,
                         gg,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         0,
                         "A02",
                         ax_Ar,
                         gg,
                         ypos=1,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         1,
                         "C2",
                         ax_Co,
                         rr,
                         ypos=1,
                         asterisk=asterisk)

    Ar, Co = xy_axes[1:3]
    annotations = ((Ar, "Bra028920 Bra028897", "center",
                    "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"),
                   (Ar, "Bra020218 Bra020286", "left",
                    "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"),
                   (Ar, "Bra029317 Bra029251", "right",
                    "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left",
                                      "1DCn2-"), (Co, "Bo2g018560 Bo2g023700",
                                                  "right", "2DCn2-"),
                   (Co, "Bo2g024450 Bo2g025390", "left",
                    "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"),
                   (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-"))

    for ax, genes, ha, label in annotations:
        g1, g2 = genes.split()
        x1, x2 = order[g1][1].start, order[g2][1].start
        if ha == "center":
            x = (x1 + x2) / 2 * .8
        elif ha == "left":
            x = x2
        else:
            x = x1
        label = r"\textit{{{0}}}".format(label)
        color = rr if "+" in label else gg
        ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center")

    ax_Ar.set_xlim(0, tracks[1].total)
    ax_Ar.set_ylim(-1, 1)
    ax_Co.set_xlim(0, tracks[2].total)
    ax_Co.set_ylim(-1, 1)

    # Plot coverage in resequencing lines
    gstep = 5000000
    order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",")
    labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"}
    hlsuffix = "regions.forhaibao"
    chr1, chr2 = "chrA02", "chrC02"
    t1, t2 = tracks[0], tracks[-1]
    s1, s2 = sizes[chr1], sizes[chr2]

    canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2)
    c = Coverage(fig,
                 root,
                 canvas1,
                 chr1, (0, s1),
                 datadir,
                 order=order,
                 gauge=None,
                 plot_chr_label=False,
                 gauge_step=gstep,
                 palette="gray",
                 cap=40,
                 hlsuffix=hlsuffix,
                 labels_dict=labels_dict,
                 diverge=diverge)
    yys = c.yys
    x1, x2 = .37, .72
    tip = .02
    annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2,
                    "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"),
                   (x1, yys[5] + .6 * tip, tip, tip / 2,
                    "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"),
                   (x2, yys[4] + .9 * tip, -1.2 * tip, 0,
                    "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL"))

    arrowprops = dict(facecolor='black',
                      shrink=.05,
                      frac=.5,
                      width=1,
                      headwidth=4)
    for x, y, dx, dy, label in annotations:
        label = r"\textit{{{0}}}".format(label)
        root.annotate(label,
                      xy=(x, y),
                      xytext=(x + dx, y + dy),
                      arrowprops=arrowprops,
                      color=rr,
                      fontsize=9,
                      ha="center",
                      va="center")

    canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2)
    Coverage(fig,
             root,
             canvas2,
             chr2, (0, s2),
             datadir,
             order=order,
             gauge=None,
             plot_chr_label=False,
             gauge_step=gstep,
             palette="gray",
             cap=40,
             hlsuffix=hlsuffix,
             labels_dict=labels_dict,
             diverge=diverge)

    pad = .03
    labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"),
              (t2.xstart - 3 * pad, .25 + pad, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    image_name = "napus-fig3." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #58
0
ファイル: kmer.py プロジェクト: zengxiaofei/jcvi
def multihistogram(args):
    """
    %prog multihistogram *.histogram species

    Plot the histogram based on a set of K-mer hisotograms. The method is based
    on Star et al.'s method (Atlantic Cod genome paper).
    """
    p = OptionParser(multihistogram.__doc__)
    p.add_option("--kmin", default=15, type="int",
            help="Minimum K-mer size, inclusive")
    p.add_option("--kmax", default=30, type="int",
            help="Maximum K-mer size, inclusive")
    p.add_option("--vmin", default=2, type="int",
            help="Minimum value, inclusive")
    p.add_option("--vmax", default=100, type="int",
            help="Maximum value, inclusive")
    opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300)

    if len(args) < 1:
        sys.exit(not p.print_help())

    histfiles = args[:-1]
    species = args[-1]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([.08, .12, .38, .76])
    B = fig.add_axes([.58, .12, .38, .76])

    lines = []
    legends = []
    genomesizes = []
    for histfile in histfiles:
        ks = KmerSpectrum(histfile)
        x, y = ks.get_xy(opts.vmin, opts.vmax)
        K = get_number(op.basename(histfile).split(".")[0].split("-")[-1])
        if not opts.kmin <= K <= opts.kmax:
            continue

        line, = A.plot(x, y, '-', lw=1)
        lines.append(line)
        legends.append("K = {0}".format(K))
        ks.analyze(K=K)
        genomesizes.append((K, ks.genomesize / 1e6))

    leg = A.legend(lines, legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(.5)

    title = "{0} genome K-mer histogram".format(species)
    A.set_title(markup(title))
    xlabel, ylabel = "Coverage (X)", "Counts"
    A.set_xlabel(xlabel)
    A.set_ylabel(ylabel)
    set_human_axis(A)

    title = "{0} genome size estimate".format(species)
    B.set_title(markup(title))
    x, y = zip(*genomesizes)
    B.plot(x, y, "ko", mfc='w')
    t = np.linspace(opts.kmin - .5, opts.kmax + .5, 100)
    p = np.poly1d(np.polyfit(x, y, 2))
    B.plot(t, p(t), "r:")

    xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)"
    B.set_xlabel(xlabel)
    B.set_ylabel(ylabel)
    set_ticklabels_helvetica(B)

    labels = ((.04, .96, 'A'), (.54, .96, 'B'))
    panel_labels(root, labels)

    normalize_axes(root)
    imagename = species + ".multiK.pdf"
    savefig(imagename, dpi=iopts.dpi, iopts=iopts)
コード例 #59
0
def plot(args):
    """
    %prog plot tagged.new.bed chr1

    Plot gene identifiers along a particular chromosome, often to illustrate the
    gene id assignment procedure.
    """
    from jcvi.graphics.base import plt, savefig
    from jcvi.graphics.chromosome import ChromosomeMap

    p = OptionParser(plot.__doc__)
    p.add_option("--firstn", type="int", help="Only plot the first N genes")
    p.add_option("--ymax", type="int", help="Y-axis max value")
    p.add_option("--log", action="store_true", help="Write plotting data")
    opts, args, iopts = p.set_image_options(args, figsize="6x4")

    if len(args) != 2:
        sys.exit(not p.print_help())

    taggedbed, chr = args
    bed = Bed(taggedbed)
    beds = list(bed.sub_bed(chr))
    old, new = [], []
    i = 0
    for b in beds:
        accn = b.extra[0]
        if "te" in accn:
            continue

        accn, tag = accn.split("|")
        if tag == "OVERLAP":
            continue

        c, r = atg_name(accn)
        if tag == "NEW":
            new.append((i, r))
        else:
            old.append((i, r))
        i += 1

    ngenes = i
    assert ngenes == len(new) + len(old)

    logging.debug("Imported {0} ranks on {1}.".format(ngenes, chr))
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    xstart, xend = 0.2, 0.8
    ystart, yend = 0.2, 0.8
    pad = 0.02

    ngenes = opts.firstn or ngenes
    ymax = opts.ymax or 500000

    title = "Assignment of Medtr identifiers"
    if opts.ymax:
        subtitle = "{0}, first {1} genes".format(chr, ngenes)
    else:
        subtitle = "{0}, {1} genes ({2} new)".format(chr, ngenes, len(new))

    chr_map = ChromosomeMap(fig, root, xstart, xend, ystart, yend, pad, 0,
                            ymax, 5, title, subtitle)

    ax = chr_map.axes

    if opts.log:
        from jcvi.utils.table import write_csv

        header = ["x", "y"]
        write_csv(header, new, filename=chr + ".new")
        write_csv(header, old, filename=chr + ".old")

    x, y = zip(*new)
    ax.plot(x, y, "b,")
    x, y = zip(*old)
    ax.plot(x, y, "r,")

    # Legends
    ymid = (ystart + yend) / 2
    y = ymid + pad
    root.plot([0.2], [y], "r.", lw=2)
    root.text(0.2 + pad, y, "Existing Medtr ids", va="center", size=10)
    y = ymid - pad
    root.plot([0.2], [y], "b.", lw=2)
    root.text(0.2 + pad, y, "Newly instantiated ids", va="center", size=10)

    ax.set_xlim(0, ngenes)
    ax.set_ylim(0, ymax)
    ax.set_axis_off()

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + ".identifiers." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
コード例 #60
0
def main():
    p = OptionParser(__doc__)
    p.add_option("--switch", help="Rename the seqid with two-column file")
    p.add_option("--tree", help="Display trees on the bottom of the figure")
    p.add_option("--extra", help="Extra features in BED format")
    p.add_option(
        "--genelabelsize",
        default=0,
        type="int",
        help="Show gene labels at this font size, useful for debugging. " +
        "However, plot may appear visually crowded. " +
        "Reasonably good values are 2 to 6 [Default: disabled]",
    )
    p.add_option(
        "--scalebar",
        default=False,
        action="store_true",
        help="Add scale bar to the plot",
    )
    p.add_option(
        "--glyphstyle",
        default="box",
        choices=Glyph.Styles,
        help="Style of feature glyphs",
    )
    p.add_option(
        "--glyphcolor",
        default="orientation",
        choices=Glyph.Palette,
        help="Glyph coloring based on",
    )
    p.add_option(
        "--shadestyle",
        default="curve",
        choices=Shade.Styles,
        help="Style of syntenic wedges",
    )
    opts, args, iopts = p.set_image_options(figsize="8x7")

    if len(args) != 3:
        sys.exit(not p.print_help())

    datafile, bedfile, layoutfile = args
    switch = opts.switch
    tree = opts.tree

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    Synteny(
        fig,
        root,
        datafile,
        bedfile,
        layoutfile,
        switch=switch,
        tree=tree,
        extra_features=opts.extra,
        genelabelsize=opts.genelabelsize,
        scalebar=opts.scalebar,
        shadestyle=opts.shadestyle,
        glyphstyle=opts.glyphstyle,
        glyphcolor=opts.glyphcolor,
    )

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)