Exemple #1
0
def resample(args):
    """
    %prog resample yellow-catfish-resample.txt medicago-resample.txt

    Plot ALLMAPS performance across resampled real data.
    """
    p = OptionParser(resample.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300)

    if len(args) != 2:
        sys.exit(not p.print_help())

    dataA, dataB = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([.1, .18, .32, .64])
    B = fig.add_axes([.6, .18, .32, .64])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    xlabel = "Fraction of markers"
    ylabels = ("Anchor rate", "Runtime (m)")
    legend = ("anchor rate", "runtime")
    subplot_twinx(A, dataA, xlabel, ylabels,
                     title="Yellow catfish", legend=legend)
    subplot_twinx(B, dataB, xlabel, ylabels,
                     title="Medicago", legend=legend)

    labels = ((.04, .92, "A"), (.54, .92, "B"))
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "resample." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #2
0
def allelefreq(args):
    """
    %prog allelefreq HD,DM1,SCA1,SCA17

    Plot the allele frequencies of some STRs.
    """
    p = OptionParser(allelefreq.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="10x10")

    if len(args) != 1:
        sys.exit(not p.print_help())

    loci, = args
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2,
                                                 nrows=2,
                                                 figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=4)
    treds, df = read_treds()
    df = df.set_index(["abbreviation"])

    for ax, locus in zip((ax1, ax2, ax3, ax4), loci.split(",")):
        plot_allelefreq(ax, df, locus)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"),
                        (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D")))
    normalize_axes(root)

    image_name = "allelefreq." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #3
0
def composite_correlation(df, size=(12, 8)):
    """ Plot composite correlation figure
    """
    fig = plt.figure(1, size)
    ax1 = plt.subplot2grid((2, 2), (0, 0))
    ax2 = plt.subplot2grid((2, 2), (0, 1))
    ax3 = plt.subplot2grid((2, 2), (1, 0))
    ax4 = plt.subplot2grid((2, 2), (1, 1))
    chemistry = ["V1", "V2", "V2.5", float("nan")]
    colors = sns.color_palette("Set2", 8)
    color_map = dict(zip(chemistry, colors))

    age_label = "Chronological age (yr)"
    ax1.scatter(df["hli_calc_age_sample_taken"],
                df["teloLength"],
                s=10,
                marker='.',
                color=df["Chemistry"].map(color_map))
    ax1.set_ylim(0, 15)
    ax1.set_ylabel("Telomere length (Kb)")

    ax2.scatter(df["hli_calc_age_sample_taken"],
                df["ccn.chrX"],
                s=10,
                marker='.',
                color=df["Chemistry"].map(color_map))
    ax2.set_ylim(1.8, 2.1)
    ax2.set_ylabel("ChrX copy number")

    ax4.scatter(df["hli_calc_age_sample_taken"],
                df["ccn.chrY"],
                s=10,
                marker='.',
                color=df["Chemistry"].map(color_map))
    ax4.set_ylim(0.8, 1.1)
    ax4.set_ylabel("ChrY copy number")

    ax3.scatter(df["hli_calc_age_sample_taken"],
                df["TRA.PPM"],
                s=10,
                marker='.',
                color=df["Chemistry"].map(color_map))
    ax3.set_ylim(0, 250)
    ax3.set_ylabel("$TCR-\\alpha$ deletions (count per million reads)")

    from matplotlib.lines import Line2D
    legend_elements = [Line2D([0], [0], marker='.', color='w', label=chem,
                          markerfacecolor=color, markersize=16) \
                        for (chem, color) in zip(chemistry, colors)[:3]]
    for ax in (ax1, ax2, ax3, ax4):
        ax.set_xlabel(age_label)
        ax.legend(handles=legend_elements, loc="upper right")

    plt.tight_layout()
    root = fig.add_axes((0, 0, 1, 1))
    labels = ((.02, .98, "A"), (.52, .98, "B"), (.02, .5, "C"), (.52, .5, "D"))
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #4
0
def composite_qc(df_orig, size=(16, 12)):
    """ Plot composite QC figures
    """
    df = df_orig.rename(columns={"hli_calc_age_sample_taken": "Age",
                       "hli_calc_gender": "Gender",
                       "eth7_max": "Ethnicity",
                       "MeanCoverage": "Mean coverage",
                       "Chemistry": "Sequencing chemistry",
                       "Release Client": "Cohort",

                      })

    fig = plt.figure(1, size)
    ax1 = plt.subplot2grid((2, 7), (0, 0), rowspan=1, colspan=2)
    ax2 = plt.subplot2grid((2, 7), (0, 2), rowspan=1, colspan=2)
    ax3 = plt.subplot2grid((2, 7), (0, 4), rowspan=1, colspan=3)
    ax4 = plt.subplot2grid((2, 7), (1, 0), rowspan=1, colspan=2)
    ax5 = plt.subplot2grid((2, 7), (1, 2), rowspan=1, colspan=2)
    ax6 = plt.subplot2grid((2, 7), (1, 4), rowspan=1, colspan=3)

    sns.distplot(df["Age"].dropna(), kde=False, ax=ax1)
    sns.countplot(x="Gender", data=df, ax=ax2)
    sns.countplot(x="Ethnicity", data=df, ax=ax3,
                    order = df['Ethnicity'].value_counts().index)
    sns.distplot(df["Mean coverage"].dropna(), kde=False, ax=ax4)
    ax4.set_xlim(0, 100)
    sns.countplot(x="Sequencing chemistry", data=df, ax=ax5)
    sns.countplot(x="Cohort", data=df, ax=ax6,
                    order = df['Cohort'].value_counts().index)
    # Anonymize the cohorts
    cohorts = ax6.get_xticklabels()
    newCohorts = []
    for i, c in enumerate(cohorts):
        if c.get_text() == "Spector":
            c = "TwinsUK"
        elif c.get_text() != "Health Nucleus":
            c = "C{}".format(i + 1)
        newCohorts.append(c)
    ax6.set_xticklabels(newCohorts)

    for ax in (ax6,):
        ax.set_xticklabels(ax.get_xticklabels(), ha="right", rotation=30)

    for ax in (ax1, ax2, ax3, ax4, ax5, ax6):
        ax.set_title(ax.get_xlabel())
        ax.set_xlabel("")

    plt.tight_layout()

    root = fig.add_axes((0, 0, 1, 1))
    labels = ((.02, .96, "A"),
              (.3, .96, "B"),
              (.6, .96, "C"),
              (.02, .52, "D"),
              (.3, .52, "E"),
              (.6, .52, "F"))
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #5
0
def pomegranate(args):
    """
    %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of WGD history of pineapple genome. The script calls both graphics.karyotype
    and graphic.synteny.
    """
    p = OptionParser(pomegranate.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x7")

    if len(args) != 5:
        sys.exit(not p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout)

    # legend showing the orientation of the genes
    draw_gene_legend(root, 0.42, 0.52, 0.48)

    labels = ((0.04, 0.96, "A"), (0.04, 0.52, "B"))
    panel_labels(root, labels)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "pomegranate-karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #6
0
def pomegranate(args):
    """
    %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of WGD history of pineapple genome. The script calls both graphics.karyotype
    and graphic.synteny.
    """
    p = OptionParser(pomegranate.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x7")

    if len(args) != 5:
        sys.exit(not p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .42, .52, .48)

    labels = ((.04, .96, 'A'), (.04, .52, 'B'))
    panel_labels(root, labels)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "pomegranate-karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #7
0
def resample(args):
    """
    %prog resample yellow-catfish-resample.txt medicago-resample.txt

    Plot ALLMAPS performance across resampled real data.
    """
    p = OptionParser(resample.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300)

    if len(args) != 2:
        sys.exit(not p.print_help())

    dataA, dataB = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([0.1, 0.18, 0.32, 0.64])
    B = fig.add_axes([0.6, 0.18, 0.32, 0.64])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    xlabel = "Fraction of markers"
    ylabels = ("Anchor rate", "Runtime (m)")
    legend = ("anchor rate", "runtime")
    subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend)
    subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend)

    labels = ((0.04, 0.92, "A"), (0.54, 0.92, "B"))
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "resample." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #8
0
def compare2(args):
    """
    %prog compare2

    Compare performances of various variant callers on simulated STR datasets.
    """
    p = OptionParser(compare2.__doc__)
    p.add_option('--maxinsert',
                 default=300,
                 type="int",
                 help="Maximum number of repeats")
    add_simulate_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x5")

    if len(args) != 0:
        sys.exit(not p.print_help())

    depth = opts.depth
    readlen = opts.readlen
    distance = opts.distance
    max_insert = opts.maxinsert
    fig, (ax1, ax2) = plt.subplots(ncols=2,
                                   nrows=1,
                                   figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=2)

    # ax1: lobSTR vs TREDPARSE with haploid model
    lobstr_results = parse_results("lobstr_results_homo.txt")
    tredparse_results = parse_results("tredparse_results_homo.txt")
    title = SIMULATED_HAPLOID + \
            r" ($D=%s\times, L=%dbp, V=%dbp$)" % (depth, readlen, distance)
    plot_compare(ax1,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    # ax2: lobSTR vs TREDPARSE with diploid model
    lobstr_results = parse_results("lobstr_results_het.txt", exclude=20)
    tredparse_results = parse_results("tredparse_results_het.txt", exclude=20)
    title = SIMULATED_DIPLOID + \
            r" ($D=%s\times, L=%dbp, V=%dbp$)" % (depth, readlen, distance)
    plot_compare(ax2,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    for ax in (ax1, ax2):
        ax.set_xlim(0, max_insert)
        ax.set_ylim(0, max_insert)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B")))
    normalize_axes(root)

    image_name = "tredparse." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #9
0
def compare(args):
    """
    %prog compare Evaluation.csv

    Compare performances of various variant callers on simulated STR datasets.
    """
    p = OptionParser(compare.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="10x10")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    pf = datafile.rsplit(".", 1)[0]
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2,
                                                 nrows=2,
                                                 figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=3)

    bbox = {'facecolor': 'tomato', 'alpha': .2, 'ec': 'w'}
    pad = 2

    # Read benchmark data
    df = pd.read_csv("Evaluation.csv")
    truth = df["Truth"]
    axes = (ax1, ax2, ax3, ax4)
    progs = ("Manta", "Isaac", "GATK", "lobSTR")
    markers = ("bx-", "yo-", "md-", "c+-")

    for ax, prog, marker in zip(axes, progs, markers):
        ax.plot(truth, df[prog], marker)
        ax.plot(truth, truth, 'k--')  # to show diagonal
        ax.axhline(infected_thr, color='tomato')
        ax.text(max(truth) - pad,
                infected_thr + pad,
                'Risk threshold',
                bbox=bbox,
                ha="right")
        ax.axhline(ref_thr, color='tomato')
        ax.text(max(truth) - pad,
                ref_thr - pad,
                'Reference repeat count',
                bbox=bbox,
                ha="right",
                va="top")
        ax.set_title(SIMULATED_HAPLOID)
        ax.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)')
        ax.set_ylabel('Num of CAG repeats called')
        ax.legend([prog, 'Truth'], loc='best')

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"),
                        (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D")))
    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #10
0
def composite(df, sameGenderMZ, sameGenderDZ, size=(16, 24)):
    """Embed both absdiff figures and heritability figures.
    """
    fig = plt.figure(1, size)

    ax1a = plt.subplot2grid((6, 4), (0, 0), rowspan=2, colspan=1)
    ax2a = plt.subplot2grid((6, 4), (0, 1), rowspan=2, colspan=1)
    ax3a = plt.subplot2grid((6, 4), (0, 2), rowspan=2, colspan=1)
    ax4a = plt.subplot2grid((6, 4), (0, 3), rowspan=2, colspan=1)
    ax1b = plt.subplot2grid((6, 4), (2, 0), rowspan=2, colspan=2)
    ax2b = plt.subplot2grid((6, 4), (2, 2), rowspan=2, colspan=2)
    ax3b = plt.subplot2grid((6, 4), (4, 0), rowspan=2, colspan=2)
    ax4b = plt.subplot2grid((6, 4), (4, 2), rowspan=2, colspan=2)

    # Telomeres
    telomeres = extract_trait(df, "Sample name", "telomeres.Length")
    mzTelomeres = extract_twin_values(sameGenderMZ, telomeres)
    dzTelomeres = extract_twin_values(sameGenderDZ, telomeres)
    plot_paired_values(ax1b, mzTelomeres, dzTelomeres, label="Telomere length")
    plot_abs_diff(ax1a, mzTelomeres, dzTelomeres, label="Telomere length")

    # CCNX
    CCNX = extract_trait(df, "Sample name", "ccn.chrX")
    mzCCNX = extract_twin_values(sameGenderMZ, CCNX, gender="Female")
    dzCCNX = extract_twin_values(sameGenderDZ, CCNX, gender="Female")
    dzCCNX = filter_low_values(dzCCNX, 1.75)
    plot_paired_values(ax2b, mzCCNX, dzCCNX, gender="Female only", label="ChrX copy number")
    plot_abs_diff(ax2a, mzCCNX, dzCCNX, label="ChrX copy number")

    # CCNY
    CCNY = extract_trait(df, "Sample name", "ccn.chrY")
    mzCCNY = extract_twin_values(sameGenderMZ, CCNY, gender="Male")
    dzCCNY = extract_twin_values(sameGenderDZ, CCNY, gender="Male")
    dzCCNY = filter_low_values(dzCCNY, .75)

    plot_paired_values(ax3b, mzCCNY, dzCCNY, gender="Male only", label="ChrY copy number")
    plot_abs_diff(ax3a, mzCCNY, dzCCNY, label="ChrY copy number")

    # CCNY
    TRA = extract_trait(df, "Sample name", "TRA.PPM")
    mzTRA = extract_twin_values(sameGenderMZ, TRA)
    dzTRA = extract_twin_values(sameGenderDZ, TRA)
    plot_paired_values(ax4b, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions")
    plot_abs_diff(ax4a, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions")

    plt.tight_layout()

    root = fig.add_axes((0, 0, 1, 1))
    # ABCD absdiff, EFGH heritability
    labels = ((.03, .99, 'A'), (.27, .99, 'B'), (.53, .99, 'C'), (.77, .99, 'D'),
              (.03, .67, 'E'), (.53, .67, 'F'),
              (.03, .34, 'G'), (.53, .34, 'H'))
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #11
0
def venn(args):
    """
    %prog venn *.benchmark

    Display benchmark results as Venn diagram.
    """
    from matplotlib_venn import venn2

    p = OptionParser(venn.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x9")

    if len(args) < 1:
        sys.exit(not p.print_help())

    bcs = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    pad = .02
    ystart = 1
    ywidth = 1. / len(bcs)
    tags = ("Bowers", "YGOB", "Schnable")
    for bc, tag in zip(bcs, tags):
        fp = open(bc)
        data = []
        for row in fp:
            prog, pcounts, tcounts, shared = row.split()
            pcounts = int(pcounts)
            tcounts = int(tcounts)
            shared = int(shared)
            data.append((prog, pcounts, tcounts, shared))
        xstart = 0
        xwidth = 1. / len(data)
        for prog, pcounts, tcounts, shared in data:
            a, b, c = pcounts - shared, tcounts - shared, shared
            ax = fig.add_axes([xstart + pad, ystart - ywidth + pad,
                               xwidth - 2 * pad, ywidth - 2 * pad])
            venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax)
            message = "Sn={0} Pu={1}".\
                format(percentage(shared, tcounts, precision=0, mode=-1),
                       percentage(shared, pcounts, precision=0, mode=-1))
            print >> sys.stderr, message
            ax.text(.5, .92, latex(message), ha="center", va="center",
                    transform=ax.transAxes, color='b')
            ax.set_axis_off()
            xstart += xwidth
        ystart -= ywidth

    panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"),
                  (.04, .96 - 2 * ywidth, "C")))
    panel_labels(root, ((.5, .98, "A. thaliana duplicates"),
                        (.5, .98 - ywidth, "14 Yeast genomes"),
                        (.5, .98 - 2 * ywidth, "4 Grass genomes")))
    normalize_axes(root)
    savefig("venn.pdf", dpi=opts.dpi)
Exemple #12
0
def composite_correlation(df, size=(12, 8)):
    """ Plot composite correlation figure
    """
    fig = plt.figure(1, size)
    ax1 = plt.subplot2grid((2, 2), (0, 0))
    ax2 = plt.subplot2grid((2, 2), (0, 1))
    ax3 = plt.subplot2grid((2, 2), (1, 0))
    ax4 = plt.subplot2grid((2, 2), (1, 1))
    chemistry = ["V1", "V2", "V2.5", float("nan")]
    colors = sns.color_palette("Set2", 8)
    color_map = dict(zip(chemistry, colors))

    age_label = "Chronological age (yr)"
    ax1.scatter(df["hli_calc_age_sample_taken"], df["teloLength"],
                s=10, marker='.',
                color=df["Chemistry"].map(color_map))
    ax1.set_ylim(0, 15)
    ax1.set_ylabel("Telomere length (Kb)")

    ax2.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrX"],
                s=10, marker='.',
                color=df["Chemistry"].map(color_map))
    ax2.set_ylim(1.8, 2.1)
    ax2.set_ylabel("ChrX copy number")

    ax4.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrY"],
                s=10, marker='.',
                color=df["Chemistry"].map(color_map))
    ax4.set_ylim(0.8, 1.1)
    ax4.set_ylabel("ChrY copy number")

    ax3.scatter(df["hli_calc_age_sample_taken"], df["TRA.PPM"],
                s=10, marker='.',
                color=df["Chemistry"].map(color_map))
    ax3.set_ylim(0, 250)
    ax3.set_ylabel("$TCR-\\alpha$ deletions (count per million reads)")

    from matplotlib.lines import Line2D
    legend_elements = [Line2D([0], [0], marker='.', color='w', label=chem,
                          markerfacecolor=color, markersize=16) \
                        for (chem, color) in zip(chemistry, colors)[:3]]
    for ax in (ax1, ax2, ax3, ax4):
        ax.set_xlabel(age_label)
        ax.legend(handles=legend_elements, loc="upper right")

    plt.tight_layout()
    root = fig.add_axes((0, 0, 1, 1))
    labels = ((.02, .98, "A"),
              (.52, .98, "B"),
              (.02, .5, "C"),
              (.52, .5, "D"))
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #13
0
def venn(args):
    """
    %prog venn *.benchmark

    Display benchmark results as Venn diagram.
    """
    from matplotlib_venn import venn2

    p = OptionParser(venn.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x9")

    if len(args) < 1:
        sys.exit(not p.print_help())

    bcs = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    pad = .02
    ystart = 1
    ywidth = 1. / len(bcs)
    tags = ("Bowers", "YGOB", "Schnable")
    for bc, tag in zip(bcs, tags):
        fp = open(bc)
        data = []
        for row in fp:
            prog, pcounts, tcounts, shared = row.split()
            pcounts = int(pcounts)
            tcounts = int(tcounts)
            shared = int(shared)
            data.append((prog, pcounts, tcounts, shared))
        xstart = 0
        xwidth = 1. / len(data)
        for prog, pcounts, tcounts, shared in data:
            a, b, c = pcounts - shared, tcounts - shared, shared
            ax = fig.add_axes([xstart + pad, ystart - ywidth + pad,
                               xwidth - 2 * pad, ywidth - 2 * pad])
            venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax)
            message = "Sn={0} Pu={1}".\
                format(percentage(shared, tcounts, precision=0, mode=-1),
                       percentage(shared, pcounts, precision=0, mode=-1))
            print(message, file=sys.stderr)
            ax.text(.5, .92, latex(message), ha="center", va="center",
                    transform=ax.transAxes, color='b')
            ax.set_axis_off()
            xstart += xwidth
        ystart -= ywidth

    panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"),
                  (.04, .96 - 2 * ywidth, "C")))
    panel_labels(root, ((.5, .98, "A. thaliana duplicates"),
                        (.5, .98 - ywidth, "14 Yeast genomes"),
                        (.5, .98 - 2 * ywidth, "4 Grass genomes")))
    normalize_axes(root)
    savefig("venn.pdf", dpi=opts.dpi)
Exemple #14
0
def ploidy(args):
    """
    %prog ploidy seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of WGD history of pineapple genome. The script calls both graphics.karyotype
    and graphic.synteny.
    """
    p = OptionParser(ploidy.__doc__)
    p.add_option("--switch", help="Rename the seqid with two-column file")
    opts, args, iopts = p.set_image_options(args, figsize="9x7")

    if len(args) != 5:
        sys.exit(not p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch)

    # legend showing the orientation of the genes
    draw_gene_legend(root, 0.27, 0.37, 0.52)

    # annotate the WGD events
    fc = "lightslategrey"
    x = 0.09
    radius = 0.012
    TextCircle(root, x, 0.825, r"$\tau$", radius=radius, fc=fc)
    TextCircle(root, x, 0.8, r"$\sigma$", radius=radius, fc=fc)
    TextCircle(root, x, 0.72, r"$\rho$", radius=radius, fc=fc)
    for ypos in (0.825, 0.8, 0.72):
        root.text(0.12, ypos, r"$\times2$", color=fc, ha="center", va="center")
    root.plot([x, x], [0.85, 0.775], ":", color=fc, lw=2)
    root.plot([x, x], [0.75, 0.675], ":", color=fc, lw=2)

    labels = ((0.04, 0.96, "A"), (0.04, 0.54, "B"))
    panel_labels(root, labels)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "pineapple-karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #15
0
def ploidy(args):
    """
    %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout

    Build a figure that calls graphics.karyotype to illustrate the high ploidy
    of WGD history of pineapple genome. The script calls both graphics.karyotype
    and graphic.synteny.
    """
    p = OptionParser(ploidy.__doc__)
    p.add_option("--switch", help="Rename the seqid with two-column file")
    opts, args, iopts = p.set_image_options(args, figsize="9x7")

    if len(args) != 5:
        sys.exit(not p.print_help())

    seqidsfile, klayout, datafile, bedfile, slayout = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    Karyotype(fig, root, seqidsfile, klayout)
    Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch)

    # legend showing the orientation of the genes
    draw_gene_legend(root, .27, .37, .52)

    # annotate the WGD events
    fc = 'lightslategrey'
    x = .09
    radius = .012
    TextCircle(root, x, .825, r'$\tau$', radius=radius, fc=fc)
    TextCircle(root, x, .8, r'$\sigma$', radius=radius, fc=fc)
    TextCircle(root, x, .72, r'$\rho$', radius=radius, fc=fc)
    for ypos in (.825, .8, .72):
        root.text(.12, ypos, r"$\times2$", color=fc, ha="center", va="center")
    root.plot([x, x], [.85, .775], ":", color=fc, lw=2)
    root.plot([x, x], [.75, .675], ":", color=fc, lw=2)

    labels = ((.04, .96, 'A'), (.04, .54, 'B'))
    panel_labels(root, labels)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "pineapple-karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #16
0
def simulation(args):
    """
    %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt

    Plot ALLMAPS accuracy across a range of simulated datasets.
    """
    p = OptionParser(simulation.__doc__)
    opts, args, iopts = p.set_image_options(args, dpi=300)

    if len(args) != 4:
        sys.exit(not p.print_help())

    dataA, dataB, dataC, dataD = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([0.12, 0.62, 0.35, 0.35])
    B = fig.add_axes([0.62, 0.62, 0.35, 0.35])
    C = fig.add_axes([0.12, 0.12, 0.35, 0.35])
    D = fig.add_axes([0.62, 0.12, 0.35, 0.35])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    dataC = import_data(dataC)
    dataD = import_data(dataD)
    subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=0.5)
    subplot(
        B,
        dataB,
        "Translocation error rate",
        "Accuracy",
        xlim=0.5,
        legend=("intra-chromosomal", "inter-chromosomal", "75\% intra + 25\% inter"),
    )
    subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int)
    subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int)

    labels = (
        (0.03, 0.97, "A"),
        (0.53, 0.97, "B"),
        (0.03, 0.47, "C"),
        (0.53, 0.47, "D"),
    )
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "simulation." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #17
0
def synteny(args):
    """
    %prog synteny vplanifoliaA_blocks.bed vplanifoliaA.sizes \
        b1.blocks all.bed b1.layout

    Create a composite figure with (A) wgd and (B) microsynteny.
    """
    from jcvi.graphics.chromosome import draw_chromosomes

    p = OptionParser(synteny.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="12x12")

    (bedfile, sizesfile, blocksfile, allbedfile, blockslayout) = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax1 = fig.add_axes([0, 0.5, 1, 0.5])
    ax2 = fig.add_axes([0.02, 0, 0.98, 0.5])

    # Panel A
    title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$"
    draw_chromosomes(
        ax1,
        bedfile,
        sizes=sizesfile,
        iopts=iopts,
        mergedist=200000,
        winsize=50000,
        imagemap=False,
        gauge=True,
        legend=False,
        title=title,
    )

    # Panel B
    draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout)

    normalize_axes([root, ax1, ax2])
    labels = ((0.05, 0.95, "A"), (0.05, 0.5, "B"))
    panel_labels(root, labels)

    image_name = "synteny.pdf"
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #18
0
def likelihood3(args):
    """
    %prog likelihood2 200_20.json 200_100.json

    Plot the likelihood surface and marginal distributions for two settings.
    """
    from matplotlib import gridspec

    p = OptionParser(likelihood3.__doc__)
    opts, args, iopts = p.set_image_options(args,
                                            figsize="10x10",
                                            style="white",
                                            cmap="coolwarm")
    if len(args) != 2:
        sys.exit(not p.print_help())

    jsonfile1, jsonfile2 = args
    fig = plt.figure(figsize=(iopts.w, iopts.h))
    gs = gridspec.GridSpec(9, 2)
    ax1 = fig.add_subplot(gs[:4, 0])
    ax2 = fig.add_subplot(gs[:2, 1])
    ax3 = fig.add_subplot(gs[2:4, 1])
    ax4 = fig.add_subplot(gs[5:, 0])
    ax5 = fig.add_subplot(gs[5:7, 1])
    ax6 = fig.add_subplot(gs[7:, 1])
    plt.tight_layout(pad=2)

    plot_panel(jsonfile1, ax1, ax2, ax3, opts.cmap)
    plot_panel(jsonfile2, ax4, ax5, ax6, opts.cmap)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .02
    panel_labels(root, ((pad, 1 - pad, "A"), (pad, 4. / 9, "B")))
    normalize_axes(root)

    image_name = "likelihood3." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #19
0
def simulation(args):
    """
    %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt

    Plot ALLMAPS accuracy across a range of simulated datasets.
    """
    p = OptionParser(simulation.__doc__)
    opts, args, iopts = p.set_image_options(args, dpi=300)

    if len(args) != 4:
        sys.exit(not p.print_help())

    dataA, dataB, dataC, dataD = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([.12, .62, .35, .35])
    B = fig.add_axes([.62, .62, .35, .35])
    C = fig.add_axes([.12, .12, .35, .35])
    D = fig.add_axes([.62, .12, .35, .35])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    dataC = import_data(dataC)
    dataD = import_data(dataD)
    subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=.5)
    subplot(B, dataB, "Translocation error rate", "Accuracy", xlim=.5,
                      legend=("intra-chromosomal", "inter-chromosomal",
                              "75\% intra + 25\% inter"))
    subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int)
    subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int)

    labels = ((.03, .97, "A"), (.53, .97, "B"),
              (.03, .47, "C"), (.53, .47, "D"))
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "simulation." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #20
0
def fig3(args):
    """
    %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data

    Napus Figure 3 displays alignments between quartet chromosomes, inset
    with read histograms.
    """
    from jcvi.formats.bed import Bed

    p = OptionParser(fig3.__doc__)
    p.add_option("--gauge_step",
                 default=10000000,
                 type="int",
                 help="Step size for the base scale")
    opts, args, iopts = p.set_image_options(args, figsize="12x9")

    if len(args) != 4:
        sys.exit(not p.print_help())

    chrs, sizes, bedfile, datadir = args
    gauge_step = opts.gauge_step
    diverge = iopts.diverge
    rr, gg = diverge
    chrs = [[x] for x in chrs.split(",")]
    sizes = Sizes(sizes).mapping

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes)

    # Synteny panel
    seqidsfile = make_seqids(chrs)
    klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05)
    height = .07
    r = height / 4
    K = Karyotype(fig,
                  root,
                  seqidsfile,
                  klayout,
                  gap=gap,
                  height=height,
                  lw=2,
                  generank=False,
                  sizes=sizes,
                  heightpad=r,
                  roundrect=True,
                  plot_label=False)

    # Chromosome labels
    for kl in K.layout:
        if kl.empty:
            continue
        lx, ly = kl.xstart, kl.y
        if lx < .11:
            lx += .1
            ly += .06
        label = kl.label
        root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center")

    # Inset with datafiles
    datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr",
                 "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr")
    datafiles = [op.join(datadir, x) for x in datafiles]
    tracks = K.tracks
    hlfile = op.join(datadir, "bzh.regions.forhaibao")
    xy_axes = []
    for t, datafile in zip(tracks, datafiles):
        ax = make_affix_axis(fig, t, -r, height=2 * r)
        xy_axes.append(ax)
        chr = t.seqids[0]
        xy = XYtrack(ax, datafile, color="lightslategray")
        start, end = 0, t.total
        xy.interpolate(end)
        xy.cap(ymax=40)
        xy.import_hlfile(hlfile, chr, diverge=diverge)
        xy.draw()
        ax.set_xlim(start, end)
        gauge_ax = make_affix_axis(fig, t, -r)
        adjust_spines(gauge_ax, ["bottom"])
        setup_gauge_ax(gauge_ax, start, end, gauge_step)

    # Converted gene tracks
    ax_Ar = make_affix_axis(fig, tracks[1], r, height=r / 2)
    ax_Co = make_affix_axis(fig, tracks[2], r, height=r / 2)

    order = Bed(bedfile).order
    for asterisk in (False, True):
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         0,
                         "A02",
                         ax_Ar,
                         rr,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         1,
                         "C2",
                         ax_Co,
                         gg,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         0,
                         "A02",
                         ax_Ar,
                         gg,
                         ypos=1,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         1,
                         "C2",
                         ax_Co,
                         rr,
                         ypos=1,
                         asterisk=asterisk)

    Ar, Co = xy_axes[1:3]
    annotations = ((Ar, "Bra028920 Bra028897", "center",
                    "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"),
                   (Ar, "Bra020218 Bra020286", "left",
                    "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"),
                   (Ar, "Bra029317 Bra029251", "right",
                    "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left",
                                      "1DCn2-"), (Co, "Bo2g018560 Bo2g023700",
                                                  "right", "2DCn2-"),
                   (Co, "Bo2g024450 Bo2g025390", "left",
                    "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"),
                   (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-"))

    for ax, genes, ha, label in annotations:
        g1, g2 = genes.split()
        x1, x2 = order[g1][1].start, order[g2][1].start
        if ha == "center":
            x = (x1 + x2) / 2 * .8
        elif ha == "left":
            x = x2
        else:
            x = x1
        label = r"\textit{{{0}}}".format(label)
        color = rr if "+" in label else gg
        ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center")

    ax_Ar.set_xlim(0, tracks[1].total)
    ax_Ar.set_ylim(-1, 1)
    ax_Co.set_xlim(0, tracks[2].total)
    ax_Co.set_ylim(-1, 1)

    # Plot coverage in resequencing lines
    gstep = 5000000
    order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",")
    labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"}
    hlsuffix = "regions.forhaibao"
    chr1, chr2 = "chrA02", "chrC02"
    t1, t2 = tracks[0], tracks[-1]
    s1, s2 = sizes[chr1], sizes[chr2]

    canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2)
    c = Coverage(fig,
                 root,
                 canvas1,
                 chr1, (0, s1),
                 datadir,
                 order=order,
                 gauge=None,
                 plot_chr_label=False,
                 gauge_step=gstep,
                 palette="gray",
                 cap=40,
                 hlsuffix=hlsuffix,
                 labels_dict=labels_dict,
                 diverge=diverge)
    yys = c.yys
    x1, x2 = .37, .72
    tip = .02
    annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2,
                    "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"),
                   (x1, yys[5] + .6 * tip, tip, tip / 2,
                    "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"),
                   (x2, yys[4] + .9 * tip, -1.2 * tip, 0,
                    "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL"))

    arrowprops = dict(facecolor='black',
                      shrink=.05,
                      frac=.5,
                      width=1,
                      headwidth=4)
    for x, y, dx, dy, label in annotations:
        label = r"\textit{{{0}}}".format(label)
        root.annotate(label,
                      xy=(x, y),
                      xytext=(x + dx, y + dy),
                      arrowprops=arrowprops,
                      color=rr,
                      fontsize=9,
                      ha="center",
                      va="center")

    canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2)
    Coverage(fig,
             root,
             canvas2,
             chr2, (0, s2),
             datadir,
             order=order,
             gauge=None,
             plot_chr_label=False,
             gauge_step=gstep,
             palette="gray",
             cap=40,
             hlsuffix=hlsuffix,
             labels_dict=labels_dict,
             diverge=diverge)

    pad = .03
    labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"),
              (t2.xstart - 3 * pad, .25 + pad, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    image_name = "napus-fig3." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #21
0
def fig3(args):
    """
    %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data

    Napus Figure 3 displays alignments between quartet chromosomes, inset
    with read histograms.
    """
    from jcvi.formats.bed import Bed

    p = OptionParser(fig3.__doc__)
    p.add_option("--gauge_step", default=10000000, type="int",
                help="Step size for the base scale")
    opts, args, iopts = p.set_image_options(args, figsize="12x9")

    if len(args) != 4:
        sys.exit(not p.print_help())

    chrs, sizes, bedfile, datadir = args
    gauge_step = opts.gauge_step
    diverge = iopts.diverge
    rr, gg = diverge
    chrs = [[x] for x in chrs.split(",")]
    sizes = Sizes(sizes).mapping

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes)

    # Synteny panel
    seqidsfile = make_seqids(chrs)
    klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05)
    height = .07
    r = height / 4
    K = Karyotype(fig, root, seqidsfile, klayout, gap=gap,
                  height=height, lw=2, generank=False, sizes=sizes,
                  heightpad=r, roundrect=True, plot_label=False)

    # Chromosome labels
    for kl in K.layout:
        if kl.empty:
            continue
        lx, ly = kl.xstart, kl.y
        if lx < .11:
            lx += .1
            ly += .06
        label = kl.label
        root.text(lx - .015, ly, label, fontsize=15,
                  ha="right", va="center")

    # Inset with datafiles
    datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr",
                 "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr")
    datafiles = [op.join(datadir, x) for x in datafiles]
    tracks = K.tracks
    hlfile = op.join(datadir, "bzh.regions.forhaibao")
    xy_axes = []
    for t, datafile in zip(tracks, datafiles):
        ax = make_affix_axis(fig, t, -r, height=2 * r)
        xy_axes.append(ax)
        chr = t.seqids[0]
        xy = XYtrack(ax, datafile, color="lightslategray")
        start, end = 0, t.total
        xy.interpolate(end)
        xy.cap(ymax=40)
        xy.import_hlfile(hlfile, chr, diverge=diverge)
        xy.draw()
        ax.set_xlim(start, end)
        gauge_ax = make_affix_axis(fig, t, -r)
        adjust_spines(gauge_ax, ["bottom"])
        setup_gauge_ax(gauge_ax, start, end, gauge_step)

    # Converted gene tracks
    ax_Ar = make_affix_axis(fig, tracks[1], r, height=r/2)
    ax_Co = make_affix_axis(fig, tracks[2], r, height=r/2)

    order = Bed(bedfile).order
    for asterisk in (False, True):
        conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         0, "A02", ax_Ar, rr, asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         1, "C2", ax_Co, gg, asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         0, "A02", ax_Ar, gg, ypos=1, asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         1, "C2", ax_Co, rr, ypos=1, asterisk=asterisk)

    Ar, Co = xy_axes[1:3]
    annotations = ((Ar, "Bra028920 Bra028897", "center", "1DAn2+"),
                   (Ar, "Bra020081 Bra020171", "right", "2DAn2+"),
                   (Ar, "Bra020218 Bra020286", "left", "3DAn2+"),
                   (Ar, "Bra008143 Bra008167", "left", "4DAn2-"),
                   (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"),
                   (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"),
                   (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"),
                   (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"),
                   (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"),
                   (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-"))

    for ax, genes, ha, label in annotations:
        g1, g2 = genes.split()
        x1, x2 = order[g1][1].start, order[g2][1].start
        if ha == "center":
            x = (x1 + x2) / 2 * .8
        elif ha == "left":
            x = x2
        else:
            x = x1
        label = r"\textit{{{0}}}".format(label)
        color = rr if "+" in label else gg
        ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center")

    ax_Ar.set_xlim(0, tracks[1].total)
    ax_Ar.set_ylim(-1, 1)
    ax_Co.set_xlim(0, tracks[2].total)
    ax_Co.set_ylim(-1, 1)

    # Plot coverage in resequencing lines
    gstep = 5000000
    order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",")
    labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"}
    hlsuffix = "regions.forhaibao"
    chr1, chr2 = "chrA02", "chrC02"
    t1, t2 = tracks[0], tracks[-1]
    s1, s2 = sizes[chr1], sizes[chr2]

    canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2)
    c = Coverage(fig, root, canvas1, chr1, (0, s1), datadir,
                 order=order, gauge=None, plot_chr_label=False,
                 gauge_step=gstep, palette="gray",
                 cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict,
                 diverge=diverge)
    yys = c.yys
    x1, x2 = .37, .72
    tip = .02
    annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2, "FLC"),
                   (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"),
                   (x1, yys[5] + .6 * tip, tip, tip / 2, "FLC"),
                   (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"),
                   (x2, yys[4] + .9 * tip, -1.2 * tip, 0, "GSL"),
                   (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL"))

    arrowprops=dict(facecolor='black', shrink=.05, frac=.5,
                    width=1, headwidth=4)
    for x, y, dx, dy, label in annotations:
        label = r"\textit{{{0}}}".format(label)
        root.annotate(label, xy=(x, y), xytext=(x + dx, y + dy),
                      arrowprops=arrowprops, color=rr, fontsize=9,
                      ha="center", va="center")

    canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2)
    Coverage(fig, root, canvas2, chr2, (0, s2), datadir,
                 order=order, gauge=None, plot_chr_label=False,
                 gauge_step=gstep, palette="gray",
                 cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict,
                 diverge=diverge)

    pad = .03
    labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"),
              (t2.xstart - 3 * pad, .25 + pad, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    image_name = "napus-fig3." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #22
0
def compare(args):
    """
    %prog compare Evaluation.csv

    Compare performances of various variant callers on simulated STR datasets.
    """
    p = OptionParser(__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="15x5")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    pf = datafile.rsplit(".", 1)[0]
    fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,
                                        nrows=1,
                                        figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=2)

    # Huntington risk allele
    infected_thr = 40
    ref_thr = 19

    # ax1: Multiple callers at lower range
    df = pd.read_csv("Evaluation.csv")
    truth = df["Truth"]

    ax1.plot(truth, df["Manta"], 'bx-')
    ax1.plot(truth, df["Isaac"], 'yo-')
    ax1.plot(truth, df["GATK"], 'md-')
    ax1.plot(truth, df["lobSTR"], 'c+-')
    ax1.plot(truth, truth, 'k--')  # to show diagonal

    bbox = {'facecolor': 'tomato', 'alpha': .2, 'ec': 'w'}
    pad = 2
    ax1.axhline(infected_thr, color='tomato')
    ax1.text(max(truth) - pad,
             infected_thr + pad,
             'Risk threshold',
             bbox=bbox,
             ha="right")
    ax1.axhline(ref_thr, color='tomato')
    ax1.text(max(truth) - pad,
             ref_thr - pad,
             'Reference repeat count',
             bbox=bbox,
             ha="right",
             va="top")

    ax1.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)')
    ax1.set_ylabel('Num of CAG repeats called')
    ax1.set_title(r'Simulated haploid $\mathit{h}$')
    ax1.legend(['Manta', 'Isaac', 'GATK', 'lobSTR', 'Truth'], loc='best')

    max_insert = 120
    # ax2: lobSTR vs TREDPARSE with haploid model
    lobstr_results = parse_results("lobstr_results_homo.txt")
    tredparse_results = parse_results("tredparse_results_homo.txt")
    truth = range(10, max_insert + 1)
    lx, ly = zip(*lobstr_results)
    tx, ty = zip(*tredparse_results)

    ax2.plot(lx, ly, 'c+-')
    ax2.plot(tx, ty, 'gx-')
    ax2.plot(truth, truth, 'k--')

    ax2.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)')
    ax2.set_ylabel('Num of CAG repeats called')
    ax2.set_title(r'Simulated haploid $\mathit{h}$')
    ax2.legend(['lobSTR', 'TREDPARSE', 'Truth'], loc='best')

    pad *= 2
    ax2.axhline(infected_thr, color='tomato')
    ax2.text(max(truth) - pad,
             infected_thr + pad,
             'Risk threshold',
             bbox=bbox,
             ha="right")
    ax2.set_xlim(10, max_insert)

    # ax3: lobSTR vs TREDPARSE with haploid model
    lobstr_results = parse_results("lobstr_results_het.txt", exclude=20)
    tredparse_results = parse_results("tredparse_results_het.txt", exclude=20)
    truth = range(10, max_insert + 1)
    lx, ly = zip(*lobstr_results)
    tx, ty = zip(*tredparse_results)

    ax3.plot(lx, ly, 'c+-')
    ax3.plot(tx, ty, 'gx-')
    ax3.plot(truth, truth, 'k--')

    ax3.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)')
    ax3.set_ylabel('Num of CAG repeats called')
    ax3.set_title(r'Simulated diploid $\mathit{20/h}$')
    ax3.legend(['lobSTR', 'TREDPARSE', 'Truth'], loc='best')
    ax3.axhline(infected_thr, color='tomato')
    ax3.text(max(truth) - pad,
             infected_thr + pad,
             'Risk threshold',
             bbox=bbox,
             ha="right")
    ax3.set_xlim(10, max_insert)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 3., 1 - pad, "B"),
                        (2 / 3., 1 - pad, "C")))
    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #23
0
def compare3(args):
    """
    %prog compare3

    Compare performances of various variant callers on simulated STR datasets.
    This compares the power of various evidence types.
    """
    p = OptionParser(compare3.__doc__)
    p.add_option('--maxinsert',
                 default=300,
                 type="int",
                 help="Maximum number of repeats")
    add_simulate_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x10")

    if len(args) != 0:
        sys.exit(not p.print_help())

    max_insert = opts.maxinsert
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2,
                                                 nrows=2,
                                                 figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=3)

    color = "lightslategray"
    # ax1: Spanning
    tredparse_results = parse_results("tredparse_results_het-spanning.txt")
    title = SIMULATED_DIPLOID + "( Sub-model 1: Spanning reads)"
    plot_compare(ax1,
                 title,
                 tredparse_results,
                 None,
                 color=color,
                 max_insert=max_insert,
                 risk=False)

    # ax2: Partial
    tredparse_results = parse_results("tredparse_results_het-partial.txt",
                                      exclude=20)
    title = SIMULATED_DIPLOID + " (Sub-model 2: Partial reads)"
    plot_compare(ax2,
                 title,
                 tredparse_results,
                 None,
                 color=color,
                 max_insert=max_insert,
                 risk=False)

    # ax3: Repeat
    tredparse_results = parse_results("tredparse_results_het-repeat.txt",
                                      exclude=20)
    # HACK (repeat reads won't work under 50)
    tredparse_results = [x for x in tredparse_results if x[0] > 50]
    title = SIMULATED_DIPLOID + " (Sub-model 3: Repeat-only reads)"
    plot_compare(ax3,
                 title,
                 tredparse_results,
                 None,
                 color=color,
                 max_insert=max_insert,
                 risk=False)

    # ax4: Pair
    tredparse_results = parse_results("tredparse_results_het-pair.txt",
                                      exclude=20)
    title = SIMULATED_DIPLOID + " (Sub-model 4: Paired-end reads)"
    plot_compare(ax4,
                 title,
                 tredparse_results,
                 None,
                 color=color,
                 max_insert=max_insert,
                 risk=False)

    for ax in (ax1, ax2, ax3, ax4):
        ax.set_xlim(0, max_insert)
        ax.set_ylim(0, max_insert)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"),
                        (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D")))
    normalize_axes(root)

    image_name = "tredparse." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #24
0
def compare4(args):
    """
    %prog compare4

    Compare performances of various variant callers on simulated STR datasets.
    Adds coverage comparisons as panel C and D.
    """
    p = OptionParser(compare4.__doc__)
    p.add_option('--maxinsert',
                 default=300,
                 type="int",
                 help="Maximum number of repeats")
    add_simulate_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x10")

    if len(args) != 0:
        sys.exit(not p.print_help())

    depth = opts.depth
    max_insert = opts.maxinsert
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2,
                                                 nrows=2,
                                                 figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=3)

    # ax1: lobSTR vs TREDPARSE with haploid model
    lobstr_results = parse_results("lobstr_results_homo-20x-150bp-500bp.txt")
    tredparse_results = parse_results(
        "tredparse_results_homo-20x-150bp-500bp.txt")
    title = SIMULATED_HAPLOID + r" ($Depth=%s\times)" % depth
    plot_compare(ax1,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    # ax2: lobSTR vs TREDPARSE with diploid model (depth=20x)
    lobstr_results = parse_results("lobstr_results_het-20x-150bp-500bp.txt",
                                   exclude=20)
    tredparse_results = parse_results(
        "tredparse_results_het-20x-150bp-500bp.txt", exclude=20)
    title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % depth
    plot_compare(ax2,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    # ax3: lobSTR vs TREDPARSE with diploid model (depth=5x)
    lobstr_results = parse_results("lobstr_results_het-5x-150bp-500bp.txt",
                                   exclude=20)
    tredparse_results = parse_results(
        "tredparse_results_het-5x-150bp-500bp.txt", exclude=20)
    title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % 5
    plot_compare(ax3,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    # ax4: lobSTR vs TREDPARSE with diploid model (depth=80x)
    lobstr_results = parse_results("lobstr_results_het-80x-150bp-500bp.txt",
                                   exclude=20)
    tredparse_results = parse_results(
        "tredparse_results_het-80x-150bp-500bp.txt", exclude=20)
    title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % 80
    plot_compare(ax4,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    for ax in (ax1, ax2, ax3, ax4):
        ax.set_xlim(0, max_insert)
        ax.set_ylim(0, max_insert)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"),
                        (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D")))
    normalize_axes(root)

    image_name = "tredparse." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #25
0
def lms(args):
    """
    %prog lms

    ALLMAPS cartoon to illustrate LMS metric.
    """
    from random import randint
    from jcvi.graphics.chromosome import HorizontalChromosome

    p = OptionParser(lms.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    w, h = 0.7, 0.35
    ax = fig.add_axes([0.15, 0.6, w, h])

    xdata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata[3:7] = ydata[3:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:3] + [xydata[4]] + xydata[7:]
    lds = xydata[3:7]
    xlis, ylis = zip(*lis)
    xlds, ylds = zip(*lds)
    ax.plot(
        xlis,
        ylis,
        "r-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(
        xlds,
        ylds,
        "g-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    HorizontalChromosome(root, 0.15, 0.15 + w, 0.57, height=0.02, lw=2)
    root.text(0.15 + w / 2, 0.55, "Chromosome location (bp)", ha="center", va="top")

    ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center")
    ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center")
    ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center")
    normalize_lms_axis(ax, xlim=110, ylim=110)

    # Panel B
    w = 0.37
    p = (0, 45, 75, 110)
    ax = fig.add_axes([0.1, 0.12, w, h])
    xdata = [x for x in range(10, 110, 10)]
    ydata = ydata_orig = [x for x in range(10, 110, 10)]
    ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:7]
    xlis, ylis = zip(*lis)
    ax.plot(
        xlis,
        ylis,
        "r-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, 0, 110, colors="beige", lw=3)
    normalize_lms_axis(ax, xlim=110, ylim=110)
    patch = [0.1 + w * x / 110.0 for x in p]
    HorizontalChromosome(root, 0.1, 0.1 + w, 0.09, patch=patch, height=0.02, lw=2)
    scaffolds = ("a", "b", "c")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, 0.09, s, va="center", ha="center")
    root.text(0.1 + w / 2, 0.04, "LMS($a||b||c$) = 7", ha="center")

    # Panel C
    ax = fig.add_axes([0.6, 0.12, w, h])
    patch = [0.6 + w * x / 110.0 for x in p]
    ydata = ydata_orig
    ax.plot(
        xdata,
        ydata,
        "r-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, [0], [110], colors="beige", lw=3)
    normalize_lms_axis(ax, xlim=110, ylim=110)
    HorizontalChromosome(root, 0.6, 0.6 + w, 0.09, patch=patch, height=0.02, lw=2)
    scaffolds = ("a", "-c", "b")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, 0.09, s, va="center", ha="center")
    root.text(0.6 + w / 2, 0.04, "LMS($a||-c||b$) = 10", ha="center")

    labels = ((0.05, 0.95, "A"), (0.05, 0.48, "B"), (0.55, 0.48, "C"))
    panel_labels(root, labels)

    normalize_axes(root)

    pf = "lms"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #26
0
def estimategaps(args):
    """
    %prog estimategaps JM-4 chr1 JMMale-1

    Illustrate ALLMAPS gap estimation algorithm.
    """
    p = OptionParser(estimategaps.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pf, seqid, mlg = args
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"

    function = lambda x: x.cm
    cc = Map(bedfile, scaffold_info=True, function=function)
    agp = AGP(agpfile)

    g = GapEstimator(cc, agp, seqid, mlg, function=function)
    pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize
    spl, spld = g.spl, g.spld
    g.compute_all_gaps(verbose=False)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    xstart, ystart = .15, .65
    w, h = .7, .3
    t = np.linspace(0, chrsize, 1000)
    ax = fig.add_axes([xstart, ystart, w, h])
    mx, my = zip(*g.scatter_data)
    rho = spearmanr(mx, my)

    dsg = "g"
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(mx, my, ".", color=set2[3])
    ax.plot(t, spl(t), "-", color=dsg)
    ax.text(.05, .95, mlg, va="top", transform=ax.transAxes)
    normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize,
                       ylabel="Genetic distance (cM)")
    if rho < 0:
        ax.invert_yaxis()

    # Panel B
    ystart -= .28
    h = .25
    ax = fig.add_axes([xstart, ystart, w, h])
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(t, spld(t), "-", lw=2, color=dsg)
    ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5)
    normalize_lms_axis(ax, xlim=chrsize, ylim=25 * 1e-6,
                       xfactor=1e-6, xlabel="Physical position (Mb)",
                       yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)")

    # Panel C (specific to JMMale-1)
    a, b = "scaffold_1076", "scaffold_861"
    sizes = dict((x.component_id, (x.object_beg, x.object_end,
                                   x.component_span, x.orientation)) \
                                   for x in g.agp if not x.is_gap)
    a_beg, a_end, asize, ao = sizes[a]
    b_beg, b_end, bsize, bo = sizes[b]
    gapsize = g.get_gapsize(a)
    total_size = asize + gapsize + bsize
    ratio = .6 / total_size
    y = .16
    pad = .03
    pb_ratio = w / chrsize

    # Zoom
    lsg = "lightslategray"
    root.plot((.15 + pb_ratio * a_beg, .2),
              (ystart, ystart - .14), ":", color=lsg)
    root.plot((.15 + pb_ratio * b_end, .3),
              (ystart, ystart - .08), ":", color=lsg)
    ends = []
    for tag, size, marker, beg in zip((a, b), (asize, bsize), (49213, 81277),
                              (.2, .2 + (asize + gapsize) * ratio)):
        end = beg + size * ratio
        marker = beg + marker * ratio
        ends.append((beg, end, marker))
        root.plot((marker,), (y,), "o", color=lsg)
        root.text((beg + end) / 2, y + pad, latex(tag),
                  ha="center", va="center")
        HorizontalChromosome(root, beg, end, y, height=.025, fc='gainsboro')

    begs, ends, markers = zip(*ends)
    fontprop = dict(color=lsg, ha="center", va="center")
    ypos = y + pad * 2
    root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(markers) / 2, ypos + pad,
              "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop)

    ypos = y - pad
    xx = markers[0], ends[0]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop)
    xx = markers[1], begs[1]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop)

    root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg)
    root.text(sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$",
                                  color="r", ha="center", va="center")

    labels = ((.05, .95, 'A'), (.05, .6, 'B'), (.05, .27, 'C'))
    panel_labels(root, labels)
    normalize_axes(root)

    pf = "estimategaps"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #27
0
def composite_ccn(df, size=(12, 8)):
    """Plot composite ccn figure"""
    fig = plt.figure(1, size)
    ax1 = plt.subplot2grid((2, 2), (0, 0))
    ax2 = plt.subplot2grid((2, 2), (0, 1))
    ax3 = plt.subplot2grid((2, 2), (1, 0))
    ax4 = plt.subplot2grid((2, 2), (1, 1))
    mf = df[df["hli_calc_gender"] == "Male"]

    age_label = "Chronological age (yr)"
    ax1.scatter(
        mf["hli_calc_age_sample_taken"],
        mf["ccn.chrX"],
        s=10,
        marker=".",
        color="lightslategray",
    )
    ax1.set_ylim(0.8, 1.1)
    plot_fit_line(ax1, mf["hli_calc_age_sample_taken"], mf["ccn.chrX"])
    ax1.set_ylabel("ChrX copy number")
    ax1.set_title("ChrX copy number in Male")

    ax2.scatter(
        mf["hli_calc_age_sample_taken"],
        mf["ccn.chrY"],
        s=10,
        marker=".",
        color="lightslategray",
    )
    plot_fit_line(ax2, mf["hli_calc_age_sample_taken"], mf["ccn.chrY"])
    ax2.set_ylim(0.8, 1.1)
    ax2.set_ylabel("ChrY copy number")
    ax2.set_title("ChrY copy number in Male")

    ax3.scatter(
        df["hli_calc_age_sample_taken"],
        df["ccn.chr1"],
        s=10,
        marker=".",
        color="lightslategray",
    )
    plot_fit_line(ax3, df["hli_calc_age_sample_taken"], df["ccn.chr1"])
    ax3.set_ylim(1.8, 2.1)
    ax3.set_ylabel("Chr1 copy number")
    ax3.set_title("Chr1 copy number")

    ax4.scatter(
        df["hli_calc_age_sample_taken"],
        df["ccn.chrM"],
        s=10,
        marker=".",
        color="lightslategray",
    )
    plot_fit_line(ax4, df["hli_calc_age_sample_taken"], df["ccn.chrM"])
    ax4.set_ylim(0, 400)
    ax4.set_ylabel("Mitochondria copy number")
    ax4.set_title("Mitochondria copy number")

    from matplotlib.lines import Line2D

    for ax in (ax1, ax2, ax3, ax4):
        ax.set_xlabel(age_label)

    plt.tight_layout()
    root = fig.add_axes((0, 0, 1, 1))
    labels = ((0.02, 0.98, "A"), (0.52, 0.98, "B"), (0.02, 0.5, "C"),
              (0.52, 0.5, "D"))
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #28
0
def composite(df, sameGenderMZ, sameGenderDZ, size=(16, 24)):
    """Embed both absdiff figures and heritability figures."""
    fig = plt.figure(1, size)

    ax1a = plt.subplot2grid((6, 4), (0, 0), rowspan=2, colspan=1)
    ax2a = plt.subplot2grid((6, 4), (0, 1), rowspan=2, colspan=1)
    ax3a = plt.subplot2grid((6, 4), (0, 2), rowspan=2, colspan=1)
    ax4a = plt.subplot2grid((6, 4), (0, 3), rowspan=2, colspan=1)
    ax1b = plt.subplot2grid((6, 4), (2, 0), rowspan=2, colspan=2)
    ax2b = plt.subplot2grid((6, 4), (2, 2), rowspan=2, colspan=2)
    ax3b = plt.subplot2grid((6, 4), (4, 0), rowspan=2, colspan=2)
    ax4b = plt.subplot2grid((6, 4), (4, 2), rowspan=2, colspan=2)

    # Telomeres
    telomeres = extract_trait(df, "Sample name", "telomeres.Length")
    mzTelomeres = extract_twin_values(sameGenderMZ, telomeres)
    dzTelomeres = extract_twin_values(sameGenderDZ, telomeres)
    plot_paired_values(ax1b, mzTelomeres, dzTelomeres, label="Telomere length")
    plot_abs_diff(ax1a, mzTelomeres, dzTelomeres, label="Telomere length")

    # CCNX
    CCNX = extract_trait(df, "Sample name", "ccn.chrX")
    mzCCNX = extract_twin_values(sameGenderMZ, CCNX, gender="Female")
    dzCCNX = extract_twin_values(sameGenderDZ, CCNX, gender="Female")
    dzCCNX = filter_low_values(dzCCNX, 1.75)
    plot_paired_values(ax2b,
                       mzCCNX,
                       dzCCNX,
                       gender="Female only",
                       label="ChrX copy number")
    plot_abs_diff(ax2a, mzCCNX, dzCCNX, label="ChrX copy number")

    # CCNY
    CCNY = extract_trait(df, "Sample name", "ccn.chrY")
    mzCCNY = extract_twin_values(sameGenderMZ, CCNY, gender="Male")
    dzCCNY = extract_twin_values(sameGenderDZ, CCNY, gender="Male")
    dzCCNY = filter_low_values(dzCCNY, 0.75)

    plot_paired_values(ax3b,
                       mzCCNY,
                       dzCCNY,
                       gender="Male only",
                       label="ChrY copy number")
    plot_abs_diff(ax3a, mzCCNY, dzCCNY, label="ChrY copy number")

    # CCNY
    TRA = extract_trait(df, "Sample name", "TRA.PPM")
    mzTRA = extract_twin_values(sameGenderMZ, TRA)
    dzTRA = extract_twin_values(sameGenderDZ, TRA)
    plot_paired_values(ax4b, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions")
    plot_abs_diff(ax4a, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions")

    plt.tight_layout()

    root = fig.add_axes((0, 0, 1, 1))
    # ABCD absdiff, EFGH heritability
    labels = (
        (0.03, 0.99, "A"),
        (0.27, 0.99, "B"),
        (0.53, 0.99, "C"),
        (0.77, 0.99, "D"),
        (0.03, 0.67, "E"),
        (0.53, 0.67, "F"),
        (0.03, 0.34, "G"),
        (0.53, 0.34, "H"),
    )
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #29
0
def cartoon(args):
    """
    %prog synteny.py

    Generate cartoon illustration of SynFind.
    """
    p = OptionParser(cartoon.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="10x7")

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    A = CartoonRegion(41)
    A.draw(root, .35, .85, strip=False, color=False)
    x1, x2 = A.x1, A.x2
    lsg = "lightslategray"
    pad = .01
    xc, yc = .35, .88
    arrowlen = x2 - xc - pad
    arrowprops = dict(length_includes_head=True, width=.01, fc=lsg, lw=0,
                      head_length=arrowlen * .15, head_width=.03)
    p = FancyArrow(xc - pad, yc, -arrowlen, 0, shape="left", **arrowprops)
    root.add_patch(p)
    p = FancyArrow(xc + pad, yc, arrowlen, 0, shape="right", **arrowprops)
    root.add_patch(p)

    yt = yc + 4 * pad
    root.text((x1 + xc) / 2, yt, "20 genes upstream", ha="center")
    root.text((x2 + xc) / 2, yt, "20 genes downstream", ha="center")
    root.plot((xc,), (yc,), "o", mfc='w', mec=lsg, mew=2, lw=2, color=lsg)
    root.text(xc, yt, "Query gene", ha="center")

    # Panel B
    A.draw(root, .35, .7, strip=False)

    RoundRect(root, (.07, .49), .56, .14, fc='y', alpha=.2)
    a = deepcopy(A)
    a.evolve(mode='S', target=10)
    a.draw(root, .35, .6)
    b = deepcopy(A)
    b.evolve(mode='F', target=8)
    b.draw(root, .35, .56)
    c = deepcopy(A)
    c.evolve(mode='G', target=6)
    c.draw(root, .35, .52)

    for x in (a, b, c):
        root.text(.64, x.y, "Score={0}".format(x.nonwhites), va="center")

    # Panel C
    A.truncate_between_flankers()
    a.truncate_between_flankers()
    b.truncate_between_flankers()
    c.truncate_between_flankers(target=6)

    plot_diagram(root, .14, .2, A, a, "S", "syntenic")
    plot_diagram(root, .37, .2, A, b, "F", "missing, with both flankers")
    plot_diagram(root, .6, .2, A, c, "G", "missing, with one flanker")

    labels = ((.04, .95, 'A'), (.04, .75, 'B'), (.04, .4, 'C'))
    panel_labels(root, labels)

    # Descriptions
    xt = .85
    desc = ("Extract neighborhood",
            "of *window* size",
            "Count gene pairs within *window*",
            "Find regions above *score* cutoff",
            "Identify flankers",
            "Annotate syntelog class"
            )
    for yt, t in zip((.88, .84, .64, .6, .3, .26), desc):
        root.text(xt, yt, markup(t), ha="center", va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "cartoon"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #30
0
def plot(args):
    """
    %prog plot input.bed seqid

    Plot the matchings between the reconstructed pseudomolecules and the maps.
    Two types of visualizations are available in one canvas:

    1. Parallel axes, and matching markers are shown in connecting lines;
    2. Scatter plot.
    """
    from jcvi.graphics.base import plt, savefig, normalize_axes, \
                set2, panel_labels
    from jcvi.graphics.chromosome import Chromosome, GeneticMap, \
                HorizontalChromosome

    p = OptionParser(plot.__doc__)
    add_allmaps_plot_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x6")

    if len(args) != 2:
        sys.exit(not p.print_help())

    inputbed, seqid = args
    pf = inputbed.rsplit(".", 1)[0]
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"
    weightsfile = opts.weightsfile
    links = opts.links

    function = get_function(opts.distance)
    cc = Map(bedfile, function)
    allseqids = cc.seqids
    mapnames = cc.mapnames
    weights = Weights(weightsfile, mapnames)
    assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids)

    s = Scaffold(seqid, cc)
    mlgs = [k for k, v in s.mlg_counts.items() if v >= links]
    mlgsizes = {}
    for mlg in mlgs:
        mm = cc.extract_mlg(mlg)
        mlgsize = max(function(x) for x in mm)
        mlgsizes[mlg] = mlgsize

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax1 = fig.add_axes([0, 0, .5, 1])
    ax2 = fig.add_axes([.5, 0, .5, 1])

    # Find the layout first
    ystart, ystop = .9, .1
    L = Layout(mlgsizes)
    coords = L.coords

    tip = .02
    marker_pos = {}
    # Palette
    colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames))
    colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs)

    rhos = {}
    # Parallel coordinates
    for mlg, (x, y1, y2) in coords.items():
        mm = cc.extract_mlg(mlg)
        markers = [(m.accn, function(m)) for m in mm]  # exhaustive marker list
        xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid]
        mx, my = zip(*xy)
        rho = spearmanr(mx, my)
        rhos[mlg] = rho
        flip = rho < 0

        g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip)
        extra = -3 * tip if x < .5 else 3 * tip
        ha = "right" if x < .5 else "left"
        mapname = mlg.split("-")[0]
        tlg = mlg.replace("_", ".")  # Latex does not like underscore char
        label = "{0} (w={1})".format(tlg, weights[mapname])
        ax1.text(x + extra, (y1 + y2) / 2,
                 label,
                 color=colors[mlg],
                 ha=ha,
                 va="center",
                 rotation=90)
        marker_pos.update(g.marker_pos)

    agp = AGP(agpfile)
    agp = [x for x in agp if x.object == seqid]
    chrsize = max(x.object_end for x in agp)

    # Pseudomolecules in the center
    r = ystart - ystop
    ratio = r / chrsize
    f = lambda x: (ystart - ratio * x)
    patchstart = [f(x.object_beg) for x in agp if not x.is_gap]
    Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2)

    label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0))
    ax1.text(.5, ystart + tip, label, ha="center")

    scatter_data = defaultdict(list)
    # Connecting lines
    for b in s.markers:
        marker_name = b.accn
        if marker_name not in marker_pos:
            continue

        cx = .5
        cy = f(b.pos)
        mx = coords[b.mlg][0]
        my = marker_pos[marker_name]

        extra = -tip if mx < cx else tip
        extra *= 1.25  # leave boundaries for aesthetic reasons
        cx += extra
        mx -= extra
        ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg])
        scatter_data[b.mlg].append((b.pos, function(b)))

    # Scatter plot, same data as parallel coordinates
    xstart, xstop = sorted((ystart, ystop))
    f = lambda x: (xstart + ratio * x)
    pp = [x.object_beg for x in agp if not x.is_gap]
    patchstart = [f(x) for x in pp]
    HorizontalChromosome(ax2,
                         xstart,
                         xstop,
                         ystop,
                         height=2 * tip,
                         patch=patchstart,
                         lw=2)

    gap = .03
    ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values())

    tlgs = []
    for mlg, mlgsize in sorted(mlgsizes.items()):
        height = ratio * mlgsize
        ystart -= height
        xx = .5 + xstart / 2
        width = r / 2
        color = colors[mlg]
        ax = fig.add_axes([xx, ystart, width, height])
        ypos = ystart + height / 2
        ystart -= gap
        sd = scatter_data[mlg]
        xx, yy = zip(*sd)
        ax.vlines(pp, 0, mlgsize, colors="beige")
        ax.plot(xx, yy, ".", color=color)
        rho = rhos[mlg]
        ax.text(.5,
                1 - .4 * gap / height,
                r"$\rho$={0:.3f}".format(rho),
                ha="center",
                va="top",
                transform=ax.transAxes,
                color="gray")
        tlg = mlg.replace("_", ".")
        tlgs.append((tlg, ypos, color))
        ax.set_xlim(0, chrsize)
        ax.set_ylim(0, mlgsize)
        ax.set_xticks([])
        while height / len(ax.get_yticks()) < .03 and len(
                ax.get_yticks()) >= 2:
            ax.set_yticks(ax.get_yticks()[::2])  # Sparsify the ticks
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_yticklabels(yticklabels, family='Helvetica')
        if rho < 0:
            ax.invert_yaxis()

    for i, (tlg, ypos, color) in enumerate(tlgs):
        ha = "center"
        if len(tlgs) > 4:
            ha = "right" if i % 2 else "left"
        root.text(.5, ypos, tlg, color=color, rotation=90, ha=ha, va="center")

    if opts.panels:
        labels = ((.04, .96, 'A'), (.48, .96, 'B'))
        panel_labels(root, labels)

    normalize_axes((ax1, ax2, root))
    image_name = seqid + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    plt.close(fig)
Exemple #31
0
def likelihood(args):
    """
    %prog likelihood

    Plot likelihood surface. Look for two files in the current folder:
    - 100_100.log, haploid model
    - 100_20.log, diploid model
    """
    p = OptionParser(likelihood.__doc__)
    opts, args, iopts = p.set_image_options(args,
                                            figsize="10x5",
                                            style="white",
                                            cmap="coolwarm")

    if len(args) != 0:
        sys.exit(not p.print_help())

    fig, (ax1, ax2) = plt.subplots(ncols=2,
                                   nrows=1,
                                   figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=4)

    # Haploid model
    LL, CI_h1, CI_h2, MLE = parse_log("100_100.log")
    data = []
    for k, v in LL.items():
        data.append((k[0], v))
    data.sort()
    x, y = zip(*data)
    x = np.array(x)
    curve, = ax1.plot(x, y, "-", color=lsg, lw=2)
    ax1.set_title("Simulated haploid ($h^{truth}=100$)")

    h_hat, max_LL = max(data, key=lambda x: x[-1])
    _, min_LL = min(data, key=lambda x: x[-1])
    ymin, ymax = ax1.get_ylim()
    ax1.set_ylim([ymin, ymax + 30])

    LL_label = "log(Likelihood)"
    ax1.plot([h_hat, h_hat], [ymin, max_LL], ":", color=lsg, lw=2)
    ax1.text(h_hat, max_LL + 10, r"$\hat{h}=93$", color=lsg)
    ax1.set_xlabel(r"$h$")
    ax1.set_ylabel(LL_label)

    a, b = CI_h1
    ci = ax1.fill_between(x, [ymin] * len(x),
                          y,
                          where=(x >= a) & (x <= b),
                          color=lsg,
                          alpha=.5)
    ax1.legend([curve, ci], ["Likelihood curve", r'95$\%$ CI'], loc='best')

    # Diploid model
    LL, CI_h1, CI_h2, MLE = parse_log("100_20.log")
    h_hat, max_LL = max(data, key=lambda x: x[-1])
    _, min_LL = min(data, key=lambda x: x[-1])
    data = np.ones((301, 301)) * min_LL
    for k, v in LL.items():
        a, b = k
        data[a, b] = v
        data[b, a] = v

    data = mask_upper_triangle(data)
    ax_imshow(ax2, data, opts.cmap, LL_label, 20, 104)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .04
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B")))
    normalize_axes(root)

    image_name = "likelihood." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #32
0
def phylogeny(args):
    """
    %prog phylogeny treefile ks.layout

    Create a composite figure with (A) tree and (B) ks.
    """
    from jcvi.graphics.tree import parse_tree, LeafInfoFile, WGDInfoFile, draw_tree

    p = OptionParser(phylogeny.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="10x12")

    (datafile, layoutfile) = args

    logging.debug("Load tree file `{0}`".format(datafile))
    t, hpd = parse_tree(datafile)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax1 = fig.add_axes([0, 0.4, 1, 0.6])
    ax2 = fig.add_axes([0.12, 0.065, 0.8, 0.3])

    margin, rmargin = 0.1, 0.2  # Left and right margin
    leafinfo = LeafInfoFile("leafinfo.csv").cache
    wgdinfo = WGDInfoFile("wgdinfo.csv").cache
    outgroup = "ginkgo"

    # Panel A
    draw_tree(
        ax1,
        t,
        hpd=hpd,
        margin=margin,
        rmargin=rmargin,
        supportcolor=None,
        internal=False,
        outgroup=outgroup,
        reroot=False,
        leafinfo=leafinfo,
        wgdinfo=wgdinfo,
        geoscale=True,
    )

    from jcvi.apps.ks import Layout, KsPlot, KsFile

    # Panel B
    ks_min = 0.0
    ks_max = 3.0
    bins = 60
    fill = False
    layout = Layout(layoutfile)
    print(layout, file=sys.stderr)

    kp = KsPlot(ax2, ks_max, bins, legendp="upper right")
    for lo in layout:
        data = KsFile(lo.ksfile)
        data = [x.ng_ks for x in data]
        data = [x for x in data if ks_min <= x <= ks_max]
        kp.add_data(
            data,
            lo.components,
            label=lo.label,
            color=lo.color,
            marker=lo.marker,
            fill=fill,
            fitted=False,
            kde=True,
        )

    kp.draw(filename=None)

    normalize_axes([root, ax1])
    labels = ((0.05, 0.95, "A"), (0.05, 0.4, "B"))
    panel_labels(root, labels)

    image_name = "phylogeny.pdf"
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #33
0
def lms(args):
    """
    %prog lms

    ALLMAPS cartoon to illustrate LMS metric.
    """
    from random import randint
    from jcvi.graphics.chromosome import HorizontalChromosome

    p = OptionParser(lms.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    w, h = .7, .35
    ax = fig.add_axes([.15, .6, w, h])

    xdata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata[3:7] = ydata[3:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:3] + [xydata[4]] + xydata[7:]
    lds = xydata[3:7]
    xlis, ylis = zip(*lis)
    xlds, ylds = zip(*lds)
    ax.plot(xlis, ylis, "r-", lw=12, alpha=.3,
                              solid_capstyle="round", solid_joinstyle="round")
    ax.plot(xlds, ylds, "g-", lw=12, alpha=.3,
                              solid_capstyle="round", solid_joinstyle="round")
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    HorizontalChromosome(root, .15, .15 + w, .57, height=.02, lw=2)
    root.text(.15 + w / 2, .55, "Chromosome location (bp)", ha="center", va="top")

    ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center")
    ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center")
    ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center")
    normalize_lms_axis(ax)

    # Panel B
    w = .37
    p = (0, 45, 75, 110)
    ax = fig.add_axes([.1, .12, w, h])
    xdata = [x for x in range(10, 110, 10)]
    ydata = ydata_orig = [x for x in range(10, 110, 10)]
    ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:7]
    xlis, ylis = zip(*lis)
    ax.plot(xlis, ylis, "r-", lw=12, alpha=.3,
                              solid_capstyle="round", solid_joinstyle="round")
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, 0, 110, colors="beige", lw=3)
    normalize_lms_axis(ax)
    patch = [.1 + w * x / 110. for x in p]
    HorizontalChromosome(root, .1, .1 + w, .09, patch=patch,
                         height=.02, lw=2)
    scaffolds = ("a", "b", "c")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, .09, s, va="center", ha="center")
    root.text(.1 + w / 2, .04, "LMS($a||b||c$) = 7", ha="center")

    # Panel C
    ax = fig.add_axes([.6, .12, w, h])
    patch = [.6 + w * x / 110. for x in p]
    ydata = ydata_orig
    ax.plot(xdata, ydata, "r-", lw=12, alpha=.3,
                              solid_capstyle="round", solid_joinstyle="round")
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, [0], [110], colors="beige", lw=3)
    normalize_lms_axis(ax)
    HorizontalChromosome(root, .6, .6 + w, .09, patch=patch,
                         height=.02, lw=2)
    scaffolds = ("a", "-c", "b")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, .09, s, va="center", ha="center")
    root.text(.6 + w / 2, .04, "LMS($a||-c||b$) = 10", ha="center")

    labels = ((.05, .95, 'A'), (.05, .48, 'B'), (.55, .48, 'C'))
    panel_labels(root, labels)

    normalize_axes(root)

    pf = "lms"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #34
0
def composite_qc(df_orig, size=(16, 12)):
    """Plot composite QC figures"""
    df = df_orig.rename(
        columns={
            "hli_calc_age_sample_taken": "Age",
            "hli_calc_gender": "Gender",
            "eth7_max": "Ethnicity",
            "MeanCoverage": "Mean coverage",
            "Chemistry": "Sequencing chemistry",
            "Release Client": "Cohort",
        })

    fig = plt.figure(1, size)
    ax1 = plt.subplot2grid((2, 7), (0, 0), rowspan=1, colspan=2)
    ax2 = plt.subplot2grid((2, 7), (0, 2), rowspan=1, colspan=2)
    ax3 = plt.subplot2grid((2, 7), (0, 4), rowspan=1, colspan=3)
    ax4 = plt.subplot2grid((2, 7), (1, 0), rowspan=1, colspan=2)
    ax5 = plt.subplot2grid((2, 7), (1, 2), rowspan=1, colspan=2)
    ax6 = plt.subplot2grid((2, 7), (1, 4), rowspan=1, colspan=3)

    sns.distplot(df["Age"].dropna(), kde=False, ax=ax1)
    sns.countplot(x="Gender", data=df, ax=ax2)
    sns.countplot(x="Ethnicity",
                  data=df,
                  ax=ax3,
                  order=df["Ethnicity"].value_counts().index)
    sns.distplot(df["Mean coverage"].dropna(), kde=False, ax=ax4)
    ax4.set_xlim(0, 100)
    sns.countplot(x="Sequencing chemistry", data=df, ax=ax5)
    sns.countplot(x="Cohort",
                  data=df,
                  ax=ax6,
                  order=df["Cohort"].value_counts().index)
    # Anonymize the cohorts
    cohorts = ax6.get_xticklabels()
    newCohorts = []
    for i, c in enumerate(cohorts):
        if c.get_text() == "Spector":
            c = "TwinsUK"
        elif c.get_text() != "Health Nucleus":
            c = "C{}".format(i + 1)
        newCohorts.append(c)
    ax6.set_xticklabels(newCohorts)

    for ax in (ax6, ):
        ax.set_xticklabels(ax.get_xticklabels(), ha="right", rotation=30)

    for ax in (ax1, ax2, ax3, ax4, ax5, ax6):
        ax.set_title(ax.get_xlabel())
        ax.set_xlabel("")

    plt.tight_layout()

    root = fig.add_axes((0, 0, 1, 1))
    labels = (
        (0.02, 0.96, "A"),
        (0.3, 0.96, "B"),
        (0.6, 0.96, "C"),
        (0.02, 0.52, "D"),
        (0.3, 0.52, "E"),
        (0.6, 0.52, "F"),
    )
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #35
0
def composite_ccn(df, size=(12, 8)):
    """ Plot composite ccn figure
    """
    fig = plt.figure(1, size)
    ax1 = plt.subplot2grid((2, 2), (0, 0))
    ax2 = plt.subplot2grid((2, 2), (0, 1))
    ax3 = plt.subplot2grid((2, 2), (1, 0))
    ax4 = plt.subplot2grid((2, 2), (1, 1))
    chemistry = ["V1", "V2", "V2.5", float("nan")]
    colors = sns.color_palette("Set2", 8)
    color_map = dict(zip(chemistry, colors))
    mf = df[df["hli_calc_gender"] == "Male"]

    age_label = "Chronological age (yr)"
    ax1.scatter(mf["hli_calc_age_sample_taken"],
                mf["ccn.chrX"],
                s=10,
                marker='.',
                color='lightslategray')
    ax1.set_ylim(0.8, 1.1)
    plot_fit_line(ax1, mf["hli_calc_age_sample_taken"], mf["ccn.chrX"])
    ax1.set_ylabel("ChrX copy number")
    ax1.set_title("ChrX copy number in Male")

    ax2.scatter(mf["hli_calc_age_sample_taken"],
                mf["ccn.chrY"],
                s=10,
                marker='.',
                color='lightslategray')
    plot_fit_line(ax2, mf["hli_calc_age_sample_taken"], mf["ccn.chrY"])
    ax2.set_ylim(0.8, 1.1)
    ax2.set_ylabel("ChrY copy number")
    ax2.set_title("ChrY copy number in Male")

    ax3.scatter(df["hli_calc_age_sample_taken"],
                df["ccn.chr1"],
                s=10,
                marker='.',
                color='lightslategray')
    plot_fit_line(ax3, df["hli_calc_age_sample_taken"], df["ccn.chr1"])
    ax3.set_ylim(1.8, 2.1)
    ax3.set_ylabel("Chr1 copy number")
    ax3.set_title("Chr1 copy number")

    ax4.scatter(df["hli_calc_age_sample_taken"],
                df["ccn.chrM"],
                s=10,
                marker='.',
                color='lightslategray')
    plot_fit_line(ax4, df["hli_calc_age_sample_taken"], df["ccn.chrM"])
    ax4.set_ylim(0, 400)
    ax4.set_ylabel("Mitochondria copy number")
    ax4.set_title("Mitochondria copy number")

    from matplotlib.lines import Line2D
    legend_elements = [Line2D([0], [0], marker='.', color='w', label=chem,
                          markerfacecolor=color) \
                        for (chem, color) in zip(chemistry, colors)[:3]]
    for ax in (ax1, ax2, ax3, ax4):
        ax.set_xlabel(age_label)

    plt.tight_layout()
    root = fig.add_axes((0, 0, 1, 1))
    labels = ((.02, .98, "A"), (.52, .98, "B"), (.02, .5, "C"), (.52, .5, "D"))
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #36
0
def estimategaps(args):
    """
    %prog estimategaps JM-4 chr1 JMMale-1

    Illustrate ALLMAPS gap estimation algorithm.
    """
    p = OptionParser(estimategaps.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pf, seqid, mlg = args
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"

    function = lambda x: x.cm
    cc = Map(bedfile, scaffold_info=True, function=function)
    agp = AGP(agpfile)

    g = GapEstimator(cc, agp, seqid, mlg, function=function)
    pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize
    spl, spld = g.spl, g.spld
    g.compute_all_gaps(verbose=False)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    xstart, ystart = 0.15, 0.65
    w, h = 0.7, 0.3
    t = np.linspace(0, chrsize, 1000)
    ax = fig.add_axes([xstart, ystart, w, h])
    mx, my = zip(*g.scatter_data)
    rho = spearmanr(mx, my)

    dsg = "g"
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(mx, my, ".", color=set2[3])
    ax.plot(t, spl(t), "-", color=dsg)
    ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes)
    normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)")
    if rho < 0:
        ax.invert_yaxis()

    # Panel B
    ystart -= 0.28
    h = 0.25
    ax = fig.add_axes([xstart, ystart, w, h])
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(t, spld(t), "-", lw=2, color=dsg)
    ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5)
    normalize_lms_axis(
        ax,
        xlim=chrsize,
        ylim=25 * 1e-6,
        xfactor=1e-6,
        xlabel="Physical position (Mb)",
        yfactor=1000000,
        ylabel="Recomb. rate\n(cM / Mb)",
    )
    ax.xaxis.grid(False)

    # Panel C (specific to JMMale-1)
    a, b = "scaffold_1076", "scaffold_861"
    sizes = dict(
        (x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation))
        for x in g.agp
        if not x.is_gap
    )
    a_beg, a_end, asize, ao = sizes[a]
    b_beg, b_end, bsize, bo = sizes[b]
    gapsize = g.get_gapsize(a)
    total_size = asize + gapsize + bsize
    ratio = 0.6 / total_size
    y = 0.16
    pad = 0.03
    pb_ratio = w / chrsize

    # Zoom
    lsg = "lightslategray"
    root.plot((0.15 + pb_ratio * a_beg, 0.2), (ystart, ystart - 0.14), ":", color=lsg)
    root.plot((0.15 + pb_ratio * b_end, 0.3), (ystart, ystart - 0.08), ":", color=lsg)
    ends = []
    for tag, size, marker, beg in zip(
        (a, b), (asize, bsize), (49213, 81277), (0.2, 0.2 + (asize + gapsize) * ratio)
    ):
        end = beg + size * ratio
        marker = beg + marker * ratio
        ends.append((beg, end, marker))
        root.plot((marker,), (y,), "o", color=lsg)
        root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center")
        HorizontalChromosome(root, beg, end, y, height=0.025, fc="gainsboro")

    begs, ends, markers = zip(*ends)
    fontprop = dict(color=lsg, ha="center", va="center")
    ypos = y + pad * 2
    root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(
        sum(markers) / 2,
        ypos + pad,
        "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)",
        **fontprop
    )

    ypos = y - pad
    xx = markers[0], ends[0]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop)
    xx = markers[1], begs[1]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop)

    root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg)
    root.text(
        sum(markers) / 2,
        ypos - 3 * pad,
        r"$\textit{Estimated gap size: 96,433bp}$",
        color="r",
        ha="center",
        va="center",
    )

    labels = ((0.05, 0.95, "A"), (0.05, 0.6, "B"), (0.05, 0.27, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    pf = "estimategaps"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #37
0
def composite_ccn(df, size=(12, 8)):
    """ Plot composite ccn figure
    """
    fig = plt.figure(1, size)
    ax1 = plt.subplot2grid((2, 2), (0, 0))
    ax2 = plt.subplot2grid((2, 2), (0, 1))
    ax3 = plt.subplot2grid((2, 2), (1, 0))
    ax4 = plt.subplot2grid((2, 2), (1, 1))
    chemistry = ["V1", "V2", "V2.5", float("nan")]
    colors = sns.color_palette("Set2", 8)
    color_map = dict(zip(chemistry, colors))
    mf = df[df["hli_calc_gender"] == "Male"]

    age_label = "Chronological age (yr)"
    ax1.scatter(mf["hli_calc_age_sample_taken"], mf["ccn.chrX"],
                s=10, marker='.',
                color='lightslategray')
    ax1.set_ylim(0.8, 1.1)
    plot_fit_line(ax1, mf["hli_calc_age_sample_taken"], mf["ccn.chrX"])
    ax1.set_ylabel("ChrX copy number")
    ax1.set_title("ChrX copy number in Male")

    ax2.scatter(mf["hli_calc_age_sample_taken"], mf["ccn.chrY"],
                s=10, marker='.',
                color='lightslategray')
    plot_fit_line(ax2, mf["hli_calc_age_sample_taken"], mf["ccn.chrY"])
    ax2.set_ylim(0.8, 1.1)
    ax2.set_ylabel("ChrY copy number")
    ax2.set_title("ChrY copy number in Male")

    ax3.scatter(df["hli_calc_age_sample_taken"], df["ccn.chr1"],
                s=10, marker='.',
                color='lightslategray')
    plot_fit_line(ax3, df["hli_calc_age_sample_taken"], df["ccn.chr1"])
    ax3.set_ylim(1.8, 2.1)
    ax3.set_ylabel("Chr1 copy number")
    ax3.set_title("Chr1 copy number")

    ax4.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrM"],
                s=10, marker='.',
                color='lightslategray')
    plot_fit_line(ax4, df["hli_calc_age_sample_taken"], df["ccn.chrM"])
    ax4.set_ylim(0, 400)
    ax4.set_ylabel("Mitochondria copy number")
    ax4.set_title("Mitochondria copy number")

    from matplotlib.lines import Line2D
    legend_elements = [Line2D([0], [0], marker='.', color='w', label=chem,
                          markerfacecolor=color) \
                        for (chem, color) in zip(chemistry, colors)[:3]]
    for ax in (ax1, ax2, ax3, ax4):
        ax.set_xlabel(age_label)

    plt.tight_layout()
    root = fig.add_axes((0, 0, 1, 1))
    labels = ((.02, .98, "A"),
              (.52, .98, "B"),
              (.02, .5, "C"),
              (.52, .5, "D"))
    panel_labels(root, labels)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #38
0
def cartoon(args):
    """
    %prog synteny.py

    Generate cartoon illustration of SynFind.
    """
    p = OptionParser(cartoon.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="10x7")

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    A = CartoonRegion(41)
    A.draw(root, .35, .85, strip=False, color=False)
    x1, x2 = A.x1, A.x2
    lsg = "lightslategray"
    pad = .01
    xc, yc = .35, .88
    arrowlen = x2 - xc - pad
    arrowprops = dict(length_includes_head=True, width=.01, fc=lsg, lw=0,
                      head_length=arrowlen * .15, head_width=.03)
    p = FancyArrow(xc - pad, yc, -arrowlen, 0, shape="left", **arrowprops)
    root.add_patch(p)
    p = FancyArrow(xc + pad, yc, arrowlen, 0, shape="right", **arrowprops)
    root.add_patch(p)

    yt = yc + 4 * pad
    root.text((x1 + xc) / 2, yt, "20 genes upstream", ha="center")
    root.text((x2 + xc) / 2, yt, "20 genes downstream", ha="center")
    root.plot((xc,), (yc,), "o", mfc='w', mec=lsg, mew=2, lw=2, color=lsg)
    root.text(xc, yt, "Query gene", ha="center")

    # Panel B
    A.draw(root, .35, .7, strip=False)

    RoundRect(root, (.07, .49), .56, .14, fc='y', alpha=.2)
    a = deepcopy(A)
    a.evolve(mode='S', target=10)
    a.draw(root, .35, .6)
    b = deepcopy(A)
    b.evolve(mode='F', target=8)
    b.draw(root, .35, .56)
    c = deepcopy(A)
    c.evolve(mode='G', target=6)
    c.draw(root, .35, .52)

    for x in (a, b, c):
        root.text(.64, x.y, "Score={0}".format(x.nonwhites), va="center")

    # Panel C
    A.truncate_between_flankers()
    a.truncate_between_flankers()
    b.truncate_between_flankers()
    c.truncate_between_flankers(target=6)

    plot_diagram(root, .14, .2, A, a, "S", "syntenic")
    plot_diagram(root, .37, .2, A, b, "F", "missing, with both flankers")
    plot_diagram(root, .6, .2, A, c, "G", "missing, with one flanker")

    labels = ((.04, .95, 'A'), (.04, .75, 'B'), (.04, .4, 'C'))
    panel_labels(root, labels)

    # Descriptions
    xt = .85
    desc = ("Extract neighborhood",
            "of *window* size",
            "Count gene pairs within *window*",
            "Find regions above *score* cutoff",
            "Identify flankers",
            "Annotate syntelog class"
            )
    for yt, t in zip((.88, .84, .64, .6, .3, .26), desc):
        root.text(xt, yt, markup(t), ha="center", va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "cartoon"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Exemple #39
0
def plot(args):
    """
    %prog plot input.bed seqid

    Plot the matchings between the reconstructed pseudomolecules and the maps.
    Two types of visualizations are available in one canvas:

    1. Parallel axes, and matching markers are shown in connecting lines;
    2. Scatter plot.
    """
    from jcvi.graphics.base import plt, savefig, normalize_axes, \
                set2, panel_labels
    from jcvi.graphics.chromosome import Chromosome, GeneticMap, \
                HorizontalChromosome

    p = OptionParser(plot.__doc__)
    add_allmaps_plot_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x6")

    if len(args) != 2:
        sys.exit(not p.print_help())

    inputbed, seqid = args
    pf = inputbed.rsplit(".", 1)[0]
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"
    weightsfile = opts.weightsfile
    links = opts.links

    function = get_function(opts.distance)
    cc = Map(bedfile, function)
    allseqids = cc.seqids
    mapnames = cc.mapnames
    weights = Weights(weightsfile, mapnames)
    assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids)

    s = Scaffold(seqid, cc)
    mlgs = [k for k, v in s.mlg_counts.items() if v >= links]
    while not mlgs:
        links /= 2
        logging.error("No markers to plot, --links reset to {0}".format(links))
        mlgs = [k for k, v in s.mlg_counts.items() if v >= links]

    mlgsizes = {}
    for mlg in mlgs:
        mm = cc.extract_mlg(mlg)
        mlgsize = max(function(x) for x in mm)
        mlgsizes[mlg] = mlgsize

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax1 = fig.add_axes([0, 0, .5, 1])
    ax2 = fig.add_axes([.5, 0, .5, 1])

    # Find the layout first
    ystart, ystop = .9, .1
    L = Layout(mlgsizes)
    coords = L.coords

    tip = .02
    marker_pos = {}
    # Palette
    colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames))
    colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs)

    rhos = {}
    # Parallel coordinates
    for mlg, (x, y1, y2) in coords.items():
        mm = cc.extract_mlg(mlg)
        markers = [(m.accn, function(m)) for m in mm]  # exhaustive marker list
        xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid]
        mx, my = zip(*xy)
        rho = spearmanr(mx, my)
        rhos[mlg] = rho
        flip = rho < 0

        g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip)
        extra = -3 * tip if x < .5 else 3 * tip
        ha = "right" if x < .5 else "left"
        mapname = mlg.split("-")[0]
        tlg = mlg.replace("_", ".")  # Latex does not like underscore char
        label = "{0} (w={1})".format(tlg, weights[mapname])
        ax1.text(x + extra, (y1 + y2) / 2, label, color=colors[mlg],
                 ha=ha, va="center", rotation=90)
        marker_pos.update(g.marker_pos)

    agp = AGP(agpfile)
    agp = [x for x in agp if x.object == seqid]
    chrsize = max(x.object_end for x in agp)

    # Pseudomolecules in the center
    r = ystart - ystop
    ratio = r / chrsize
    f = lambda x: (ystart - ratio * x)
    patchstart = [f(x.object_beg) for x in agp if not x.is_gap]
    Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2)

    label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0))
    ax1.text(.5, ystart + tip, label, ha="center")

    scatter_data = defaultdict(list)
    # Connecting lines
    for b in s.markers:
        marker_name = b.accn
        if marker_name not in marker_pos:
            continue

        cx = .5
        cy = f(b.pos)
        mx = coords[b.mlg][0]
        my = marker_pos[marker_name]

        extra = -tip if mx < cx else tip
        extra *= 1.25  # leave boundaries for aesthetic reasons
        cx += extra
        mx -= extra
        ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg])
        scatter_data[b.mlg].append((b.pos, function(b)))

    # Scatter plot, same data as parallel coordinates
    xstart, xstop = sorted((ystart, ystop))
    f = lambda x: (xstart + ratio * x)
    pp = [x.object_beg for x in agp if not x.is_gap]
    patchstart = [f(x) for x in pp]
    HorizontalChromosome(ax2, xstart, xstop, ystop,
                         height=2 * tip, patch=patchstart, lw=2)

    gap = .03
    ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values())

    tlgs = []
    for mlg, mlgsize in sorted(mlgsizes.items()):
        height = ratio * mlgsize
        ystart -= height
        xx = .5 + xstart / 2
        width = r / 2
        color = colors[mlg]
        ax = fig.add_axes([xx, ystart, width, height])
        ypos = ystart + height / 2
        ystart -= gap
        sd = scatter_data[mlg]
        xx, yy = zip(*sd)
        ax.vlines(pp, 0, mlgsize, colors="beige")
        ax.plot(xx, yy, ".", color=color)
        rho = rhos[mlg]
        ax.text(.5, 1 - .4 * gap / height, r"$\rho$={0:.3f}".format(rho),
                    ha="center", va="top", transform=ax.transAxes, color="gray")
        tlg = mlg.replace("_", ".")
        tlgs.append((tlg, ypos, color))
        ax.set_xlim(0, chrsize)
        ax.set_ylim(0, mlgsize)
        ax.set_xticks([])
        while height / len(ax.get_yticks()) < .03 and len(ax.get_yticks()) >= 2:
            ax.set_yticks(ax.get_yticks()[::2])  # Sparsify the ticks
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_yticklabels(yticklabels, family='Helvetica')
        if rho < 0:
            ax.invert_yaxis()

    for i, (tlg, ypos, color) in enumerate(tlgs):
        ha = "center"
        if len(tlgs) > 4:
            ha = "right" if i % 2 else "left"
        root.text(.5, ypos, tlg, color=color, rotation=90,
                      ha=ha, va="center")

    if opts.panels:
        labels = ((.04, .96, 'A'), (.48, .96, 'B'))
        panel_labels(root, labels)

    normalize_axes((ax1, ax2, root))
    image_name = seqid + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    plt.close(fig)
Exemple #40
0
def multihistogram(args):
    """
    %prog multihistogram *.histogram species

    Plot the histogram based on a set of K-mer hisotograms. The method is based
    on Star et al.'s method (Atlantic Cod genome paper).
    """
    p = OptionParser(multihistogram.__doc__)
    p.add_option("--kmin",
                 default=15,
                 type="int",
                 help="Minimum K-mer size, inclusive")
    p.add_option("--kmax",
                 default=30,
                 type="int",
                 help="Maximum K-mer size, inclusive")
    p.add_option("--vmin",
                 default=2,
                 type="int",
                 help="Minimum value, inclusive")
    p.add_option("--vmax",
                 default=100,
                 type="int",
                 help="Maximum value, inclusive")
    opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300)

    if len(args) < 1:
        sys.exit(not p.print_help())

    histfiles = args[:-1]
    species = args[-1]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([0.08, 0.12, 0.38, 0.76])
    B = fig.add_axes([0.58, 0.12, 0.38, 0.76])

    lines = []
    legends = []
    genomesizes = []
    for histfile in histfiles:
        ks = KmerSpectrum(histfile)
        x, y = ks.get_xy(opts.vmin, opts.vmax)
        K = get_number(op.basename(histfile).split(".")[0].split("-")[-1])
        if not opts.kmin <= K <= opts.kmax:
            continue

        (line, ) = A.plot(x, y, "-", lw=1)
        lines.append(line)
        legends.append("K = {0}".format(K))
        ks.analyze(K=K, method="allpaths")
        genomesizes.append((K, ks.genomesize / 1e6))

    leg = A.legend(lines, legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(0.5)

    title = "{0} genome K-mer histogram".format(species)
    A.set_title(markup(title))
    xlabel, ylabel = "Coverage (X)", "Counts"
    A.set_xlabel(xlabel)
    A.set_ylabel(ylabel)
    set_human_axis(A)

    title = "{0} genome size estimate".format(species)
    B.set_title(markup(title))
    x, y = zip(*genomesizes)
    B.plot(x, y, "ko", mfc="w")
    t = np.linspace(opts.kmin - 0.5, opts.kmax + 0.5, 100)
    p = np.poly1d(np.polyfit(x, y, 2))
    B.plot(t, p(t), "r:")

    xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)"
    B.set_xlabel(xlabel)
    B.set_ylabel(ylabel)
    set_ticklabels_helvetica(B)

    labels = ((0.04, 0.96, "A"), (0.54, 0.96, "B"))
    panel_labels(root, labels)

    normalize_axes(root)
    imagename = species + ".multiK.pdf"
    savefig(imagename, dpi=iopts.dpi, iopts=iopts)
Exemple #41
0
def multihistogram(args):
    """
    %prog multihistogram *.histogram species

    Plot the histogram based on a set of K-mer hisotograms. The method is based
    on Star et al.'s method (Atlantic Cod genome paper).
    """
    p = OptionParser(multihistogram.__doc__)
    p.add_option("--kmin", default=15, type="int", help="Minimum K-mer size, inclusive")
    p.add_option("--kmax", default=30, type="int", help="Maximum K-mer size, inclusive")
    p.add_option("--vmin", default=2, type="int", help="Minimum value, inclusive")
    p.add_option("--vmax", default=100, type="int", help="Maximum value, inclusive")
    opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300)

    histfiles = args[:-1]
    species = args[-1]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([0.08, 0.12, 0.38, 0.76])
    B = fig.add_axes([0.58, 0.12, 0.38, 0.76])

    lines = []
    legends = []
    genomesizes = []
    for histfile in histfiles:
        ks = KmerSpectrum(histfile)
        x, y = ks.get_xy(opts.vmin, opts.vmax)
        K = get_number(op.basename(histfile).split(".")[0].split("-")[-1])
        if not opts.kmin <= K <= opts.kmax:
            continue

        line, = A.plot(x, y, "-", lw=1)
        lines.append(line)
        legends.append("K = {0}".format(K))
        ks.analyze(K=K)
        genomesizes.append((K, ks.genomesize / 1e6))

    leg = A.legend(lines, legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(0.5)

    title = "{0} genome K-mer histogram".format(species)
    A.set_title(markup(title))
    xlabel, ylabel = "Coverage (X)", "Counts"
    A.set_xlabel(xlabel)
    A.set_ylabel(ylabel)
    set_human_axis(A)

    title = "{0} genome size estimate".format(species)
    B.set_title(markup(title))
    x, y = zip(*genomesizes)
    B.plot(x, y, "ko", mfc="w")
    t = np.linspace(opts.kmin - 0.5, opts.kmax + 0.5, 100)
    p = np.poly1d(np.polyfit(x, y, 2))
    B.plot(t, p(t), "r:")

    xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)"
    B.set_xlabel(xlabel)
    B.set_ylabel(ylabel)
    set_ticklabels_helvetica(B)

    labels = ((0.04, 0.96, "A"), (0.54, 0.96, "B"))
    panel_labels(root, labels)

    normalize_axes(root)
    imagename = species + ".multiK.pdf"
    savefig(imagename, dpi=iopts.dpi, iopts=iopts)