def resample(args): """ %prog resample yellow-catfish-resample.txt medicago-resample.txt Plot ALLMAPS performance across resampled real data. """ p = OptionParser(resample.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300) if len(args) != 2: sys.exit(not p.print_help()) dataA, dataB = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([.1, .18, .32, .64]) B = fig.add_axes([.6, .18, .32, .64]) dataA = import_data(dataA) dataB = import_data(dataB) xlabel = "Fraction of markers" ylabels = ("Anchor rate", "Runtime (m)") legend = ("anchor rate", "runtime") subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend) subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend) labels = ((.04, .92, "A"), (.54, .92, "B")) panel_labels(root, labels) normalize_axes(root) image_name = "resample." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def allelefreq(args): """ %prog allelefreq HD,DM1,SCA1,SCA17 Plot the allele frequencies of some STRs. """ p = OptionParser(allelefreq.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x10") if len(args) != 1: sys.exit(not p.print_help()) loci, = args fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=4) treds, df = read_treds() df = df.set_index(["abbreviation"]) for ax, locus in zip((ax1, ax2, ax3, ax4), loci.split(",")): plot_allelefreq(ax, df, locus) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"), (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D"))) normalize_axes(root) image_name = "allelefreq." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def composite_correlation(df, size=(12, 8)): """ Plot composite correlation figure """ fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 2), (0, 0)) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax3 = plt.subplot2grid((2, 2), (1, 0)) ax4 = plt.subplot2grid((2, 2), (1, 1)) chemistry = ["V1", "V2", "V2.5", float("nan")] colors = sns.color_palette("Set2", 8) color_map = dict(zip(chemistry, colors)) age_label = "Chronological age (yr)" ax1.scatter(df["hli_calc_age_sample_taken"], df["teloLength"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax1.set_ylim(0, 15) ax1.set_ylabel("Telomere length (Kb)") ax2.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrX"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax2.set_ylim(1.8, 2.1) ax2.set_ylabel("ChrX copy number") ax4.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrY"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax4.set_ylim(0.8, 1.1) ax4.set_ylabel("ChrY copy number") ax3.scatter(df["hli_calc_age_sample_taken"], df["TRA.PPM"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax3.set_ylim(0, 250) ax3.set_ylabel("$TCR-\\alpha$ deletions (count per million reads)") from matplotlib.lines import Line2D legend_elements = [Line2D([0], [0], marker='.', color='w', label=chem, markerfacecolor=color, markersize=16) \ for (chem, color) in zip(chemistry, colors)[:3]] for ax in (ax1, ax2, ax3, ax4): ax.set_xlabel(age_label) ax.legend(handles=legend_elements, loc="upper right") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ((.02, .98, "A"), (.52, .98, "B"), (.02, .5, "C"), (.52, .5, "D")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def composite_qc(df_orig, size=(16, 12)): """ Plot composite QC figures """ df = df_orig.rename(columns={"hli_calc_age_sample_taken": "Age", "hli_calc_gender": "Gender", "eth7_max": "Ethnicity", "MeanCoverage": "Mean coverage", "Chemistry": "Sequencing chemistry", "Release Client": "Cohort", }) fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 7), (0, 0), rowspan=1, colspan=2) ax2 = plt.subplot2grid((2, 7), (0, 2), rowspan=1, colspan=2) ax3 = plt.subplot2grid((2, 7), (0, 4), rowspan=1, colspan=3) ax4 = plt.subplot2grid((2, 7), (1, 0), rowspan=1, colspan=2) ax5 = plt.subplot2grid((2, 7), (1, 2), rowspan=1, colspan=2) ax6 = plt.subplot2grid((2, 7), (1, 4), rowspan=1, colspan=3) sns.distplot(df["Age"].dropna(), kde=False, ax=ax1) sns.countplot(x="Gender", data=df, ax=ax2) sns.countplot(x="Ethnicity", data=df, ax=ax3, order = df['Ethnicity'].value_counts().index) sns.distplot(df["Mean coverage"].dropna(), kde=False, ax=ax4) ax4.set_xlim(0, 100) sns.countplot(x="Sequencing chemistry", data=df, ax=ax5) sns.countplot(x="Cohort", data=df, ax=ax6, order = df['Cohort'].value_counts().index) # Anonymize the cohorts cohorts = ax6.get_xticklabels() newCohorts = [] for i, c in enumerate(cohorts): if c.get_text() == "Spector": c = "TwinsUK" elif c.get_text() != "Health Nucleus": c = "C{}".format(i + 1) newCohorts.append(c) ax6.set_xticklabels(newCohorts) for ax in (ax6,): ax.set_xticklabels(ax.get_xticklabels(), ha="right", rotation=30) for ax in (ax1, ax2, ax3, ax4, ax5, ax6): ax.set_title(ax.get_xlabel()) ax.set_xlabel("") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ((.02, .96, "A"), (.3, .96, "B"), (.6, .96, "C"), (.02, .52, "D"), (.3, .52, "E"), (.6, .52, "F")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def pomegranate(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(pomegranate.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout) # legend showing the orientation of the genes draw_gene_legend(root, 0.42, 0.52, 0.48) labels = ((0.04, 0.96, "A"), (0.04, 0.52, "B")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pomegranate-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def pomegranate(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(pomegranate.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout) # legend showing the orientation of the genes draw_gene_legend(root, .42, .52, .48) labels = ((.04, .96, 'A'), (.04, .52, 'B')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pomegranate-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def resample(args): """ %prog resample yellow-catfish-resample.txt medicago-resample.txt Plot ALLMAPS performance across resampled real data. """ p = OptionParser(resample.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300) if len(args) != 2: sys.exit(not p.print_help()) dataA, dataB = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([0.1, 0.18, 0.32, 0.64]) B = fig.add_axes([0.6, 0.18, 0.32, 0.64]) dataA = import_data(dataA) dataB = import_data(dataB) xlabel = "Fraction of markers" ylabels = ("Anchor rate", "Runtime (m)") legend = ("anchor rate", "runtime") subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend) subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend) labels = ((0.04, 0.92, "A"), (0.54, 0.92, "B")) panel_labels(root, labels) normalize_axes(root) image_name = "resample." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def compare2(args): """ %prog compare2 Compare performances of various variant callers on simulated STR datasets. """ p = OptionParser(compare2.__doc__) p.add_option('--maxinsert', default=300, type="int", help="Maximum number of repeats") add_simulate_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x5") if len(args) != 0: sys.exit(not p.print_help()) depth = opts.depth readlen = opts.readlen distance = opts.distance max_insert = opts.maxinsert fig, (ax1, ax2) = plt.subplots(ncols=2, nrows=1, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=2) # ax1: lobSTR vs TREDPARSE with haploid model lobstr_results = parse_results("lobstr_results_homo.txt") tredparse_results = parse_results("tredparse_results_homo.txt") title = SIMULATED_HAPLOID + \ r" ($D=%s\times, L=%dbp, V=%dbp$)" % (depth, readlen, distance) plot_compare(ax1, title, tredparse_results, lobstr_results, max_insert=max_insert) # ax2: lobSTR vs TREDPARSE with diploid model lobstr_results = parse_results("lobstr_results_het.txt", exclude=20) tredparse_results = parse_results("tredparse_results_het.txt", exclude=20) title = SIMULATED_DIPLOID + \ r" ($D=%s\times, L=%dbp, V=%dbp$)" % (depth, readlen, distance) plot_compare(ax2, title, tredparse_results, lobstr_results, max_insert=max_insert) for ax in (ax1, ax2): ax.set_xlim(0, max_insert) ax.set_ylim(0, max_insert) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"))) normalize_axes(root) image_name = "tredparse." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def compare(args): """ %prog compare Evaluation.csv Compare performances of various variant callers on simulated STR datasets. """ p = OptionParser(compare.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x10") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args pf = datafile.rsplit(".", 1)[0] fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=3) bbox = {'facecolor': 'tomato', 'alpha': .2, 'ec': 'w'} pad = 2 # Read benchmark data df = pd.read_csv("Evaluation.csv") truth = df["Truth"] axes = (ax1, ax2, ax3, ax4) progs = ("Manta", "Isaac", "GATK", "lobSTR") markers = ("bx-", "yo-", "md-", "c+-") for ax, prog, marker in zip(axes, progs, markers): ax.plot(truth, df[prog], marker) ax.plot(truth, truth, 'k--') # to show diagonal ax.axhline(infected_thr, color='tomato') ax.text(max(truth) - pad, infected_thr + pad, 'Risk threshold', bbox=bbox, ha="right") ax.axhline(ref_thr, color='tomato') ax.text(max(truth) - pad, ref_thr - pad, 'Reference repeat count', bbox=bbox, ha="right", va="top") ax.set_title(SIMULATED_HAPLOID) ax.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)') ax.set_ylabel('Num of CAG repeats called') ax.legend([prog, 'Truth'], loc='best') root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"), (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D"))) normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def composite(df, sameGenderMZ, sameGenderDZ, size=(16, 24)): """Embed both absdiff figures and heritability figures. """ fig = plt.figure(1, size) ax1a = plt.subplot2grid((6, 4), (0, 0), rowspan=2, colspan=1) ax2a = plt.subplot2grid((6, 4), (0, 1), rowspan=2, colspan=1) ax3a = plt.subplot2grid((6, 4), (0, 2), rowspan=2, colspan=1) ax4a = plt.subplot2grid((6, 4), (0, 3), rowspan=2, colspan=1) ax1b = plt.subplot2grid((6, 4), (2, 0), rowspan=2, colspan=2) ax2b = plt.subplot2grid((6, 4), (2, 2), rowspan=2, colspan=2) ax3b = plt.subplot2grid((6, 4), (4, 0), rowspan=2, colspan=2) ax4b = plt.subplot2grid((6, 4), (4, 2), rowspan=2, colspan=2) # Telomeres telomeres = extract_trait(df, "Sample name", "telomeres.Length") mzTelomeres = extract_twin_values(sameGenderMZ, telomeres) dzTelomeres = extract_twin_values(sameGenderDZ, telomeres) plot_paired_values(ax1b, mzTelomeres, dzTelomeres, label="Telomere length") plot_abs_diff(ax1a, mzTelomeres, dzTelomeres, label="Telomere length") # CCNX CCNX = extract_trait(df, "Sample name", "ccn.chrX") mzCCNX = extract_twin_values(sameGenderMZ, CCNX, gender="Female") dzCCNX = extract_twin_values(sameGenderDZ, CCNX, gender="Female") dzCCNX = filter_low_values(dzCCNX, 1.75) plot_paired_values(ax2b, mzCCNX, dzCCNX, gender="Female only", label="ChrX copy number") plot_abs_diff(ax2a, mzCCNX, dzCCNX, label="ChrX copy number") # CCNY CCNY = extract_trait(df, "Sample name", "ccn.chrY") mzCCNY = extract_twin_values(sameGenderMZ, CCNY, gender="Male") dzCCNY = extract_twin_values(sameGenderDZ, CCNY, gender="Male") dzCCNY = filter_low_values(dzCCNY, .75) plot_paired_values(ax3b, mzCCNY, dzCCNY, gender="Male only", label="ChrY copy number") plot_abs_diff(ax3a, mzCCNY, dzCCNY, label="ChrY copy number") # CCNY TRA = extract_trait(df, "Sample name", "TRA.PPM") mzTRA = extract_twin_values(sameGenderMZ, TRA) dzTRA = extract_twin_values(sameGenderDZ, TRA) plot_paired_values(ax4b, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") plot_abs_diff(ax4a, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) # ABCD absdiff, EFGH heritability labels = ((.03, .99, 'A'), (.27, .99, 'B'), (.53, .99, 'C'), (.77, .99, 'D'), (.03, .67, 'E'), (.53, .67, 'F'), (.03, .34, 'G'), (.53, .34, 'H')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print >> sys.stderr, message ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print(message, file=sys.stderr) ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def ploidy(args): """ %prog ploidy seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(ploidy.__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch) # legend showing the orientation of the genes draw_gene_legend(root, 0.27, 0.37, 0.52) # annotate the WGD events fc = "lightslategrey" x = 0.09 radius = 0.012 TextCircle(root, x, 0.825, r"$\tau$", radius=radius, fc=fc) TextCircle(root, x, 0.8, r"$\sigma$", radius=radius, fc=fc) TextCircle(root, x, 0.72, r"$\rho$", radius=radius, fc=fc) for ypos in (0.825, 0.8, 0.72): root.text(0.12, ypos, r"$\times2$", color=fc, ha="center", va="center") root.plot([x, x], [0.85, 0.775], ":", color=fc, lw=2) root.plot([x, x], [0.75, 0.675], ":", color=fc, lw=2) labels = ((0.04, 0.96, "A"), (0.04, 0.54, "B")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pineapple-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ploidy(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(ploidy.__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch) # legend showing the orientation of the genes draw_gene_legend(root, .27, .37, .52) # annotate the WGD events fc = 'lightslategrey' x = .09 radius = .012 TextCircle(root, x, .825, r'$\tau$', radius=radius, fc=fc) TextCircle(root, x, .8, r'$\sigma$', radius=radius, fc=fc) TextCircle(root, x, .72, r'$\rho$', radius=radius, fc=fc) for ypos in (.825, .8, .72): root.text(.12, ypos, r"$\times2$", color=fc, ha="center", va="center") root.plot([x, x], [.85, .775], ":", color=fc, lw=2) root.plot([x, x], [.75, .675], ":", color=fc, lw=2) labels = ((.04, .96, 'A'), (.04, .54, 'B')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pineapple-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def simulation(args): """ %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt Plot ALLMAPS accuracy across a range of simulated datasets. """ p = OptionParser(simulation.__doc__) opts, args, iopts = p.set_image_options(args, dpi=300) if len(args) != 4: sys.exit(not p.print_help()) dataA, dataB, dataC, dataD = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([0.12, 0.62, 0.35, 0.35]) B = fig.add_axes([0.62, 0.62, 0.35, 0.35]) C = fig.add_axes([0.12, 0.12, 0.35, 0.35]) D = fig.add_axes([0.62, 0.12, 0.35, 0.35]) dataA = import_data(dataA) dataB = import_data(dataB) dataC = import_data(dataC) dataD = import_data(dataD) subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=0.5) subplot( B, dataB, "Translocation error rate", "Accuracy", xlim=0.5, legend=("intra-chromosomal", "inter-chromosomal", "75\% intra + 25\% inter"), ) subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int) subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int) labels = ( (0.03, 0.97, "A"), (0.53, 0.97, "B"), (0.03, 0.47, "C"), (0.53, 0.47, "D"), ) panel_labels(root, labels) normalize_axes(root) image_name = "simulation." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def synteny(args): """ %prog synteny vplanifoliaA_blocks.bed vplanifoliaA.sizes \ b1.blocks all.bed b1.layout Create a composite figure with (A) wgd and (B) microsynteny. """ from jcvi.graphics.chromosome import draw_chromosomes p = OptionParser(synteny.__doc__) opts, args, iopts = p.set_image_options(args, figsize="12x12") (bedfile, sizesfile, blocksfile, allbedfile, blockslayout) = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax1 = fig.add_axes([0, 0.5, 1, 0.5]) ax2 = fig.add_axes([0.02, 0, 0.98, 0.5]) # Panel A title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$" draw_chromosomes( ax1, bedfile, sizes=sizesfile, iopts=iopts, mergedist=200000, winsize=50000, imagemap=False, gauge=True, legend=False, title=title, ) # Panel B draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout) normalize_axes([root, ax1, ax2]) labels = ((0.05, 0.95, "A"), (0.05, 0.5, "B")) panel_labels(root, labels) image_name = "synteny.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def likelihood3(args): """ %prog likelihood2 200_20.json 200_100.json Plot the likelihood surface and marginal distributions for two settings. """ from matplotlib import gridspec p = OptionParser(likelihood3.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x10", style="white", cmap="coolwarm") if len(args) != 2: sys.exit(not p.print_help()) jsonfile1, jsonfile2 = args fig = plt.figure(figsize=(iopts.w, iopts.h)) gs = gridspec.GridSpec(9, 2) ax1 = fig.add_subplot(gs[:4, 0]) ax2 = fig.add_subplot(gs[:2, 1]) ax3 = fig.add_subplot(gs[2:4, 1]) ax4 = fig.add_subplot(gs[5:, 0]) ax5 = fig.add_subplot(gs[5:7, 1]) ax6 = fig.add_subplot(gs[7:, 1]) plt.tight_layout(pad=2) plot_panel(jsonfile1, ax1, ax2, ax3, opts.cmap) plot_panel(jsonfile2, ax4, ax5, ax6, opts.cmap) root = fig.add_axes([0, 0, 1, 1]) pad = .02 panel_labels(root, ((pad, 1 - pad, "A"), (pad, 4. / 9, "B"))) normalize_axes(root) image_name = "likelihood3." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def simulation(args): """ %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt Plot ALLMAPS accuracy across a range of simulated datasets. """ p = OptionParser(simulation.__doc__) opts, args, iopts = p.set_image_options(args, dpi=300) if len(args) != 4: sys.exit(not p.print_help()) dataA, dataB, dataC, dataD = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([.12, .62, .35, .35]) B = fig.add_axes([.62, .62, .35, .35]) C = fig.add_axes([.12, .12, .35, .35]) D = fig.add_axes([.62, .12, .35, .35]) dataA = import_data(dataA) dataB = import_data(dataB) dataC = import_data(dataC) dataD = import_data(dataD) subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=.5) subplot(B, dataB, "Translocation error rate", "Accuracy", xlim=.5, legend=("intra-chromosomal", "inter-chromosomal", "75\% intra + 25\% inter")) subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int) subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int) labels = ((.03, .97, "A"), (.53, .97, "B"), (.03, .47, "C"), (.53, .47, "D")) panel_labels(root, labels) normalize_axes(root) image_name = "simulation." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def fig3(args): """ %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data Napus Figure 3 displays alignments between quartet chromosomes, inset with read histograms. """ from jcvi.formats.bed import Bed p = OptionParser(fig3.__doc__) p.add_option("--gauge_step", default=10000000, type="int", help="Step size for the base scale") opts, args, iopts = p.set_image_options(args, figsize="12x9") if len(args) != 4: sys.exit(not p.print_help()) chrs, sizes, bedfile, datadir = args gauge_step = opts.gauge_step diverge = iopts.diverge rr, gg = diverge chrs = [[x] for x in chrs.split(",")] sizes = Sizes(sizes).mapping fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) # Synteny panel seqidsfile = make_seqids(chrs) klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05) height = .07 r = height / 4 K = Karyotype(fig, root, seqidsfile, klayout, gap=gap, height=height, lw=2, generank=False, sizes=sizes, heightpad=r, roundrect=True, plot_label=False) # Chromosome labels for kl in K.layout: if kl.empty: continue lx, ly = kl.xstart, kl.y if lx < .11: lx += .1 ly += .06 label = kl.label root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center") # Inset with datafiles datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr", "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr") datafiles = [op.join(datadir, x) for x in datafiles] tracks = K.tracks hlfile = op.join(datadir, "bzh.regions.forhaibao") xy_axes = [] for t, datafile in zip(tracks, datafiles): ax = make_affix_axis(fig, t, -r, height=2 * r) xy_axes.append(ax) chr = t.seqids[0] xy = XYtrack(ax, datafile, color="lightslategray") start, end = 0, t.total xy.interpolate(end) xy.cap(ymax=40) xy.import_hlfile(hlfile, chr, diverge=diverge) xy.draw() ax.set_xlim(start, end) gauge_ax = make_affix_axis(fig, t, -r) adjust_spines(gauge_ax, ["bottom"]) setup_gauge_ax(gauge_ax, start, end, gauge_step) # Converted gene tracks ax_Ar = make_affix_axis(fig, tracks[1], r, height=r / 2) ax_Co = make_affix_axis(fig, tracks[2], r, height=r / 2) order = Bed(bedfile).order for asterisk in (False, True): conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 0, "A02", ax_Ar, rr, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 1, "C2", ax_Co, gg, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 0, "A02", ax_Ar, gg, ypos=1, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 1, "C2", ax_Co, rr, ypos=1, asterisk=asterisk) Ar, Co = xy_axes[1:3] annotations = ((Ar, "Bra028920 Bra028897", "center", "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"), (Ar, "Bra020218 Bra020286", "left", "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"), (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"), (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"), (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"), (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-")) for ax, genes, ha, label in annotations: g1, g2 = genes.split() x1, x2 = order[g1][1].start, order[g2][1].start if ha == "center": x = (x1 + x2) / 2 * .8 elif ha == "left": x = x2 else: x = x1 label = r"\textit{{{0}}}".format(label) color = rr if "+" in label else gg ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center") ax_Ar.set_xlim(0, tracks[1].total) ax_Ar.set_ylim(-1, 1) ax_Co.set_xlim(0, tracks[2].total) ax_Co.set_ylim(-1, 1) # Plot coverage in resequencing lines gstep = 5000000 order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",") labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"} hlsuffix = "regions.forhaibao" chr1, chr2 = "chrA02", "chrC02" t1, t2 = tracks[0], tracks[-1] s1, s2 = sizes[chr1], sizes[chr2] canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2) c = Coverage(fig, root, canvas1, chr1, (0, s1), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) yys = c.yys x1, x2 = .37, .72 tip = .02 annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2, "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"), (x1, yys[5] + .6 * tip, tip, tip / 2, "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[4] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL")) arrowprops = dict(facecolor='black', shrink=.05, frac=.5, width=1, headwidth=4) for x, y, dx, dy, label in annotations: label = r"\textit{{{0}}}".format(label) root.annotate(label, xy=(x, y), xytext=(x + dx, y + dy), arrowprops=arrowprops, color=rr, fontsize=9, ha="center", va="center") canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2) Coverage(fig, root, canvas2, chr2, (0, s2), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) pad = .03 labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"), (t2.xstart - 3 * pad, .25 + pad, "C")) panel_labels(root, labels) normalize_axes(root) image_name = "napus-fig3." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def fig3(args): """ %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data Napus Figure 3 displays alignments between quartet chromosomes, inset with read histograms. """ from jcvi.formats.bed import Bed p = OptionParser(fig3.__doc__) p.add_option("--gauge_step", default=10000000, type="int", help="Step size for the base scale") opts, args, iopts = p.set_image_options(args, figsize="12x9") if len(args) != 4: sys.exit(not p.print_help()) chrs, sizes, bedfile, datadir = args gauge_step = opts.gauge_step diverge = iopts.diverge rr, gg = diverge chrs = [[x] for x in chrs.split(",")] sizes = Sizes(sizes).mapping fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) # Synteny panel seqidsfile = make_seqids(chrs) klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05) height = .07 r = height / 4 K = Karyotype(fig, root, seqidsfile, klayout, gap=gap, height=height, lw=2, generank=False, sizes=sizes, heightpad=r, roundrect=True, plot_label=False) # Chromosome labels for kl in K.layout: if kl.empty: continue lx, ly = kl.xstart, kl.y if lx < .11: lx += .1 ly += .06 label = kl.label root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center") # Inset with datafiles datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr", "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr") datafiles = [op.join(datadir, x) for x in datafiles] tracks = K.tracks hlfile = op.join(datadir, "bzh.regions.forhaibao") xy_axes = [] for t, datafile in zip(tracks, datafiles): ax = make_affix_axis(fig, t, -r, height=2 * r) xy_axes.append(ax) chr = t.seqids[0] xy = XYtrack(ax, datafile, color="lightslategray") start, end = 0, t.total xy.interpolate(end) xy.cap(ymax=40) xy.import_hlfile(hlfile, chr, diverge=diverge) xy.draw() ax.set_xlim(start, end) gauge_ax = make_affix_axis(fig, t, -r) adjust_spines(gauge_ax, ["bottom"]) setup_gauge_ax(gauge_ax, start, end, gauge_step) # Converted gene tracks ax_Ar = make_affix_axis(fig, tracks[1], r, height=r/2) ax_Co = make_affix_axis(fig, tracks[2], r, height=r/2) order = Bed(bedfile).order for asterisk in (False, True): conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 0, "A02", ax_Ar, rr, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 1, "C2", ax_Co, gg, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 0, "A02", ax_Ar, gg, ypos=1, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 1, "C2", ax_Co, rr, ypos=1, asterisk=asterisk) Ar, Co = xy_axes[1:3] annotations = ((Ar, "Bra028920 Bra028897", "center", "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"), (Ar, "Bra020218 Bra020286", "left", "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"), (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"), (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"), (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"), (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-")) for ax, genes, ha, label in annotations: g1, g2 = genes.split() x1, x2 = order[g1][1].start, order[g2][1].start if ha == "center": x = (x1 + x2) / 2 * .8 elif ha == "left": x = x2 else: x = x1 label = r"\textit{{{0}}}".format(label) color = rr if "+" in label else gg ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center") ax_Ar.set_xlim(0, tracks[1].total) ax_Ar.set_ylim(-1, 1) ax_Co.set_xlim(0, tracks[2].total) ax_Co.set_ylim(-1, 1) # Plot coverage in resequencing lines gstep = 5000000 order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",") labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"} hlsuffix = "regions.forhaibao" chr1, chr2 = "chrA02", "chrC02" t1, t2 = tracks[0], tracks[-1] s1, s2 = sizes[chr1], sizes[chr2] canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2) c = Coverage(fig, root, canvas1, chr1, (0, s1), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) yys = c.yys x1, x2 = .37, .72 tip = .02 annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2, "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"), (x1, yys[5] + .6 * tip, tip, tip / 2, "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[4] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL")) arrowprops=dict(facecolor='black', shrink=.05, frac=.5, width=1, headwidth=4) for x, y, dx, dy, label in annotations: label = r"\textit{{{0}}}".format(label) root.annotate(label, xy=(x, y), xytext=(x + dx, y + dy), arrowprops=arrowprops, color=rr, fontsize=9, ha="center", va="center") canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2) Coverage(fig, root, canvas2, chr2, (0, s2), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) pad = .03 labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"), (t2.xstart - 3 * pad, .25 + pad, "C")) panel_labels(root, labels) normalize_axes(root) image_name = "napus-fig3." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def compare(args): """ %prog compare Evaluation.csv Compare performances of various variant callers on simulated STR datasets. """ p = OptionParser(__doc__) opts, args, iopts = p.set_image_options(args, figsize="15x5") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args pf = datafile.rsplit(".", 1)[0] fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, nrows=1, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=2) # Huntington risk allele infected_thr = 40 ref_thr = 19 # ax1: Multiple callers at lower range df = pd.read_csv("Evaluation.csv") truth = df["Truth"] ax1.plot(truth, df["Manta"], 'bx-') ax1.plot(truth, df["Isaac"], 'yo-') ax1.plot(truth, df["GATK"], 'md-') ax1.plot(truth, df["lobSTR"], 'c+-') ax1.plot(truth, truth, 'k--') # to show diagonal bbox = {'facecolor': 'tomato', 'alpha': .2, 'ec': 'w'} pad = 2 ax1.axhline(infected_thr, color='tomato') ax1.text(max(truth) - pad, infected_thr + pad, 'Risk threshold', bbox=bbox, ha="right") ax1.axhline(ref_thr, color='tomato') ax1.text(max(truth) - pad, ref_thr - pad, 'Reference repeat count', bbox=bbox, ha="right", va="top") ax1.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)') ax1.set_ylabel('Num of CAG repeats called') ax1.set_title(r'Simulated haploid $\mathit{h}$') ax1.legend(['Manta', 'Isaac', 'GATK', 'lobSTR', 'Truth'], loc='best') max_insert = 120 # ax2: lobSTR vs TREDPARSE with haploid model lobstr_results = parse_results("lobstr_results_homo.txt") tredparse_results = parse_results("tredparse_results_homo.txt") truth = range(10, max_insert + 1) lx, ly = zip(*lobstr_results) tx, ty = zip(*tredparse_results) ax2.plot(lx, ly, 'c+-') ax2.plot(tx, ty, 'gx-') ax2.plot(truth, truth, 'k--') ax2.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)') ax2.set_ylabel('Num of CAG repeats called') ax2.set_title(r'Simulated haploid $\mathit{h}$') ax2.legend(['lobSTR', 'TREDPARSE', 'Truth'], loc='best') pad *= 2 ax2.axhline(infected_thr, color='tomato') ax2.text(max(truth) - pad, infected_thr + pad, 'Risk threshold', bbox=bbox, ha="right") ax2.set_xlim(10, max_insert) # ax3: lobSTR vs TREDPARSE with haploid model lobstr_results = parse_results("lobstr_results_het.txt", exclude=20) tredparse_results = parse_results("tredparse_results_het.txt", exclude=20) truth = range(10, max_insert + 1) lx, ly = zip(*lobstr_results) tx, ty = zip(*tredparse_results) ax3.plot(lx, ly, 'c+-') ax3.plot(tx, ty, 'gx-') ax3.plot(truth, truth, 'k--') ax3.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)') ax3.set_ylabel('Num of CAG repeats called') ax3.set_title(r'Simulated diploid $\mathit{20/h}$') ax3.legend(['lobSTR', 'TREDPARSE', 'Truth'], loc='best') ax3.axhline(infected_thr, color='tomato') ax3.text(max(truth) - pad, infected_thr + pad, 'Risk threshold', bbox=bbox, ha="right") ax3.set_xlim(10, max_insert) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 3., 1 - pad, "B"), (2 / 3., 1 - pad, "C"))) normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def compare3(args): """ %prog compare3 Compare performances of various variant callers on simulated STR datasets. This compares the power of various evidence types. """ p = OptionParser(compare3.__doc__) p.add_option('--maxinsert', default=300, type="int", help="Maximum number of repeats") add_simulate_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x10") if len(args) != 0: sys.exit(not p.print_help()) max_insert = opts.maxinsert fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=3) color = "lightslategray" # ax1: Spanning tredparse_results = parse_results("tredparse_results_het-spanning.txt") title = SIMULATED_DIPLOID + "( Sub-model 1: Spanning reads)" plot_compare(ax1, title, tredparse_results, None, color=color, max_insert=max_insert, risk=False) # ax2: Partial tredparse_results = parse_results("tredparse_results_het-partial.txt", exclude=20) title = SIMULATED_DIPLOID + " (Sub-model 2: Partial reads)" plot_compare(ax2, title, tredparse_results, None, color=color, max_insert=max_insert, risk=False) # ax3: Repeat tredparse_results = parse_results("tredparse_results_het-repeat.txt", exclude=20) # HACK (repeat reads won't work under 50) tredparse_results = [x for x in tredparse_results if x[0] > 50] title = SIMULATED_DIPLOID + " (Sub-model 3: Repeat-only reads)" plot_compare(ax3, title, tredparse_results, None, color=color, max_insert=max_insert, risk=False) # ax4: Pair tredparse_results = parse_results("tredparse_results_het-pair.txt", exclude=20) title = SIMULATED_DIPLOID + " (Sub-model 4: Paired-end reads)" plot_compare(ax4, title, tredparse_results, None, color=color, max_insert=max_insert, risk=False) for ax in (ax1, ax2, ax3, ax4): ax.set_xlim(0, max_insert) ax.set_ylim(0, max_insert) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"), (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D"))) normalize_axes(root) image_name = "tredparse." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def compare4(args): """ %prog compare4 Compare performances of various variant callers on simulated STR datasets. Adds coverage comparisons as panel C and D. """ p = OptionParser(compare4.__doc__) p.add_option('--maxinsert', default=300, type="int", help="Maximum number of repeats") add_simulate_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x10") if len(args) != 0: sys.exit(not p.print_help()) depth = opts.depth max_insert = opts.maxinsert fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=3) # ax1: lobSTR vs TREDPARSE with haploid model lobstr_results = parse_results("lobstr_results_homo-20x-150bp-500bp.txt") tredparse_results = parse_results( "tredparse_results_homo-20x-150bp-500bp.txt") title = SIMULATED_HAPLOID + r" ($Depth=%s\times)" % depth plot_compare(ax1, title, tredparse_results, lobstr_results, max_insert=max_insert) # ax2: lobSTR vs TREDPARSE with diploid model (depth=20x) lobstr_results = parse_results("lobstr_results_het-20x-150bp-500bp.txt", exclude=20) tredparse_results = parse_results( "tredparse_results_het-20x-150bp-500bp.txt", exclude=20) title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % depth plot_compare(ax2, title, tredparse_results, lobstr_results, max_insert=max_insert) # ax3: lobSTR vs TREDPARSE with diploid model (depth=5x) lobstr_results = parse_results("lobstr_results_het-5x-150bp-500bp.txt", exclude=20) tredparse_results = parse_results( "tredparse_results_het-5x-150bp-500bp.txt", exclude=20) title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % 5 plot_compare(ax3, title, tredparse_results, lobstr_results, max_insert=max_insert) # ax4: lobSTR vs TREDPARSE with diploid model (depth=80x) lobstr_results = parse_results("lobstr_results_het-80x-150bp-500bp.txt", exclude=20) tredparse_results = parse_results( "tredparse_results_het-80x-150bp-500bp.txt", exclude=20) title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % 80 plot_compare(ax4, title, tredparse_results, lobstr_results, max_insert=max_insert) for ax in (ax1, ax2, ax3, ax4): ax.set_xlim(0, max_insert) ax.set_ylim(0, max_insert) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"), (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D"))) normalize_axes(root) image_name = "tredparse." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def lms(args): """ %prog lms ALLMAPS cartoon to illustrate LMS metric. """ from random import randint from jcvi.graphics.chromosome import HorizontalChromosome p = OptionParser(lms.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A w, h = 0.7, 0.35 ax = fig.add_axes([0.15, 0.6, w, h]) xdata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata[3:7] = ydata[3:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:3] + [xydata[4]] + xydata[7:] lds = xydata[3:7] xlis, ylis = zip(*lis) xlds, ylds = zip(*lds) ax.plot( xlis, ylis, "r-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot( xlds, ylds, "g-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) HorizontalChromosome(root, 0.15, 0.15 + w, 0.57, height=0.02, lw=2) root.text(0.15 + w / 2, 0.55, "Chromosome location (bp)", ha="center", va="top") ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center") ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center") ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center") normalize_lms_axis(ax, xlim=110, ylim=110) # Panel B w = 0.37 p = (0, 45, 75, 110) ax = fig.add_axes([0.1, 0.12, w, h]) xdata = [x for x in range(10, 110, 10)] ydata = ydata_orig = [x for x in range(10, 110, 10)] ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:7] xlis, ylis = zip(*lis) ax.plot( xlis, ylis, "r-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, 0, 110, colors="beige", lw=3) normalize_lms_axis(ax, xlim=110, ylim=110) patch = [0.1 + w * x / 110.0 for x in p] HorizontalChromosome(root, 0.1, 0.1 + w, 0.09, patch=patch, height=0.02, lw=2) scaffolds = ("a", "b", "c") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, 0.09, s, va="center", ha="center") root.text(0.1 + w / 2, 0.04, "LMS($a||b||c$) = 7", ha="center") # Panel C ax = fig.add_axes([0.6, 0.12, w, h]) patch = [0.6 + w * x / 110.0 for x in p] ydata = ydata_orig ax.plot( xdata, ydata, "r-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, [0], [110], colors="beige", lw=3) normalize_lms_axis(ax, xlim=110, ylim=110) HorizontalChromosome(root, 0.6, 0.6 + w, 0.09, patch=patch, height=0.02, lw=2) scaffolds = ("a", "-c", "b") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, 0.09, s, va="center", ha="center") root.text(0.6 + w / 2, 0.04, "LMS($a||-c||b$) = 10", ha="center") labels = ((0.05, 0.95, "A"), (0.05, 0.48, "B"), (0.55, 0.48, "C")) panel_labels(root, labels) normalize_axes(root) pf = "lms" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def estimategaps(args): """ %prog estimategaps JM-4 chr1 JMMale-1 Illustrate ALLMAPS gap estimation algorithm. """ p = OptionParser(estimategaps.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) if len(args) != 3: sys.exit(not p.print_help()) pf, seqid, mlg = args bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" function = lambda x: x.cm cc = Map(bedfile, scaffold_info=True, function=function) agp = AGP(agpfile) g = GapEstimator(cc, agp, seqid, mlg, function=function) pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize spl, spld = g.spl, g.spld g.compute_all_gaps(verbose=False) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A xstart, ystart = .15, .65 w, h = .7, .3 t = np.linspace(0, chrsize, 1000) ax = fig.add_axes([xstart, ystart, w, h]) mx, my = zip(*g.scatter_data) rho = spearmanr(mx, my) dsg = "g" ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(mx, my, ".", color=set2[3]) ax.plot(t, spl(t), "-", color=dsg) ax.text(.05, .95, mlg, va="top", transform=ax.transAxes) normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") if rho < 0: ax.invert_yaxis() # Panel B ystart -= .28 h = .25 ax = fig.add_axes([xstart, ystart, w, h]) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(t, spld(t), "-", lw=2, color=dsg) ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) normalize_lms_axis(ax, xlim=chrsize, ylim=25 * 1e-6, xfactor=1e-6, xlabel="Physical position (Mb)", yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)") # Panel C (specific to JMMale-1) a, b = "scaffold_1076", "scaffold_861" sizes = dict((x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation)) \ for x in g.agp if not x.is_gap) a_beg, a_end, asize, ao = sizes[a] b_beg, b_end, bsize, bo = sizes[b] gapsize = g.get_gapsize(a) total_size = asize + gapsize + bsize ratio = .6 / total_size y = .16 pad = .03 pb_ratio = w / chrsize # Zoom lsg = "lightslategray" root.plot((.15 + pb_ratio * a_beg, .2), (ystart, ystart - .14), ":", color=lsg) root.plot((.15 + pb_ratio * b_end, .3), (ystart, ystart - .08), ":", color=lsg) ends = [] for tag, size, marker, beg in zip((a, b), (asize, bsize), (49213, 81277), (.2, .2 + (asize + gapsize) * ratio)): end = beg + size * ratio marker = beg + marker * ratio ends.append((beg, end, marker)) root.plot((marker,), (y,), "o", color=lsg) root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center") HorizontalChromosome(root, beg, end, y, height=.025, fc='gainsboro') begs, ends, markers = zip(*ends) fontprop = dict(color=lsg, ha="center", va="center") ypos = y + pad * 2 root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(markers) / 2, ypos + pad, "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop) ypos = y - pad xx = markers[0], ends[0] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop) xx = markers[1], begs[1] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop) root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg) root.text(sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$", color="r", ha="center", va="center") labels = ((.05, .95, 'A'), (.05, .6, 'B'), (.05, .27, 'C')) panel_labels(root, labels) normalize_axes(root) pf = "estimategaps" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def composite_ccn(df, size=(12, 8)): """Plot composite ccn figure""" fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 2), (0, 0)) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax3 = plt.subplot2grid((2, 2), (1, 0)) ax4 = plt.subplot2grid((2, 2), (1, 1)) mf = df[df["hli_calc_gender"] == "Male"] age_label = "Chronological age (yr)" ax1.scatter( mf["hli_calc_age_sample_taken"], mf["ccn.chrX"], s=10, marker=".", color="lightslategray", ) ax1.set_ylim(0.8, 1.1) plot_fit_line(ax1, mf["hli_calc_age_sample_taken"], mf["ccn.chrX"]) ax1.set_ylabel("ChrX copy number") ax1.set_title("ChrX copy number in Male") ax2.scatter( mf["hli_calc_age_sample_taken"], mf["ccn.chrY"], s=10, marker=".", color="lightslategray", ) plot_fit_line(ax2, mf["hli_calc_age_sample_taken"], mf["ccn.chrY"]) ax2.set_ylim(0.8, 1.1) ax2.set_ylabel("ChrY copy number") ax2.set_title("ChrY copy number in Male") ax3.scatter( df["hli_calc_age_sample_taken"], df["ccn.chr1"], s=10, marker=".", color="lightslategray", ) plot_fit_line(ax3, df["hli_calc_age_sample_taken"], df["ccn.chr1"]) ax3.set_ylim(1.8, 2.1) ax3.set_ylabel("Chr1 copy number") ax3.set_title("Chr1 copy number") ax4.scatter( df["hli_calc_age_sample_taken"], df["ccn.chrM"], s=10, marker=".", color="lightslategray", ) plot_fit_line(ax4, df["hli_calc_age_sample_taken"], df["ccn.chrM"]) ax4.set_ylim(0, 400) ax4.set_ylabel("Mitochondria copy number") ax4.set_title("Mitochondria copy number") from matplotlib.lines import Line2D for ax in (ax1, ax2, ax3, ax4): ax.set_xlabel(age_label) plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ((0.02, 0.98, "A"), (0.52, 0.98, "B"), (0.02, 0.5, "C"), (0.52, 0.5, "D")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def composite(df, sameGenderMZ, sameGenderDZ, size=(16, 24)): """Embed both absdiff figures and heritability figures.""" fig = plt.figure(1, size) ax1a = plt.subplot2grid((6, 4), (0, 0), rowspan=2, colspan=1) ax2a = plt.subplot2grid((6, 4), (0, 1), rowspan=2, colspan=1) ax3a = plt.subplot2grid((6, 4), (0, 2), rowspan=2, colspan=1) ax4a = plt.subplot2grid((6, 4), (0, 3), rowspan=2, colspan=1) ax1b = plt.subplot2grid((6, 4), (2, 0), rowspan=2, colspan=2) ax2b = plt.subplot2grid((6, 4), (2, 2), rowspan=2, colspan=2) ax3b = plt.subplot2grid((6, 4), (4, 0), rowspan=2, colspan=2) ax4b = plt.subplot2grid((6, 4), (4, 2), rowspan=2, colspan=2) # Telomeres telomeres = extract_trait(df, "Sample name", "telomeres.Length") mzTelomeres = extract_twin_values(sameGenderMZ, telomeres) dzTelomeres = extract_twin_values(sameGenderDZ, telomeres) plot_paired_values(ax1b, mzTelomeres, dzTelomeres, label="Telomere length") plot_abs_diff(ax1a, mzTelomeres, dzTelomeres, label="Telomere length") # CCNX CCNX = extract_trait(df, "Sample name", "ccn.chrX") mzCCNX = extract_twin_values(sameGenderMZ, CCNX, gender="Female") dzCCNX = extract_twin_values(sameGenderDZ, CCNX, gender="Female") dzCCNX = filter_low_values(dzCCNX, 1.75) plot_paired_values(ax2b, mzCCNX, dzCCNX, gender="Female only", label="ChrX copy number") plot_abs_diff(ax2a, mzCCNX, dzCCNX, label="ChrX copy number") # CCNY CCNY = extract_trait(df, "Sample name", "ccn.chrY") mzCCNY = extract_twin_values(sameGenderMZ, CCNY, gender="Male") dzCCNY = extract_twin_values(sameGenderDZ, CCNY, gender="Male") dzCCNY = filter_low_values(dzCCNY, 0.75) plot_paired_values(ax3b, mzCCNY, dzCCNY, gender="Male only", label="ChrY copy number") plot_abs_diff(ax3a, mzCCNY, dzCCNY, label="ChrY copy number") # CCNY TRA = extract_trait(df, "Sample name", "TRA.PPM") mzTRA = extract_twin_values(sameGenderMZ, TRA) dzTRA = extract_twin_values(sameGenderDZ, TRA) plot_paired_values(ax4b, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") plot_abs_diff(ax4a, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) # ABCD absdiff, EFGH heritability labels = ( (0.03, 0.99, "A"), (0.27, 0.99, "B"), (0.53, 0.99, "C"), (0.77, 0.99, "D"), (0.03, 0.67, "E"), (0.53, 0.67, "F"), (0.03, 0.34, "G"), (0.53, 0.34, "H"), ) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def cartoon(args): """ %prog synteny.py Generate cartoon illustration of SynFind. """ p = OptionParser(cartoon.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x7") fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A A = CartoonRegion(41) A.draw(root, .35, .85, strip=False, color=False) x1, x2 = A.x1, A.x2 lsg = "lightslategray" pad = .01 xc, yc = .35, .88 arrowlen = x2 - xc - pad arrowprops = dict(length_includes_head=True, width=.01, fc=lsg, lw=0, head_length=arrowlen * .15, head_width=.03) p = FancyArrow(xc - pad, yc, -arrowlen, 0, shape="left", **arrowprops) root.add_patch(p) p = FancyArrow(xc + pad, yc, arrowlen, 0, shape="right", **arrowprops) root.add_patch(p) yt = yc + 4 * pad root.text((x1 + xc) / 2, yt, "20 genes upstream", ha="center") root.text((x2 + xc) / 2, yt, "20 genes downstream", ha="center") root.plot((xc,), (yc,), "o", mfc='w', mec=lsg, mew=2, lw=2, color=lsg) root.text(xc, yt, "Query gene", ha="center") # Panel B A.draw(root, .35, .7, strip=False) RoundRect(root, (.07, .49), .56, .14, fc='y', alpha=.2) a = deepcopy(A) a.evolve(mode='S', target=10) a.draw(root, .35, .6) b = deepcopy(A) b.evolve(mode='F', target=8) b.draw(root, .35, .56) c = deepcopy(A) c.evolve(mode='G', target=6) c.draw(root, .35, .52) for x in (a, b, c): root.text(.64, x.y, "Score={0}".format(x.nonwhites), va="center") # Panel C A.truncate_between_flankers() a.truncate_between_flankers() b.truncate_between_flankers() c.truncate_between_flankers(target=6) plot_diagram(root, .14, .2, A, a, "S", "syntenic") plot_diagram(root, .37, .2, A, b, "F", "missing, with both flankers") plot_diagram(root, .6, .2, A, c, "G", "missing, with one flanker") labels = ((.04, .95, 'A'), (.04, .75, 'B'), (.04, .4, 'C')) panel_labels(root, labels) # Descriptions xt = .85 desc = ("Extract neighborhood", "of *window* size", "Count gene pairs within *window*", "Find regions above *score* cutoff", "Identify flankers", "Annotate syntelog class" ) for yt, t in zip((.88, .84, .64, .6, .3, .26), desc): root.text(xt, yt, markup(t), ha="center", va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "cartoon" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def plot(args): """ %prog plot input.bed seqid Plot the matchings between the reconstructed pseudomolecules and the maps. Two types of visualizations are available in one canvas: 1. Parallel axes, and matching markers are shown in connecting lines; 2. Scatter plot. """ from jcvi.graphics.base import plt, savefig, normalize_axes, \ set2, panel_labels from jcvi.graphics.chromosome import Chromosome, GeneticMap, \ HorizontalChromosome p = OptionParser(plot.__doc__) add_allmaps_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x6") if len(args) != 2: sys.exit(not p.print_help()) inputbed, seqid = args pf = inputbed.rsplit(".", 1)[0] bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" weightsfile = opts.weightsfile links = opts.links function = get_function(opts.distance) cc = Map(bedfile, function) allseqids = cc.seqids mapnames = cc.mapnames weights = Weights(weightsfile, mapnames) assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids) s = Scaffold(seqid, cc) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] mlgsizes = {} for mlg in mlgs: mm = cc.extract_mlg(mlg) mlgsize = max(function(x) for x in mm) mlgsizes[mlg] = mlgsize fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax1 = fig.add_axes([0, 0, .5, 1]) ax2 = fig.add_axes([.5, 0, .5, 1]) # Find the layout first ystart, ystop = .9, .1 L = Layout(mlgsizes) coords = L.coords tip = .02 marker_pos = {} # Palette colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames)) colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs) rhos = {} # Parallel coordinates for mlg, (x, y1, y2) in coords.items(): mm = cc.extract_mlg(mlg) markers = [(m.accn, function(m)) for m in mm] # exhaustive marker list xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid] mx, my = zip(*xy) rho = spearmanr(mx, my) rhos[mlg] = rho flip = rho < 0 g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip) extra = -3 * tip if x < .5 else 3 * tip ha = "right" if x < .5 else "left" mapname = mlg.split("-")[0] tlg = mlg.replace("_", ".") # Latex does not like underscore char label = "{0} (w={1})".format(tlg, weights[mapname]) ax1.text(x + extra, (y1 + y2) / 2, label, color=colors[mlg], ha=ha, va="center", rotation=90) marker_pos.update(g.marker_pos) agp = AGP(agpfile) agp = [x for x in agp if x.object == seqid] chrsize = max(x.object_end for x in agp) # Pseudomolecules in the center r = ystart - ystop ratio = r / chrsize f = lambda x: (ystart - ratio * x) patchstart = [f(x.object_beg) for x in agp if not x.is_gap] Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2) label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0)) ax1.text(.5, ystart + tip, label, ha="center") scatter_data = defaultdict(list) # Connecting lines for b in s.markers: marker_name = b.accn if marker_name not in marker_pos: continue cx = .5 cy = f(b.pos) mx = coords[b.mlg][0] my = marker_pos[marker_name] extra = -tip if mx < cx else tip extra *= 1.25 # leave boundaries for aesthetic reasons cx += extra mx -= extra ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg]) scatter_data[b.mlg].append((b.pos, function(b))) # Scatter plot, same data as parallel coordinates xstart, xstop = sorted((ystart, ystop)) f = lambda x: (xstart + ratio * x) pp = [x.object_beg for x in agp if not x.is_gap] patchstart = [f(x) for x in pp] HorizontalChromosome(ax2, xstart, xstop, ystop, height=2 * tip, patch=patchstart, lw=2) gap = .03 ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values()) tlgs = [] for mlg, mlgsize in sorted(mlgsizes.items()): height = ratio * mlgsize ystart -= height xx = .5 + xstart / 2 width = r / 2 color = colors[mlg] ax = fig.add_axes([xx, ystart, width, height]) ypos = ystart + height / 2 ystart -= gap sd = scatter_data[mlg] xx, yy = zip(*sd) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(xx, yy, ".", color=color) rho = rhos[mlg] ax.text(.5, 1 - .4 * gap / height, r"$\rho$={0:.3f}".format(rho), ha="center", va="top", transform=ax.transAxes, color="gray") tlg = mlg.replace("_", ".") tlgs.append((tlg, ypos, color)) ax.set_xlim(0, chrsize) ax.set_ylim(0, mlgsize) ax.set_xticks([]) while height / len(ax.get_yticks()) < .03 and len( ax.get_yticks()) >= 2: ax.set_yticks(ax.get_yticks()[::2]) # Sparsify the ticks yticklabels = [int(x) for x in ax.get_yticks()] ax.set_yticklabels(yticklabels, family='Helvetica') if rho < 0: ax.invert_yaxis() for i, (tlg, ypos, color) in enumerate(tlgs): ha = "center" if len(tlgs) > 4: ha = "right" if i % 2 else "left" root.text(.5, ypos, tlg, color=color, rotation=90, ha=ha, va="center") if opts.panels: labels = ((.04, .96, 'A'), (.48, .96, 'B')) panel_labels(root, labels) normalize_axes((ax1, ax2, root)) image_name = seqid + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts) plt.close(fig)
def likelihood(args): """ %prog likelihood Plot likelihood surface. Look for two files in the current folder: - 100_100.log, haploid model - 100_20.log, diploid model """ p = OptionParser(likelihood.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x5", style="white", cmap="coolwarm") if len(args) != 0: sys.exit(not p.print_help()) fig, (ax1, ax2) = plt.subplots(ncols=2, nrows=1, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=4) # Haploid model LL, CI_h1, CI_h2, MLE = parse_log("100_100.log") data = [] for k, v in LL.items(): data.append((k[0], v)) data.sort() x, y = zip(*data) x = np.array(x) curve, = ax1.plot(x, y, "-", color=lsg, lw=2) ax1.set_title("Simulated haploid ($h^{truth}=100$)") h_hat, max_LL = max(data, key=lambda x: x[-1]) _, min_LL = min(data, key=lambda x: x[-1]) ymin, ymax = ax1.get_ylim() ax1.set_ylim([ymin, ymax + 30]) LL_label = "log(Likelihood)" ax1.plot([h_hat, h_hat], [ymin, max_LL], ":", color=lsg, lw=2) ax1.text(h_hat, max_LL + 10, r"$\hat{h}=93$", color=lsg) ax1.set_xlabel(r"$h$") ax1.set_ylabel(LL_label) a, b = CI_h1 ci = ax1.fill_between(x, [ymin] * len(x), y, where=(x >= a) & (x <= b), color=lsg, alpha=.5) ax1.legend([curve, ci], ["Likelihood curve", r'95$\%$ CI'], loc='best') # Diploid model LL, CI_h1, CI_h2, MLE = parse_log("100_20.log") h_hat, max_LL = max(data, key=lambda x: x[-1]) _, min_LL = min(data, key=lambda x: x[-1]) data = np.ones((301, 301)) * min_LL for k, v in LL.items(): a, b = k data[a, b] = v data[b, a] = v data = mask_upper_triangle(data) ax_imshow(ax2, data, opts.cmap, LL_label, 20, 104) root = fig.add_axes([0, 0, 1, 1]) pad = .04 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"))) normalize_axes(root) image_name = "likelihood." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def phylogeny(args): """ %prog phylogeny treefile ks.layout Create a composite figure with (A) tree and (B) ks. """ from jcvi.graphics.tree import parse_tree, LeafInfoFile, WGDInfoFile, draw_tree p = OptionParser(phylogeny.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x12") (datafile, layoutfile) = args logging.debug("Load tree file `{0}`".format(datafile)) t, hpd = parse_tree(datafile) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax1 = fig.add_axes([0, 0.4, 1, 0.6]) ax2 = fig.add_axes([0.12, 0.065, 0.8, 0.3]) margin, rmargin = 0.1, 0.2 # Left and right margin leafinfo = LeafInfoFile("leafinfo.csv").cache wgdinfo = WGDInfoFile("wgdinfo.csv").cache outgroup = "ginkgo" # Panel A draw_tree( ax1, t, hpd=hpd, margin=margin, rmargin=rmargin, supportcolor=None, internal=False, outgroup=outgroup, reroot=False, leafinfo=leafinfo, wgdinfo=wgdinfo, geoscale=True, ) from jcvi.apps.ks import Layout, KsPlot, KsFile # Panel B ks_min = 0.0 ks_max = 3.0 bins = 60 fill = False layout = Layout(layoutfile) print(layout, file=sys.stderr) kp = KsPlot(ax2, ks_max, bins, legendp="upper right") for lo in layout: data = KsFile(lo.ksfile) data = [x.ng_ks for x in data] data = [x for x in data if ks_min <= x <= ks_max] kp.add_data( data, lo.components, label=lo.label, color=lo.color, marker=lo.marker, fill=fill, fitted=False, kde=True, ) kp.draw(filename=None) normalize_axes([root, ax1]) labels = ((0.05, 0.95, "A"), (0.05, 0.4, "B")) panel_labels(root, labels) image_name = "phylogeny.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def lms(args): """ %prog lms ALLMAPS cartoon to illustrate LMS metric. """ from random import randint from jcvi.graphics.chromosome import HorizontalChromosome p = OptionParser(lms.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A w, h = .7, .35 ax = fig.add_axes([.15, .6, w, h]) xdata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata[3:7] = ydata[3:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:3] + [xydata[4]] + xydata[7:] lds = xydata[3:7] xlis, ylis = zip(*lis) xlds, ylds = zip(*lds) ax.plot(xlis, ylis, "r-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xlds, ylds, "g-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) HorizontalChromosome(root, .15, .15 + w, .57, height=.02, lw=2) root.text(.15 + w / 2, .55, "Chromosome location (bp)", ha="center", va="top") ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center") ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center") ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center") normalize_lms_axis(ax) # Panel B w = .37 p = (0, 45, 75, 110) ax = fig.add_axes([.1, .12, w, h]) xdata = [x for x in range(10, 110, 10)] ydata = ydata_orig = [x for x in range(10, 110, 10)] ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:7] xlis, ylis = zip(*lis) ax.plot(xlis, ylis, "r-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, 0, 110, colors="beige", lw=3) normalize_lms_axis(ax) patch = [.1 + w * x / 110. for x in p] HorizontalChromosome(root, .1, .1 + w, .09, patch=patch, height=.02, lw=2) scaffolds = ("a", "b", "c") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, .09, s, va="center", ha="center") root.text(.1 + w / 2, .04, "LMS($a||b||c$) = 7", ha="center") # Panel C ax = fig.add_axes([.6, .12, w, h]) patch = [.6 + w * x / 110. for x in p] ydata = ydata_orig ax.plot(xdata, ydata, "r-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, [0], [110], colors="beige", lw=3) normalize_lms_axis(ax) HorizontalChromosome(root, .6, .6 + w, .09, patch=patch, height=.02, lw=2) scaffolds = ("a", "-c", "b") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, .09, s, va="center", ha="center") root.text(.6 + w / 2, .04, "LMS($a||-c||b$) = 10", ha="center") labels = ((.05, .95, 'A'), (.05, .48, 'B'), (.55, .48, 'C')) panel_labels(root, labels) normalize_axes(root) pf = "lms" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def composite_qc(df_orig, size=(16, 12)): """Plot composite QC figures""" df = df_orig.rename( columns={ "hli_calc_age_sample_taken": "Age", "hli_calc_gender": "Gender", "eth7_max": "Ethnicity", "MeanCoverage": "Mean coverage", "Chemistry": "Sequencing chemistry", "Release Client": "Cohort", }) fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 7), (0, 0), rowspan=1, colspan=2) ax2 = plt.subplot2grid((2, 7), (0, 2), rowspan=1, colspan=2) ax3 = plt.subplot2grid((2, 7), (0, 4), rowspan=1, colspan=3) ax4 = plt.subplot2grid((2, 7), (1, 0), rowspan=1, colspan=2) ax5 = plt.subplot2grid((2, 7), (1, 2), rowspan=1, colspan=2) ax6 = plt.subplot2grid((2, 7), (1, 4), rowspan=1, colspan=3) sns.distplot(df["Age"].dropna(), kde=False, ax=ax1) sns.countplot(x="Gender", data=df, ax=ax2) sns.countplot(x="Ethnicity", data=df, ax=ax3, order=df["Ethnicity"].value_counts().index) sns.distplot(df["Mean coverage"].dropna(), kde=False, ax=ax4) ax4.set_xlim(0, 100) sns.countplot(x="Sequencing chemistry", data=df, ax=ax5) sns.countplot(x="Cohort", data=df, ax=ax6, order=df["Cohort"].value_counts().index) # Anonymize the cohorts cohorts = ax6.get_xticklabels() newCohorts = [] for i, c in enumerate(cohorts): if c.get_text() == "Spector": c = "TwinsUK" elif c.get_text() != "Health Nucleus": c = "C{}".format(i + 1) newCohorts.append(c) ax6.set_xticklabels(newCohorts) for ax in (ax6, ): ax.set_xticklabels(ax.get_xticklabels(), ha="right", rotation=30) for ax in (ax1, ax2, ax3, ax4, ax5, ax6): ax.set_title(ax.get_xlabel()) ax.set_xlabel("") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ( (0.02, 0.96, "A"), (0.3, 0.96, "B"), (0.6, 0.96, "C"), (0.02, 0.52, "D"), (0.3, 0.52, "E"), (0.6, 0.52, "F"), ) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def composite_ccn(df, size=(12, 8)): """ Plot composite ccn figure """ fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 2), (0, 0)) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax3 = plt.subplot2grid((2, 2), (1, 0)) ax4 = plt.subplot2grid((2, 2), (1, 1)) chemistry = ["V1", "V2", "V2.5", float("nan")] colors = sns.color_palette("Set2", 8) color_map = dict(zip(chemistry, colors)) mf = df[df["hli_calc_gender"] == "Male"] age_label = "Chronological age (yr)" ax1.scatter(mf["hli_calc_age_sample_taken"], mf["ccn.chrX"], s=10, marker='.', color='lightslategray') ax1.set_ylim(0.8, 1.1) plot_fit_line(ax1, mf["hli_calc_age_sample_taken"], mf["ccn.chrX"]) ax1.set_ylabel("ChrX copy number") ax1.set_title("ChrX copy number in Male") ax2.scatter(mf["hli_calc_age_sample_taken"], mf["ccn.chrY"], s=10, marker='.', color='lightslategray') plot_fit_line(ax2, mf["hli_calc_age_sample_taken"], mf["ccn.chrY"]) ax2.set_ylim(0.8, 1.1) ax2.set_ylabel("ChrY copy number") ax2.set_title("ChrY copy number in Male") ax3.scatter(df["hli_calc_age_sample_taken"], df["ccn.chr1"], s=10, marker='.', color='lightslategray') plot_fit_line(ax3, df["hli_calc_age_sample_taken"], df["ccn.chr1"]) ax3.set_ylim(1.8, 2.1) ax3.set_ylabel("Chr1 copy number") ax3.set_title("Chr1 copy number") ax4.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrM"], s=10, marker='.', color='lightslategray') plot_fit_line(ax4, df["hli_calc_age_sample_taken"], df["ccn.chrM"]) ax4.set_ylim(0, 400) ax4.set_ylabel("Mitochondria copy number") ax4.set_title("Mitochondria copy number") from matplotlib.lines import Line2D legend_elements = [Line2D([0], [0], marker='.', color='w', label=chem, markerfacecolor=color) \ for (chem, color) in zip(chemistry, colors)[:3]] for ax in (ax1, ax2, ax3, ax4): ax.set_xlabel(age_label) plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ((.02, .98, "A"), (.52, .98, "B"), (.02, .5, "C"), (.52, .5, "D")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def estimategaps(args): """ %prog estimategaps JM-4 chr1 JMMale-1 Illustrate ALLMAPS gap estimation algorithm. """ p = OptionParser(estimategaps.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) if len(args) != 3: sys.exit(not p.print_help()) pf, seqid, mlg = args bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" function = lambda x: x.cm cc = Map(bedfile, scaffold_info=True, function=function) agp = AGP(agpfile) g = GapEstimator(cc, agp, seqid, mlg, function=function) pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize spl, spld = g.spl, g.spld g.compute_all_gaps(verbose=False) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A xstart, ystart = 0.15, 0.65 w, h = 0.7, 0.3 t = np.linspace(0, chrsize, 1000) ax = fig.add_axes([xstart, ystart, w, h]) mx, my = zip(*g.scatter_data) rho = spearmanr(mx, my) dsg = "g" ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(mx, my, ".", color=set2[3]) ax.plot(t, spl(t), "-", color=dsg) ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes) normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") if rho < 0: ax.invert_yaxis() # Panel B ystart -= 0.28 h = 0.25 ax = fig.add_axes([xstart, ystart, w, h]) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(t, spld(t), "-", lw=2, color=dsg) ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) normalize_lms_axis( ax, xlim=chrsize, ylim=25 * 1e-6, xfactor=1e-6, xlabel="Physical position (Mb)", yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)", ) ax.xaxis.grid(False) # Panel C (specific to JMMale-1) a, b = "scaffold_1076", "scaffold_861" sizes = dict( (x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation)) for x in g.agp if not x.is_gap ) a_beg, a_end, asize, ao = sizes[a] b_beg, b_end, bsize, bo = sizes[b] gapsize = g.get_gapsize(a) total_size = asize + gapsize + bsize ratio = 0.6 / total_size y = 0.16 pad = 0.03 pb_ratio = w / chrsize # Zoom lsg = "lightslategray" root.plot((0.15 + pb_ratio * a_beg, 0.2), (ystart, ystart - 0.14), ":", color=lsg) root.plot((0.15 + pb_ratio * b_end, 0.3), (ystart, ystart - 0.08), ":", color=lsg) ends = [] for tag, size, marker, beg in zip( (a, b), (asize, bsize), (49213, 81277), (0.2, 0.2 + (asize + gapsize) * ratio) ): end = beg + size * ratio marker = beg + marker * ratio ends.append((beg, end, marker)) root.plot((marker,), (y,), "o", color=lsg) root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center") HorizontalChromosome(root, beg, end, y, height=0.025, fc="gainsboro") begs, ends, markers = zip(*ends) fontprop = dict(color=lsg, ha="center", va="center") ypos = y + pad * 2 root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg) root.text( sum(markers) / 2, ypos + pad, "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop ) ypos = y - pad xx = markers[0], ends[0] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop) xx = markers[1], begs[1] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop) root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg) root.text( sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$", color="r", ha="center", va="center", ) labels = ((0.05, 0.95, "A"), (0.05, 0.6, "B"), (0.05, 0.27, "C")) panel_labels(root, labels) normalize_axes(root) pf = "estimategaps" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def plot(args): """ %prog plot input.bed seqid Plot the matchings between the reconstructed pseudomolecules and the maps. Two types of visualizations are available in one canvas: 1. Parallel axes, and matching markers are shown in connecting lines; 2. Scatter plot. """ from jcvi.graphics.base import plt, savefig, normalize_axes, \ set2, panel_labels from jcvi.graphics.chromosome import Chromosome, GeneticMap, \ HorizontalChromosome p = OptionParser(plot.__doc__) add_allmaps_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x6") if len(args) != 2: sys.exit(not p.print_help()) inputbed, seqid = args pf = inputbed.rsplit(".", 1)[0] bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" weightsfile = opts.weightsfile links = opts.links function = get_function(opts.distance) cc = Map(bedfile, function) allseqids = cc.seqids mapnames = cc.mapnames weights = Weights(weightsfile, mapnames) assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids) s = Scaffold(seqid, cc) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] while not mlgs: links /= 2 logging.error("No markers to plot, --links reset to {0}".format(links)) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] mlgsizes = {} for mlg in mlgs: mm = cc.extract_mlg(mlg) mlgsize = max(function(x) for x in mm) mlgsizes[mlg] = mlgsize fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax1 = fig.add_axes([0, 0, .5, 1]) ax2 = fig.add_axes([.5, 0, .5, 1]) # Find the layout first ystart, ystop = .9, .1 L = Layout(mlgsizes) coords = L.coords tip = .02 marker_pos = {} # Palette colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames)) colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs) rhos = {} # Parallel coordinates for mlg, (x, y1, y2) in coords.items(): mm = cc.extract_mlg(mlg) markers = [(m.accn, function(m)) for m in mm] # exhaustive marker list xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid] mx, my = zip(*xy) rho = spearmanr(mx, my) rhos[mlg] = rho flip = rho < 0 g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip) extra = -3 * tip if x < .5 else 3 * tip ha = "right" if x < .5 else "left" mapname = mlg.split("-")[0] tlg = mlg.replace("_", ".") # Latex does not like underscore char label = "{0} (w={1})".format(tlg, weights[mapname]) ax1.text(x + extra, (y1 + y2) / 2, label, color=colors[mlg], ha=ha, va="center", rotation=90) marker_pos.update(g.marker_pos) agp = AGP(agpfile) agp = [x for x in agp if x.object == seqid] chrsize = max(x.object_end for x in agp) # Pseudomolecules in the center r = ystart - ystop ratio = r / chrsize f = lambda x: (ystart - ratio * x) patchstart = [f(x.object_beg) for x in agp if not x.is_gap] Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2) label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0)) ax1.text(.5, ystart + tip, label, ha="center") scatter_data = defaultdict(list) # Connecting lines for b in s.markers: marker_name = b.accn if marker_name not in marker_pos: continue cx = .5 cy = f(b.pos) mx = coords[b.mlg][0] my = marker_pos[marker_name] extra = -tip if mx < cx else tip extra *= 1.25 # leave boundaries for aesthetic reasons cx += extra mx -= extra ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg]) scatter_data[b.mlg].append((b.pos, function(b))) # Scatter plot, same data as parallel coordinates xstart, xstop = sorted((ystart, ystop)) f = lambda x: (xstart + ratio * x) pp = [x.object_beg for x in agp if not x.is_gap] patchstart = [f(x) for x in pp] HorizontalChromosome(ax2, xstart, xstop, ystop, height=2 * tip, patch=patchstart, lw=2) gap = .03 ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values()) tlgs = [] for mlg, mlgsize in sorted(mlgsizes.items()): height = ratio * mlgsize ystart -= height xx = .5 + xstart / 2 width = r / 2 color = colors[mlg] ax = fig.add_axes([xx, ystart, width, height]) ypos = ystart + height / 2 ystart -= gap sd = scatter_data[mlg] xx, yy = zip(*sd) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(xx, yy, ".", color=color) rho = rhos[mlg] ax.text(.5, 1 - .4 * gap / height, r"$\rho$={0:.3f}".format(rho), ha="center", va="top", transform=ax.transAxes, color="gray") tlg = mlg.replace("_", ".") tlgs.append((tlg, ypos, color)) ax.set_xlim(0, chrsize) ax.set_ylim(0, mlgsize) ax.set_xticks([]) while height / len(ax.get_yticks()) < .03 and len(ax.get_yticks()) >= 2: ax.set_yticks(ax.get_yticks()[::2]) # Sparsify the ticks yticklabels = [int(x) for x in ax.get_yticks()] ax.set_yticklabels(yticklabels, family='Helvetica') if rho < 0: ax.invert_yaxis() for i, (tlg, ypos, color) in enumerate(tlgs): ha = "center" if len(tlgs) > 4: ha = "right" if i % 2 else "left" root.text(.5, ypos, tlg, color=color, rotation=90, ha=ha, va="center") if opts.panels: labels = ((.04, .96, 'A'), (.48, .96, 'B')) panel_labels(root, labels) normalize_axes((ax1, ax2, root)) image_name = seqid + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts) plt.close(fig)
def multihistogram(args): """ %prog multihistogram *.histogram species Plot the histogram based on a set of K-mer hisotograms. The method is based on Star et al.'s method (Atlantic Cod genome paper). """ p = OptionParser(multihistogram.__doc__) p.add_option("--kmin", default=15, type="int", help="Minimum K-mer size, inclusive") p.add_option("--kmax", default=30, type="int", help="Maximum K-mer size, inclusive") p.add_option("--vmin", default=2, type="int", help="Minimum value, inclusive") p.add_option("--vmax", default=100, type="int", help="Maximum value, inclusive") opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300) if len(args) < 1: sys.exit(not p.print_help()) histfiles = args[:-1] species = args[-1] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([0.08, 0.12, 0.38, 0.76]) B = fig.add_axes([0.58, 0.12, 0.38, 0.76]) lines = [] legends = [] genomesizes = [] for histfile in histfiles: ks = KmerSpectrum(histfile) x, y = ks.get_xy(opts.vmin, opts.vmax) K = get_number(op.basename(histfile).split(".")[0].split("-")[-1]) if not opts.kmin <= K <= opts.kmax: continue (line, ) = A.plot(x, y, "-", lw=1) lines.append(line) legends.append("K = {0}".format(K)) ks.analyze(K=K, method="allpaths") genomesizes.append((K, ks.genomesize / 1e6)) leg = A.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(0.5) title = "{0} genome K-mer histogram".format(species) A.set_title(markup(title)) xlabel, ylabel = "Coverage (X)", "Counts" A.set_xlabel(xlabel) A.set_ylabel(ylabel) set_human_axis(A) title = "{0} genome size estimate".format(species) B.set_title(markup(title)) x, y = zip(*genomesizes) B.plot(x, y, "ko", mfc="w") t = np.linspace(opts.kmin - 0.5, opts.kmax + 0.5, 100) p = np.poly1d(np.polyfit(x, y, 2)) B.plot(t, p(t), "r:") xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)" B.set_xlabel(xlabel) B.set_ylabel(ylabel) set_ticklabels_helvetica(B) labels = ((0.04, 0.96, "A"), (0.54, 0.96, "B")) panel_labels(root, labels) normalize_axes(root) imagename = species + ".multiK.pdf" savefig(imagename, dpi=iopts.dpi, iopts=iopts)
def multihistogram(args): """ %prog multihistogram *.histogram species Plot the histogram based on a set of K-mer hisotograms. The method is based on Star et al.'s method (Atlantic Cod genome paper). """ p = OptionParser(multihistogram.__doc__) p.add_option("--kmin", default=15, type="int", help="Minimum K-mer size, inclusive") p.add_option("--kmax", default=30, type="int", help="Maximum K-mer size, inclusive") p.add_option("--vmin", default=2, type="int", help="Minimum value, inclusive") p.add_option("--vmax", default=100, type="int", help="Maximum value, inclusive") opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300) histfiles = args[:-1] species = args[-1] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([0.08, 0.12, 0.38, 0.76]) B = fig.add_axes([0.58, 0.12, 0.38, 0.76]) lines = [] legends = [] genomesizes = [] for histfile in histfiles: ks = KmerSpectrum(histfile) x, y = ks.get_xy(opts.vmin, opts.vmax) K = get_number(op.basename(histfile).split(".")[0].split("-")[-1]) if not opts.kmin <= K <= opts.kmax: continue line, = A.plot(x, y, "-", lw=1) lines.append(line) legends.append("K = {0}".format(K)) ks.analyze(K=K) genomesizes.append((K, ks.genomesize / 1e6)) leg = A.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(0.5) title = "{0} genome K-mer histogram".format(species) A.set_title(markup(title)) xlabel, ylabel = "Coverage (X)", "Counts" A.set_xlabel(xlabel) A.set_ylabel(ylabel) set_human_axis(A) title = "{0} genome size estimate".format(species) B.set_title(markup(title)) x, y = zip(*genomesizes) B.plot(x, y, "ko", mfc="w") t = np.linspace(opts.kmin - 0.5, opts.kmax + 0.5, 100) p = np.poly1d(np.polyfit(x, y, 2)) B.plot(t, p(t), "r:") xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)" B.set_xlabel(xlabel) B.set_ylabel(ylabel) set_ticklabels_helvetica(B) labels = ((0.04, 0.96, "A"), (0.54, 0.96, "B")) panel_labels(root, labels) normalize_axes(root) imagename = species + ".multiK.pdf" savefig(imagename, dpi=iopts.dpi, iopts=iopts)