def composite_correlation(df, size=(12, 8)): """ Plot composite correlation figure """ fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 2), (0, 0)) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax3 = plt.subplot2grid((2, 2), (1, 0)) ax4 = plt.subplot2grid((2, 2), (1, 1)) chemistry = ["V1", "V2", "V2.5", float("nan")] colors = sns.color_palette("Set2", 8) color_map = dict(zip(chemistry, colors)) age_label = "Chronological age (yr)" ax1.scatter(df["hli_calc_age_sample_taken"], df["teloLength"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax1.set_ylim(0, 15) ax1.set_ylabel("Telomere length (Kb)") ax2.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrX"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax2.set_ylim(1.8, 2.1) ax2.set_ylabel("ChrX copy number") ax4.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrY"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax4.set_ylim(0.8, 1.1) ax4.set_ylabel("ChrY copy number") ax3.scatter(df["hli_calc_age_sample_taken"], df["TRA.PPM"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax3.set_ylim(0, 250) ax3.set_ylabel("$TCR-\\alpha$ deletions (count per million reads)") from matplotlib.lines import Line2D legend_elements = [Line2D([0], [0], marker='.', color='w', label=chem, markerfacecolor=color, markersize=16) \ for (chem, color) in zip(chemistry, colors)[:3]] for ax in (ax1, ax2, ax3, ax4): ax.set_xlabel(age_label) ax.legend(handles=legend_elements, loc="upper right") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ((.02, .98, "A"), (.52, .98, "B"), (.02, .5, "C"), (.52, .5, "D")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def composite(df, sameGenderMZ, sameGenderDZ, size=(16, 24)): """Embed both absdiff figures and heritability figures. """ fig = plt.figure(1, size) ax1a = plt.subplot2grid((6, 4), (0, 0), rowspan=2, colspan=1) ax2a = plt.subplot2grid((6, 4), (0, 1), rowspan=2, colspan=1) ax3a = plt.subplot2grid((6, 4), (0, 2), rowspan=2, colspan=1) ax4a = plt.subplot2grid((6, 4), (0, 3), rowspan=2, colspan=1) ax1b = plt.subplot2grid((6, 4), (2, 0), rowspan=2, colspan=2) ax2b = plt.subplot2grid((6, 4), (2, 2), rowspan=2, colspan=2) ax3b = plt.subplot2grid((6, 4), (4, 0), rowspan=2, colspan=2) ax4b = plt.subplot2grid((6, 4), (4, 2), rowspan=2, colspan=2) # Telomeres telomeres = extract_trait(df, "Sample name", "telomeres.Length") mzTelomeres = extract_twin_values(sameGenderMZ, telomeres) dzTelomeres = extract_twin_values(sameGenderDZ, telomeres) plot_paired_values(ax1b, mzTelomeres, dzTelomeres, label="Telomere length") plot_abs_diff(ax1a, mzTelomeres, dzTelomeres, label="Telomere length") # CCNX CCNX = extract_trait(df, "Sample name", "ccn.chrX") mzCCNX = extract_twin_values(sameGenderMZ, CCNX, gender="Female") dzCCNX = extract_twin_values(sameGenderDZ, CCNX, gender="Female") dzCCNX = filter_low_values(dzCCNX, 1.75) plot_paired_values(ax2b, mzCCNX, dzCCNX, gender="Female only", label="ChrX copy number") plot_abs_diff(ax2a, mzCCNX, dzCCNX, label="ChrX copy number") # CCNY CCNY = extract_trait(df, "Sample name", "ccn.chrY") mzCCNY = extract_twin_values(sameGenderMZ, CCNY, gender="Male") dzCCNY = extract_twin_values(sameGenderDZ, CCNY, gender="Male") dzCCNY = filter_low_values(dzCCNY, .75) plot_paired_values(ax3b, mzCCNY, dzCCNY, gender="Male only", label="ChrY copy number") plot_abs_diff(ax3a, mzCCNY, dzCCNY, label="ChrY copy number") # CCNY TRA = extract_trait(df, "Sample name", "TRA.PPM") mzTRA = extract_twin_values(sameGenderMZ, TRA) dzTRA = extract_twin_values(sameGenderDZ, TRA) plot_paired_values(ax4b, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") plot_abs_diff(ax4a, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) # ABCD absdiff, EFGH heritability labels = ((.03, .99, 'A'), (.27, .99, 'B'), (.53, .99, 'C'), (.77, .99, 'D'), (.03, .67, 'E'), (.53, .67, 'F'), (.03, .34, 'G'), (.53, .34, 'H')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def composite_qc(df_orig, size=(16, 12)): """ Plot composite QC figures """ df = df_orig.rename(columns={"hli_calc_age_sample_taken": "Age", "hli_calc_gender": "Gender", "eth7_max": "Ethnicity", "MeanCoverage": "Mean coverage", "Chemistry": "Sequencing chemistry", "Release Client": "Cohort", }) fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 7), (0, 0), rowspan=1, colspan=2) ax2 = plt.subplot2grid((2, 7), (0, 2), rowspan=1, colspan=2) ax3 = plt.subplot2grid((2, 7), (0, 4), rowspan=1, colspan=3) ax4 = plt.subplot2grid((2, 7), (1, 0), rowspan=1, colspan=2) ax5 = plt.subplot2grid((2, 7), (1, 2), rowspan=1, colspan=2) ax6 = plt.subplot2grid((2, 7), (1, 4), rowspan=1, colspan=3) sns.distplot(df["Age"].dropna(), kde=False, ax=ax1) sns.countplot(x="Gender", data=df, ax=ax2) sns.countplot(x="Ethnicity", data=df, ax=ax3, order = df['Ethnicity'].value_counts().index) sns.distplot(df["Mean coverage"].dropna(), kde=False, ax=ax4) ax4.set_xlim(0, 100) sns.countplot(x="Sequencing chemistry", data=df, ax=ax5) sns.countplot(x="Cohort", data=df, ax=ax6, order = df['Cohort'].value_counts().index) # Anonymize the cohorts cohorts = ax6.get_xticklabels() newCohorts = [] for i, c in enumerate(cohorts): if c.get_text() == "Spector": c = "TwinsUK" elif c.get_text() != "Health Nucleus": c = "C{}".format(i + 1) newCohorts.append(c) ax6.set_xticklabels(newCohorts) for ax in (ax6,): ax.set_xticklabels(ax.get_xticklabels(), ha="right", rotation=30) for ax in (ax1, ax2, ax3, ax4, ax5, ax6): ax.set_title(ax.get_xlabel()) ax.set_xlabel("") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ((.02, .96, "A"), (.3, .96, "B"), (.6, .96, "C"), (.02, .52, "D"), (.3, .52, "E"), (.6, .52, "F")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def composite(df, sameGenderMZ, sameGenderDZ, size=(16, 24)): """Embed both absdiff figures and heritability figures.""" fig = plt.figure(1, size) ax1a = plt.subplot2grid((6, 4), (0, 0), rowspan=2, colspan=1) ax2a = plt.subplot2grid((6, 4), (0, 1), rowspan=2, colspan=1) ax3a = plt.subplot2grid((6, 4), (0, 2), rowspan=2, colspan=1) ax4a = plt.subplot2grid((6, 4), (0, 3), rowspan=2, colspan=1) ax1b = plt.subplot2grid((6, 4), (2, 0), rowspan=2, colspan=2) ax2b = plt.subplot2grid((6, 4), (2, 2), rowspan=2, colspan=2) ax3b = plt.subplot2grid((6, 4), (4, 0), rowspan=2, colspan=2) ax4b = plt.subplot2grid((6, 4), (4, 2), rowspan=2, colspan=2) # Telomeres telomeres = extract_trait(df, "Sample name", "telomeres.Length") mzTelomeres = extract_twin_values(sameGenderMZ, telomeres) dzTelomeres = extract_twin_values(sameGenderDZ, telomeres) plot_paired_values(ax1b, mzTelomeres, dzTelomeres, label="Telomere length") plot_abs_diff(ax1a, mzTelomeres, dzTelomeres, label="Telomere length") # CCNX CCNX = extract_trait(df, "Sample name", "ccn.chrX") mzCCNX = extract_twin_values(sameGenderMZ, CCNX, gender="Female") dzCCNX = extract_twin_values(sameGenderDZ, CCNX, gender="Female") dzCCNX = filter_low_values(dzCCNX, 1.75) plot_paired_values(ax2b, mzCCNX, dzCCNX, gender="Female only", label="ChrX copy number") plot_abs_diff(ax2a, mzCCNX, dzCCNX, label="ChrX copy number") # CCNY CCNY = extract_trait(df, "Sample name", "ccn.chrY") mzCCNY = extract_twin_values(sameGenderMZ, CCNY, gender="Male") dzCCNY = extract_twin_values(sameGenderDZ, CCNY, gender="Male") dzCCNY = filter_low_values(dzCCNY, 0.75) plot_paired_values(ax3b, mzCCNY, dzCCNY, gender="Male only", label="ChrY copy number") plot_abs_diff(ax3a, mzCCNY, dzCCNY, label="ChrY copy number") # CCNY TRA = extract_trait(df, "Sample name", "TRA.PPM") mzTRA = extract_twin_values(sameGenderMZ, TRA) dzTRA = extract_twin_values(sameGenderDZ, TRA) plot_paired_values(ax4b, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") plot_abs_diff(ax4a, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) # ABCD absdiff, EFGH heritability labels = ( (0.03, 0.99, "A"), (0.27, 0.99, "B"), (0.53, 0.99, "C"), (0.77, 0.99, "D"), (0.03, 0.67, "E"), (0.53, 0.67, "F"), (0.03, 0.34, "G"), (0.53, 0.34, "H"), ) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def composite_qc(df_orig, size=(16, 12)): """Plot composite QC figures""" df = df_orig.rename( columns={ "hli_calc_age_sample_taken": "Age", "hli_calc_gender": "Gender", "eth7_max": "Ethnicity", "MeanCoverage": "Mean coverage", "Chemistry": "Sequencing chemistry", "Release Client": "Cohort", }) fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 7), (0, 0), rowspan=1, colspan=2) ax2 = plt.subplot2grid((2, 7), (0, 2), rowspan=1, colspan=2) ax3 = plt.subplot2grid((2, 7), (0, 4), rowspan=1, colspan=3) ax4 = plt.subplot2grid((2, 7), (1, 0), rowspan=1, colspan=2) ax5 = plt.subplot2grid((2, 7), (1, 2), rowspan=1, colspan=2) ax6 = plt.subplot2grid((2, 7), (1, 4), rowspan=1, colspan=3) sns.distplot(df["Age"].dropna(), kde=False, ax=ax1) sns.countplot(x="Gender", data=df, ax=ax2) sns.countplot(x="Ethnicity", data=df, ax=ax3, order=df["Ethnicity"].value_counts().index) sns.distplot(df["Mean coverage"].dropna(), kde=False, ax=ax4) ax4.set_xlim(0, 100) sns.countplot(x="Sequencing chemistry", data=df, ax=ax5) sns.countplot(x="Cohort", data=df, ax=ax6, order=df["Cohort"].value_counts().index) # Anonymize the cohorts cohorts = ax6.get_xticklabels() newCohorts = [] for i, c in enumerate(cohorts): if c.get_text() == "Spector": c = "TwinsUK" elif c.get_text() != "Health Nucleus": c = "C{}".format(i + 1) newCohorts.append(c) ax6.set_xticklabels(newCohorts) for ax in (ax6, ): ax.set_xticklabels(ax.get_xticklabels(), ha="right", rotation=30) for ax in (ax1, ax2, ax3, ax4, ax5, ax6): ax.set_title(ax.get_xlabel()) ax.set_xlabel("") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ( (0.02, 0.96, "A"), (0.3, 0.96, "B"), (0.6, 0.96, "C"), (0.02, 0.52, "D"), (0.3, 0.52, "E"), (0.6, 0.52, "F"), ) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def composite_ccn(df, size=(12, 8)): """Plot composite ccn figure""" fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 2), (0, 0)) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax3 = plt.subplot2grid((2, 2), (1, 0)) ax4 = plt.subplot2grid((2, 2), (1, 1)) mf = df[df["hli_calc_gender"] == "Male"] age_label = "Chronological age (yr)" ax1.scatter( mf["hli_calc_age_sample_taken"], mf["ccn.chrX"], s=10, marker=".", color="lightslategray", ) ax1.set_ylim(0.8, 1.1) plot_fit_line(ax1, mf["hli_calc_age_sample_taken"], mf["ccn.chrX"]) ax1.set_ylabel("ChrX copy number") ax1.set_title("ChrX copy number in Male") ax2.scatter( mf["hli_calc_age_sample_taken"], mf["ccn.chrY"], s=10, marker=".", color="lightslategray", ) plot_fit_line(ax2, mf["hli_calc_age_sample_taken"], mf["ccn.chrY"]) ax2.set_ylim(0.8, 1.1) ax2.set_ylabel("ChrY copy number") ax2.set_title("ChrY copy number in Male") ax3.scatter( df["hli_calc_age_sample_taken"], df["ccn.chr1"], s=10, marker=".", color="lightslategray", ) plot_fit_line(ax3, df["hli_calc_age_sample_taken"], df["ccn.chr1"]) ax3.set_ylim(1.8, 2.1) ax3.set_ylabel("Chr1 copy number") ax3.set_title("Chr1 copy number") ax4.scatter( df["hli_calc_age_sample_taken"], df["ccn.chrM"], s=10, marker=".", color="lightslategray", ) plot_fit_line(ax4, df["hli_calc_age_sample_taken"], df["ccn.chrM"]) ax4.set_ylim(0, 400) ax4.set_ylabel("Mitochondria copy number") ax4.set_title("Mitochondria copy number") from matplotlib.lines import Line2D for ax in (ax1, ax2, ax3, ax4): ax.set_xlabel(age_label) plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ((0.02, 0.98, "A"), (0.52, 0.98, "B"), (0.02, 0.5, "C"), (0.52, 0.5, "D")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def composite_ccn(df, size=(12, 8)): """ Plot composite ccn figure """ fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 2), (0, 0)) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax3 = plt.subplot2grid((2, 2), (1, 0)) ax4 = plt.subplot2grid((2, 2), (1, 1)) chemistry = ["V1", "V2", "V2.5", float("nan")] colors = sns.color_palette("Set2", 8) color_map = dict(zip(chemistry, colors)) mf = df[df["hli_calc_gender"] == "Male"] age_label = "Chronological age (yr)" ax1.scatter(mf["hli_calc_age_sample_taken"], mf["ccn.chrX"], s=10, marker='.', color='lightslategray') ax1.set_ylim(0.8, 1.1) plot_fit_line(ax1, mf["hli_calc_age_sample_taken"], mf["ccn.chrX"]) ax1.set_ylabel("ChrX copy number") ax1.set_title("ChrX copy number in Male") ax2.scatter(mf["hli_calc_age_sample_taken"], mf["ccn.chrY"], s=10, marker='.', color='lightslategray') plot_fit_line(ax2, mf["hli_calc_age_sample_taken"], mf["ccn.chrY"]) ax2.set_ylim(0.8, 1.1) ax2.set_ylabel("ChrY copy number") ax2.set_title("ChrY copy number in Male") ax3.scatter(df["hli_calc_age_sample_taken"], df["ccn.chr1"], s=10, marker='.', color='lightslategray') plot_fit_line(ax3, df["hli_calc_age_sample_taken"], df["ccn.chr1"]) ax3.set_ylim(1.8, 2.1) ax3.set_ylabel("Chr1 copy number") ax3.set_title("Chr1 copy number") ax4.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrM"], s=10, marker='.', color='lightslategray') plot_fit_line(ax4, df["hli_calc_age_sample_taken"], df["ccn.chrM"]) ax4.set_ylim(0, 400) ax4.set_ylabel("Mitochondria copy number") ax4.set_title("Mitochondria copy number") from matplotlib.lines import Line2D legend_elements = [Line2D([0], [0], marker='.', color='w', label=chem, markerfacecolor=color) \ for (chem, color) in zip(chemistry, colors)[:3]] for ax in (ax1, ax2, ax3, ax4): ax.set_xlabel(age_label) plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ((.02, .98, "A"), (.52, .98, "B"), (.02, .5, "C"), (.52, .5, "D")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()