def plot(self, filename, panel_list, ancestries_df): dataset_label, _ = self._unique_dataset_and_K_check(ancestries_df) dataset = Dataset(dataset_label) population_order = Dataset.used_populations() rows, cols = 1, len(panel_list) width, height = self.PLOT_SIZE fig = plt.figure(figsize=(cols * width, rows * height), dpi=30) fig.set_size_inches((cols*width), (rows*height)) ax_ids = (np.arange(rows * cols) + 1).tolist()[::-1] # One subplot per panel for panel in panel_list: df_lite = ancestries_df.xs(panel.label, level="panel") df_lite = df_lite.reset_index(drop=True).set_index("population") plot_title = "Dataset: {}\n{}".format(dataset.name, panel.name) ax = fig.add_subplot(rows, cols, ax_ids.pop()) fig, tax = ternary.figure(scale=1, ax=ax) df_lite = df_lite.loc[population_order] df_lite = df_lite[["EUR", "AFR", "AMR"]].dropna() df_grouped = df_lite.groupby(level="population", sort=False) for population, df_pop_group in df_grouped: tax.scatter( df_pop_group.values, label=population, s=45, alpha=0.75, color=population_colors(population), marker=population_markers(population) ) self._ternary_plot_aesthetics(tax, plot_title, df_lite) makedirs(self.PLOTS_DIR, exist_ok=True) plt.savefig(join(self.PLOTS_DIR, filename), bbox_inches="tight")
def draw_ax(self, ax, components_to_compare, components_df, explained_variance, reference_population, title): ylabel_prefix = "" xlabel_prefix = "" for pop_code, components in components_df.groupby(level="population"): marker = plot_helpers.population_markers(pop_code) color = plot_helpers.population_colors(pop_code) filled_markers = ['o', '.', 'D', 's', '^', '<', '>', '*'] lw = 0 if marker in filled_markers else 1 # linewidth z = 1 if marker == 'o' else 0 # ^ americans are 'o' and appear on top x = components[components_to_compare[0]] y = components[components_to_compare[1]] ax.scatter(x, y, lw=lw, label=pop_code, marker=marker, c=color, zorder=z, s=30, alpha=0.65) # Define inversion of axis to align components across plots # Keep the reference population in the upper left if pop_code == reference_population: xaxis_mean = np.mean(ax.get_xlim()) yaxis_mean = np.mean(ax.get_ylim()) # The median determines where most the scatter cloud is reference_in_the_left = np.median(x) < xaxis_mean reference_in_the_top = np.median(y) > yaxis_mean if not reference_in_the_left: ax.invert_xaxis() xlabel_prefix = "–" if not reference_in_the_top: ax.invert_yaxis() ylabel_prefix = "–" xcomponent, ycomponent = components_to_compare xvariance = explained_variance.ix[xcomponent] xlabel = "{}{}: {}%".format(xlabel_prefix, xcomponent, xvariance) ax.set_xlabel(xlabel) yvariance = explained_variance.ix[ycomponent] ylabel = "{}{}: {}%".format(ylabel_prefix, ycomponent, yvariance) ax.set_ylabel(ylabel) if title: ax.set_title(title, fontweight="bold") self._pca_plot_aesthetics(ax) return ax