def plot_GAM(gams, X, Y, size=4, dpi=300, ext='png', filename=None): cols = X.shape[1] rows = Y.shape[1] colors = sns.color_palette(n_colors=rows) plt.rcParams['figure.figsize'] = (cols*size, rows*size) fig, mat_axs = plt.subplots(rows, cols) titles = X.columns for j, (name, out) in enumerate(gams.items()): axs = mat_axs[j] gam = out['model'] R2 = get_avg_score(out['scores_cv']) p_vals = gam.statistics_['p_values'] for i, ax in enumerate(axs): plot_term(gam, i, ax, colors[j], size=size) ax.set_xlabel('') ax.text(.5, .95, 'p< %s' % format_num(p_vals[i]), va='center', fontsize=size*3, transform=ax.transAxes) if j%2==0: ax.set_title(titles[i], fontsize=size*4) if i==0: ax.set_ylabel(name + ' (%s)' % format_num(R2), fontsize=size*4) else: ax.set_ylabel('') plt.subplots_adjust(hspace=.4) if filename is not None: save_figure(fig, '%s.%s' % (filename,ext), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
def calc_EFA_retest(results, rotate='oblimin', verbose=True): name = results.ID.split('_')[0].title() retest_data_raw = get_behav_data(dataset=results.dataset.replace('Complete','Retest'), file='meaningful_variables.csv') shared_ids = set(retest_data_raw.index) & set(results.data.index) retest_data_raw = retest_data_raw.loc[shared_ids, :] retest_scores = transfer_scores(retest_data_raw, results, rotate=rotate) retest_scores.columns = [str(i)+' Retest' for i in retest_scores.columns] # scale and perform the factor score transformation EFA = results.EFA c = EFA.get_c() ref_scores = EFA.get_scores(c=c, rotate=rotate).loc[retest_data_raw.index, :] # reorder scores if rotate == 'oblimin': reorder_vec = EFA.get_factor_reorder(c, rotate=rotate) ref_scores = ref_scores.iloc[:, reorder_vec] retest_scores = retest_scores.iloc[:, reorder_vec] combined = pd.concat([ref_scores, retest_scores], axis=1) cross_diag = np.diag(combined.corr().iloc[c:, :c]) # get ICCs ICCs = [] for col in ref_scores.columns: tmp = combined.filter(regex=str(col)) out = psych.ICC(tmp) ICCs.append(list(out[0][1])[-1]) if verbose: print('%s, Avg Correlation: %s\n' % (name, format_num(np.mean(cross_diag)))) for factor, num in zip(ref_scores.columns, cross_diag): print('%s: %s' % (factor, format_num(num))) return combined, cross_diag, ICCs
def plot_EFA_retest(combined, size=4.6, dpi=300, ext='png', plot_dir=None): corr = combined.corr() max_val = abs(corr).max().max() fig = plt.figure(figsize=(size,size)); ax = fig.add_axes([.1, .1, .8, .8]) cbar_ax = fig.add_axes([.92, .15, .04, .7]) sns.heatmap(corr, square=True, ax=ax, cbar_ax=cbar_ax, vmin=-1, vmax=1, cmap=sns.diverging_palette(220,15,n=100,as_cmap=True), cbar_kws={'orientation': 'vertical', 'ticks': [-1, 0, 1]}); ax.set_xticklabels(ax.get_xticklabels(), rotation=90) ax.set_yticklabels(ax.get_yticklabels(), rotation=0) ax.tick_params(labelsize=size/len(corr)*40) # format cbar axis cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)]) cbar_ax.tick_params(labelsize=size, length=0, pad=size/2) cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=size, labelpad=size/2) # set divider lines n = corr.shape[1] ax.axvline(n//2, 0, n, color='k', linewidth=size/3) ax.axhline(n//2, 0, n, color='k', linewidth=size/3) if plot_dir is not None: save_figure(fig, path.join(plot_dir, 'EFA_test_retest_heatmap.%s' % ext), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
def calc_EFA_retest(results, rotate='oblimin', verbose=True): name = results.ID.split('_')[0].title() retest_data_raw = get_behav_data(dataset=results.dataset.replace( 'Complete', 'Retest'), file='meaningful_variables.csv') shared_ids = set(retest_data_raw.index) & set(results.data.index) retest_data_raw = retest_data_raw.loc[shared_ids, :] retest_scores = transfer_scores(retest_data_raw, results, rotate=rotate) retest_scores.columns = [str(i) + ' Retest' for i in retest_scores.columns] # scale and perform the factor score transformation EFA = results.EFA c = EFA.get_c() ref_scores = EFA.get_scores(c=c, rotate=rotate).loc[retest_data_raw.index, :] # reorder scores if rotate == 'oblimin': reorder_vec = EFA.get_factor_reorder(c, rotate=rotate) ref_scores = ref_scores.iloc[:, reorder_vec] retest_scores = retest_scores.iloc[:, reorder_vec] combined = pd.concat([ref_scores, retest_scores], axis=1) cross_diag = np.diag(combined.corr().iloc[c:, :c]) # get ICCs ICCs = [] for col in ref_scores.columns: tmp = combined.filter(regex=str(col)) out = psych.ICC(tmp) ICCs.append(list(out[0][1])[-1]) if verbose: print('%s, Avg Correlation: %s\n' % (name, format_num(np.mean(cross_diag)))) for factor, num in zip(ref_scores.columns, cross_diag): print('%s: %s' % (factor, format_num(num))) return combined, cross_diag, ICCs
def plot_prediction_scatter(results, target_order=None, EFA=True, change=False, classifier='ridge', rotate='oblimin', normalize=False, metric='R2', size=4.6, dpi=300, ext='png', plot_dir=None): predictions = results.load_prediction_object(EFA=EFA, change=change, classifier=classifier, rotate=rotate) if predictions is None: print('No prediction object found!') return else: predictions = predictions['data'] if EFA: predictors = results.EFA.get_scores() else: predictors = results.data if change: target_factors, _ = results.DA.get_change(results.dataset.replace('Complete', 'Retest')) predictors = predictors.loc[target_factors.index] else: target_factors = results.DA.get_scores() sns.set_style('whitegrid') n_cols = 2 n_rows = math.ceil(len(target_factors.columns)/n_cols) fig, axes = plt.subplots(n_rows, n_cols, figsize=(size, size/n_cols*n_rows)) axes = fig.get_axes() for i,v in enumerate(target_factors.columns): MAE = format_num(predictions[v]['scores_cv'][0]['MAE']) R2 = format_num(predictions[v]['scores_cv'][0]['R2']) axes[i].set_title('%s: R2: %s, MAE: %s' % (v, R2, MAE), fontweight='bold', fontsize=size*1.5) clf=predictions[v]['clf'] axes[i].scatter(target_factors[v], clf.predict(predictors), s=size*3) axes[i].tick_params(length=0, labelsize=0) if i%2==0: axes[i].set_ylabel('Predicted Factor Score', fontsize=size*1.5) axes[i].set_xlabel('Target Factor Score', fontsize=size*1.5) axes[i-1].set_xlabel('Target Factor Score', fontsize=size*1.5) empty_plots = n_cols*n_rows - len(target_factors.columns) for ax in axes[-empty_plots:]: ax.set_visible(False) plt.subplots_adjust(hspace=.4, wspace=.3) if plot_dir is not None: changestr = '_change' if change else '' if EFA: filename = 'EFA%s_%s_prediction_scatter.%s' % (changestr, classifier, ext) else: filename = 'IDM%s_%s_prediction_scatter.%s' % (changestr, classifier, ext) save_figure(fig, path.join(plot_dir, filename), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
def plot_prediction_scatter(predictions, predictors, targets, target_order=None, metric='R2', size=4.6, dpi=300, filename=None): # subset predictors predictors = predictors.loc[targets.index] if target_order is None: target_order = predictions.keys() sns.set_style('white') n_cols = 4 n_rows = math.ceil(len(target_order) / n_cols) fig, axes = plt.subplots(n_rows, n_cols, figsize=(size, size / n_cols * n_rows)) axes = fig.get_axes() for i, v in enumerate(target_order): MAE = format_num(predictions[v]['scores_cv'][0]['MAE']) R2 = format_num(predictions[v]['scores_cv'][0]['R2']) axes[i].set_title('%s\nR2: %s, MAE: %s' % ('\n'.join(v.split()), R2, MAE), fontweight='bold', fontsize=size * 1) clf = predictions[v]['clf'] axes[i].scatter(targets[v], clf.predict(predictors), s=size * 2.5, edgecolor='white', linewidth=size / 30) axes[i].tick_params(length=0, labelsize=0) # add diagonal xlim = axes[i].get_xlim() ylim = axes[i].get_ylim() axes[i].plot(xlim, ylim, ls="-", c=".5", zorder=-1) axes[i].set_xlim(xlim) axes[i].set_ylim(ylim) for spine in ['top', 'right']: axes[i].spines[spine].set_visible(False) if i % n_cols == 0: axes[i].set_ylabel('Predicted Score', fontsize=size * 1.2) for ax in axes[-(len(target_order) + 1):]: ax.set_xlabel('Target Score', fontsize=size * 1.2) empty_plots = n_cols * n_rows - len(targets.columns) if empty_plots > 0: for ax in axes[-empty_plots:]: ax.set_visible(False) plt.subplots_adjust(hspace=.6, wspace=.3) if filename is not None: save_figure(fig, filename, {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
def plot_predictors_comparison(R2_df, size=2, dpi=300, filename=None): CV_df = R2_df.filter(regex='CV', axis=0) CV_corr = CV_df.corr(method='spearman') max_R2 = round(CV_df.max(numeric_only=True).max(), 1) size = 2 grid = sns.pairplot(CV_df, hue='Target_Cat', height=size) for i, row in enumerate(grid.axes): for j, ax in enumerate(row): ax.set_xlim([0, max_R2]) ax.set_ylim([0, max_R2]) xlim = ax.get_xlim() ylim = ax.get_ylim() ax.plot(xlim, ylim, ls=":", c=".5", zorder=-1) ax.set_xlim(xlim) ax.set_ylim(ylim) if j < i: ax.text(.5, 1, r'$\rho$ = %s' % format_num(CV_corr.iloc[i, j]), ha='center', fontsize=size * 7, fontweight='bold', transform=ax.transAxes) if j > i: ax.set_visible(False) if filename is not None: save_figure(grid.fig, filename, {'bbox_inches': 'tight', 'dpi': dpi}) else: return grid
def plot_EFA_retest(combined, size=4.6, dpi=300, ext='png', plot_dir=None): corr = combined.corr() max_val = abs(corr).max().max() fig = plt.figure(figsize=(size, size)) ax = fig.add_axes([.1, .1, .8, .8]) cbar_ax = fig.add_axes([.92, .15, .04, .7]) sns.heatmap(corr, square=True, ax=ax, cbar_ax=cbar_ax, vmin=-1, vmax=1, cmap=sns.diverging_palette(220, 15, n=100, as_cmap=True), cbar_kws={ 'orientation': 'vertical', 'ticks': [-1, 0, 1] }) ax.set_xticklabels(ax.get_xticklabels(), rotation=90) print('LABELS THAT WORK??????') print(ax.get_yticklabels()) ax.set_yticklabels(ax.get_yticklabels(), rotation=0) print(ax.get_yticklabels()) ax.tick_params(labelsize=size / len(corr) * 40) # format cbar axis cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)]) cbar_ax.tick_params(labelsize=size, length=0, pad=size / 2) cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=size, labelpad=size / 2) # set divider lines n = corr.shape[1] ax.axvline(n // 2, 0, n, color='k', linewidth=size / 3) ax.axhline(n // 2, 0, n, color='k', linewidth=size / 3) if plot_dir is not None: save_figure(fig, path.join(plot_dir, 'EFA_test_retest_heatmap.%s' % ext), { 'bbox_inches': 'tight', 'dpi': dpi }) plt.close()
def plot_vars(tasks, contrasts, axes=None, xlabel='Value', standardize=False): colors = sns.hls_palette(4) desat_colors = [sns.desaturate(c, .5) for c in colors] for i, task in enumerate(tasks): subset = contrasts.filter(regex='^' + task) if subset.shape[1] != 0: if standardize: subset = subset / subset.std() subset.columns = [c.split('.')[1] for c in subset.columns] subset.columns = format_variable_names(subset.columns) # add mean value to columns means = subset.mean() subset.columns = [ subset.columns[i] + ': %s' % format_num(means.iloc[i]) for i in range(len(means)) ] subset = subset.melt(var_name='Variable', value_name='Value') sns.stripplot(x='Value', y='Variable', hue='Variable', ax=axes[i], data=subset, palette=desat_colors, jitter=True, alpha=.75) # plot central tendency N = len(means) axes[i].scatter(means, range(N), s=200, c=colors[:N], edgecolors='white', linewidths=2, zorder=3) # add legend leg = axes[i].get_legend() leg.set_title('') beautify_legend(leg, colors=colors, fontsize=14) # change axes max_val = subset.Value.abs().max() axes[i].set_xlim(-max_val, max_val) axes[i].set_xlabel(xlabel, fontsize=16) axes[i].set_ylabel('') axes[i].set_yticklabels('') axes[i].set_title(format_variable_names([task])[0].title(), fontsize=20) plt.subplots_adjust(hspace=.3)
def plot_GAM(gams, X, Y, size=4, dpi=300, ext='png', filename=None): cols = X.shape[1] rows = Y.shape[1] colors = sns.color_palette(n_colors=rows) plt.rcParams['figure.figsize'] = (cols * size, rows * size) fig, mat_axs = plt.subplots(rows, cols) titles = X.columns for j, (name, out) in enumerate(gams.items()): axs = mat_axs[j] gam = out['model'] R2 = get_avg_score(out['scores_cv']) p_vals = gam.statistics_['p_values'] for i, ax in enumerate(axs): plot_term(gam, i, ax, colors[j], size=size) ax.set_xlabel('') ax.text(.5, .95, 'p< %s' % format_num(p_vals[i]), va='center', fontsize=size * 3, transform=ax.transAxes) if j % 2 == 0: ax.set_title(titles[i], fontsize=size * 4) if i == 0: ax.set_ylabel(name + ' (%s)' % format_num(R2), fontsize=size * 4) else: ax.set_ylabel('') plt.subplots_adjust(hspace=.4) if filename is not None: save_figure(fig, '%s.%s' % (filename, ext), { 'bbox_inches': 'tight', 'dpi': dpi }) plt.close()
# ******************************************************** # Inspect # ******************************************************** gams = GAM_results['task'] X = results['task'].EFA.get_scores() ridge_prediction = results['task'].load_prediction_object(classifier='ridge')['data'] for k,v in gams.items(): ridge_r2cv = ridge_prediction[k]['scores_cv'][0]['R2'] ridge_r2in = ridge_prediction[k]['scores_insample'][0]['R2'] print('*'*79) print(k) print('GAM CV', get_avg_score(v['scores_cv'])) print('GAM Insample', get_avg_score(v['scores_insample'])) print('*') print('Ridge CV', format_num(ridge_r2cv, 3)) print('Ridge insample', format_num(ridge_r2in, 3)) print('*'*79) # plot full matrix plot_dir = path.dirname(results['task'].get_plot_dir()) plot_GAM(GAM_results['task'], results['task'].EFA.get_scores(), Y, filename=path.join(plot_dir, 'task_GAM')) plot_GAM(GAM_results['survey'], results['survey'].EFA.get_scores(), Y, filename=path.join(plot_dir, 'survey_GAM'))
def plot_prediction_scatter(results, target_order=None, EFA=True, change=False, classifier='ridge', rotate='oblimin', normalize=False, metric='R2', size=4.6, dpi=300, ext='png', plot_dir=None): predictions = results.load_prediction_object(EFA=EFA, change=change, classifier=classifier, rotate=rotate) if predictions is None: print('No prediction object found!') return else: predictions = predictions['data'] if EFA: predictors = results.EFA.get_scores() else: predictors = results.data if change: target_factors, _ = results.DA.get_change( results.dataset.replace('Complete', 'Retest')) predictors = predictors.loc[target_factors.index] else: target_factors = results.DA.get_scores() sns.set_style('whitegrid') n_cols = 2 n_rows = math.ceil(len(target_factors.columns) / n_cols) fig, axes = plt.subplots(n_rows, n_cols, figsize=(size, size / n_cols * n_rows)) axes = fig.get_axes() for i, v in enumerate(target_factors.columns): MAE = format_num(predictions[v]['scores_cv'][0]['MAE']) R2 = format_num(predictions[v]['scores_cv'][0]['R2']) axes[i].set_title('%s: R2: %s, MAE: %s' % (v, R2, MAE), fontweight='bold', fontsize=size * 1.5) clf = predictions[v]['clf'] axes[i].scatter(target_factors[v], clf.predict(predictors), s=size * 3) axes[i].tick_params(length=0, labelsize=0) if i % 2 == 0: axes[i].set_ylabel('Predicted Factor Score', fontsize=size * 1.5) axes[i].set_xlabel('Target Factor Score', fontsize=size * 1.5) axes[i - 1].set_xlabel('Target Factor Score', fontsize=size * 1.5) empty_plots = n_cols * n_rows - len(target_factors.columns) for ax in axes[-empty_plots:]: ax.set_visible(False) plt.subplots_adjust(hspace=.4, wspace=.3) if plot_dir is not None: changestr = '_change' if change else '' if EFA: filename = 'EFA%s_%s_prediction_scatter.%s' % (changestr, classifier, ext) else: filename = 'IDM%s_%s_prediction_scatter.%s' % (changestr, classifier, ext) save_figure(fig, path.join(plot_dir, filename), { 'bbox_inches': 'tight', 'dpi': dpi }) plt.close()
def plot_dendrogram(loading, clustering, title=None, break_lines=True, drop_list=None, double_drop_list=None, absolute_loading=False, size=4.6, dpi=300, filename=None): """ Plots HCA results as dendrogram with loadings underneath Args: loading: pandas df, a results EFA loading matrix clustering: pandas df, a results HCA clustering title (optional): str, title to plot break_lines: whether to separate EFA heatmap based on clusters, default=True drop_list (optional): list of cluster indices to drop the cluster label drop_list (optional): list of cluster indices to drop the cluster label twice absolute_loading: whether to plot the absolute loading value, default False plot_dir: if set, where to save the plot """ c = loading.shape[1] # extract cluster vars link = clustering['linkage'] DVs = clustering['clustered_df'].columns ordered_loading = loading.loc[DVs] if absolute_loading: ordered_loading = abs(ordered_loading) # get cluster sizes labels = clustering['labels'] cluster_sizes = [np.sum(labels == (i + 1)) for i in range(max(labels))] link_function, colors = get_dendrogram_color_fun(link, clustering['reorder_vec'], labels) # set figure properties figsize = (size, size * .6) # set up axes' size heatmap_height = ordered_loading.shape[1] * .035 heat_size = [.1, heatmap_height] dendro_size = [np.sum(heat_size), .3] # set up plot axes dendro_size = [.15, dendro_size[0], .78, dendro_size[1]] heatmap_size = [.15, heat_size[0], .78, heat_size[1]] cbar_size = [.935, heat_size[0], .015, heat_size[1]] ordered_loading = ordered_loading.T with sns.axes_style('white'): fig = plt.figure(figsize=figsize) ax1 = fig.add_axes(dendro_size) # ********************************** # plot dendrogram # ********************************** with plt.rc_context({'lines.linewidth': size * .125}): dendrogram(link, ax=ax1, link_color_func=link_function, orientation='top') # change axis properties ax1.tick_params(axis='x', which='major', labelsize=14, labelbottom=False) ax1.get_yaxis().set_visible(False) ax1.spines['top'].set_visible(False) ax1.spines['right'].set_visible(False) ax1.spines['bottom'].set_visible(False) ax1.spines['left'].set_visible(False) # ********************************** # plot loadings as heatmap below # ********************************** ax2 = fig.add_axes(heatmap_size) cbar_ax = fig.add_axes(cbar_size) max_val = np.max(abs(loading.values)) # bring to closest .25 max_val = ceil(max_val * 4) / 4 sns.heatmap(ordered_loading, ax=ax2, cbar=True, cbar_ax=cbar_ax, yticklabels=True, xticklabels=True, vmax=max_val, vmin=-max_val, cbar_kws={ 'orientation': 'vertical', 'ticks': [-max_val, 0, max_val] }, cmap=sns.diverging_palette(220, 15, n=100, as_cmap=True)) ax2.set_yticklabels(ax2.get_yticklabels(), rotation=0) ax2.tick_params(axis='y', labelsize=size * heat_size[1] * 30 / c, pad=size / 4, length=0) # format cbar axis cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)]) cbar_ax.tick_params(labelsize=size * heat_size[1] * 25 / c, length=0, pad=size / 2) cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=size * heat_size[1] * 30 / c, labelpad=size * 2) # add lines to heatmap to distinguish clusters if break_lines == True: xlim = ax2.get_xlim() ylim = ax2.get_ylim() step = xlim[1] / len(labels) cluster_breaks = [i * step for i in np.cumsum(cluster_sizes)] ax2.vlines(cluster_breaks[:-1], ylim[0], ylim[1], linestyles='dashed', linewidth=size * .1, colors=[.5, .5, .5], zorder=10) # ********************************** # plot cluster names # ********************************** beginnings = np.hstack([[0], np.cumsum(cluster_sizes)[:-1]]) centers = beginnings + np.array(cluster_sizes) // 2 + .5 offset = .07 if 'cluster_names' in clustering.keys(): ax2.tick_params(axis='x', reset=True, top=False, bottom=False, width=size / 8, length=0) names = [transform_name(i) for i in clustering['cluster_names']] ax2.set_xticks(centers) ax2.set_xticklabels(names, rotation=0, ha='center', fontsize=heatmap_size[2] * size * 1) ticks = ax2.xaxis.get_ticklines()[::2] for i, label in enumerate(ax2.get_xticklabels()): if label.get_text() != '': ax2.hlines(c + offset, beginnings[i] + .5, beginnings[i] + cluster_sizes[i] - .5, clip_on=False, color=colors[i], linewidth=size / 5) label.set_color(colors[i]) ticks[i].set_color(colors[i]) y_drop = .005 line_drop = .3 if drop_list and i in drop_list: y_drop = .05 line_drop = 1.6 if double_drop_list and i in double_drop_list: y_drop = .1 line_drop = 2.9 label.set_y(-(y_drop / heatmap_height + heatmap_height / c * offset)) ax2.vlines(beginnings[i] + cluster_sizes[i] / 2, c + offset, c + offset + line_drop, clip_on=False, color=colors[i], linewidth=size / 7.5) # add title if title: ax1.set_title(title, fontsize=size * 2, y=1.05) if filename is not None: save_figure(fig, filename, {'bbox_inches': 'tight', 'dpi': dpi}) plt.close() else: return fig
def plot_subbranch(target_color, cluster_i, tree, loading, cluster_sizes, title=None, size=2.3, dpi=300, plot_loc=None): sns.set_style('white') colormap = sns.diverging_palette(220, 15, n=100, as_cmap=True) # get variables in subbranch based on coloring curr_color = tree['color_list'][0] start = 0 for i, color in enumerate(tree['color_list']): if color != curr_color: end = i if curr_color == to_hex(target_color): break if color != "#808080": start = i curr_color = color if (end - start) + 1 != cluster_sizes[cluster_i]: return # get subset of loading cumsizes = np.cumsum(cluster_sizes) if cluster_i == 0: loading_start = 0 else: loading_start = cumsizes[cluster_i - 1] subset_loading = loading.T.iloc[:, loading_start:cumsizes[cluster_i]] # plotting N = subset_loading.shape[1] length = N * .05 dendro_size = [0, .746, length, .12] heatmap_size = [0, .5, length, .25] fig = plt.figure(figsize=(size * 2, size * 4)) dendro_ax = fig.add_axes(dendro_size) heatmap_ax = fig.add_axes(heatmap_size) cbar_size = [length + .22, .5, .05, .25] factor_avg_size = [length + .01, .5, .2, .25] factor_avg_ax = fig.add_axes(factor_avg_size) cbar_ax = fig.add_axes(cbar_size) #subset_loading.columns = [col.replace(': ',':\n', 1) for col in subset_loading.columns] plot_tree(tree, range(start, end), dendro_ax, linewidth=size / 2) dendro_ax.set_xticklabels('') max_val = np.max(loading.values) # if max_val is high, just make it 1 if max_val > .9: max_val = 1 sns.heatmap( subset_loading, ax=heatmap_ax, cbar=True, cbar_ax=cbar_ax, cbar_kws={'ticks': [-max_val, 0, max_val]}, yticklabels=True, vmin=-max_val, vmax=max_val, cmap=colormap, ) yn, xn = subset_loading.shape tick_label_size = size * 30 / max(yn, 8) heatmap_ax.tick_params(labelsize=tick_label_size, length=size * .5, width=size / 5, pad=size) heatmap_ax.set_yticklabels(heatmap_ax.get_yticklabels(), rotation=0) heatmap_ax.set_xticks([i + .5 for i in range(0, subset_loading.shape[1])]) heatmap_ax.set_xticklabels( [str(i) for i in range(1, subset_loading.shape[1] + 1)], size=size * 2, rotation=0, ha='center') avg_factors = abs(subset_loading).mean(1) # format cbar axis cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)]) cbar_ax.tick_params(axis='y', length=0) cbar_ax.tick_params(labelsize=size * 3) cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=size * 3, labelpad=size * 2) # add axis labels as text above text_ax = fig.add_axes([-.22, .44 - .02 * N, .4, .02 * N]) for spine in ['top', 'right', 'bottom', 'left']: text_ax.spines[spine].set_visible(False) for i, label in enumerate(subset_loading.columns): text_ax.text(0, 1 - i / N, str(i + 1) + '.', fontsize=size * 2.8, ha='right') text_ax.text(.1, 1 - i / N, label, fontsize=size * 3) text_ax.tick_params(which='both', labelbottom=False, labelleft=False, bottom=False, left=False) # average factor bar avg_factors[::-1].plot(kind='barh', ax=factor_avg_ax, width=.7, color=tree['color_list'][start]) factor_avg_ax.set_xlim(0, max_val) #factor_avg_ax.set_xticks([max(avg_factors)]) #factor_avg_ax.set_xticklabels([format_num(max(avg_factors))]) factor_avg_ax.set_xticklabels('') factor_avg_ax.set_yticklabels('') factor_avg_ax.tick_params(length=0) factor_avg_ax.spines['top'].set_visible(False) factor_avg_ax.spines['bottom'].set_visible(False) factor_avg_ax.spines['left'].set_visible(False) factor_avg_ax.spines['right'].set_visible(False) # title and axes styling of dendrogram if title: dendro_ax.set_title(title, fontsize=size * 3, y=1.05, fontweight='bold') dendro_ax.get_yaxis().set_visible(False) dendro_ax.spines['top'].set_visible(False) dendro_ax.spines['right'].set_visible(False) dendro_ax.spines['bottom'].set_visible(False) dendro_ax.spines['left'].set_visible(False) if plot_loc is not None: try: print('about to crash? - dpi: ' + str(dpi)) save_figure(fig, plot_loc, {'bbox_inches': 'tight', 'dpi': dpi}) plt.close() except ValueError: print('something when wrong with that plot') plt.close() else: return fig
# ******************************************************** # Inspect # ******************************************************** gams = GAM_results['task'] X = results['task'].EFA.get_scores() ridge_prediction = results['task'].load_prediction_object( classifier='ridge')['data'] for k, v in gams.items(): ridge_r2cv = ridge_prediction[k]['scores_cv'][0]['R2'] ridge_r2in = ridge_prediction[k]['scores_insample'][0]['R2'] print('*' * 79) print(k) print('GAM CV', get_avg_score(v['scores_cv'])) print('GAM Insample', get_avg_score(v['scores_insample'])) print('*') print('Ridge CV', format_num(ridge_r2cv, 3)) print('Ridge insample', format_num(ridge_r2in, 3)) print('*' * 79) # plot full matrix plot_dir = path.dirname(results['task'].get_plot_dir()) plot_GAM(GAM_results['task'], results['task'].EFA.get_scores(), Y, filename=path.join(plot_dir, 'task_GAM')) plot_GAM(GAM_results['survey'], results['survey'].EFA.get_scores(), Y, filename=path.join(plot_dir, 'survey_GAM'))
# ************************************************************************* max_val = round(abs(task_subset).max().max(),1) loading_data = task_subset.filter(regex=tasks[task_i], axis=0) # for visualization purposes remove "reflections" from loading matrix # by multiplying by -1 reflects = [-1 if 'ReflogTr' in i else 1 for i in loading_data.index] loading_data = loading_data.multiply(reflects, axis=0) # plot loadings sns.heatmap(loading_data.iloc[::-1,:], ax=loading_axes[task_i], yticklabels=False, xticklabels=False, linecolor='white', linewidth=basewidth, cbar_ax=cbar_ax, vmax = max_val, vmin = -max_val, cbar_kws={'ticks': [-max_val, 0, max_val]}, cmap=sns.diverging_palette(220,16,n=100, as_cmap=True)) # format cbar cbar_ax.set_yticklabels([format_num(-max_val, 1), 0, format_num(max_val, 1)]) cbar_ax.tick_params(axis='y', length=0) cbar_ax.tick_params(labelsize=basefont) for i in range(1,loading_data.shape[0]+1): #loading_axes[task_i].hlines(i, -.2, 6.1, color='white', linewidth=basewidth*3) loading_axes[task_i].add_patch(Rectangle([-.1,i-.2], width=loading_data.shape[1]+.2, height=.2, zorder=100, facecolor='white', edgecolor='white', linewidth=basewidth, clip_on=False)) # add boxes for i in range(len(tick_names)): box_color = tick_colors[len(tick_names)-(i+1)] box_pos = [-.15, i+.2] loading_axes[task_i].add_patch(Rectangle(box_pos, width=.15, height=.4, zorder=100, facecolor=box_color, edgecolor=box_color,
def plot_subbranch(target_color, cluster_i, tree, loading, cluster_sizes, title=None, size=2.3, dpi=300, plot_loc=None): sns.set_style('white') colormap = sns.diverging_palette(220,15,n=100,as_cmap=True) # get variables in subbranch based on coloring curr_color = tree['color_list'][0] start = 0 for i, color in enumerate(tree['color_list']): if color != curr_color: end = i if curr_color == to_hex(target_color): break if color != "#808080": start = i curr_color = color if (end-start)+1 != cluster_sizes[cluster_i]: return # get subset of loading cumsizes = np.cumsum(cluster_sizes) if cluster_i==0: loading_start = 0 else: loading_start = cumsizes[cluster_i-1] subset_loading = loading.T.iloc[:,loading_start:cumsizes[cluster_i]] # plotting N = subset_loading.shape[1] length = N*.05 dendro_size = [0,.746,length,.12] heatmap_size = [0,.5,length,.25] fig = plt.figure(figsize=(size,size*2)) dendro_ax = fig.add_axes(dendro_size) heatmap_ax = fig.add_axes(heatmap_size) cbar_size = [length+.22, .5, .05, .25] factor_avg_size = [length+.01,.5,.2,.25] factor_avg_ax = fig.add_axes(factor_avg_size) cbar_ax = fig.add_axes(cbar_size) #subset_loading.columns = [col.replace(': ',':\n', 1) for col in subset_loading.columns] plot_tree(tree, range(start, end), dendro_ax, linewidth=size/2) dendro_ax.set_xticklabels('') max_val = np.max(loading.values) # if max_val is high, just make it 1 if max_val > .9: max_val = 1 sns.heatmap(subset_loading, ax=heatmap_ax, cbar=True, cbar_ax=cbar_ax, cbar_kws={'ticks': [-max_val, 0, max_val]}, yticklabels=True, vmin=-max_val, vmax=max_val, cmap=colormap,) yn, xn = subset_loading.shape tick_label_size = size*30/max(yn, 8) heatmap_ax.tick_params(labelsize=tick_label_size, length=size*.5, width=size/5, pad=size) heatmap_ax.set_yticklabels(heatmap_ax.get_yticklabels(), rotation=0) heatmap_ax.set_xticks([i+.5 for i in range(0,subset_loading.shape[1])]) heatmap_ax.set_xticklabels([str(i) for i in range(1,subset_loading.shape[1]+1)], size=size*2, rotation=0, ha='center') avg_factors = abs(subset_loading).mean(1) # format cbar axis cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)]) cbar_ax.tick_params(axis='y', length=0) cbar_ax.tick_params(labelsize=size*3) cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=size*3, labelpad=size*2) # add axis labels as text above text_ax = fig.add_axes([-.22,.44-.02*N,.4,.02*N]) for spine in ['top','right','bottom','left']: text_ax.spines[spine].set_visible(False) for i, label in enumerate(subset_loading.columns): text_ax.text(0, 1-i/N, str(i+1)+'.', fontsize=size*2.8, ha='right') text_ax.text(.1, 1-i/N, label, fontsize=size*3) text_ax.tick_params(which='both', labelbottom=False, labelleft=False, bottom=False, left=False) # average factor bar avg_factors[::-1].plot(kind='barh', ax = factor_avg_ax, width=.7, color= tree['color_list'][start]) factor_avg_ax.set_xlim(0, max_val) #factor_avg_ax.set_xticks([max(avg_factors)]) #factor_avg_ax.set_xticklabels([format_num(max(avg_factors))]) factor_avg_ax.set_xticklabels('') factor_avg_ax.set_yticklabels('') factor_avg_ax.tick_params(length=0) factor_avg_ax.spines['top'].set_visible(False) factor_avg_ax.spines['bottom'].set_visible(False) factor_avg_ax.spines['left'].set_visible(False) factor_avg_ax.spines['right'].set_visible(False) # title and axes styling of dendrogram if title: dendro_ax.set_title(title, fontsize=size*3, y=1.05, fontweight='bold') dendro_ax.get_yaxis().set_visible(False) dendro_ax.spines['top'].set_visible(False) dendro_ax.spines['right'].set_visible(False) dendro_ax.spines['bottom'].set_visible(False) dendro_ax.spines['left'].set_visible(False) if plot_loc is not None: save_figure(fig, plot_loc, {'bbox_inches': 'tight', 'dpi': dpi}) plt.close() else: return fig
def plot_communality(results, c, rotate='oblimin', retest_threshold=.2, size=4.6, dpi=300, ext='png', plot_dir=None): EFA = results.EFA communality = get_communality(EFA, rotate, c) # load retest data retest_data = get_retest_data(dataset=results.dataset.replace('Complete','Retest')) if retest_data is None: print('No retest data found for datafile: %s' % results.dataset) return # reorder data in line with communality retest_data = retest_data.loc[communality.index] # reformat variable names communality.index = format_variable_names(communality.index) retest_data.index = format_variable_names(retest_data.index) if len(retest_data) > 0: adjusted_communality,correlation, noise_ceiling = \ get_adjusted_communality(communality, retest_data, retest_threshold) # plot communality bars woo! if len(retest_data)>0: f, axes = plt.subplots(1, 3, figsize=(3*(size/10), size)) plot_bar_factor(communality, axes[0], width=size/10, height=size, label_rows=True, title='Communality') plot_bar_factor(noise_ceiling, axes[1], width=size/10, height=size, label_rows=False, title='Test-Retest') plot_bar_factor(adjusted_communality, axes[2], width=size/10, height=size, label_rows=False, title='Adjusted Communality') else: f = plot_bar_factor(communality, label_rows=True, width=size/3, height=size*2, title='Communality') if plot_dir: filename = 'communality_bars-EFA%s.%s' % (c, ext) save_figure(f, path.join(plot_dir, filename), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close() # plot communality histogram if len(retest_data) > 0: with sns.axes_style('white'): colors = sns.color_palette(n_colors=2, desat=.75) f, ax = plt.subplots(1,1,figsize=(size,size)) sns.kdeplot(communality, linewidth=size/4, shade=True, label='Communality', color=colors[0]) sns.kdeplot(adjusted_communality, linewidth=size/4, shade=True, label='Adjusted Communality', color=colors[1]) ylim = ax.get_ylim() ax.vlines(np.mean(communality), ylim[0], ylim[1], color=colors[0], linewidth=size/4, linestyle='--') ax.vlines(np.mean(adjusted_communality), ylim[0], ylim[1], color=colors[1], linewidth=size/4, linestyle='--') leg=ax.legend(fontsize=size*2, loc='upper right') beautify_legend(leg, colors) plt.xlabel('Communality', fontsize=size*2) plt.ylabel('Normalized Density', fontsize=size*2) ax.set_yticks([]) ax.tick_params(labelsize=size) ax.set_ylim(0, ax.get_ylim()[1]) ax.set_xlim(0, ax.get_xlim()[1]) ax.spines['right'].set_visible(False) #ax.spines['left'].set_visible(False) ax.spines['top'].set_visible(False) # add correlation correlation = format_num(np.mean(correlation)) ax.text(1.1, 1.25, 'Correlation Between Communality \nand Test-Retest: %s' % correlation, size=size*2) if plot_dir: filename = 'communality_dist-EFA%s.%s' % (c, ext) save_figure(f, path.join(plot_dir, filename), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
def plot_heatmap_factors(results, c, size=4.6, thresh=75, rotate='oblimin', DA=False, dpi=300, ext='png', plot_dir=None): """ Plots factor analytic results as bars Args: results: a dimensional structure results object c: the number of components to use dpi: the final dpi for the image size: scalar - the width of the plot. The height is determined by the number of factors thresh: proportion of factor loadings to remove ext: the extension for the saved figure plot_dir: the directory to save the figure. If none, do not save """ if DA: EFA = results.DA else: EFA = results.EFA loadings = EFA.get_loading(c, rotate=rotate) loadings = EFA.reorder_factors(loadings, rotate=rotate) grouping = get_factor_groups(loadings) flattened_factor_order = [] for sublist in [i[1] for i in grouping]: flattened_factor_order += sublist loadings = loadings.loc[flattened_factor_order] # get threshold for loadings if thresh>0: thresh_val = np.percentile(abs(loadings).values, thresh) print('Thresholding all loadings less than %s' % np.round(thresh_val, 3)) loadings = loadings.mask(abs(loadings) <= thresh_val, 0) # remove variables that don't cross the threshold for any factor kept_vars = list(loadings.index[loadings.mean(1)!=0]) print('%s Variables out of %s are kept after threshold' % (len(kept_vars), loadings.shape[0])) loadings = loadings.loc[kept_vars] # remove masked variabled from grouping threshed_groups = [] for factor, group in grouping: group = [x for x in group if x in kept_vars] threshed_groups.append([factor,group]) grouping = threshed_groups # change variable names to make them more readable loadings.index = format_variable_names(loadings.index) # set up plot variables DV_fontsize = size*2/(loadings.shape[0]//2)*30 figsize = (size,size*2) f = plt.figure(figsize=figsize) ax = f.add_axes([0, 0, .08*loadings.shape[1], 1]) cbar_ax = f.add_axes([.08*loadings.shape[1]+.02,0,.04,1]) max_val = abs(loadings).max().max() sns.heatmap(loadings, ax=ax, cbar_ax=cbar_ax, vmax = max_val, vmin = -max_val, cbar_kws={'ticks': [-max_val, -max_val/2, 0, max_val/2, max_val]}, linecolor='white', linewidth=.01, cmap=sns.diverging_palette(220,15,n=100,as_cmap=True)) ax.set_yticks(np.arange(.5,loadings.shape[0]+.5,1)) ax.set_yticklabels(loadings.index, fontsize=DV_fontsize, rotation=0) ax.set_xticklabels(loadings.columns, fontsize=min(size*3, DV_fontsize*1.5), ha='center', rotation=90) ax.tick_params(length=size*.5, width=size/10) # format cbar cbar_ax.set_yticklabels([format_num(-max_val, 2), format_num(-max_val/2, 2), 0, format_num(-max_val/2, 2), format_num(max_val, 2)]) cbar_ax.tick_params(axis='y', length=0) cbar_ax.tick_params(labelsize=DV_fontsize*1.5) cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=DV_fontsize*2) # draw lines separating groups if grouping is not None: factor_breaks = np.cumsum([len(i[1]) for i in grouping])[:-1] for y_val in factor_breaks: ax.hlines(y_val, 0, loadings.shape[1], lw=size/5, color='grey', linestyle='dashed') if plot_dir: filename = 'factor_heatmap_EFA%s.%s' % (c, ext) save_figure(f, path.join(plot_dir, filename), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
# In[ ]: tmp = [] for i, group in all_reconstructions.groupby(['approach', 'pop_size']): group = group.loc[:, ['var', 'mean']].set_index('var') group.columns = [i] tmp.append(group) approach_compare = pd.concat(tmp, axis=1) approach_compare.columns = [ i + ': ' + str(int(j)) for i, j in approach_compare.columns ] # correlation of reconstructions corr = approach_compare.corr(method='spearman') overall_correlation = np.mean(corr.values[np.tril_indices_from(corr, -1)]) print('DV reconstruction score correlates %s across approaches' % format_num(overall_correlation)) # Model reconstruction success as a function of DV characteristics, approach and subpopulation size # In[ ]: all_reconstructions.loc[:, 'z_mean'] = np.arctanh(all_reconstructions['mean']) md = smf.mixedlm( "z_mean ~ (pop_size + Q('icc3.k') + communality)*C(approach, Sum)", all_reconstructions, groups=all_reconstructions["var"]) mdf = md.fit() mdf.summary() # other way to do it # endog, exog = patsy.dmatrices("z_mean ~ (pop_size + icc + avg_correlation)*C(approach, Sum)", all_reconstructions, return_type='dataframe')
def importance_polar_plots(predictions, target_order=None, show_sign=True, colorbar=True, size=5, dpi=300, filename=None): # set up color styling palette = sns.color_palette('Blues_d', 100) #palette = sns.cubehelix_palette(100) # plot if target_order is None: target_order = list(predictions.values())[0].keys() N = len(target_order) f = plt.figure(figsize=(size, size)) background_ax = f.add_axes([0, 0, 1, 1]) polar_axes = [] subplot_size = 1 / N # get max r2 max_r2 = 0 for prediction in predictions.values(): vals = [prediction[i] for i in target_order] max_r2 = max(max_r2, max([i['scores_cv'][0]['R2'] for i in vals])) for row_i, (name, prediction) in enumerate(predictions.items()): # get importances vals = [prediction[i] for i in target_order] importances = [(i['predvars'], i['importances'][0]) for i in vals] r2s = [i['scores_cv'][0]['R2'] for i in vals] for i, target in enumerate(target_order): xticklabels = True polar_axes.append( f.add_axes([ subplot_size * i * 1.3, row_i * 1.4 * subplot_size, subplot_size, subplot_size ], projection='polar')) importance = importances[i] visualize_importance(importance, polar_axes[-1], yticklabels=False, xticklabels=xticklabels, label_size=size * 1.5, color=palette[max( int(r2s[i] / max_r2 * len(palette)) - 1, 0)], outline_color='k', axes_linewidth=size / 20, label_scale=.25, show_sign=show_sign) polar_axes[-1].text(.5, -.2, 'R2: ' + format_num(r2s[i]), zorder=5, fontsize=size * 1.5, fontweight='bold', ha='center', transform=polar_axes[-1].transAxes) # change axis color polar_axes[-1].grid(color=[.6, .6, .6]) polar_axes[-1].set_facecolor((0.91, 0.91, 0.94, 1.0)) # add column labels for i, label in enumerate(target_order): pos = polar_axes[i - 3].get_position().bounds x_pos = pos[0] + pos[2] * .5 y_pos = pos[1] + pos[3] background_ax.text(x_pos, y_pos + .05, '\n'.join(label.split()), fontsize=size * 2, fontweight='bold', ha='center') # add row labels for i, key in enumerate(predictions.keys()): pos = polar_axes[i * N].get_position().bounds x_pos = pos[0] y_pos = pos[1] + pos[3] * .5 background_ax.text(x_pos - .1, y_pos, ' '.join(key.title().split('_')), fontsize=size * 2, fontweight='bold', va='center', rotation=90) # make background ax invisible background_ax.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False) # add colorbar if colorbar == True: # get x position of center plots if N % 2 == 1: pos = polar_axes[N // 2].get_position().bounds x_pos = pos[0] + pos[2] * .5 else: pos1 = polar_axes[N // 2 - 1].get_position().bounds pos2 = polar_axes[N // 2].get_position().bounds x_pos = (pos2[0] - (pos1[0] + pos[2])) * 2 + pos[0] + pos[2] color_ax = f.add_axes([x_pos - .3, -.2, .6, .025]) cbar = mpl.colorbar.ColorbarBase(ax=color_ax, cmap=ListedColormap(palette), orientation='horizontal') cbar.set_ticks([0, 1]) cbar.set_ticklabels([0, format_num(max_r2)]) color_ax.tick_params(labelsize=size) cbar.set_label('R2', fontsize=size * 1.5) for key, spine in background_ax.spines.items(): spine.set_visible(False) if filename is not None: save_figure(f, filename, {'bbox_inches': 'tight', 'dpi': dpi}) plt.close() else: return f
def plot_dendrogram(loading, clustering, title=None, break_lines=True, drop_list=None, double_drop_list=None, absolute_loading=False, size=4.6, dpi=300, filename=None): """ Plots HCA results as dendrogram with loadings underneath Args: loading: pandas df, a results EFA loading matrix clustering: pandas df, a results HCA clustering title (optional): str, title to plot break_lines: whether to separate EFA heatmap based on clusters, default=True drop_list (optional): list of cluster indices to drop the cluster label drop_list (optional): list of cluster indices to drop the cluster label twice absolute_loading: whether to plot the absolute loading value, default False plot_dir: if set, where to save the plot """ c = loading.shape[1] # extract cluster vars link = clustering['linkage'] DVs = clustering['clustered_df'].columns ordered_loading = loading.loc[DVs] if absolute_loading: ordered_loading = abs(ordered_loading) # get cluster sizes labels=clustering['labels'] cluster_sizes = [np.sum(labels==(i+1)) for i in range(max(labels))] link_function, colors = get_dendrogram_color_fun(link, clustering['reorder_vec'], labels) # set figure properties figsize = (size, size*.6) # set up axes' size heatmap_height = ordered_loading.shape[1]*.035 heat_size = [.1, heatmap_height] dendro_size=[np.sum(heat_size), .3] # set up plot axes dendro_size = [.15,dendro_size[0], .78, dendro_size[1]] heatmap_size = [.15,heat_size[0],.78,heat_size[1]] cbar_size = [.935,heat_size[0],.015,heat_size[1]] ordered_loading = ordered_loading.T with sns.axes_style('white'): fig = plt.figure(figsize=figsize) ax1 = fig.add_axes(dendro_size) # ********************************** # plot dendrogram # ********************************** with plt.rc_context({'lines.linewidth': size*.125}): dendrogram(link, ax=ax1, link_color_func=link_function, orientation='top') # change axis properties ax1.tick_params(axis='x', which='major', labelsize=14, labelbottom=False) ax1.get_yaxis().set_visible(False) ax1.spines['top'].set_visible(False) ax1.spines['right'].set_visible(False) ax1.spines['bottom'].set_visible(False) ax1.spines['left'].set_visible(False) # ********************************** # plot loadings as heatmap below # ********************************** ax2 = fig.add_axes(heatmap_size) cbar_ax = fig.add_axes(cbar_size) max_val = np.max(abs(loading.values)) # bring to closest .25 max_val = ceil(max_val*4)/4 sns.heatmap(ordered_loading, ax=ax2, cbar=True, cbar_ax=cbar_ax, yticklabels=True, xticklabels=True, vmax = max_val, vmin = -max_val, cbar_kws={'orientation': 'vertical', 'ticks': [-max_val, 0, max_val]}, cmap=sns.diverging_palette(220,15,n=100,as_cmap=True)) ax2.set_yticklabels(ax2.get_yticklabels(), rotation=0) ax2.tick_params(axis='y', labelsize=size*heat_size[1]*30/c, pad=size/4, length=0) # format cbar axis cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)]) cbar_ax.tick_params(labelsize=size*heat_size[1]*25/c, length=0, pad=size/2) cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=size*heat_size[1]*30/c, labelpad=size*2) # add lines to heatmap to distinguish clusters if break_lines == True: xlim = ax2.get_xlim(); ylim = ax2.get_ylim() step = xlim[1]/len(labels) cluster_breaks = [i*step for i in np.cumsum(cluster_sizes)] ax2.vlines(cluster_breaks[:-1], ylim[0], ylim[1], linestyles='dashed', linewidth=size*.1, colors=[.5,.5,.5], zorder=10) # ********************************** # plot cluster names # ********************************** beginnings = np.hstack([[0],np.cumsum(cluster_sizes)[:-1]]) centers = beginnings+np.array(cluster_sizes)//2+.5 offset = .07 if 'cluster_names' in clustering.keys(): ax2.tick_params(axis='x', reset=True, top=False, bottom=False, width=size/8, length=0) names = [transform_name(i) for i in clustering['cluster_names']] ax2.set_xticks(centers) ax2.set_xticklabels(names, rotation=0, ha='center', fontsize=heatmap_size[2]*size*1) ticks = ax2.xaxis.get_ticklines()[::2] for i, label in enumerate(ax2.get_xticklabels()): if label.get_text() != '': ax2.hlines(c+offset,beginnings[i]+.5,beginnings[i]+cluster_sizes[i]-.5, clip_on=False, color=colors[i], linewidth=size/5) label.set_color(colors[i]) ticks[i].set_color(colors[i]) y_drop = .005 line_drop = .3 if drop_list and i in drop_list: y_drop = .05 line_drop = 1.6 if double_drop_list and i in double_drop_list: y_drop = .1 line_drop = 2.9 label.set_y(-(y_drop/heatmap_height+heatmap_height/c*offset)) ax2.vlines(beginnings[i]+cluster_sizes[i]/2, c+offset, c+offset+line_drop, clip_on=False, color=colors[i], linewidth=size/7.5) # add title if title: ax1.set_title(title, fontsize=size*2, y=1.05) if filename is not None: save_figure(fig, filename, {'bbox_inches': 'tight', 'dpi': dpi}) plt.close() else: return fig
def plot_communality(results, c, rotate='oblimin', retest_threshold=.2, size=4.6, dpi=300, ext='png', plot_dir=None): EFA = results.EFA communality = get_communality(EFA, rotate, c) # load retest data retest_data = get_retest_data( dataset=results.dataset.replace('Complete', 'Retest')) if retest_data is None: print('No retest data found for datafile: %s' % results.dataset) return # reorder data in line with communality retest_data = retest_data.loc[communality.index] # reformat variable names communality.index = format_variable_names(communality.index) retest_data.index = format_variable_names(retest_data.index) if len(retest_data) > 0: adjusted_communality,correlation, noise_ceiling = \ get_adjusted_communality(communality, retest_data, retest_threshold) # plot communality bars woo! if len(retest_data) > 0: f, axes = plt.subplots(1, 3, figsize=(3 * (size / 10), size)) plot_bar_factor(communality, axes[0], width=size / 10, height=size, label_rows=True, title='Communality') plot_bar_factor(noise_ceiling, axes[1], width=size / 10, height=size, label_rows=False, title='Test-Retest') plot_bar_factor(adjusted_communality, axes[2], width=size / 10, height=size, label_rows=False, title='Adjusted Communality') else: f = plot_bar_factor(communality, label_rows=True, width=size / 3, height=size * 2, title='Communality') if plot_dir: filename = 'communality_bars-EFA%s.%s' % (c, ext) save_figure(f, path.join(plot_dir, filename), { 'bbox_inches': 'tight', 'dpi': dpi }) plt.close() # plot communality histogram if len(retest_data) > 0: with sns.axes_style('white'): colors = sns.color_palette(n_colors=2, desat=.75) f, ax = plt.subplots(1, 1, figsize=(size, size)) sns.kdeplot(communality, linewidth=size / 4, shade=True, label='Communality', color=colors[0]) sns.kdeplot(adjusted_communality, linewidth=size / 4, shade=True, label='Adjusted Communality', color=colors[1]) ylim = ax.get_ylim() ax.vlines(np.mean(communality), ylim[0], ylim[1], color=colors[0], linewidth=size / 4, linestyle='--') ax.vlines(np.mean(adjusted_communality), ylim[0], ylim[1], color=colors[1], linewidth=size / 4, linestyle='--') leg = ax.legend(fontsize=size * 2, loc='upper right') beautify_legend(leg, colors) plt.xlabel('Communality', fontsize=size * 2) plt.ylabel('Normalized Density', fontsize=size * 2) ax.set_yticks([]) ax.tick_params(labelsize=size) ax.set_ylim(0, ax.get_ylim()[1]) ax.set_xlim(0, ax.get_xlim()[1]) ax.spines['right'].set_visible(False) #ax.spines['left'].set_visible(False) ax.spines['top'].set_visible(False) # add correlation correlation = format_num(np.mean(correlation)) ax.text(1.1, 1.25, 'Correlation Between Communality \nand Test-Retest: %s' % correlation, size=size * 2) if plot_dir: filename = 'communality_dist-EFA%s.%s' % (c, ext) save_figure(f, path.join(plot_dir, filename), { 'bbox_inches': 'tight', 'dpi': dpi }) plt.close()
loading_data = loading_data.multiply(reflects, axis=0) # plot loadings sns.heatmap(loading_data.iloc[::-1, :], ax=loading_axes[task_i], yticklabels=False, xticklabels=False, linecolor='white', linewidth=basewidth, cbar_ax=cbar_ax, vmax=max_val, vmin=-max_val, cbar_kws={'ticks': [-max_val, 0, max_val]}, cmap=sns.diverging_palette(220, 16, n=100, as_cmap=True)) # format cbar cbar_ax.set_yticklabels( [format_num(-max_val, 1), 0, format_num(max_val, 1)]) cbar_ax.tick_params(axis='y', length=0) cbar_ax.tick_params(labelsize=basefont) for i in range(1, loading_data.shape[0] + 1): #loading_axes[task_i].hlines(i, -.2, 6.1, color='white', linewidth=basewidth*3) loading_axes[task_i].add_patch( Rectangle([-.1, i - .2], width=loading_data.shape[1] + .2, height=.2, zorder=100, facecolor='white', edgecolor='white', linewidth=basewidth, clip_on=False)) # add boxes
def plot_heatmap_factors(results, c, size=4.6, thresh=75, rotate='oblimin', DA=False, dpi=300, ext='png', plot_dir=None): """ Plots factor analytic results as bars Args: results: a dimensional structure results object c: the number of components to use dpi: the final dpi for the image size: scalar - the width of the plot. The height is determined by the number of factors thresh: proportion of factor loadings to remove ext: the extension for the saved figure plot_dir: the directory to save the figure. If none, do not save """ if DA: EFA = results.DA else: EFA = results.EFA loadings = EFA.get_loading(c, rotate=rotate) loadings = EFA.reorder_factors(loadings, rotate=rotate) grouping = get_factor_groups(loadings) flattened_factor_order = [] for sublist in [i[1] for i in grouping]: flattened_factor_order += sublist loadings = loadings.loc[flattened_factor_order] # get threshold for loadings if thresh > 0: thresh_val = np.percentile(abs(loadings).values, thresh) print('Thresholding all loadings less than %s' % np.round(thresh_val, 3)) loadings = loadings.mask(abs(loadings) <= thresh_val, 0) # remove variables that don't cross the threshold for any factor kept_vars = list(loadings.index[loadings.mean(1) != 0]) print('%s Variables out of %s are kept after threshold' % (len(kept_vars), loadings.shape[0])) loadings = loadings.loc[kept_vars] # remove masked variabled from grouping threshed_groups = [] for factor, group in grouping: group = [x for x in group if x in kept_vars] threshed_groups.append([factor, group]) grouping = threshed_groups # change variable names to make them more readable loadings.index = format_variable_names(loadings.index) # set up plot variables DV_fontsize = size * 2 / (loadings.shape[0] // 2) * 30 figsize = (size, size * 2) f = plt.figure(figsize=figsize) ax = f.add_axes([0, 0, .08 * loadings.shape[1], 1]) cbar_ax = f.add_axes([.08 * loadings.shape[1] + .02, 0, .04, 1]) max_val = abs(loadings).max().max() sns.heatmap( loadings, ax=ax, cbar_ax=cbar_ax, vmax=max_val, vmin=-max_val, cbar_kws={'ticks': [-max_val, -max_val / 2, 0, max_val / 2, max_val]}, linecolor='white', linewidth=.01, cmap=sns.diverging_palette(220, 15, n=100, as_cmap=True)) ax.set_yticks(np.arange(.5, loadings.shape[0] + .5, 1)) ax.set_yticklabels(loadings.index, fontsize=DV_fontsize, rotation=0) ax.set_xticklabels(loadings.columns, fontsize=min(size * 3, DV_fontsize * 1.5), ha='center', rotation=90) ax.tick_params(length=size * .5, width=size / 10) # format cbar cbar_ax.set_yticklabels([ format_num(-max_val, 2), format_num(-max_val / 2, 2), 0, format_num(-max_val / 2, 2), format_num(max_val, 2) ]) cbar_ax.tick_params(axis='y', length=0) cbar_ax.tick_params(labelsize=DV_fontsize * 1.5) cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=DV_fontsize * 2) # draw lines separating groups if grouping is not None: factor_breaks = np.cumsum([len(i[1]) for i in grouping])[:-1] for y_val in factor_breaks: ax.hlines(y_val, 0, loadings.shape[1], lw=size / 5, color='grey', linestyle='dashed') if plot_dir: filename = 'factor_heatmap_EFA%s.%s' % (c, ext) save_figure(f, path.join(plot_dir, filename), { 'bbox_inches': 'tight', 'dpi': dpi }) plt.close()