def get_task_representations(self, tasks, c=None, rotate='oblimin'): """Take a list of tasks and reconstructs factor scores""" if c is None: c = self.get_c() print('# of components not specified, using BIC determined #') fa_output = self.results['factor_tree_Rout_%s' % rotate][c] output = {'weights': get_attr(fa_output, 'weights'), 'scores': get_attr(fa_output, 'scores')} subset_scores, r2_scores = get_scores_from_subset(self.data, output, tasks) return subset_scores, r2_scores
def get_task_representations(self, tasks, c=None, rotate='oblimin'): """Take a list of tasks and reconstructs factor scores""" if c is None: c = self.get_c() print('# of components not specified, using BIC determined #') fa_output = self.results['factor_tree_Rout_%s' % rotate][c] output = {'weights': get_attr(fa_output, 'weights'), 'scores': get_attr(fa_output, 'scores')} subset_scores, r2_scores = get_scores_from_subset(self.data, output, tasks) return subset_scores, r2_scores
def get_change(self, retest_dataset): demographics = self.data retest = get_demographics(retest_dataset) retest = residualize_baseline(retest, self.residualize_vars) if 'BMI' in retest.columns: retest.drop(['WeightPounds', 'HeightInches'], axis=1, inplace=True) # get common variables common_index = sorted(list(set(demographics.index) & set(retest.index))) common_columns = sorted(list(set(demographics.columns) & set(retest.columns))) demographics = demographics.loc[common_index, common_columns] retest = retest.loc[common_index, common_columns] raw_change = retest-demographics # convert to scores c = self.get_c() demographic_factor_weights = get_attr(self.results['factor_tree_Rout_oblimin'][c],'weights') demographic_scores = scale(demographics).dot(demographic_factor_weights) retest_scores = scale(retest).dot(demographic_factor_weights) factor_change = pd.DataFrame(retest_scores-demographic_scores, index=common_index, columns = self.get_scores().columns) factor_change = self.reorder_factors(factor_change) factor_change.columns = [i + ' Change' for i in factor_change.columns] return factor_change, raw_change
def get_factor_reorder(self, c, rotate='oblimin'): # reorder factors based on correlation matrix phi=get_attr(self.results['factor_tree_Rout_%s' % rotate][c],'Phi') if phi is None: return list(range(c)) new_order = list(leaves_list(linkage(squareform(np.round(1-phi,3))))) return new_order[::-1] # reversing because it works better for task EFA
def transfer_scores(data, results, rotate='oblimin'): """ calculates factor scores in a new dataset based on a reference results object """ ref_data = results.data EFA = results.EFA c = EFA.results['num_factors'] loadings = EFA.get_loading(c=c, rotate=rotate) # transform data positive_skewed = [ i.replace('.logTr', '') for i in ref_data.columns if ".logTr" in i ] negative_skewed = [ i.replace('.ReflogTr', '') for i in ref_data.columns if ".ReflogTr" in i ] DVs = [ i.replace('.logTr', '').replace('.ReflogTr', '') for i in ref_data.columns ] data = data.loc[:, DVs] print('using correct transfer_scores') data = transform_remove_skew(data, positive_skewed=positive_skewed, negative_skewed=negative_skewed, drop_failed=False) data = remove_outliers(data) data_imputed, error = missForest(data) subset = data_imputed.loc[:, loadings.index] scaled_data = scale(subset) # calculate scores weights = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c], 'weights') scores = pd.DataFrame(scaled_data.dot(weights), index=data_imputed.index, columns=loadings.columns) return scores
def compute_higher_order_factors(self, c=None, rotate='oblimin'): """ Return higher order EFA """ if c is None: c = self.get_c() print('# of components not specified, using BIC determined #') if ('factor_tree_%s' % rotate in self.results.keys() and c in self.results['factor_tree_Rout_%s' % rotate].keys()): # get factor correlation matrix scores = get_attr(self.results['factor_tree_Rout_%s' % rotate][c], 'scores') phi = pd.DataFrame(np.corrcoef(scores.T)) # check for correlations if np.mean(np.tril(phi, -1)) < 10E-5: return n_obs = self.data.shape[0] labels = list(self.results['factor_tree_%s' % rotate][c].columns) BIC_c, BICs = find_optimal_components(phi, metric='BIC', nobs=n_obs) if BIC_c != 0: if 'factor2_tree_%s' % rotate not in self.results.keys(): self.results['factor2_tree_%s' % rotate] = {} self.results['factor2_tree_Rout_%s' % rotate] = {} Rout, higher_order_out = psychFA(phi, BIC_c, nobs=n_obs) loadings = get_loadings(higher_order_out, labels) self.results['factor2_tree_%s' % rotate][c] = loadings self.results['factor2_tree_Rout_%s' % rotate][c] = Rout else: print('Higher order factors could not be calculated') else: print('No %s factor solution computed yet!' % c)
def get_factor_reorder(self, c, rotate='oblimin'): # reorder factors based on correlation matrix phi=get_attr(self.results['factor_tree_Rout_%s' % rotate][c],'Phi') if phi is None: return list(range(c)) new_order = list(leaves_list(linkage(squareform(np.round(1-phi,3))))) return new_order[::-1] # reversing because it works better for task EFA
def get_change(self, retest_dataset): demographics = self.data retest = get_demographics(retest_dataset) retest = residualize_baseline(retest, self.residualize_vars) if 'BMI' in retest.columns: retest.drop(['WeightPounds', 'HeightInches'], axis=1, inplace=True) # get common variables common_index = sorted(list(set(demographics.index) & set(retest.index))) common_columns = sorted(list(set(demographics.columns) & set(retest.columns))) demographics = demographics.loc[common_index, common_columns] retest = retest.loc[common_index, common_columns] raw_change = retest-demographics # convert to scores c = self.get_c() demographic_factor_weights = get_attr(self.results['factor_tree_Rout_oblimin'][c],'weights') demographic_scores = scale(demographics).dot(demographic_factor_weights) retest_scores = scale(retest).dot(demographic_factor_weights) factor_change = pd.DataFrame(retest_scores-demographic_scores, index=common_index, columns = self.get_scores().columns) factor_change = self.reorder_factors(factor_change) factor_change.columns = [i + ' Change' for i in factor_change.columns] return factor_change, raw_change
def compute_higher_order_factors(self, c=None, rotate='oblimin'): """ Return higher order EFA """ if c is None: c = self.get_c() print('# of components not specified, using BIC determined #') if ('factor_tree_%s' % rotate in self.results.keys() and c in self.results['factor_tree_Rout_%s' % rotate].keys()): # get factor correlation matrix scores = get_attr(self.results['factor_tree_Rout_%s' % rotate][c], 'scores') phi = pd.DataFrame(np.corrcoef(scores.T)) # check for correlations if np.mean(np.tril(phi, -1)) < 10E-5: return n_obs = self.data.shape[0] labels = list(self.results['factor_tree_%s' % rotate][c].columns) BIC_c, BICs = find_optimal_components(phi, metric='BIC', nobs=n_obs) if BIC_c != 0: if 'factor2_tree_%s' % rotate not in self.results.keys(): self.results['factor2_tree_%s' % rotate] = {} self.results['factor2_tree_Rout_%s' % rotate] = {} Rout, higher_order_out = psychFA(phi, BIC_c, nobs=n_obs) loadings = get_loadings(higher_order_out, labels) self.results['factor2_tree_%s' % rotate][c] = loadings self.results['factor2_tree_Rout_%s' % rotate][c] = Rout else: print('Higher order factors could not be calculated') else: print('No %s factor solution computed yet!' % c)
def get_boot_stats(self, c=None, rotate='oblimin'): if c is None: c = self.get_c() print('# of components not specified, using BIC determined #') if c in self.results['factor_tree_Rout_%s' % rotate].keys(): bootstrap_Rout = self.results['factor_tree_Rout_%s' % rotate][c] if 'cis' in bootstrap_Rout.names: loadings = self.get_loading(c, rotate=rotate) bootstrap_stats = get_attr(bootstrap_Rout, 'cis') means = pd.DataFrame(get_attr(bootstrap_stats,'means'), index=loadings.index, columns=loadings.columns) sds = pd.DataFrame(get_attr(bootstrap_stats,'sds'), index=loadings.index, columns=loadings.columns) return {'means': means, 'sds': sds} else: print('No bootstrap has been run for EFA with %s factors' % c) return None else: print("EFA hasn't been run for %s factors" % c) return None
def get_communality(EFA, rotate='oblimin', c=None): if c is None: c = EFA.get_c() loading = EFA.get_loading(c, rotate=rotate) # get communality from psych out fa = EFA.results['factor_tree_Rout_%s' % rotate][c] communality = get_attr(fa, 'communalities') communality = pd.Series(communality, index=loading.index) # alternative calculation #communality = (loading**2).sum(1).sort_values() communality.index = [i.replace('.logTr','').replace('.ReflogTr','') for i in communality.index] communality.name = "communality" return communality
def get_boot_stats(self, c=None, rotate='oblimin'): if c is None: c = self.get_c() print('# of components not specified, using BIC determined #') if c in self.results['factor_tree_Rout_%s' % rotate].keys(): bootstrap_Rout = self.results['factor_tree_Rout_%s' % rotate][c] if 'cis' in bootstrap_Rout.names: loadings = self.get_loading(c, rotate=rotate) bootstrap_stats = get_attr(bootstrap_Rout, 'cis') means = pd.DataFrame(get_attr(bootstrap_stats,'means'), index=loadings.index, columns=loadings.columns) sds = pd.DataFrame(get_attr(bootstrap_stats,'sds'), index=loadings.index, columns=loadings.columns) return {'means': means, 'sds': sds} else: print('No bootstrap has been run for EFA with %s factors' % c) return None else: print("EFA hasn't been run for %s factors" % c) return None
def calc_EFA_retest_held_out(results, rotate='oblimin', verbose=True): name = results.ID.split('_')[0].title() orig_data = results.data positive_skewed = [i.replace('.logTr', '') for i in orig_data.columns if ".logTr" in i] negative_skewed = [i.replace('.ReflogTr', '') for i in orig_data.columns if ".ReflogTr" in i] DVs = [i.replace('.logTr','').replace('.ReflogTr','') for i in orig_data.columns] orig_scores = results.EFA.get_scores(rotate=rotate) # load and clean retest data exactly like original data data_raw = get_behav_data(dataset=results.dataset, file='meaningful_variables.csv') retest_data_raw = get_behav_data(dataset=results.dataset.replace('Complete','Retest'), file='meaningful_variables.csv') shared_ids = set(retest_data_raw.index) & set(data_raw.index) data_raw = data_raw.loc[shared_ids, :] retest_data_raw = retest_data_raw.loc[shared_ids, :] raw_data = {'T1': data_raw, 'T2': retest_data_raw} imputed_data = {} for name, data in raw_data.items(): tmp_data = data.loc[:, DVs] tmp_data = transform_remove_skew(tmp_data, positive_skewed=positive_skewed, negative_skewed=negative_skewed) tmp_data = remove_outliers(tmp_data) tmp_data_imputed, error = missForest(tmp_data) scaled_tmp_data = scale(tmp_data_imputed) imputed_data[name] = scaled_tmp_data # get subjects not in the retest set ind_data = orig_data.loc[set(orig_data.index)-shared_ids] fa, output = psychFA(ind_data, results.EFA.results['num_factors'], method='ml', rotate=rotate) weights = get_attr(fa, 'weights') scores = {} for name, data in imputed_data.items(): suffix='' if name=='T2': suffix='T2' tmp_scores = pd.DataFrame(data.dot(weights), index=shared_ids, columns=[i+' '+suffix for i in orig_scores.columns]) scores[name] = tmp_scores combined = pd.concat([scores['T1'], scores['T2']], axis=1) cross_diag = [combined.corr().iloc[i,i+len(orig_scores.columns)] for i in range(len(orig_scores.columns))] # get ICCs ICCs = [] for col in scores['T1'].columns: tmp = combined.filter(regex=col) out = psych.ICC(tmp) ICCs.append(list(out[0][1])[-1]) return combined, cross_diag, ICCs, (fa, output)
def get_communality(EFA, rotate='oblimin', c=None): if c is None: c = EFA.get_c() loading = EFA.get_loading(c, rotate=rotate) # get communality from psych out fa = EFA.results['factor_tree_Rout_%s' % rotate][c] communality = get_attr(fa, 'communalities') communality = pd.Series(communality, index=loading.index) # alternative calculation #communality = (loading**2).sum(1).sort_values() communality.index = [ i.replace('.logTr', '').replace('.ReflogTr', '') for i in communality.index ] communality.name = "communality" return communality
def plot_factor_correlation(results, c, rotate='oblimin', title=True, DA=False, size=4.6, dpi=300, ext='png', plot_dir=None): if DA: EFA = results.DA else: EFA = results.EFA loading = EFA.get_loading(c, rotate=rotate) # get factor correlation matrix reorder_vec = EFA.get_factor_reorder(c) phi = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c],'Phi') phi = pd.DataFrame(phi, columns=loading.columns, index=loading.columns) phi = phi.iloc[reorder_vec, reorder_vec] mask = np.zeros_like(phi) mask[np.tril_indices_from(mask, -1)] = True with sns.plotting_context('notebook', font_scale=2) and sns.axes_style('white'): f = plt.figure(figsize=(size*5/4, size)) ax1 = f.add_axes([0,0,.9,.9]) cbar_ax = f.add_axes([.91, .05, .03, .8]) sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1, cbar_ax=cbar_ax, cmap=sns.diverging_palette(220,15,n=100,as_cmap=True)) sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1, cbar_ax=cbar_ax, annot=True, annot_kws={"size": size/c*15}, cmap=sns.diverging_palette(220,15,n=100,as_cmap=True), mask=mask) yticklabels = ax1.get_yticklabels() ax1.set_yticklabels(yticklabels, rotation=0, ha="right") ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90) if title == True: ax1.set_title('%s Factor Correlations' % results.ID.split('_')[0].title(), weight='bold', y=1.05, fontsize=size*3) ax1.tick_params(labelsize=size*3) # format cbar cbar_ax.tick_params(axis='y', length=0) cbar_ax.tick_params(labelsize=size*2) cbar_ax.set_ylabel('Pearson Correlation', rotation=-90, labelpad=size*4, fontsize=size*3) if plot_dir: filename = 'factor_correlations_EFA%s.%s' % (c, ext) save_figure(f, path.join(plot_dir, filename), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
def transfer_scores(data, results, rotate='oblimin'): """ calculates factor scores in a new dataset based on a reference results object """ ref_data = results.data EFA = results.EFA c = EFA.results['num_factors'] loadings = EFA.get_loading(c=c, rotate=rotate) # transform data positive_skewed = [i.replace('.logTr', '') for i in ref_data.columns if ".logTr" in i] negative_skewed = [i.replace('.ReflogTr', '') for i in ref_data.columns if ".ReflogTr" in i] DVs = [i.replace('.logTr','').replace('.ReflogTr','') for i in ref_data.columns] data = data.loc[:, DVs] data = transform_remove_skew(data, positive_skewed=positive_skewed, negative_skewed=negative_skewed) data = remove_outliers(data) data_imputed, error = missForest(data) subset = data_imputed.loc[:, loadings.index] scaled_data = scale(subset) # calculate scores weights = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c], 'weights') scores = pd.DataFrame(scaled_data.dot(weights), index=data_imputed.index, columns=loadings.columns) return scores
def calc_EFA_retest_held_out(results, rotate='oblimin', verbose=True): name = results.ID.split('_')[0].title() orig_data = results.data positive_skewed = [ i.replace('.logTr', '') for i in orig_data.columns if ".logTr" in i ] negative_skewed = [ i.replace('.ReflogTr', '') for i in orig_data.columns if ".ReflogTr" in i ] DVs = [ i.replace('.logTr', '').replace('.ReflogTr', '') for i in orig_data.columns ] orig_scores = results.EFA.get_scores(rotate=rotate) # load and clean retest data exactly like original data data_raw = get_behav_data(dataset=results.dataset, file='meaningful_variables.csv') retest_data_raw = get_behav_data(dataset=results.dataset.replace( 'Complete', 'Retest'), file='meaningful_variables.csv') shared_ids = set(retest_data_raw.index) & set(data_raw.index) data_raw = data_raw.loc[shared_ids, :] retest_data_raw = retest_data_raw.loc[shared_ids, :] raw_data = {'T1': data_raw, 'T2': retest_data_raw} imputed_data = {} for name, data in raw_data.items(): tmp_data = data.loc[:, DVs] tmp_data = transform_remove_skew(tmp_data, positive_skewed=positive_skewed, negative_skewed=negative_skewed) tmp_data = remove_outliers(tmp_data) tmp_data_imputed, error = missForest(tmp_data) scaled_tmp_data = scale(tmp_data_imputed) imputed_data[name] = scaled_tmp_data # get subjects not in the retest set ind_data = orig_data.loc[set(orig_data.index) - shared_ids] fa, output = psychFA(ind_data, results.EFA.results['num_factors'], method='ml', rotate=rotate) weights = get_attr(fa, 'weights') scores = {} for name, data in imputed_data.items(): suffix = '' if name == 'T2': suffix = 'T2' tmp_scores = pd.DataFrame( data.dot(weights), index=shared_ids, columns=[i + ' ' + suffix for i in orig_scores.columns]) scores[name] = tmp_scores combined = pd.concat([scores['T1'], scores['T2']], axis=1) cross_diag = [ combined.corr().iloc[i, i + len(orig_scores.columns)] for i in range(len(orig_scores.columns)) ] # get ICCs ICCs = [] for col in scores['T1'].columns: tmp = combined.filter(regex=col) out = psych.ICC(tmp) ICCs.append(list(out[0][1])[-1]) return combined, cross_diag, ICCs, (fa, output)
def plot_cross_communality(all_results, rotate='oblimin', retest_threshold=.2, size=4.6, dpi=300, ext='png', plot_dir=None): retest_data = None num_cols = 2 num_rows = math.ceil(len(all_results.keys())/2) with sns.axes_style('white'): f, axes = plt.subplots(num_rows, num_cols, figsize=(size, size/2*num_rows)) max_y = 0 for i, (name, results) in enumerate(all_results.items()): if retest_data is None: # load retest data retest_data = get_retest_data(dataset=results.dataset.replace('Complete','Retest')) if retest_data is None: print('No retest data found for datafile: %s' % results.dataset) c = results.EFA.get_c() EFA = results.EFA loading = EFA.get_loading(c, rotate=rotate) # get communality from psych out fa = EFA.results['factor_tree_Rout_%s' % rotate][c] communality = get_attr(fa, 'communalities') communality = pd.Series(communality, index=loading.index) # alternative calculation #communality = (loading**2).sum(1).sort_values() communality.index = [i.replace('.logTr','') for i in communality.index] # reorder data in line with communality retest_subset= retest_data.loc[communality.index] # reformat variable names communality.index = format_variable_names(communality.index) retest_subset.index = format_variable_names(retest_subset.index) if len(retest_subset) > 0: # noise ceiling noise_ceiling = retest_subset.pearson # remove very low reliabilities if retest_threshold: noise_ceiling[noise_ceiling<retest_threshold]= np.nan # adjust adjusted_communality = communality/noise_ceiling # plot communality histogram if len(retest_subset) > 0: ax = axes[i] ax.set_title(name.title(), fontweight='bold', fontsize=size*2) colors = sns.color_palette(n_colors=2, desat=.75) sns.kdeplot(communality, linewidth=size/4, ax=ax, vertical=True, shade=True, label='Communality', color=colors[0]) sns.kdeplot(adjusted_communality, linewidth=size/4, ax=ax, vertical=True, shade=True, label='Adjusted Communality', color=colors[1]) xlim = ax.get_xlim() ax.hlines(np.mean(communality), xlim[0], xlim[1], color=colors[0], linewidth=size/4, linestyle='--') ax.hlines(np.mean(adjusted_communality), xlim[0], xlim[1], color=colors[1], linewidth=size/4, linestyle='--') ax.set_xticks([]) ax.tick_params(labelsize=size*1.2) ax.set_ylim(0, ax.get_ylim()[1]) ax.set_xlim(0, ax.get_xlim()[1]) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) if (i+1) == len(all_results): ax.set_xlabel('Normalized Density', fontsize=size*2) leg=ax.legend(fontsize=size*1.5, loc='upper right', bbox_to_anchor=(1.2, 1.0), handlelength=0, handletextpad=0) beautify_legend(leg, colors) elif i>=len(all_results)-2: ax.set_xlabel('Normalized Density', fontsize=size*2) ax.legend().set_visible(False) else: ax.legend().set_visible(False) if i%2==0: ax.set_ylabel('Communality', fontsize=size*2) ax.tick_params(labelleft=True, left=True, length=size/4, width=size/8) else: ax.tick_params(labelleft=False, left=True, length=0, width=size/8) # update max_x if ax.get_ylim()[1] > max_y: max_y = ax.get_ylim()[1] ax.grid(False) [i.set_linewidth(size*.1) for i in ax.spines.values()] for ax in axes: ax.set_ylim((0, max_y)) plt.subplots_adjust(wspace=0) if plot_dir: filename = 'communality_adjustment.%s' % ext save_figure(f, path.join(plot_dir, rotate, filename), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
def _get_attr(self, attribute, c=None, rotate='oblimin'): if c is None: c = self.get_c() print('# of components not specified, using BIC determined #') return get_attr(self.results['factor_tree_Rout_%s' % rotate][c], attribute)
def _get_attr(self, attribute, c=None, rotate='oblimin'): if c is None: c = self.get_c() print('# of components not specified, using BIC determined #') return get_attr(self.results['factor_tree_Rout_%s' % rotate][c], attribute)
def plot_cross_communality(all_results, rotate='oblimin', retest_threshold=.2, size=4.6, dpi=300, ext='png', plot_dir=None): retest_data = None num_cols = 2 num_rows = math.ceil(len(all_results.keys()) / 2) with sns.axes_style('white'): f, axes = plt.subplots(num_rows, num_cols, figsize=(size, size / 2 * num_rows)) max_y = 0 for i, (name, results) in enumerate(all_results.items()): if retest_data is None: # load retest data retest_data = get_retest_data( dataset=results.dataset.replace('Complete', 'Retest')) if retest_data is None: print('No retest data found for datafile: %s' % results.dataset) c = results.EFA.get_c() EFA = results.EFA loading = EFA.get_loading(c, rotate=rotate) # get communality from psych out fa = EFA.results['factor_tree_Rout_%s' % rotate][c] communality = get_attr(fa, 'communalities') communality = pd.Series(communality, index=loading.index) # alternative calculation #communality = (loading**2).sum(1).sort_values() communality.index = [ i.replace('.logTr', '') for i in communality.index ] # reorder data in line with communality retest_subset = retest_data.loc[communality.index] # reformat variable names communality.index = format_variable_names(communality.index) retest_subset.index = format_variable_names(retest_subset.index) if len(retest_subset) > 0: # noise ceiling noise_ceiling = retest_subset.pearson # remove very low reliabilities if retest_threshold: noise_ceiling[noise_ceiling < retest_threshold] = np.nan # adjust adjusted_communality = communality / noise_ceiling # plot communality histogram if len(retest_subset) > 0: ax = axes[i] ax.set_title(name.title(), fontweight='bold', fontsize=size * 2) colors = sns.color_palette(n_colors=2, desat=.75) sns.kdeplot(communality, linewidth=size / 4, ax=ax, vertical=True, shade=True, label='Communality', color=colors[0]) sns.kdeplot(adjusted_communality, linewidth=size / 4, ax=ax, vertical=True, shade=True, label='Adjusted Communality', color=colors[1]) xlim = ax.get_xlim() ax.hlines(np.mean(communality), xlim[0], xlim[1], color=colors[0], linewidth=size / 4, linestyle='--') ax.hlines(np.mean(adjusted_communality), xlim[0], xlim[1], color=colors[1], linewidth=size / 4, linestyle='--') ax.set_xticks([]) ax.tick_params(labelsize=size * 1.2) ax.set_ylim(0, ax.get_ylim()[1]) ax.set_xlim(0, ax.get_xlim()[1]) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) if (i + 1) == len(all_results): ax.set_xlabel('Normalized Density', fontsize=size * 2) leg = ax.legend(fontsize=size * 1.5, loc='upper right', bbox_to_anchor=(1.2, 1.0), handlelength=0, handletextpad=0) beautify_legend(leg, colors) elif i >= len(all_results) - 2: ax.set_xlabel('Normalized Density', fontsize=size * 2) ax.legend().set_visible(False) else: ax.legend().set_visible(False) if i % 2 == 0: ax.set_ylabel('Communality', fontsize=size * 2) ax.tick_params(labelleft=True, left=True, length=size / 4, width=size / 8) else: ax.tick_params(labelleft=False, left=True, length=0, width=size / 8) # update max_x if ax.get_ylim()[1] > max_y: max_y = ax.get_ylim()[1] ax.grid(False) [i.set_linewidth(size * .1) for i in ax.spines.values()] for ax in axes: ax.set_ylim((0, max_y)) plt.subplots_adjust(wspace=0) if plot_dir: filename = 'communality_adjustment.%s' % ext save_figure(f, path.join(plot_dir, rotate, filename), { 'bbox_inches': 'tight', 'dpi': dpi }) plt.close()
def plot_factor_correlation(results, c, rotate='oblimin', title=True, DA=False, size=4.6, dpi=300, ext='png', plot_dir=None): if DA: EFA = results.DA else: EFA = results.EFA loading = EFA.get_loading(c, rotate=rotate) # get factor correlation matrix reorder_vec = EFA.get_factor_reorder(c) phi = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c], 'Phi') phi = pd.DataFrame(phi, columns=loading.columns, index=loading.columns) phi = phi.iloc[reorder_vec, reorder_vec] mask = np.zeros_like(phi) mask[np.tril_indices_from(mask, -1)] = True with sns.plotting_context('notebook', font_scale=2) and sns.axes_style('white'): f = plt.figure(figsize=(size * 5 / 4, size)) ax1 = f.add_axes([0, 0, .9, .9]) cbar_ax = f.add_axes([.91, .05, .03, .8]) sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1, cbar_ax=cbar_ax, cmap=sns.diverging_palette(220, 15, n=100, as_cmap=True)) sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1, cbar_ax=cbar_ax, annot=True, annot_kws={"size": size / c * 15}, cmap=sns.diverging_palette(220, 15, n=100, as_cmap=True), mask=mask) yticklabels = ax1.get_yticklabels() ax1.set_yticklabels(yticklabels, rotation=0, ha="right") ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90) if title == True: ax1.set_title('%s Factor Correlations' % results.ID.split('_')[0].title(), weight='bold', y=1.05, fontsize=size * 3) ax1.tick_params(labelsize=size * 3) # format cbar cbar_ax.tick_params(axis='y', length=0) cbar_ax.tick_params(labelsize=size * 2) cbar_ax.set_ylabel('Pearson Correlation', rotation=-90, labelpad=size * 4, fontsize=size * 3) if plot_dir: filename = 'factor_correlations_EFA%s.%s' % (c, ext) save_figure(f, path.join(plot_dir, filename), { 'bbox_inches': 'tight', 'dpi': dpi }) plt.close()