def explore_statistics(n_clusters_dir, items, ylabels, colors, val_fmt=''): stats_path = pjoin(n_clusters_dir, 'statistics.csv') with open(stats_path) as f: stats = f.read().splitlines() stats_items = stats[0].split(',') stats_content = [line.split(',') for line in stats[1:]] stats_content = list(zip(*stats_content)) stats_dict = {} for idx, item in enumerate(stats_items): stats_dict[item] = stats_content[idx] x = np.arange(len(stats) - 1) width = auto_bar_width(x) for idx, item in enumerate(items): plt.figure() y = [float(_) for _ in stats_dict[item]] rects = plt.bar(x, y, width, color=colors[idx]) show_bar_value(rects, val_fmt) plt.xlabel('subgroup label') plt.ylabel(ylabels[idx]) plt.title(item) plt.xticks(x, stats_dict['label']) ax = plt.gca() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.savefig(pjoin(n_clusters_dir, '{}.png'.format(item)))
def compare_plot_bar(): import numpy as np import matplotlib.pyplot as plt from os.path import join as pjoin # https://www.statsmodels.org/dev/_modules/statsmodels/stats/multitest.html from statsmodels.stats.multitest import multipletests from commontool.io.io import CsvReader from commontool.algorithm.plot import auto_bar_width, show_bar_value stru_name = 'myelin' project_dir = '/nfs/s2/userhome/chenxiayu/workingdir/study/FFA_clustering' stru_dir = pjoin(project_dir, 's2_25_zscore/HAC_ward_euclidean/2clusters/structure') compare_dir = pjoin(stru_dir, 'compare') compare_file = pjoin(compare_dir, stru_name) multi_test_corrected = True alpha = 0.001 compare_dict = CsvReader(compare_file).to_dict(1) ps = np.array(list(map(float, compare_dict['p']))) if multi_test_corrected: reject, ps, alpha_sidak, alpha_bonf = multipletests(ps, 0.05, 'fdr_bh') sample_names = [ name for idx, name in enumerate(compare_dict['sample_name']) if ps[idx] < alpha ] ps = [p for p in ps if p < alpha] print('\n'.join(list(map(str, zip(sample_names, ps))))) fig, ax = plt.subplots() x = np.arange(len(sample_names)) width = auto_bar_width(x) rects_p = ax.bar(x, ps, width, color='g', alpha=0.5) show_bar_value(rects_p, '.2f') ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_title(stru_name) ax.set_ylabel('p', color='g') ax.tick_params('y', colors='g') ax.axhline(0.05) ax.axhline(0.01) ax.axhline(0.001) ax.set_xticks(x) ax.set_xticklabels(sample_names) plt.setp(ax.get_xticklabels(), rotation=25, ha='right', rotation_mode='anchor') plt.tight_layout() plt.show()
def test_bar_plot(): x = np.arange(3) y = [1, 2, 3] x_ticks = ['11', '22', '33'] width = auto_bar_width(x) plt.figure() rects = plt.bar(x, y, width, color='r') show_bar_value(rects) plt.xticks(x, x_ticks) x = np.arange(1) y = [1.3241] width = auto_bar_width(x) plt.figure() rects = plt.bar(x, y, width, color='g') show_bar_value(rects, '.3f')
def plot_age_distribution(proj_name='HCPD'): import pandas as pd from matplotlib import pyplot as plt from commontool.algorithm.plot import show_bar_value age_type = 'age in years' fname = f'{proj_name}_SubjInfo.csv' info_file = pjoin(work_dir, fname) data = pd.read_csv(info_file) ages = data[age_type].to_list() ages_uniq = np.unique(ages) y = [ages.count(age) for age in ages_uniq] rects = plt.bar(ages_uniq, y, edgecolor='k', facecolor='w') show_bar_value(rects) plt.xlabel(age_type) plt.xticks(ages_uniq, ages_uniq, rotation=45) plt.ylabel('#subjects') plt.title(fname) plt.tight_layout() plt.show()
def plot_reliability(): import numpy as np import pickle as pkl from os.path import join as pjoin from matplotlib import pyplot as plt from commontool.algorithm.plot import show_bar_value hemi = 'both' # lh, rh, or both thr = None trg_dir = '/nfs/t3/workingshop/chenxiayu/study/FFA_pattern/analysis/s4_reliability' if hemi == 'both': reliability_lh = pkl.load( open(pjoin(trg_dir, 'reliability_lh.pkl'), 'rb')) reliability_rh = pkl.load( open(pjoin(trg_dir, 'reliability_rh.pkl'), 'rb')) corrs = reliability_lh['corr'] + reliability_rh['corr'] else: reliability = pkl.load( open(pjoin(trg_dir, f'reliability_{hemi}.pkl'), 'rb')) corrs = reliability['corr'] if thr is not None: corrs = [i for i in corrs if i > 0.5] print('#correlation:', len(corrs)) bins = np.linspace(min(corrs), max(corrs), 30) # plt.figure(figsize=(9, 4)) _, _, patches = plt.hist(corrs, bins, color='white', edgecolor='black') plt.xlabel('correlation') plt.title(f'{hemi} activation pattern reliability') show_bar_value(patches, '.0f') plt.tight_layout() plt.show()
def roi_mean_plot(roi_mean_file, ROIitems, colors, xticklabels, ylabel=None, title=None, plot_style='violin'): roi_mean_dict = CsvReader(roi_mean_file).to_dict(axis=1) roi_means_list = [list(map(float, roi_mean_dict[ROIitem])) for ROIitem in ROIitems] ROIitem_num = len(ROIitems) for i in range(ROIitem_num): for j in range(i+1, ROIitem_num): print('{} vs. {}'.format(ROIitems[i], ROIitems[j]), ttest_ind(roi_means_list[i], roi_means_list[j])) plt.figure() if plot_style == 'violin': violin_parts = plt.violinplot(roi_means_list, showmeans=True) for idx, pc in enumerate(violin_parts['bodies']): # https://stackoverflow.com/questions/26291479/changing-the-color-of-matplotlibs-violin-plots pc.set_color(colors[idx]) plt.xticks(range(1, ROIitem_num + 1), xticklabels) elif plot_style == 'bar': x = np.arange(ROIitem_num) y = [np.mean(roi_means) for roi_means in roi_means_list] sems = [sem(roi_means) for roi_means in roi_means_list] width = auto_bar_width(x) rects = plt.bar(x, y, width, edgecolor=colors[0], yerr=sems, facecolor='white') show_bar_value(rects, '.2f') plt.xticks(x, xticklabels) else: raise RuntimeError("Invalid plot style: {}".format(plot_style)) if ylabel is not None: plt.ylabel(ylabel) if title is not None: plt.title(title) ax = plt.gca() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.tight_layout()
def plot_roi_info(): import numpy as np import pickle as pkl from matplotlib import pyplot as plt from commontool.algorithm.plot import show_bar_value, auto_bar_width roi_info_file = '/nfs/s2/userhome/chenxiayu/workingdir/study/FFA_pattern/analysis/s2_rh/zscore/' \ 'HAC_ward_euclidean/100clusters/activation/ROIs/v3/rois_info.pkl' roi_infos = pkl.load(open(roi_info_file, 'rb')) # -plot n_group and n_subject- x_labels = list(roi_infos.keys()) n_roi = len(x_labels) x = np.arange(n_roi) width = auto_bar_width(x) # plot n_group y_n_group = [info['n_group'] for info in roi_infos.values()] rects_group = plt.bar(x, y_n_group, width, facecolor='white', edgecolor='black') show_bar_value(rects_group) plt.xticks(x, x_labels) plt.ylabel('#group') # plot n_subject plt.figure() y_n_subj = [info['n_subject'] for info in roi_infos.values()] rects_subj = plt.bar(x, y_n_subj, width, facecolor='white', edgecolor='black') show_bar_value(rects_subj) plt.xticks(x, x_labels) plt.ylabel('#subject') # -plot sizes- for roi, info in roi_infos.items(): plt.figure() sizes = info['sizes'] bins = np.linspace(min(sizes), max(sizes), 50) _, _, patches = plt.hist(sizes, bins, color='white', edgecolor='black') plt.xlabel('#vertex') plt.title(f'distribution of {roi} sizes') show_bar_value(patches, '.0f') plt.tight_layout() plt.show()
def explore_label_dice(n_clusters_dir): import nibabel as nib from commontool.algorithm.tool import calc_overlap c1_r = nib.freesurfer.read_label( pjoin(n_clusters_dir, 'cluster1_ROI_z2.3.label')) c2_r = nib.freesurfer.read_label( pjoin(n_clusters_dir, 'cluster2_ROI_z2.3.label')) c3_r = nib.freesurfer.read_label( pjoin(n_clusters_dir, 'cluster3_ROI_z2.3.label')) c4_r1 = nib.freesurfer.read_label( pjoin(n_clusters_dir, 'cluster4_ROI1_z2.3.label')) c4_r2 = nib.freesurfer.read_label( pjoin(n_clusters_dir, 'cluster4_ROI2_z2.3.label')) c5_r1 = nib.freesurfer.read_label( pjoin(n_clusters_dir, 'cluster5_ROI1_z2.3.label')) c5_r2 = nib.freesurfer.read_label( pjoin(n_clusters_dir, 'cluster5_ROI2_z2.3.label')) c6_r1 = nib.freesurfer.read_label( pjoin(n_clusters_dir, 'cluster6_ROI1_z2.3.label')) c6_r2 = nib.freesurfer.read_label( pjoin(n_clusters_dir, 'cluster6_ROI2_z2.3.label')) c1_6_acti_top10 = nib.load( pjoin(n_clusters_dir, 'top_acti_ROIs_percent10.0.nii.gz')).get_data() c123_r_z = [c1_r, c2_r, c3_r] c123_r_top = c1_6_acti_top10[:3] c123_dice = [] c123_xticks = [ 'c1_c2_z2.3', 'c1_c3_z2.3', 'c2_c3_z2.3', 'c1_c2_top10', 'c1_c3_top10', 'c2_c3_top10' ] c456_r_z = [ np.concatenate((c4_r1, c4_r2)), np.concatenate((c5_r1, c5_r2)), np.concatenate((c6_r1, c6_r2)) ] c456_r_top = c1_6_acti_top10[3:] c456_dice = [] c456_xticks = [ 'c4_c5_z2.3', 'c4_c6_z2.3', 'c5_c6_z2.3', 'c4_c5_top10', 'c4_c6_top10', 'c5_c6_top10' ] for idx, i in enumerate(c123_r_z[:-1]): for j in c123_r_z[idx + 1:]: c123_dice.append(calc_overlap(i, j)) for idx, i in enumerate(c123_r_top[:-1]): for j in c123_r_top[idx + 1:]: c123_dice.append(calc_overlap(i, j, 1, 1)) for idx, i in enumerate(c456_r_z[:-1]): for j in c456_r_z[idx + 1:]: c456_dice.append(calc_overlap(i, j)) for idx, i in enumerate(c456_r_top[:-1]): for j in c456_r_top[idx + 1:]: c456_dice.append(calc_overlap(i, j, 1, 1)) x = np.arange(6) width = auto_bar_width(x) plt.figure() rects = plt.bar(x, c123_dice, width, color='b') show_bar_value(rects, '.2%') plt.ylabel('dice') plt.xticks(x, c123_xticks) ax = plt.gca() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.figure() rects = plt.bar(x, c456_dice, width, color='b') show_bar_value(rects, '.2%') plt.ylabel('dice') plt.xticks(x, c456_xticks) ax = plt.gca() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False)
def explore_roi_stats(n_clusters_dir): roi_path = pjoin(n_clusters_dir, 'mean_map_ROIs.nii.gz') stats_path = pjoin(n_clusters_dir, 'statistics.csv') roi_maps = read_nifti(roi_path) stats_reader = CsvReader(stats_path) row_dict = stats_reader.to_dict(keys=['#subjects']) numb_items = ['1', '2'] numb_dict = OrderedDict() for item in numb_items: numb_dict[item] = 0 type_items = ['r_pFFA', 'r_mFFA', 'both', 'unknown'] type_dict = OrderedDict() for item in type_items: type_dict[item] = 0 for idx, roi_map in enumerate(roi_maps): map_set = set(roi_map) subjects_num = int(row_dict['#subjects'][idx]) if 0 not in map_set: raise RuntimeError('Be careful! There is no zero in one roi_map') if len(map_set) == 2: numb_dict['1'] += subjects_num if 1 in map_set: type_dict['r_pFFA'] += subjects_num elif 2 in map_set: type_dict['r_mFFA'] += subjects_num elif 3 in map_set: type_dict['unknown'] += subjects_num else: raise RuntimeError( 'Be careful! the only one ROI label is not in (1, 2, 3)') elif len(map_set) == 3: numb_dict['2'] += subjects_num if 1 in map_set and 2 in map_set: type_dict['both'] += subjects_num else: raise RuntimeError( 'Be careful! the two ROI labels are not 1 and 2') else: raise RuntimeError( 'Be careful! the number of ROI labels is not 1 or 2') plt.figure() x = np.arange(len(numb_items)) width = auto_bar_width(x) rects = plt.bar(x, numb_dict.values(), width, color='b') show_bar_value(rects) plt.ylabel('#subjects') plt.xticks(x, numb_dict.keys()) ax = plt.gca() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.savefig(pjoin(n_clusters_dir, 'numb_count.png')) plt.figure() x = np.arange(len(type_items)) width = auto_bar_width(x) rects = plt.bar(x, type_dict.values(), width, color='b') show_bar_value(rects) plt.ylabel('#subjects') plt.xticks(x, type_dict.keys()) ax = plt.gca() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.savefig(pjoin(n_clusters_dir, 'type_count.png'))
male_num, female_num = explore_Gender(behavior_dict, label) male_nums.append(male_num) female_nums.append(female_num) for item in float_items: float_data_dict[item].append( explore_float_data(behavior_dict, item, label)) # plot x = np.arange(label_num) width = auto_bar_width(x, 2) plt.figure() ax = plt.gca() rects1 = ax.bar(x, male_nums, width, color='b') rects2 = ax.bar(x + width, female_nums, width, color='r') show_bar_value(rects1) show_bar_value(rects2) ax.legend((rects1[0], rects2[1]), ('male', 'female')) ax.set_xticks(x + width / 2.0) ax.set_xticklabels(labels) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_title('Gender') ax.set_xlabel('subgroup label') ax.set_ylabel('count') # plt.savefig(pjoin(cluster_num_dir, 'Gender.png')) width = auto_bar_width(x) for item in float_items: plt.figure() ax = plt.gca()
def plot_mean_sem(): import numpy as np import pandas as pd import matplotlib.pyplot as plt from os.path import join as pjoin from commontool.algorithm.plot import auto_bar_width, show_bar_value stru_name = 'myelin' project_dir = '/nfs/s2/userhome/chenxiayu/workingdir/study/FFA_clustering' stru_dir = pjoin(project_dir, 's2_25_zscore/HAC_ward_euclidean/2clusters/structure') mean_sem_file = pjoin(stru_dir, 'mean_sem/{}'.format(stru_name)) mean_sems = pd.read_csv(mean_sem_file) intra_inter_pairs = np.array( [[ 'G1_acti_corr_G1_{}_lFFA'.format(stru_name), 'G1_acti_corr_G2_{}_lFFA'.format(stru_name) ], [ 'G2_acti_corr_G2_{}_lFFA'.format(stru_name), 'G2_acti_corr_G1_{}_lFFA'.format(stru_name) ], [ 'G1_acti_corr_G1_{}_rFFA'.format(stru_name), 'G1_acti_corr_G2_{}_rFFA'.format(stru_name) ], [ 'G2_acti_corr_G2_{}_rFFA'.format(stru_name), 'G2_acti_corr_G1_{}_rFFA'.format(stru_name) ]]) names = mean_sems['names'].to_list() means = list(map(float, mean_sems['means'])) sems = list(map(float, mean_sems['sems'])) fig, ax = plt.subplots() x = np.arange(intra_inter_pairs.shape[0]) item_num = intra_inter_pairs.shape[1] width = auto_bar_width(x, item_num) for idx in range(item_num): sub_means = [means[names.index(i)] for i in intra_inter_pairs[:, idx]] sub_sems = [sems[names.index(i)] for i in intra_inter_pairs[:, idx]] rects = ax.bar(x + width * idx, sub_means, width, color='k', alpha=1. / ((idx + 1) / 2 + 0.5), yerr=sub_sems) show_bar_value(rects, '.3f') xticklabels1 = [name for name in intra_inter_pairs[:, 0]] xticklabels2 = [name for name in intra_inter_pairs[:, 1]] xticklabels = xticklabels1 + xticklabels2 ax.set_xticks(np.r_[x, x + width * (item_num - 1)]) ax.set_xticklabels(xticklabels) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_ylabel('pearsonr') plt.setp(ax.get_xticklabels(), rotation=25, ha='right', rotation_mode='anchor') plt.tight_layout() plt.show()