def explore_statistics(n_clusters_dir, items, ylabels, colors, val_fmt=''):
    stats_path = pjoin(n_clusters_dir, 'statistics.csv')

    with open(stats_path) as f:
        stats = f.read().splitlines()
    stats_items = stats[0].split(',')
    stats_content = [line.split(',') for line in stats[1:]]
    stats_content = list(zip(*stats_content))
    stats_dict = {}
    for idx, item in enumerate(stats_items):
        stats_dict[item] = stats_content[idx]

    x = np.arange(len(stats) - 1)
    width = auto_bar_width(x)
    for idx, item in enumerate(items):
        plt.figure()
        y = [float(_) for _ in stats_dict[item]]
        rects = plt.bar(x, y, width, color=colors[idx])
        show_bar_value(rects, val_fmt)
        plt.xlabel('subgroup label')
        plt.ylabel(ylabels[idx])
        plt.title(item)
        plt.xticks(x, stats_dict['label'])
        ax = plt.gca()
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        plt.savefig(pjoin(n_clusters_dir, '{}.png'.format(item)))
def compare_plot_bar():
    import numpy as np
    import matplotlib.pyplot as plt

    from os.path import join as pjoin
    # https://www.statsmodels.org/dev/_modules/statsmodels/stats/multitest.html
    from statsmodels.stats.multitest import multipletests
    from commontool.io.io import CsvReader
    from commontool.algorithm.plot import auto_bar_width, show_bar_value

    stru_name = 'myelin'
    project_dir = '/nfs/s2/userhome/chenxiayu/workingdir/study/FFA_clustering'
    stru_dir = pjoin(project_dir,
                     's2_25_zscore/HAC_ward_euclidean/2clusters/structure')
    compare_dir = pjoin(stru_dir, 'compare')
    compare_file = pjoin(compare_dir, stru_name)

    multi_test_corrected = True
    alpha = 0.001
    compare_dict = CsvReader(compare_file).to_dict(1)
    ps = np.array(list(map(float, compare_dict['p'])))
    if multi_test_corrected:
        reject, ps, alpha_sidak, alpha_bonf = multipletests(ps, 0.05, 'fdr_bh')
    sample_names = [
        name for idx, name in enumerate(compare_dict['sample_name'])
        if ps[idx] < alpha
    ]
    ps = [p for p in ps if p < alpha]
    print('\n'.join(list(map(str, zip(sample_names, ps)))))

    fig, ax = plt.subplots()
    x = np.arange(len(sample_names))
    width = auto_bar_width(x)
    rects_p = ax.bar(x, ps, width, color='g', alpha=0.5)
    show_bar_value(rects_p, '.2f')
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_title(stru_name)
    ax.set_ylabel('p', color='g')
    ax.tick_params('y', colors='g')
    ax.axhline(0.05)
    ax.axhline(0.01)
    ax.axhline(0.001)
    ax.set_xticks(x)
    ax.set_xticklabels(sample_names)
    plt.setp(ax.get_xticklabels(),
             rotation=25,
             ha='right',
             rotation_mode='anchor')

    plt.tight_layout()
    plt.show()
def test_bar_plot():
    x = np.arange(3)
    y = [1, 2, 3]
    x_ticks = ['11', '22', '33']
    width = auto_bar_width(x)
    plt.figure()
    rects = plt.bar(x, y, width, color='r')
    show_bar_value(rects)
    plt.xticks(x, x_ticks)

    x = np.arange(1)
    y = [1.3241]
    width = auto_bar_width(x)
    plt.figure()
    rects = plt.bar(x, y, width, color='g')
    show_bar_value(rects, '.3f')
Exemple #4
0
def plot_age_distribution(proj_name='HCPD'):
    import pandas as pd
    from matplotlib import pyplot as plt
    from commontool.algorithm.plot import show_bar_value

    age_type = 'age in years'
    fname = f'{proj_name}_SubjInfo.csv'
    info_file = pjoin(work_dir, fname)
    data = pd.read_csv(info_file)
    ages = data[age_type].to_list()
    ages_uniq = np.unique(ages)

    y = [ages.count(age) for age in ages_uniq]
    rects = plt.bar(ages_uniq, y, edgecolor='k', facecolor='w')
    show_bar_value(rects)
    plt.xlabel(age_type)
    plt.xticks(ages_uniq, ages_uniq, rotation=45)
    plt.ylabel('#subjects')
    plt.title(fname)
    plt.tight_layout()
    plt.show()
def plot_reliability():
    import numpy as np
    import pickle as pkl

    from os.path import join as pjoin
    from matplotlib import pyplot as plt
    from commontool.algorithm.plot import show_bar_value

    hemi = 'both'  # lh, rh, or both
    thr = None
    trg_dir = '/nfs/t3/workingshop/chenxiayu/study/FFA_pattern/analysis/s4_reliability'

    if hemi == 'both':
        reliability_lh = pkl.load(
            open(pjoin(trg_dir, 'reliability_lh.pkl'), 'rb'))
        reliability_rh = pkl.load(
            open(pjoin(trg_dir, 'reliability_rh.pkl'), 'rb'))
        corrs = reliability_lh['corr'] + reliability_rh['corr']
    else:
        reliability = pkl.load(
            open(pjoin(trg_dir, f'reliability_{hemi}.pkl'), 'rb'))
        corrs = reliability['corr']

    if thr is not None:
        corrs = [i for i in corrs if i > 0.5]

    print('#correlation:', len(corrs))

    bins = np.linspace(min(corrs), max(corrs), 30)
    # plt.figure(figsize=(9, 4))
    _, _, patches = plt.hist(corrs, bins, color='white', edgecolor='black')
    plt.xlabel('correlation')
    plt.title(f'{hemi} activation pattern reliability')
    show_bar_value(patches, '.0f')

    plt.tight_layout()
    plt.show()
def roi_mean_plot(roi_mean_file, ROIitems, colors, xticklabels, ylabel=None, title=None, plot_style='violin'):
    roi_mean_dict = CsvReader(roi_mean_file).to_dict(axis=1)
    roi_means_list = [list(map(float, roi_mean_dict[ROIitem])) for ROIitem in ROIitems]

    ROIitem_num = len(ROIitems)
    for i in range(ROIitem_num):
        for j in range(i+1, ROIitem_num):
            print('{} vs. {}'.format(ROIitems[i], ROIitems[j]),
                  ttest_ind(roi_means_list[i], roi_means_list[j]))

    plt.figure()
    if plot_style == 'violin':
        violin_parts = plt.violinplot(roi_means_list, showmeans=True)
        for idx, pc in enumerate(violin_parts['bodies']):
            # https://stackoverflow.com/questions/26291479/changing-the-color-of-matplotlibs-violin-plots
            pc.set_color(colors[idx])
        plt.xticks(range(1, ROIitem_num + 1), xticklabels)
    elif plot_style == 'bar':
        x = np.arange(ROIitem_num)
        y = [np.mean(roi_means) for roi_means in roi_means_list]
        sems = [sem(roi_means) for roi_means in roi_means_list]
        width = auto_bar_width(x)
        rects = plt.bar(x, y, width, edgecolor=colors[0], yerr=sems, facecolor='white')
        show_bar_value(rects, '.2f')
        plt.xticks(x, xticklabels)
    else:
        raise RuntimeError("Invalid plot style: {}".format(plot_style))
    if ylabel is not None:
        plt.ylabel(ylabel)
    if title is not None:
        plt.title(title)
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.tight_layout()
def plot_roi_info():
    import numpy as np
    import pickle as pkl

    from matplotlib import pyplot as plt
    from commontool.algorithm.plot import show_bar_value, auto_bar_width

    roi_info_file = '/nfs/s2/userhome/chenxiayu/workingdir/study/FFA_pattern/analysis/s2_rh/zscore/' \
                    'HAC_ward_euclidean/100clusters/activation/ROIs/v3/rois_info.pkl'
    roi_infos = pkl.load(open(roi_info_file, 'rb'))

    # -plot n_group and n_subject-
    x_labels = list(roi_infos.keys())
    n_roi = len(x_labels)
    x = np.arange(n_roi)
    width = auto_bar_width(x)

    # plot n_group
    y_n_group = [info['n_group'] for info in roi_infos.values()]
    rects_group = plt.bar(x,
                          y_n_group,
                          width,
                          facecolor='white',
                          edgecolor='black')
    show_bar_value(rects_group)
    plt.xticks(x, x_labels)
    plt.ylabel('#group')

    # plot n_subject
    plt.figure()
    y_n_subj = [info['n_subject'] for info in roi_infos.values()]
    rects_subj = plt.bar(x,
                         y_n_subj,
                         width,
                         facecolor='white',
                         edgecolor='black')
    show_bar_value(rects_subj)
    plt.xticks(x, x_labels)
    plt.ylabel('#subject')

    # -plot sizes-
    for roi, info in roi_infos.items():
        plt.figure()
        sizes = info['sizes']
        bins = np.linspace(min(sizes), max(sizes), 50)
        _, _, patches = plt.hist(sizes, bins, color='white', edgecolor='black')
        plt.xlabel('#vertex')
        plt.title(f'distribution of {roi} sizes')
        show_bar_value(patches, '.0f')

    plt.tight_layout()
    plt.show()
def explore_label_dice(n_clusters_dir):
    import nibabel as nib

    from commontool.algorithm.tool import calc_overlap

    c1_r = nib.freesurfer.read_label(
        pjoin(n_clusters_dir, 'cluster1_ROI_z2.3.label'))
    c2_r = nib.freesurfer.read_label(
        pjoin(n_clusters_dir, 'cluster2_ROI_z2.3.label'))
    c3_r = nib.freesurfer.read_label(
        pjoin(n_clusters_dir, 'cluster3_ROI_z2.3.label'))
    c4_r1 = nib.freesurfer.read_label(
        pjoin(n_clusters_dir, 'cluster4_ROI1_z2.3.label'))
    c4_r2 = nib.freesurfer.read_label(
        pjoin(n_clusters_dir, 'cluster4_ROI2_z2.3.label'))
    c5_r1 = nib.freesurfer.read_label(
        pjoin(n_clusters_dir, 'cluster5_ROI1_z2.3.label'))
    c5_r2 = nib.freesurfer.read_label(
        pjoin(n_clusters_dir, 'cluster5_ROI2_z2.3.label'))
    c6_r1 = nib.freesurfer.read_label(
        pjoin(n_clusters_dir, 'cluster6_ROI1_z2.3.label'))
    c6_r2 = nib.freesurfer.read_label(
        pjoin(n_clusters_dir, 'cluster6_ROI2_z2.3.label'))
    c1_6_acti_top10 = nib.load(
        pjoin(n_clusters_dir, 'top_acti_ROIs_percent10.0.nii.gz')).get_data()

    c123_r_z = [c1_r, c2_r, c3_r]
    c123_r_top = c1_6_acti_top10[:3]
    c123_dice = []
    c123_xticks = [
        'c1_c2_z2.3', 'c1_c3_z2.3', 'c2_c3_z2.3', 'c1_c2_top10', 'c1_c3_top10',
        'c2_c3_top10'
    ]

    c456_r_z = [
        np.concatenate((c4_r1, c4_r2)),
        np.concatenate((c5_r1, c5_r2)),
        np.concatenate((c6_r1, c6_r2))
    ]
    c456_r_top = c1_6_acti_top10[3:]
    c456_dice = []
    c456_xticks = [
        'c4_c5_z2.3', 'c4_c6_z2.3', 'c5_c6_z2.3', 'c4_c5_top10', 'c4_c6_top10',
        'c5_c6_top10'
    ]

    for idx, i in enumerate(c123_r_z[:-1]):
        for j in c123_r_z[idx + 1:]:
            c123_dice.append(calc_overlap(i, j))

    for idx, i in enumerate(c123_r_top[:-1]):
        for j in c123_r_top[idx + 1:]:
            c123_dice.append(calc_overlap(i, j, 1, 1))

    for idx, i in enumerate(c456_r_z[:-1]):
        for j in c456_r_z[idx + 1:]:
            c456_dice.append(calc_overlap(i, j))

    for idx, i in enumerate(c456_r_top[:-1]):
        for j in c456_r_top[idx + 1:]:
            c456_dice.append(calc_overlap(i, j, 1, 1))

    x = np.arange(6)
    width = auto_bar_width(x)
    plt.figure()
    rects = plt.bar(x, c123_dice, width, color='b')
    show_bar_value(rects, '.2%')
    plt.ylabel('dice')
    plt.xticks(x, c123_xticks)
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.figure()
    rects = plt.bar(x, c456_dice, width, color='b')
    show_bar_value(rects, '.2%')
    plt.ylabel('dice')
    plt.xticks(x, c456_xticks)
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
def explore_roi_stats(n_clusters_dir):
    roi_path = pjoin(n_clusters_dir, 'mean_map_ROIs.nii.gz')
    stats_path = pjoin(n_clusters_dir, 'statistics.csv')
    roi_maps = read_nifti(roi_path)
    stats_reader = CsvReader(stats_path)
    row_dict = stats_reader.to_dict(keys=['#subjects'])

    numb_items = ['1', '2']
    numb_dict = OrderedDict()
    for item in numb_items:
        numb_dict[item] = 0

    type_items = ['r_pFFA', 'r_mFFA', 'both', 'unknown']
    type_dict = OrderedDict()
    for item in type_items:
        type_dict[item] = 0

    for idx, roi_map in enumerate(roi_maps):
        map_set = set(roi_map)
        subjects_num = int(row_dict['#subjects'][idx])

        if 0 not in map_set:
            raise RuntimeError('Be careful! There is no zero in one roi_map')

        if len(map_set) == 2:
            numb_dict['1'] += subjects_num
            if 1 in map_set:
                type_dict['r_pFFA'] += subjects_num
            elif 2 in map_set:
                type_dict['r_mFFA'] += subjects_num
            elif 3 in map_set:
                type_dict['unknown'] += subjects_num
            else:
                raise RuntimeError(
                    'Be careful! the only one ROI label is not in (1, 2, 3)')
        elif len(map_set) == 3:
            numb_dict['2'] += subjects_num
            if 1 in map_set and 2 in map_set:
                type_dict['both'] += subjects_num
            else:
                raise RuntimeError(
                    'Be careful! the two ROI labels are not 1 and 2')
        else:
            raise RuntimeError(
                'Be careful! the number of ROI labels is not 1 or 2')

    plt.figure()
    x = np.arange(len(numb_items))
    width = auto_bar_width(x)
    rects = plt.bar(x, numb_dict.values(), width, color='b')
    show_bar_value(rects)
    plt.ylabel('#subjects')
    plt.xticks(x, numb_dict.keys())
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.savefig(pjoin(n_clusters_dir, 'numb_count.png'))

    plt.figure()
    x = np.arange(len(type_items))
    width = auto_bar_width(x)
    rects = plt.bar(x, type_dict.values(), width, color='b')
    show_bar_value(rects)
    plt.ylabel('#subjects')
    plt.xticks(x, type_dict.keys())
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.savefig(pjoin(n_clusters_dir, 'type_count.png'))
Exemple #10
0
        male_num, female_num = explore_Gender(behavior_dict, label)
        male_nums.append(male_num)
        female_nums.append(female_num)

        for item in float_items:
            float_data_dict[item].append(
                explore_float_data(behavior_dict, item, label))

    # plot
    x = np.arange(label_num)
    width = auto_bar_width(x, 2)
    plt.figure()
    ax = plt.gca()
    rects1 = ax.bar(x, male_nums, width, color='b')
    rects2 = ax.bar(x + width, female_nums, width, color='r')
    show_bar_value(rects1)
    show_bar_value(rects2)
    ax.legend((rects1[0], rects2[1]), ('male', 'female'))
    ax.set_xticks(x + width / 2.0)
    ax.set_xticklabels(labels)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_title('Gender')
    ax.set_xlabel('subgroup label')
    ax.set_ylabel('count')
    # plt.savefig(pjoin(cluster_num_dir, 'Gender.png'))

    width = auto_bar_width(x)
    for item in float_items:
        plt.figure()
        ax = plt.gca()
def plot_mean_sem():
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt

    from os.path import join as pjoin
    from commontool.algorithm.plot import auto_bar_width, show_bar_value

    stru_name = 'myelin'
    project_dir = '/nfs/s2/userhome/chenxiayu/workingdir/study/FFA_clustering'
    stru_dir = pjoin(project_dir,
                     's2_25_zscore/HAC_ward_euclidean/2clusters/structure')
    mean_sem_file = pjoin(stru_dir, 'mean_sem/{}'.format(stru_name))

    mean_sems = pd.read_csv(mean_sem_file)
    intra_inter_pairs = np.array(
        [[
            'G1_acti_corr_G1_{}_lFFA'.format(stru_name),
            'G1_acti_corr_G2_{}_lFFA'.format(stru_name)
        ],
         [
             'G2_acti_corr_G2_{}_lFFA'.format(stru_name),
             'G2_acti_corr_G1_{}_lFFA'.format(stru_name)
         ],
         [
             'G1_acti_corr_G1_{}_rFFA'.format(stru_name),
             'G1_acti_corr_G2_{}_rFFA'.format(stru_name)
         ],
         [
             'G2_acti_corr_G2_{}_rFFA'.format(stru_name),
             'G2_acti_corr_G1_{}_rFFA'.format(stru_name)
         ]])
    names = mean_sems['names'].to_list()
    means = list(map(float, mean_sems['means']))
    sems = list(map(float, mean_sems['sems']))

    fig, ax = plt.subplots()
    x = np.arange(intra_inter_pairs.shape[0])
    item_num = intra_inter_pairs.shape[1]
    width = auto_bar_width(x, item_num)
    for idx in range(item_num):
        sub_means = [means[names.index(i)] for i in intra_inter_pairs[:, idx]]
        sub_sems = [sems[names.index(i)] for i in intra_inter_pairs[:, idx]]
        rects = ax.bar(x + width * idx,
                       sub_means,
                       width,
                       color='k',
                       alpha=1. / ((idx + 1) / 2 + 0.5),
                       yerr=sub_sems)
        show_bar_value(rects, '.3f')
    xticklabels1 = [name for name in intra_inter_pairs[:, 0]]
    xticklabels2 = [name for name in intra_inter_pairs[:, 1]]
    xticklabels = xticklabels1 + xticklabels2
    ax.set_xticks(np.r_[x, x + width * (item_num - 1)])
    ax.set_xticklabels(xticklabels)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_ylabel('pearsonr')
    plt.setp(ax.get_xticklabels(),
             rotation=25,
             ha='right',
             rotation_mode='anchor')

    plt.tight_layout()
    plt.show()