Example #1
0
def test_csv_io():
    reader = CsvReader('../data/statistics.csv')
    dict0 = reader.to_dict(keys=['#subjects'])
    dict1 = reader.to_dict(1, keys=['2', '1'])

    print(dict0)
    print(dict1)
def two_way_anova():
    import pandas as pd
    from statsmodels.formula.api import ols
    from statsmodels.stats.anova import anova_lm

    columns = ['z_stat', 'roi', 'cope']
    pd_dict = dict()
    for c in columns:
        pd_dict[c] = []
    for roi in rois:
        fingerprint_file = fingerprint_files.format(roi)
        csv_reader = CsvReader(fingerprint_file)
        csv_dict = csv_reader.to_dict()
        for cope, z_stats in csv_dict.items():
            pd_dict['roi'].extend([roi] * len(z_stats))
            pd_dict['cope'].extend([cope] * len(z_stats))
            pd_dict['z_stat'].extend(map(float, z_stats))
    data = pd.DataFrame(pd_dict, columns=columns)

    formula = 'z_stat ~ C(roi) + C(cope) + C(roi):C(cope)'
    model = ols(formula, data).fit()
    aov_table = anova_lm(model, typ=2)
    eta_squared(aov_table)
    omega_squared(aov_table)
    print(aov_table)
def compare_plot_bar():
    # https://www.statsmodels.org/dev/_modules/statsmodels/stats/multitest.html
    from statsmodels.stats.multitest import multipletests
    from commontool.algorithm.statistics import plot_compare
    from commontool.io.io import CsvReader

    compare_dir = pjoin(connect_dir, 'compare_sess2')
    item_pairs = [
        ['l1_FFA1', 'l2_FFA1'],
        # ['l1_FFA2', 'l2_FFA2']
    ]

    multi_test_corrected = True
    alpha = 0.01
    for item1, item2, in item_pairs:
        file_name = '{}_vs_{}'.format(item1, item2)
        compare_file = pjoin(compare_dir, file_name)
        compare_dict = CsvReader(compare_file).to_dict(1)
        ps = np.array(list(map(float, compare_dict['p'])))
        if multi_test_corrected:
            reject, ps, alpha_sidak, alpha_bonf = multipletests(ps, 0.05, 'fdr_bh')
        sample_names = [name for idx, name in enumerate(compare_dict['sample_name']) if ps[idx] < alpha]
        ps = [p for p in ps if p < alpha]
        print('\n'.join(list(map(str, zip(sample_names, ps)))))
        plot_compare(ps, sample_names, title=file_name)

    plt.show()
def connect_pattern_plot():
    from commontool.io.io import CsvReader

    mean_sem_dir = pjoin(connect_dir, 'mean_sem_sess2')
    items = ['l2_FFA1', 'l2_FFA2', 'l1_FFA1', 'l1_FFA2']
    mean_sem_files = [pjoin(mean_sem_dir, item) for item in items]
    name2s = [FFA2name2[item] for item in items]
    connect_patterns = []
    item_num = len(items)
    for idx, mean_sem_file in enumerate(mean_sem_files):
        mean_sem_dict = CsvReader(mean_sem_file).to_dict(1)
        sample_names = mean_sem_dict['sample_name']
        means = [float(mean_sem_dict['mean'][mean_sem_dict['sample_name'].index(i)]) for i in sample_names]
        connect_patterns.append(means)
    corr_arr = np.zeros((item_num, item_num))
    for i, pattern in enumerate(connect_patterns):
        for j in range(i, item_num):
            corr = pearsonr(pattern, connect_patterns[j])[0]
            corr_arr[i, j] = corr
            corr_arr[j, i] = corr

    plt.rcParams['xtick.bottom'] = plt.rcParams['xtick.labelbottom'] = False
    plt.rcParams['xtick.top'] = plt.rcParams['xtick.labeltop'] = True
    fig, ax = plt.subplots()
    im = ax.imshow(corr_arr, cmap='jet')
    plt.xticks(range(item_num), name2s)
    plt.yticks(range(item_num), name2s)
    cbar = fig.colorbar(im, ax=ax)
    cbar.set_label('pearsonr')
    for i in range(item_num):
        for j in range(item_num):
            ax.text(j, i, '{:.2f}'.format(corr_arr[i, j]), ha="center", va="center", color='w', fontsize=16)
    plt.tight_layout()
    plt.show()
def mean_sem_calc():
    from commontool.io.io import CsvReader
    from commontool.algorithm.statistics import calc_mean_sem

    connectivity_file = pjoin(connect_dir, 'connectivity_sess2.npy')
    connectivity_data = np.load(connectivity_file)

    npy_info_file = pjoin(connect_dir, 'connectivity_info')
    all_rois = CsvReader(npy_info_file).to_dict(1)['region_name']

    group_labels_file = pjoin(connect_dir, 'group_labels_4run_1200')
    group_labels = np.array(open(group_labels_file).read().split(' '))

    mean_sem_dir = pjoin(connect_dir, 'mean_sem_sess2')
    if not os.path.exists(mean_sem_dir):
        os.makedirs(mean_sem_dir)

    items = FFA_rois
    for item in items:
        seed_roi = item
        sub_connectivity_data = connectivity_data[group_labels == item[1]]
        samples, sample_names = get_samples(sub_connectivity_data,
                                            seed_roi, all_rois, item2exclude[item])
        for idx, sample_name in enumerate(sample_names):
            if sample_name in FFA2name.keys():
                sample_names[idx] = FFA2name[sample_name]
        output_file = pjoin(mean_sem_dir, item)
        calc_mean_sem(samples, output_file, sample_names)
def gender_diff_roi_mean_plot(roi_mean_file, items_m, items_f, xticklabels, ylabel=None, title=None):

    assert len(items_m) == len(items_f)

    roi_mean_dict = CsvReader(roi_mean_file).to_dict(axis=1)
    roi_means_list_m = [list(map(float, roi_mean_dict[item])) for item in items_m]
    roi_means_list_f = [list(map(float, roi_mean_dict[item])) for item in items_f]

    item_num = len(items_m)
    for i in range(item_num):
        print('{} vs. {}'.format(items_m[i], items_f[i]),
              ttest_ind(roi_means_list_m[i], roi_means_list_f[i]))

    fig, ax = plt.subplots()
    x = np.arange(item_num)
    width = auto_bar_width(x, 2)
    y_m = [np.mean(roi_means) for roi_means in roi_means_list_m]
    y_f = [np.mean(roi_means) for roi_means in roi_means_list_f]
    sems_m = [sem(roi_means) for roi_means in roi_means_list_m]
    sems_f = [sem(roi_means) for roi_means in roi_means_list_f]
    rects1 = ax.bar(x, y_m, width, color='b', alpha=0.5, yerr=sems_m, ecolor='blue')
    rects2 = ax.bar(x + width, y_f, width, color='r', alpha=0.5, yerr=sems_f, ecolor='red')
    # show_bar_value(rects1, '.3f')
    # show_bar_value(rects2, '.3f')
    ax.legend((rects1, rects2), ('male', 'female'))
    ax.set_xticks(x + width / 2.0)
    ax.set_xticklabels(xticklabels)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    if ylabel is not None:
        plt.ylabel(ylabel)
    if title is not None:
        plt.title(title)
    plt.tight_layout()
def compare():
    from commontool.io.io import CsvReader
    from commontool.algorithm.statistics import ttest_ind_pairwise

    connectivity_file = pjoin(connect_dir, 'connectivity_sess1.npy')
    connectivity_data = np.load(connectivity_file)

    npy_info_file = pjoin(connect_dir, 'connectivity_info')
    all_rois = CsvReader(npy_info_file).to_dict(1)['region_name']

    group_labels_file = pjoin(connect_dir, 'group_labels_4run_1200')
    group_labels = np.array(open(group_labels_file).read().split(' '))

    compare_dir = pjoin(connect_dir, 'compare_sess1')
    if not os.path.exists(compare_dir):
        os.makedirs(compare_dir)

    item_pairs = [
        ['l1_FFA1', 'l2_FFA1'],
        ['r1_FFA1', 'r2_FFA1']
    ]
    for item1, item2 in item_pairs:
        seed_roi1 = item1
        seed_roi2 = item2
        samples1, sample_names1 = get_samples(connectivity_data[group_labels == item1[1]],
                                              seed_roi1, all_rois, item2exclude[item1])
        for idx, sample_name1 in enumerate(sample_names1):
            if sample_name1 in FFA2name.keys():
                sample_names1[idx] = FFA2name[sample_name1]
        samples2, sample_names2 = get_samples(connectivity_data[group_labels == item2[1]],
                                              seed_roi2, all_rois, item2exclude[item2])
        output_file = pjoin(compare_dir, '{}_vs_{}'.format(item1, item2))
        ttest_ind_pairwise(samples1, samples2, output_file, sample_names1)
def ttest():
    from scipy.stats import ttest_ind, ttest_1samp

    roi2face_avg = dict()
    for roi in rois:
        fingerprint_file = fingerprint_files.format(roi)
        csv_reader = CsvReader(fingerprint_file)
        csv_dict = csv_reader.to_dict()
        for cope in csv_dict.keys():
            csv_dict[cope] = np.array(csv_dict[cope], np.float64)
            print('{}_{}:'.format(roi, cope), ttest_1samp(csv_dict[cope], 0))
        roi2face_avg[roi] = csv_dict['FACE-AVG']

    for i, roi1 in enumerate(rois[:-1]):
        for roi2 in rois[i + 1:]:
            print('{0}_face vs. {1}_face:'.format(roi1, roi2),
                  ttest_ind(roi2face_avg[roi1], roi2face_avg[roi2]))
Example #9
0
def compare_plot_bar():
    import numpy as np
    import matplotlib.pyplot as plt

    from os.path import join as pjoin
    # https://www.statsmodels.org/dev/_modules/statsmodels/stats/multitest.html
    from statsmodels.stats.multitest import multipletests
    from commontool.io.io import CsvReader
    from commontool.algorithm.plot import auto_bar_width, show_bar_value

    stru_name = 'myelin'
    project_dir = '/nfs/s2/userhome/chenxiayu/workingdir/study/FFA_clustering'
    stru_dir = pjoin(project_dir,
                     's2_25_zscore/HAC_ward_euclidean/2clusters/structure')
    compare_dir = pjoin(stru_dir, 'compare')
    compare_file = pjoin(compare_dir, stru_name)

    multi_test_corrected = True
    alpha = 0.001
    compare_dict = CsvReader(compare_file).to_dict(1)
    ps = np.array(list(map(float, compare_dict['p'])))
    if multi_test_corrected:
        reject, ps, alpha_sidak, alpha_bonf = multipletests(ps, 0.05, 'fdr_bh')
    sample_names = [
        name for idx, name in enumerate(compare_dict['sample_name'])
        if ps[idx] < alpha
    ]
    ps = [p for p in ps if p < alpha]
    print('\n'.join(list(map(str, zip(sample_names, ps)))))

    fig, ax = plt.subplots()
    x = np.arange(len(sample_names))
    width = auto_bar_width(x)
    rects_p = ax.bar(x, ps, width, color='g', alpha=0.5)
    show_bar_value(rects_p, '.2f')
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_title(stru_name)
    ax.set_ylabel('p', color='g')
    ax.tick_params('y', colors='g')
    ax.axhline(0.05)
    ax.axhline(0.01)
    ax.axhline(0.001)
    ax.set_xticks(x)
    ax.set_xticklabels(sample_names)
    plt.setp(ax.get_xticklabels(),
             rotation=25,
             ha='right',
             rotation_mode='anchor')

    plt.tight_layout()
    plt.show()
def mds_plot():
    from sklearn.manifold import MDS
    from collections import OrderedDict

    only_mean = True

    data = OrderedDict()
    for roi in rois:
        fingerprint_file = fingerprint_files.format(roi)
        reader = CsvReader(fingerprint_file)
        fingerprints = np.array(reader.rows[1:], dtype=np.float64)
        fingerprints_mean = np.atleast_2d(np.mean(fingerprints, axis=0))
        if only_mean:
            data[roi] = fingerprints_mean
        else:
            data[roi] = np.r_[fingerprints_mean, fingerprints]

    X = np.zeros((0, list(data.values())[0].shape[1]))
    offsets = []
    counts = []
    for roi, fp in data.items():
        offsets.append(X.shape[0])
        counts.append(fp.shape[0])
        X = np.r_[X, fp]
    print(counts)

    embedding = MDS()
    X_transformed = embedding.fit_transform(X)

    fig, ax = plt.subplots()
    for idx, roi in enumerate(rois):
        positions = X_transformed[offsets[idx]:offsets[idx] + counts[idx]]
        ax.scatter(positions[0, 0],
                   positions[0, 1],
                   c=roi2color[roi],
                   label=roi + '_mean',
                   s=30)
        if positions.shape[0] > 1:
            ax.scatter(positions[1:, 0],
                       positions[1:, 1],
                       c=roi2color[roi],
                       label=roi,
                       s=1)
    ax.legend()
    ax.tick_params(bottom=False,
                   left=False,
                   labelbottom=False,
                   labelleft=False)
    plt.show()
def curve_plot(show_errbar=False):
    fig, ax = plt.subplots()
    is_first_loop = True
    for roi in rois:
        fingerprint_file = fingerprint_files.format(roi)
        reader = CsvReader(fingerprint_file)
        fingerprints = np.array(reader.rows[1:], dtype=np.float64)

        x = np.arange(fingerprints.shape[1])
        fingerprints_mean = np.mean(fingerprints, axis=0)
        ax.plot(x, fingerprints_mean, '{}.-'.format(roi2color[roi]), label=roi)
        if show_errbar:
            fingerprints_std = np.std(fingerprints, axis=0)
            ax.fill_between(x,
                            fingerprints_mean - fingerprints_std,
                            fingerprints_mean + fingerprints_std,
                            alpha=0.5,
                            facecolors=color2facecolor[roi2color[roi]])
        ax.legend()

        if is_first_loop:
            is_first_loop = False
            xticklabels = np.array(reader.rows[0])
            plt.xticks(x, xticklabels)
            # plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')

    # fingerprint_file_1080 = '/nfs/t3/workingshop/chenxiayu/study/FFA_clustering/data/HCP_face-avg/label/' \
    #                         'lFFA_2mm_func_fingerprint.csv'
    # reader_1080 = CsvReader(fingerprint_file_1080)
    # fingerprints_1080 = np.array(reader_1080.rows[1:], dtype=np.float64)
    # x_1080 = np.arange(fingerprints_1080.shape[1])
    # fingerprints_1080_mean = np.mean(fingerprints_1080, axis=0)
    # ax.plot(x_1080, fingerprints_1080_mean, 'm*-', label='lFFA_2mm')
    # if show_errbar:
    #     fingerprints_1080_std = np.std(fingerprints_1080, axis=0)
    #     ax.fill_between(x_1080, fingerprints_1080_mean - fingerprints_1080_std,
    #                     fingerprints_1080_mean + fingerprints_1080_std,
    #                     alpha=0.5, facecolors='fuchsia')

    plt.tight_layout()
    plt.show()
def plot_compare(compare_file, label_ids=None, p_thr=None):

    file_name = os.path.basename(compare_file)
    compare_dict = CsvReader(compare_file).to_dict(1)
    if label_ids is None:
        label_ids = compare_dict['label_id']
    else:
        label_ids = [str(i) for i in label_ids]

    if p_thr is not None:
        label_ids = [i for i in label_ids if float(compare_dict['p'][compare_dict['label_id'].index(i)]) < p_thr]

    fig, ax = plt.subplots()
    ax_twin = ax.twinx()
    if len(label_ids) > 0:
        x = np.arange(len(label_ids))
        width = auto_bar_width(x)
        y_t = [float(compare_dict['t'][compare_dict['label_id'].index(i)]) for i in label_ids]
        y_p = [float(compare_dict['p'][compare_dict['label_id'].index(i)]) for i in label_ids]
        rects_t = ax.bar(x, y_t, width, color='b', alpha=0.5)
        rects_p = ax_twin.bar(x, y_p, width, color='g', alpha=0.5)
        ax.legend([rects_t, rects_p], ['t', 'p'])
        ax.set_xticks(x)
        xticklabels = [compare_dict['label_name'][compare_dict['label_id'].index(i)] for i in label_ids]
        ax.set_xticklabels(xticklabels)
        plt.setp(ax.get_xticklabels(), rotation=-90, ha='left', rotation_mode='anchor')
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_title(file_name)
    ax.set_ylabel('t', color='b')
    ax.tick_params('y', colors='b')
    ax_twin.set_ylabel('p', color='g')
    ax_twin.tick_params('y', colors='g')
    ax_twin.axhline(0.05)
    ax_twin.axhline(0.01)
    ax_twin.axhline(0.001)

    plt.tight_layout()
    plt.show()
def roi_mean_plot(roi_mean_file, ROIitems, colors, xticklabels, ylabel=None, title=None, plot_style='violin'):
    roi_mean_dict = CsvReader(roi_mean_file).to_dict(axis=1)
    roi_means_list = [list(map(float, roi_mean_dict[ROIitem])) for ROIitem in ROIitems]

    ROIitem_num = len(ROIitems)
    for i in range(ROIitem_num):
        for j in range(i+1, ROIitem_num):
            print('{} vs. {}'.format(ROIitems[i], ROIitems[j]),
                  ttest_ind(roi_means_list[i], roi_means_list[j]))

    plt.figure()
    if plot_style == 'violin':
        violin_parts = plt.violinplot(roi_means_list, showmeans=True)
        for idx, pc in enumerate(violin_parts['bodies']):
            # https://stackoverflow.com/questions/26291479/changing-the-color-of-matplotlibs-violin-plots
            pc.set_color(colors[idx])
        plt.xticks(range(1, ROIitem_num + 1), xticklabels)
    elif plot_style == 'bar':
        x = np.arange(ROIitem_num)
        y = [np.mean(roi_means) for roi_means in roi_means_list]
        sems = [sem(roi_means) for roi_means in roi_means_list]
        width = auto_bar_width(x)
        rects = plt.bar(x, y, width, edgecolor=colors[0], yerr=sems, facecolor='white')
        show_bar_value(rects, '.2f')
        plt.xticks(x, xticklabels)
    else:
        raise RuntimeError("Invalid plot style: {}".format(plot_style))
    if ylabel is not None:
        plt.ylabel(ylabel)
    if title is not None:
        plt.title(title)
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.tight_layout()
def plot_mean_sem(mean_sem_files, items=None, label_ids=None, xlabel='', ylabel=''):

    fig, ax = plt.subplots()
    x = None
    width = None
    xticklabels = None
    rects_list = []
    item_num = len(mean_sem_files)
    for idx, mean_sem_file in enumerate(mean_sem_files):
        mean_sem_dict = CsvReader(mean_sem_file).to_dict(1)
        if label_ids is None:
            label_ids = mean_sem_dict['label_id']
        else:
            label_ids = [str(i) for i in label_ids]
        if x is None:
            xticklabels = [mean_sem_dict['label_name'][mean_sem_dict['label_id'].index(i)] for i in label_ids]
            x = np.arange(len(label_ids))
            width = auto_bar_width(x, item_num)
        y = [float(mean_sem_dict['mean'][mean_sem_dict['label_id'].index(i)]) for i in label_ids]
        sems = [float(mean_sem_dict['sem'][mean_sem_dict['label_id'].index(i)]) for i in label_ids]
        rects = ax.bar(x+width*idx, y, width, color='k', alpha=1./((idx+1)/2+0.5), yerr=sems)
        rects_list.append(rects)
    if items is not None:
        assert item_num == len(items)
        ax.legend(rects_list, items)
    ax.set_xticks(x+width/2.0*(item_num-1))
    ax.set_xticklabels(xticklabels)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.setp(ax.get_xticklabels(), rotation=-90, ha='left', rotation_mode='anchor')
    # plt.ylim(bottom=5.5)

    plt.tight_layout()
    plt.show()
def mean_sem_plot_radar():
    # https://www.kaggle.com/typewind/draw-a-radar-chart-with-python-in-a-simple-way
    # https://python-graph-gallery.com/390-basic-radar-chart/
    # https://stackoverflow.com/questions/26583620/how-to-plot-error-bars-in-polar-coordinates-in-python
    # https://matplotlib.org/gallery/api/radar_chart.html
    # https://stackoverflow.com/questions/49488018/radar-plot-matplotlib-python-how-to-set-label-alignment
    from commontool.io.io import CsvReader

    mean_sem_dir = pjoin(connect_dir, 'mean_sem_sess1')
    items = ['l2_FFA1', 'l1_FFA1']
    mean_sem_files = [pjoin(mean_sem_dir, item) for item in items]
    name2s = [FFA2name2[item] for item in items]
    ax = plt.subplot(111, polar=True)
    for idx, mean_sem_file in enumerate(mean_sem_files):
        mean_sem_dict = CsvReader(mean_sem_file).to_dict(1)
        sample_names = mean_sem_dict['sample_name']
        angles = np.linspace(0, 2*np.pi, len(sample_names), endpoint=False)
        angles = np.concatenate((angles, [angles[0]]))
        means = [float(mean_sem_dict['mean'][mean_sem_dict['sample_name'].index(i)]) for i in sample_names]
        means += [means[0]]
        ax.plot(angles, means, linewidth=1, linestyle='solid', label=name2s[idx])

        # sems = [float(mean_sem_dict['sem'][mean_sem_dict['sample_name'].index(i)]) for i in sample_names]
        # sems += [sems[0]]
        # ax.errorbar(angles, means, yerr=sems, capsize=0, linewidth=1, linestyle='solid')

    ax.legend(loc='upper center')
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(sample_names)
    # for label, rot in zip(ax.get_xticklabels(), angles[:-1]):
    #     label.set_horizontalalignment("left")
    #     label.set_rotation_mode("anchor")
    #     label.set_rotation(np.rad2deg(rot))

    plt.tight_layout()
    plt.show()
    rects2 = ax.bar(x + width,
                    means_f,
                    width,
                    color='r',
                    alpha=0.5,
                    yerr=sems_f,
                    ecolor='red')
    # show_bar_value(rects1, '.3f')
    # show_bar_value(rects2, '.3f')
    ax.legend((rects1, rects2), ('male', 'female'))
    ax.set_xticks(x + width / 2.0)
    ax.set_xticklabels(rois)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_ylabel('activation (z-stat)')

    plt.tight_layout()
    plt.show()


if __name__ == '__main__':
    from commontool.io.io import CsvReader

    behavior_file = pjoin(project_dir, 'data/HCP/S1200_behavior.csv')
    csv_reader = CsvReader(behavior_file)
    behavior_dict = csv_reader.to_dict()

    # different_activation_gender_1080(behavior_dict)
    # different_activation_gender_intrasubgroup(behavior_dict)
    different_activation_gender_roi2allsubgroup(behavior_dict)
if __name__ == '__main__':
    import numpy as np

    from matplotlib import pyplot as plt
    from commontool.io.io import CsvReader

    stat_file = '/nfs/s2/userhome/chenxiayu/workingdir/study/FFA_clustering/data/' \
                'HCP_face-avg/s2/patches_15/crg/rFFA_patch_stats'
    reader = CsvReader(stat_file)
    patch_nums = np.array(reader.cols[1], dtype=np.uint16)
    patch_sizes = []
    for row in reader.rows:
        patch_sizes.extend(row[2:])
    patch_sizes = np.array(patch_sizes, dtype=np.uint16)

    plt.hist(patch_nums,
             np.arange(1,
                       patch_nums.max() + 2),
             align='left',
             facecolor='white',
             edgecolor='black')
    plt.xlabel('#patch')
    plt.ylabel('count')
    plt.figure()
    plt.hist(patch_sizes,
             np.arange(1,
                       patch_sizes.max() + 2),
             align='left',
             facecolor='white',
             edgecolor='black')
    plt.xlabel('patch size / #vertices')
Example #18
0
    samples2 = maps[group_labels == pair_labels[1]].T
    sample_names = list(map(str, range(maps.shape[1])))
    compare_file = pjoin(
        compare_dir, '{}_g{}_vs_g{}'.format(hemi, pair_labels[0],
                                            pair_labels[1]))
    ttest_ind_pairwise(samples1, samples2, compare_file, sample_names)
    # ---compare end---

    # ---compare2nifti start---
    compare_file = pjoin(compare_dir, '{}_g1_vs_g2'.format(hemi))
    mask_file = pjoin(
        project_dir,
        'data/HCP_1080/face-avg_s2/label/{}_posterior_brain_mask.label'.format(
            hemi))

    compare_dict = CsvReader(compare_file).to_dict(1)
    valid_idx_mat = np.array(compare_dict['p']) != 'nan'
    if mask_file is not None:
        mask_vertices = nib.freesurfer.read_label(mask_file)
        mask_idx_mat = np.zeros_like(valid_idx_mat, dtype=np.bool)
        mask_idx_mat[mask_vertices] = True
        valid_idx_mat = np.logical_and(valid_idx_mat, mask_idx_mat)

    compare_data = np.zeros((3, maps.shape[1]))
    ps_uncorrected = np.array([
        float(p) for idx, p in enumerate(compare_dict['p'])
        if valid_idx_mat[idx]
    ])
    reject, ps_corrected, alpha_sidak, alpha_bonf = multipletests(
        ps_uncorrected, 0.05, 'fdr_bh')
    ts = [
Example #19
0
    project_dir = '/nfs/s2/userhome/chenxiayu/workingdir/study/FFA_clustering'
    connect_dir = pjoin(project_dir, 's2_25_zscore/HAC_ward_euclidean/2clusters/rfMRI_connectivity/PAM_z165_p025_ROI')
    subject_ids_file = pjoin(connect_dir, 'subject_id_4run_1200')
    tseries_LR_file1 = pjoin(connect_dir, '{subject}/rfMRI_REST1_LR.npy')
    tseries_RL_file1 = pjoin(connect_dir, '{subject}/rfMRI_REST1_RL.npy')
    npy_info_file1 = pjoin(connect_dir, 'npy_info')
    tseries_LR_file2 = pjoin(connect_dir, 'addition/{subject}/rfMRI_REST1_LR.npy')
    tseries_RL_file2 = pjoin(connect_dir, 'addition/{subject}/rfMRI_REST1_RL.npy')
    npy_info_file2 = pjoin(connect_dir, 'addition/npy_info')
    out_conn_file = pjoin(connect_dir, 'connectivity_sess1.npy')
    out_info_file = pjoin(connect_dir, 'connectivity_info')

    subject_ids = np.array(open(subject_ids_file).read().splitlines())

    r_names1 = CsvReader(npy_info_file1).to_dict(1)['region_name']
    invalid_rois_of_1 = ['l2_FFA1', 'r2_FFA1']
    invalid_rows_of_1 = [r_names1.index(roi) for roi in invalid_rois_of_1]
    for roi in invalid_rois_of_1:
        r_names1.remove(roi)

    r_names2 = CsvReader(npy_info_file2).to_dict(1)['region_name']
    new_rois_of_2 = ['l2_FFA1', 'l2_FFA2', 'r2_FFA1', 'r2_FFA2']
    new_rows_of_2 = [r_names2.index(roi) for roi in new_rois_of_2]

    r_names = r_names1 + new_rois_of_2

    connectivity = []
    for subject in subject_ids:
        tseries_LR1 = np.load(tseries_LR_file1.format(subject=subject))
        tseries_RL1 = np.load(tseries_RL_file1.format(subject=subject))
def explore_roi_stats(n_clusters_dir):
    roi_path = pjoin(n_clusters_dir, 'mean_map_ROIs.nii.gz')
    stats_path = pjoin(n_clusters_dir, 'statistics.csv')
    roi_maps = read_nifti(roi_path)
    stats_reader = CsvReader(stats_path)
    row_dict = stats_reader.to_dict(keys=['#subjects'])

    numb_items = ['1', '2']
    numb_dict = OrderedDict()
    for item in numb_items:
        numb_dict[item] = 0

    type_items = ['r_pFFA', 'r_mFFA', 'both', 'unknown']
    type_dict = OrderedDict()
    for item in type_items:
        type_dict[item] = 0

    for idx, roi_map in enumerate(roi_maps):
        map_set = set(roi_map)
        subjects_num = int(row_dict['#subjects'][idx])

        if 0 not in map_set:
            raise RuntimeError('Be careful! There is no zero in one roi_map')

        if len(map_set) == 2:
            numb_dict['1'] += subjects_num
            if 1 in map_set:
                type_dict['r_pFFA'] += subjects_num
            elif 2 in map_set:
                type_dict['r_mFFA'] += subjects_num
            elif 3 in map_set:
                type_dict['unknown'] += subjects_num
            else:
                raise RuntimeError(
                    'Be careful! the only one ROI label is not in (1, 2, 3)')
        elif len(map_set) == 3:
            numb_dict['2'] += subjects_num
            if 1 in map_set and 2 in map_set:
                type_dict['both'] += subjects_num
            else:
                raise RuntimeError(
                    'Be careful! the two ROI labels are not 1 and 2')
        else:
            raise RuntimeError(
                'Be careful! the number of ROI labels is not 1 or 2')

    plt.figure()
    x = np.arange(len(numb_items))
    width = auto_bar_width(x)
    rects = plt.bar(x, numb_dict.values(), width, color='b')
    show_bar_value(rects)
    plt.ylabel('#subjects')
    plt.xticks(x, numb_dict.keys())
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.savefig(pjoin(n_clusters_dir, 'numb_count.png'))

    plt.figure()
    x = np.arange(len(type_items))
    width = auto_bar_width(x)
    rects = plt.bar(x, type_dict.values(), width, color='b')
    show_bar_value(rects)
    plt.ylabel('#subjects')
    plt.xticks(x, type_dict.keys())
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.savefig(pjoin(n_clusters_dir, 'type_count.png'))
    group_labels = np.array(open(group_labels_file).read().split(' '),
                            dtype=np.uint16)
    group_labels_uniq = np.unique(group_labels)

    # prepare target regions
    trg_regions_lr = dict()
    mask_l = nib.load(mask_file_l).get_data().ravel()
    mask_r = nib.load(mask_file_r).get_data().ravel()
    trg_regions_lr['lh'] = [
        np.where(mask_l == i)[0] for i in np.unique(mask_l) if i != 0
    ]
    trg_regions_lr['rh'] = [
        np.where(mask_r == i)[0] for i in np.unique(mask_r) if i != 0
    ]
    label_names_lr = dict()
    label_names_lr['lh'] = CsvReader(
        mask_labelconfig_l).to_dict()['label_name']
    label_names_lr['rh'] = CsvReader(
        mask_labelconfig_r).to_dict()['label_name']
    for group_label in group_labels_uniq:
        for hemi in hemis:
            roi_file = roi_files.format(hemi[0], group_label)
            roi_name = roi_file.split('.')[0]
            roi_data = nib.load(roi_file).get_data().ravel()
            trg_regions_lr[hemi].append(np.where(roi_data != 0)[0])
            label_names_lr[hemi].append(roi_name)
            roi_data_uniq = np.unique(roi_data).astype(np.uint8)
            for roi_label in roi_data_uniq:
                if roi_label != 0:
                    trg_regions_lr[hemi].append(
                        np.where(roi_data == roi_label)[0])
                    label_names_lr[hemi].append(roi_name + str(roi_label))
Example #22
0
def plot_mean_sem(mean_sem_files,
                  items=None,
                  sample_names=None,
                  xlabel='',
                  ylabel=''):
    """

    :param mean_sem_files: sequence
        a sequence of file paths which are generated from 'calc_mean_sem'
    :param items: sequence
        a sequence of item names corresponding to the 'mean_sem_files'
    :param sample_names: collection
        a collection of sample names of interested
    :param xlabel: str
    :param ylabel: str

    :returns: fig, ax
    """
    fig, ax = plt.subplots()
    x = None
    width = None
    rects_list = []
    item_num = len(mean_sem_files)
    for idx, mean_sem_file in enumerate(mean_sem_files):
        mean_sem_dict = CsvReader(mean_sem_file).to_dict(1)
        if sample_names is None:
            sample_names = mean_sem_dict['sample_name']
        if x is None:
            x = np.arange(len(sample_names))
            width = auto_bar_width(x, item_num)
        y = [
            float(mean_sem_dict['mean'][mean_sem_dict['sample_name'].index(i)])
            for i in sample_names
        ]
        sems = [
            float(mean_sem_dict['sem'][mean_sem_dict['sample_name'].index(i)])
            for i in sample_names
        ]
        rects = ax.bar(x + width * idx,
                       y,
                       width,
                       color='k',
                       alpha=1. / ((idx + 1) / 2 + 0.5),
                       yerr=sems)
        rects_list.append(rects)
    if items is not None:
        assert item_num == len(items)
        ax.legend(rects_list, items)
    ax.set_xticks(x + width / 2.0 * (item_num - 1))
    ax.set_xticklabels(sample_names)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.setp(ax.get_xticklabels(),
             rotation=-90,
             ha='left',
             rotation_mode='anchor')

    plt.tight_layout()
    return fig, ax
Example #23
0
    FFA_file_r = pjoin(project_dir,
                       'data/HCP_1080/face-avg_s2/label/rFFA_25.label')
    subFFA_files = pjoin(acti_dir, '{}{}_FFA.nii.gz')
    acti_analysis_dir = pjoin(acti_dir, 'acti_of_FSR')
    if not os.path.exists(acti_analysis_dir):
        os.makedirs(acti_analysis_dir)

    trg_regions_lr = dict()
    label_names_lr = dict()
    FSR_nonFFA_l = nib.load(FSR_nonFFA_file_l).get_data().ravel()
    FSR_nonFFA_r = nib.load(FSR_nonFFA_file_r).get_data().ravel()
    trg_regions_lr['lh'] = [
        np.where(FSR_nonFFA_l == i)[0] for i in np.unique(FSR_nonFFA_l)
        if i != 0
    ]
    label_names_lr['lh'] = CsvReader(
        FSR_nonFFA_config_l).to_dict()['label_name']
    trg_regions_lr['rh'] = [
        np.where(FSR_nonFFA_r == i)[0] for i in np.unique(FSR_nonFFA_r)
        if i != 0
    ]
    label_names_lr['rh'] = CsvReader(
        FSR_nonFFA_config_r).to_dict()['label_name']
    trg_regions_lr['lh'].append(nib.freesurfer.read_label(FFA_file_l))
    label_names_lr['lh'].append('lFFA mask')
    trg_regions_lr['rh'].append(nib.freesurfer.read_label(FFA_file_r))
    label_names_lr['rh'].append('rFFA mask')
    for group_label in group_labels_uniq:
        for hemi in hemis:
            subFFA_file = subFFA_files.format(hemi[0], group_label)
            subFFA_file_name = os.path.basename(subFFA_file)
            subFFA_name = subFFA_file_name.split('.')[0]
def compare_plot_mat():
    # https://www.statsmodels.org/dev/_modules/statsmodels/stats/multitest.html
    from statsmodels.stats.multitest import multipletests
    from commontool.algorithm.statistics import plot_compare

    multi_test_corrected = True
    alpha = 1.1
    for item1, item2, in item_pairs:
        file_name = '{}_vs_{}'.format(item1, item2)
        compare_file = pjoin(compare_dir, file_name)
        compare_dict = CsvReader(compare_file).to_dict(1)
        ps = np.array(list(map(float, compare_dict['p'])))
        if multi_test_corrected:
            reject, ps, alpha_sidak, alpha_bonf = multipletests(ps, 0.05, 'fdr_bh')
        sample_names = [name for idx, name in enumerate(compare_dict['sample_name']) if ps[idx] < alpha]
        ps = [p for p in ps if p < alpha]
        print('\n'.join(list(map(str, zip(sample_names, ps)))))

        ts = [float(t) for idx, t in enumerate(compare_dict['t']) if ps[idx] < alpha]
        ts_mat = np.zeros((7, 6))
        names_mat = np.zeros_like(ts_mat, np.object)
        ps_mat = np.ones_like(ts_mat)

        ffa_idx = 0
        for idx, name in enumerate(sample_names):
            if 'FFA' in name:
                ffa_idx = idx
                break
        ts.pop(ffa_idx)
        ps.pop(ffa_idx)
        sample_names.pop(ffa_idx)

        for i in range(7):
            for j in range(6):
                idx = i * 6 + j
                ts_mat[i, j] = ts[idx]
                names_mat[i, j] = sample_names[idx]
                ps_mat[i, j] = ps[idx]
        ts_mat[ps_mat > 0.05] = 0

        fig, ax = plt.subplots()
        im = ax.imshow(ts_mat, cmap='hot', vmin=0, vmax=7)
        for i in range(7):
            for j in range(6):
                if names_mat[i, j]:
                    if ts_mat[i, j] == 0:
                        c = 'w'
                    else:
                        c = 'k'
                    text = ax.text(j, i, names_mat[i, j], ha="center", va="center", color=c, fontsize=8)

        # plt.axis('off')
        from mpl_toolkits.axes_grid1 import make_axes_locatable
        ax.set_xticks([])
        ax.set_yticks([])
        # divider = make_axes_locatable(ax)
        # cax = divider.append_axes("right", size="3%", pad=0.05)
        # cbar = fig.colorbar(im, ax=ax, cax=cax)
        cbar = fig.colorbar(im, ax=ax)
        cbar.set_label('t')
        fig.tight_layout()
        plt.show()