def ttest(self, permutation=False, **kwargs): ''' Calculate ttest across samples. Args: permutation: (bool) Run ttest as permutation. Note this can be very slow. Returns: out: (dict) contains Adjacency instances of t values (or mean if running permutation) and Adjacency instance of p values. ''' if self.is_single_matrix: raise ValueError('t-test cannot be run on single matrices.') if permutation: t = [] p = [] for i in range(self.data.shape[1]): stats = one_sample_permutation(self.data[:, i], **kwargs) t.append(stats['mean']) p.append(stats['p']) t = Adjacency(np.array(t)) p = Adjacency(np.array(p)) else: t = self.mean().copy() p = deepcopy(t) t.data, p.data = ttest_1samp(self.data, 0, 0) return {'t': t, 'p': p}
def test_permutation(): dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 1000) x = dat[:, 0] y = dat[:, 1] stats = two_sample_permutation(x, y, tail=1) assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001) stats = one_sample_permutation(x - y, tail=1) assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='pearson', tail=1) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='spearman', tail=1) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='kendall', tail=2) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='kendall',tail=3) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='doesntwork',tail=3) s = np.random.normal(0, 1, 10000) two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2) upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1) lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1) sum_p = upper_p + lower_p np.testing.assert_almost_equal(two_sided, sum_p)
def ttest(self, **kwargs): ''' Calculate ttest across samples. ''' if self.is_single_matrix: raise ValueError('t-test cannot be run on single matrices.') m = []; p = [] for i in range(self.data.shape[1]): stats = one_sample_permutation(self.data[:, i], **kwargs) m.append(stats['mean']) p.append(stats['p']) mn = Adjacency(np.array(m)) pval = Adjacency(np.array(p)) return (mn, pval)
def _run_permutation(self, data): '''Helper function to run a nonparametric one-sample permutation test''' flattened = data.reshape(self.grid_width * self.grid_width, self.n_subjects) stats_all = [] for i in range(flattened.shape[0]): stats = one_sample_permutation(flattened[i, :]) stats_all.append(stats) mean = np.reshape(np.array([x['mean'] for x in stats_all]), (self.grid_width, self.grid_width)) p = np.reshape(np.array([x['p'] for x in stats_all]), (self.grid_width, self.grid_width)) return (mean, p)
def test_permutation(): dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 1000) x = dat[:, 0] y = dat[:, 1] stats = two_sample_permutation(x, y, tail=1, n_permute=1000) assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001) stats = one_sample_permutation(x - y, tail=1, n_permute=1000) assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='pearson', tail=1) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='spearman', tail=1) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='kendall', tail=2) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='kendall',tail=3) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='doesntwork',tail=3) s = np.random.normal(0, 1, 10000) two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2) upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1) lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1) sum_p = upper_p + lower_p np.testing.assert_almost_equal(two_sided, sum_p) # Test matrix_permutation dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 190) x = squareform(dat[:, 0]) y = squareform(dat[:, 1]) stats = matrix_permutation(x, y, n_permute=1000) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) # Test jackknife_permutation dat = np.random.multivariate_normal( [5, 10, 15, 25, 35, 45], [[1, .2, .5, .7, .8, .9], [.2, 1, .4, .1, .1, .1], [.5, .4, 1, .1, .1, .1], [.7, .1, .1, 1, .3, .6], [.8, .1, .1, .3, 1, .5], [.9, .1, .1, .6, .5, 1]], 200) dat = dat + np.random.randn(dat.shape[0], dat.shape[1]) * .5 data1 = pairwise_distances(dat[0:100, :].T, metric='correlation') data2 = pairwise_distances(dat[100:, :].T, metric='correlation') stats = jackknife_permutation(data1, data2) print(stats) assert (stats['correlation'] >= .4) & (stats['correlation'] <= .99) & (stats['p'] <= .05)
def test_permutation(): dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 100) x = dat[:, 0] y = dat[:, 1] stats = two_sample_permutation(x, y) assert (stats['mean'] < -2) & (stats['mean'] > -6) assert stats['p'] < .001 print(stats) stats = one_sample_permutation(x - y) assert (stats['mean'] < -2) & (stats['mean'] > -6) assert stats['p'] < .001 print(stats) stats = correlation_permutation(x, y) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) assert stats['p'] < .001 stats = correlation_permutation(x, y, metric='kendall') assert (stats['correlation'] > .4) & (stats['correlation'] < .85) assert stats['p'] < .001
def test_permutation(): dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 1000) x = dat[:, 0] y = dat[:, 1] stats = two_sample_permutation(x, y, tail=1, n_permute=1000) assert (stats["mean"] < -2) & (stats["mean"] > -6) & (stats["p"] < 0.001) stats = one_sample_permutation(x - y, tail=1, n_permute=1000) assert (stats["mean"] < -2) & (stats["mean"] > -6) & (stats["p"] < 0.001) for method in ["permute", "circle_shift", "phase_randomize"]: for metric in ["spearman", "kendall", "pearson"]: stats = correlation_permutation(x, y, metric=metric, method=method, n_permute=500, tail=1) assert ((stats["correlation"] > 0.4) & (stats["correlation"] < 0.85) & (stats["p"] < 0.05)) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='kendall',tail=3) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='doesntwork',tail=3) s = np.random.normal(0, 1, 10000) two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2) upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1) lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1) sum_p = upper_p + lower_p np.testing.assert_almost_equal(two_sided, sum_p, decimal=3) # Test matrix_permutation dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 190) x = squareform(dat[:, 0]) y = squareform(dat[:, 1]) stats = matrix_permutation(x, y, n_permute=1000) assert ((stats["correlation"] > 0.4) & (stats["correlation"] < 0.85) & (stats["p"] < 0.001))
n_permute=0) sub_pattern.append(sub_pattern_similarity) motor_sim_r.append(s['correlation']) all_sub_similarity[sub] = sub_pattern all_sub_motor_rsa[sub] = motor_sim_r all_sub_motor_rsa = pd.DataFrame(all_sub_motor_rsa).T # Now let's calculate a one sample t-test on each ROI, to see which ROI is consistently different from zero across our sample of participants. Because these are r-values, we will first perform a [fisher r to z transformation](https://en.wikipedia.org/wiki/Fisher_transformation). We will use a [non-parametric permutation sign test](https://en.wikipedia.org/wiki/Sign_test) to perform our null hypothesis test. This will take a minute to run as we will be calculating 5000 permutations for each of 50 ROIs (though these permutations are parallelized across cores). # In[114]: rsa_stats = [] for i in all_sub_motor_rsa: rsa_stats.append( one_sample_permutation(fisher_r_to_z(all_sub_motor_rsa[i]))) # We can plot a thresholded map using fdr correction as the threshold # In[117]: fdr_p = fdr(np.array([x['p'] for x in rsa_stats]), q=0.05) print(fdr_p) rsa_motor_r = Brain_Data([x * y['mean'] for x, y in zip(mask_x, rsa_stats)]).sum() rsa_motor_p = Brain_Data([x * y['p'] for x, y in zip(mask_x, rsa_stats)]).sum() thresholded = threshold(rsa_motor_r, rsa_motor_p, thr=fdr_p) plot_glass_brain(thresholded.to_nifti(), cmap='coolwarm')
def plot_silhouette(distance, labels, ax=None, permutation_test=True, n_permute=5000, **kwargs): """Create a silhouette plot indicating between relative to within label distance Args: distance: (pandas dataframe) brain_distance matrix labels: (pandas dataframe) group labels ax: axis to plot (default=None) permutation_test: (boolean) n_permute: (int) number of samples for permuation test Optional keyword args: figsize: (list) dimensions of silhouette plot colors: (list) color triplets for silhouettes. Length must equal number of unique labels Returns: # f: heatmap # out: pandas dataframe of pairwise distance between conditions # within_dist_out: average pairwise distance matrix # mn_dist_out: (optional if permutation_test=True) average difference in distance between conditions # p_dist_out: (optional if permutation_test=True) p-value for difference in distance between conditions """ # Define label set labelSet = np.unique(np.array(labels)) n_clusters = len(labelSet) # Set defaults for plot design if "colors" not in kwargs.keys(): colors = sns.color_palette("hls", n_clusters) if "figsize" not in kwargs.keys(): figsize = (6, 4) # Compute silhouette scores out = pd.DataFrame(columns=("Label", "MeanWit", "MeanBet", "Sil")) for index in range(len(labels)): label = labels.iloc[index] sameIndices = [ i for i, labelcur in enumerate(labels) if (labelcur == label) & (i != index) ] within = distance.iloc[index, sameIndices].values.flatten() otherIndices = [ i for i, labelcur in enumerate(labels) if (labelcur != label) ] between = distance.iloc[index, otherIndices].values.flatten() silhouetteScore = (np.mean(between) - np.mean(within)) / max( np.mean(between), np.mean(within)) out_tmp = pd.DataFrame(columns=out.columns) out_tmp.at[index] = index out_tmp["Label"] = label out_tmp["MeanWit"] = np.mean(within) out_tmp["MeanBet"] = np.mean(between) out_tmp["Sil"] = silhouetteScore out = out.append(out_tmp) sample_silhouette_values = out["Sil"] # Plot with sns.axes_style("white"): if ax is None: _, ax = plt.subplots(1, figsize=figsize) else: plt.plot(figsize=figsize) x_lower = 10 labelX = [] for labelInd in range(n_clusters): label = labelSet[labelInd] ith_cluster_silhouette_values = sample_silhouette_values[labels == label] ith_cluster_silhouette_values.sort_values(inplace=True) size_cluster_i = ith_cluster_silhouette_values.shape[0] x_upper = x_lower + size_cluster_i color = colors[labelInd] with sns.axes_style("white"): plt.fill_between( np.arange(x_lower, x_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, ) labelX = np.hstack((labelX, np.mean([x_lower, x_upper]))) x_lower = x_upper + 3 # Format plot ax.set_xticks(labelX) ax.set_xticklabels(labelSet) ax.set_title("Silhouettes", fontsize=18) ax.set_xlim([5, 10 + len(labels) + n_clusters * 3]) # Permutation test on mean silhouette score per label if permutation_test: outAll = pd.DataFrame(columns=["label", "mean", "p"]) for labelInd in range(n_clusters): temp = pd.DataFrame(columns=outAll.columns) label = labelSet[labelInd] data = sample_silhouette_values[labels == label] temp.loc[labelInd, "label"] = label temp.loc[labelInd, "mean"] = np.mean(data) if np.mean(data) > 0: # Only test positive mean silhouette scores statsout = one_sample_permutation(data, n_permute=n_permute) temp["p"] = statsout["p"] else: temp["p"] = 999 outAll = outAll.append(temp) return outAll else: return
for m in mask_x: sub_pattern_similarity = 1 - beta.apply_mask(m).distance(metric='correlation') sub_pattern_similarity.labels = conditions s = sub_pattern_similarity.similarity(motor, metric='spearman', n_permute=0) sub_pattern.append(sub_pattern_similarity) motor_sim_r.append(s['correlation']) all_sub_similarity[sub] = sub_pattern all_sub_motor_rsa[sub] = motor_sim_r all_sub_motor_rsa = pd.DataFrame(all_sub_motor_rsa).T Now let's calculate a one sample t-test on each ROI, to see which ROI is consistently different from zero across our sample of participants. Because these are r-values, we will first perform a [fisher r to z transformation](https://en.wikipedia.org/wiki/Fisher_transformation). We will use a [non-parametric permutation sign test](https://en.wikipedia.org/wiki/Sign_test) to perform our null hypothesis test. This will take a minute to run as we will be calculating 5000 permutations for each of 50 ROIs (though these permutations are parallelized across cores). rsa_stats = [] for i in all_sub_motor_rsa: rsa_stats.append(one_sample_permutation(fisher_r_to_z(all_sub_motor_rsa[i]))) We can plot a thresholded map using fdr correction as the threshold fdr_p = fdr(np.array([x['p'] for x in rsa_stats]), q=0.05) print(fdr_p) rsa_motor_r = Brain_Data([x*y['mean'] for x,y in zip(mask_x, rsa_stats)]).sum() rsa_motor_p = Brain_Data([x*y['p'] for x,y in zip(mask_x, rsa_stats)]).sum() thresholded = threshold(rsa_motor_r, rsa_motor_p, thr=fdr_p) plot_glass_brain(thresholded.to_nifti(), cmap='coolwarm') Looks like nothing survives FDR. Let's try a more liberal uncorrected threshold.