Python one_sample_permutation 예제들, nltools.stats.one_sample_permutation Python 예제들

예제 #1

0

파일 보기

파일: adjacency.py 프로젝트: shrahimim/nltools

    def ttest(self, permutation=False, **kwargs):
        ''' Calculate ttest across samples.

        Args:
            permutation: (bool) Run ttest as permutation. Note this can be very slow.

        Returns:
            out: (dict) contains Adjacency instances of t values (or mean if
                 running permutation) and Adjacency instance of p values.

        '''
        if self.is_single_matrix:
            raise ValueError('t-test cannot be run on single matrices.')

        if permutation:
            t = []
            p = []
            for i in range(self.data.shape[1]):
                stats = one_sample_permutation(self.data[:, i], **kwargs)
                t.append(stats['mean'])
                p.append(stats['p'])
            t = Adjacency(np.array(t))
            p = Adjacency(np.array(p))
        else:
            t = self.mean().copy()
            p = deepcopy(t)
            t.data, p.data = ttest_1samp(self.data, 0, 0)

        return {'t': t, 'p': p}

예제 #2

0

파일 보기

def test_permutation():
    dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 1000)
    x = dat[:, 0]
    y = dat[:, 1]
    stats = two_sample_permutation(x, y, tail=1)
    assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001)
    stats = one_sample_permutation(x - y, tail=1)
    assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='pearson', tail=1)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='spearman', tail=1)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='kendall', tail=2)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='kendall',tail=3)
    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='doesntwork',tail=3)
    s = np.random.normal(0, 1, 10000)
    two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2)
    upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1)
    lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1)
    sum_p = upper_p + lower_p
    np.testing.assert_almost_equal(two_sided, sum_p)

예제 #3

0

파일 보기

파일: adjacency.py 프로젝트: enfascination/nltools

 def ttest(self, **kwargs):
     ''' Calculate ttest across samples. '''
     if self.is_single_matrix:
         raise ValueError('t-test cannot be run on single matrices.')
     m = []; p = []
     for i in range(self.data.shape[1]):
         stats = one_sample_permutation(self.data[:, i], **kwargs)
         m.append(stats['mean'])
         p.append(stats['p'])
     mn = Adjacency(np.array(m))
     pval = Adjacency(np.array(p))
     return (mn, pval)

예제 #4

0

파일 보기

파일: simulator.py 프로젝트: xinyue00766/nltools

 def _run_permutation(self, data):
     '''Helper function to run a nonparametric one-sample permutation test'''
     flattened = data.reshape(self.grid_width * self.grid_width,
                              self.n_subjects)
     stats_all = []
     for i in range(flattened.shape[0]):
         stats = one_sample_permutation(flattened[i, :])
         stats_all.append(stats)
     mean = np.reshape(np.array([x['mean'] for x in stats_all]),
                       (self.grid_width, self.grid_width))
     p = np.reshape(np.array([x['p'] for x in stats_all]),
                    (self.grid_width, self.grid_width))
     return (mean, p)

예제 #5

0

파일 보기

파일: test_stats.py 프로젝트: danieljwilson/nltools

def test_permutation():
    dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 1000)
    x = dat[:, 0]
    y = dat[:, 1]
    stats = two_sample_permutation(x, y, tail=1, n_permute=1000)
    assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001)
    stats = one_sample_permutation(x - y, tail=1, n_permute=1000)
    assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='pearson', tail=1)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='spearman', tail=1)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='kendall', tail=2)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='kendall',tail=3)
    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='doesntwork',tail=3)
    s = np.random.normal(0, 1, 10000)
    two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2)
    upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1)
    lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1)
    sum_p = upper_p + lower_p
    np.testing.assert_almost_equal(two_sided, sum_p)

    # Test matrix_permutation
    dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 190)
    x = squareform(dat[:, 0])
    y = squareform(dat[:, 1])
    stats = matrix_permutation(x, y, n_permute=1000)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)

    # Test jackknife_permutation
    dat = np.random.multivariate_normal(
        [5, 10, 15, 25, 35, 45],
        [[1, .2, .5, .7, .8, .9], [.2, 1, .4, .1, .1, .1],
         [.5, .4, 1, .1, .1, .1], [.7, .1, .1, 1, .3, .6],
         [.8, .1, .1, .3, 1, .5], [.9, .1, .1, .6, .5, 1]], 200)
    dat = dat + np.random.randn(dat.shape[0], dat.shape[1]) * .5
    data1 = pairwise_distances(dat[0:100, :].T, metric='correlation')
    data2 = pairwise_distances(dat[100:, :].T, metric='correlation')

    stats = jackknife_permutation(data1, data2)
    print(stats)
    assert (stats['correlation'] >= .4) & (stats['correlation'] <=
                                           .99) & (stats['p'] <= .05)

예제 #6

0

파일 보기

파일: test_stats.py 프로젝트: jeroenvanbaar/nltools

def test_permutation():
    dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 100)
    x = dat[:, 0]
    y = dat[:, 1]
    stats = two_sample_permutation(x, y)
    assert (stats['mean'] < -2) & (stats['mean'] > -6)
    assert stats['p'] < .001
    print(stats)
    stats = one_sample_permutation(x - y)
    assert (stats['mean'] < -2) & (stats['mean'] > -6)
    assert stats['p'] < .001
    print(stats)
    stats = correlation_permutation(x, y)
    assert (stats['correlation'] > .4) & (stats['correlation'] < .85)
    assert stats['p'] < .001
    stats = correlation_permutation(x, y, metric='kendall')
    assert (stats['correlation'] > .4) & (stats['correlation'] < .85)
    assert stats['p'] < .001

예제 #7

0

파일 보기

파일: test_stats.py 프로젝트: paxtonfitzpatrick/nltools

def test_permutation():
    dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 1000)
    x = dat[:, 0]
    y = dat[:, 1]
    stats = two_sample_permutation(x, y, tail=1, n_permute=1000)
    assert (stats["mean"] < -2) & (stats["mean"] > -6) & (stats["p"] < 0.001)
    stats = one_sample_permutation(x - y, tail=1, n_permute=1000)
    assert (stats["mean"] < -2) & (stats["mean"] > -6) & (stats["p"] < 0.001)
    for method in ["permute", "circle_shift", "phase_randomize"]:
        for metric in ["spearman", "kendall", "pearson"]:
            stats = correlation_permutation(x,
                                            y,
                                            metric=metric,
                                            method=method,
                                            n_permute=500,
                                            tail=1)
            assert ((stats["correlation"] > 0.4)
                    & (stats["correlation"] < 0.85)
                    & (stats["p"] < 0.05))

    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='kendall',tail=3)
    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='doesntwork',tail=3)
    s = np.random.normal(0, 1, 10000)
    two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2)
    upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1)
    lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1)
    sum_p = upper_p + lower_p
    np.testing.assert_almost_equal(two_sided, sum_p, decimal=3)

    # Test matrix_permutation
    dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 190)
    x = squareform(dat[:, 0])
    y = squareform(dat[:, 1])
    stats = matrix_permutation(x, y, n_permute=1000)
    assert ((stats["correlation"] > 0.4)
            & (stats["correlation"] < 0.85)
            & (stats["p"] < 0.001))

예제 #8

0

파일 보기

                                              n_permute=0)
        sub_pattern.append(sub_pattern_similarity)
        motor_sim_r.append(s['correlation'])

    all_sub_similarity[sub] = sub_pattern
    all_sub_motor_rsa[sub] = motor_sim_r
all_sub_motor_rsa = pd.DataFrame(all_sub_motor_rsa).T

# Now let's calculate a one sample t-test on each ROI, to see which ROI is consistently different from zero across our sample of participants. Because these are r-values, we will first perform a [fisher r to z transformation](https://en.wikipedia.org/wiki/Fisher_transformation). We will use a [non-parametric permutation sign test](https://en.wikipedia.org/wiki/Sign_test) to perform our null hypothesis test. This will take a minute to run as we will be calculating 5000 permutations for each of 50 ROIs (though these permutations are parallelized across cores).

# In[114]:

rsa_stats = []
for i in all_sub_motor_rsa:
    rsa_stats.append(
        one_sample_permutation(fisher_r_to_z(all_sub_motor_rsa[i])))

# We can plot a thresholded map using fdr correction as the threshold

# In[117]:

fdr_p = fdr(np.array([x['p'] for x in rsa_stats]), q=0.05)
print(fdr_p)

rsa_motor_r = Brain_Data([x * y['mean']
                          for x, y in zip(mask_x, rsa_stats)]).sum()
rsa_motor_p = Brain_Data([x * y['p'] for x, y in zip(mask_x, rsa_stats)]).sum()

thresholded = threshold(rsa_motor_r, rsa_motor_p, thr=fdr_p)

plot_glass_brain(thresholded.to_nifti(), cmap='coolwarm')

예제 #9

0

파일 보기

파일: plotting.py 프로젝트: paxtonfitzpatrick/nltools

def plot_silhouette(distance,
                    labels,
                    ax=None,
                    permutation_test=True,
                    n_permute=5000,
                    **kwargs):
    """Create a silhouette plot indicating between relative to within label distance

    Args:
        distance: (pandas dataframe) brain_distance matrix
        labels: (pandas dataframe) group labels
        ax: axis to plot (default=None)
        permutation_test: (boolean)
        n_permute: (int) number of samples for permuation test
    Optional keyword args:
        figsize: (list) dimensions of silhouette plot
        colors: (list) color triplets for silhouettes. Length must equal number of unique labels
    Returns:
        # f: heatmap
        # out: pandas dataframe of pairwise distance between conditions
        # within_dist_out: average pairwise distance matrix
        # mn_dist_out: (optional if permutation_test=True) average difference in distance between conditions
        # p_dist_out: (optional if permutation_test=True) p-value for difference in distance between conditions
    """

    # Define label set
    labelSet = np.unique(np.array(labels))
    n_clusters = len(labelSet)

    # Set defaults for plot design
    if "colors" not in kwargs.keys():
        colors = sns.color_palette("hls", n_clusters)
    if "figsize" not in kwargs.keys():
        figsize = (6, 4)

    # Compute silhouette scores
    out = pd.DataFrame(columns=("Label", "MeanWit", "MeanBet", "Sil"))
    for index in range(len(labels)):
        label = labels.iloc[index]
        sameIndices = [
            i for i, labelcur in enumerate(labels)
            if (labelcur == label) & (i != index)
        ]
        within = distance.iloc[index, sameIndices].values.flatten()
        otherIndices = [
            i for i, labelcur in enumerate(labels) if (labelcur != label)
        ]
        between = distance.iloc[index, otherIndices].values.flatten()
        silhouetteScore = (np.mean(between) - np.mean(within)) / max(
            np.mean(between), np.mean(within))
        out_tmp = pd.DataFrame(columns=out.columns)
        out_tmp.at[index] = index
        out_tmp["Label"] = label
        out_tmp["MeanWit"] = np.mean(within)
        out_tmp["MeanBet"] = np.mean(between)
        out_tmp["Sil"] = silhouetteScore
        out = out.append(out_tmp)
    sample_silhouette_values = out["Sil"]

    # Plot
    with sns.axes_style("white"):
        if ax is None:
            _, ax = plt.subplots(1, figsize=figsize)
        else:
            plt.plot(figsize=figsize)
    x_lower = 10
    labelX = []
    for labelInd in range(n_clusters):
        label = labelSet[labelInd]
        ith_cluster_silhouette_values = sample_silhouette_values[labels ==
                                                                 label]
        ith_cluster_silhouette_values.sort_values(inplace=True)
        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        x_upper = x_lower + size_cluster_i

        color = colors[labelInd]
        with sns.axes_style("white"):
            plt.fill_between(
                np.arange(x_lower, x_upper),
                0,
                ith_cluster_silhouette_values,
                facecolor=color,
                edgecolor=color,
            )

        labelX = np.hstack((labelX, np.mean([x_lower, x_upper])))
        x_lower = x_upper + 3

    # Format plot
    ax.set_xticks(labelX)
    ax.set_xticklabels(labelSet)
    ax.set_title("Silhouettes", fontsize=18)
    ax.set_xlim([5, 10 + len(labels) + n_clusters * 3])

    # Permutation test on mean silhouette score per label
    if permutation_test:
        outAll = pd.DataFrame(columns=["label", "mean", "p"])
        for labelInd in range(n_clusters):
            temp = pd.DataFrame(columns=outAll.columns)
            label = labelSet[labelInd]
            data = sample_silhouette_values[labels == label]
            temp.loc[labelInd, "label"] = label
            temp.loc[labelInd, "mean"] = np.mean(data)
            if np.mean(data) > 0:  # Only test positive mean silhouette scores
                statsout = one_sample_permutation(data, n_permute=n_permute)
                temp["p"] = statsout["p"]
            else:
                temp["p"] = 999
            outAll = outAll.append(temp)
        return outAll
    else:
        return

예제 #10

0

파일 보기

파일: RSA.py 프로젝트: DevXl/dartbrains

    for m in mask_x:
        sub_pattern_similarity = 1 - beta.apply_mask(m).distance(metric='correlation')
        sub_pattern_similarity.labels = conditions
        s = sub_pattern_similarity.similarity(motor, metric='spearman', n_permute=0)
        sub_pattern.append(sub_pattern_similarity)
        motor_sim_r.append(s['correlation'])
    
    all_sub_similarity[sub] = sub_pattern
    all_sub_motor_rsa[sub] = motor_sim_r
all_sub_motor_rsa = pd.DataFrame(all_sub_motor_rsa).T

Now let's calculate a one sample t-test on each ROI, to see which ROI is consistently different from zero across our sample of participants. Because these are r-values, we will first perform a [fisher r to z transformation](https://en.wikipedia.org/wiki/Fisher_transformation). We will use a [non-parametric permutation sign test](https://en.wikipedia.org/wiki/Sign_test) to perform our null hypothesis test. This will take a minute to run as we will be calculating 5000 permutations for each of 50 ROIs (though these permutations are parallelized across cores).

rsa_stats = []
for i in all_sub_motor_rsa:
    rsa_stats.append(one_sample_permutation(fisher_r_to_z(all_sub_motor_rsa[i])))

We can plot a thresholded map using fdr correction as the threshold

fdr_p = fdr(np.array([x['p'] for x in rsa_stats]), q=0.05)
print(fdr_p)

rsa_motor_r = Brain_Data([x*y['mean'] for x,y in zip(mask_x, rsa_stats)]).sum()
rsa_motor_p = Brain_Data([x*y['p'] for x,y in zip(mask_x, rsa_stats)]).sum()

thresholded = threshold(rsa_motor_r, rsa_motor_p, thr=fdr_p)

plot_glass_brain(thresholded.to_nifti(), cmap='coolwarm')

Looks like nothing survives FDR. Let's try a more liberal uncorrected threshold.