def p_boot(self): """Calculate the pvals based on the bootstrap ratios. Applies only to non-masked features. Calculates p-values by treating the bootstrap ratios as t-values and the number of subjects to determine the degrees of freedom. """ # get the boot ratios brs = self.boot_ratio names = brs.dtype.names pvals = [] for i,n in enumerate(names): # bootstrap ratios fmask = ~np.isnan(brs[n]) br = brs[n][fmask] # turn the brs into p-vals bp = np.ones(self._feat_shape) bp[fmask] = dists.t(len(self._feat_mask)-1).pdf(br) # append the pvals pvals.append(bp) # convert to recarray pvals = np.rec.fromarrays(pvals, names=','.join(names)) # return the pvals return pvals
def fdr_boot(self): """Calculate the False Discovery Rate on the bootstrap ratios. Applies only to non-masked features. Calculates p-values by treating the bootstrap ratios as t-values and the number of subjects to determine the degrees of freedom. """ # get the boot ratios brs = self.boot_ratio names = brs.dtype.names qvals = [] for i,n in enumerate(names): # bootstrap ratios fmask = ~np.isnan(brs[n]) br = brs[n][fmask] # turn the brs into p-vals bp = dists.t(len(self._feat_mask)-1).pdf(br) # calc FDR reject,q = fdr_correction(bp) # set up q-vals qv = np.ones(self._feat_shape) qv[fmask] = q qvals.append(qv) # convert to recarray qvals = np.rec.fromarrays(qvals, names=','.join(names)) # grab the qs return qvals
def p_boot(self): """Calculate the pvals based on the bootstrap ratios. Applies only to non-masked features. Calculates p-values by treating the bootstrap ratios as t-values and the number of subjects to determine the degrees of freedom. """ # get the boot ratios brs = self.boot_ratio names = brs.dtype.names pvals = [] for i, n in enumerate(names): # bootstrap ratios fmask = ~np.isnan(brs[n]) br = brs[n][fmask] # turn the brs into p-vals bp = np.ones(self._feat_shape) bp[fmask] = dists.t(len(self._feat_mask) - 1).pdf(br) # append the pvals pvals.append(bp) # convert to recarray pvals = np.rec.fromarrays(pvals, names=','.join(names)) # return the pvals return pvals
def fdr_boot(self): """Calculate the False Discovery Rate on the bootstrap ratios. Applies only to non-masked features. Calculates p-values by treating the bootstrap ratios as t-values and the number of subjects to determine the degrees of freedom. """ # get the boot ratios brs = self.boot_ratio names = brs.dtype.names qvals = [] for i, n in enumerate(names): # bootstrap ratios fmask = ~np.isnan(brs[n]) br = brs[n][fmask] # turn the brs into p-vals bp = dists.t(len(self._feat_mask) - 1).pdf(br) # calc FDR reject, q = fdr_correction(bp) # set up q-vals qv = np.ones(self._feat_shape) qv[fmask] = q qvals.append(qv) # convert to recarray qvals = np.rec.fromarrays(qvals, names=','.join(names)) # grab the qs return qvals
def evaluate(network_factory, data, n, poolsize=DEFAULT_POOL_SIZE, average_over=DEFAULT_AVERAGE_OVER): """Run multiple accuracy tests and produce a confidence interval result.""" pool = Pool(poolsize) networks = [] for _ in range(average_over): networks.append({ 'network': network_factory(), 'raw_data': data.raw_data, 'labels': data.labels, 'n': n }) accuracies = np.array(pool.map(train_and_evaluate, networks)) pool.close() pool.join() mean = np.mean(accuracies) stdev = np.sqrt(np.var(accuracies)) / np.sqrt(10) t_value = t(average_over - 1).ppf(0.975) lower_bound = mean - t_value * stdev upper_bound = mean + t_value * stdev return {'mean': mean, 'confidence_interval': (lower_bound, upper_bound)}
def pick_stable_features(R, nboot=500, do_tfce=True, connectivity=None, shape=None, dt=.01, E=2/3., H=2.0): """Use a bootstrap to pick stable features. """ # generate the boots boots = [np.random.random_integers(0,len(R)-1,len(R)) for i in xrange(nboot)] # run tfce on each subj and cond if do_tfce: # # allocate for tfce # Rt = np.zeros([R.shape[0],R.shape[1]]+list(shape)) # # loop # for i in range(Rt.shape[0]): # for j in range(Rt.shape[1]): # Rt[i,j] = cluster.tfce(np.arctanh(R[i,j]).reshape(*shape), # dt=dt,tail=1,connectivity=connectivity, # E=E,H=H) # Rt[i,j] += cluster.tfce(np.arctanh(R[i,j]).reshape(*shape), # dt=dt,tail=-1,connectivity=connectivity, # E=E,H=H) Rt = R else: # convert to Z Rt = np.arctanh(R) # calc bootstrap ratio Rtb = np.array([Rt[boots[b]].mean(0) for b in range(len(boots))]) Rtbr = Rt.mean(0)/Rtb.std(0) Rtbr = dists.t(len(R)-1).pdf(Rtbr) return Rtbr
def pick_stable_features(Z, nboot=500): """Use a bootstrap to pick stable features. """ # generate the boots boots = [ np.random.random_integers(0, len(Z) - 1, len(Z)) for i in xrange(nboot) ] # calc bootstrap ratio # calc the bootstrap std in efficient way # old way # Zb = np.array([Z[boots[b]].mean(0) for b in range(len(boots))]) # Zbr = Z.mean(0)/Zb.std(0) ov = OnlineVariance(ddof=0) for b in range(len(boots)): ov.include(Z[boots[b]].mean(0)) Zbr = Z.mean(0) / ov.std # ignore any nans Zbr[np.isnan(Zbr)] = 0. # bootstrap ratios are supposedly t-distributed, so test sig Zbr = dists.t(len(Z) - 1).cdf(-1 * np.abs(Zbr)) * 2. Zbr[Zbr > 1] = 1 return Zbr
def __thpd(self, nu, bbar, sd): ''' Get the hpd interval for the t-dist. ''' ## and plot it rv = dstn.t(nu, bbar, sd) xl = rv.ppf(0.025) xu = rv.ppf(0.975) return np.array([xl, xu])
def test_pdf(): for mu in [0, 1, 10]: for v in [1, 10]: for std in [1, 10]: priors = np.array([mu, v, std], dtype='d') kernel = TStudentKernel() kernel.build(999, 1, priors) #99... is just the max freed truth = t(v, loc=mu, scale=std) for x in np.linspace(-100, 100, 200): print(mu, v, std, x, truth.pdf(x), kernel._pdf(x, 0, StampLists(1))) assert_almost_equal(truth.pdf(x), \ kernel._pdf(x, 0, StampLists(1)))
def pick_stable_features(Z, nboot=500): """Use a bootstrap to pick stable features. """ # generate the boots boots = [np.random.random_integers(0,len(Z)-1,len(Z)) for i in xrange(nboot)] # calc bootstrap ratio Zb = np.array([Z[boots[b]].mean(0) for b in range(len(boots))]) Zbr = Z.mean(0)/Zb.std(0) # ignore any nans Zbr[np.isnan(Zbr)]=0. # bootstrap ratios are supposedly t-distributed, so test sig Zbr = dists.t(len(Z)-1).cdf(-1*np.abs(Zbr))*2. Zbr[Zbr>1]=1 return Zbr
def pick_stable_features(Z, nboot=500): """Use a bootstrap to pick stable features. """ # generate the boots boots = [np.random.random_integers(0,len(Z)-1,len(Z)) for i in range(nboot)] # calc bootstrap ratio Zb = np.array([Z[boots[b]].mean(0) for b in range(len(boots))]) Zbr = Z.mean(0)/Zb.std(0) # ignore any nans Zbr[np.isnan(Zbr)]=0. # bootstrap ratios are supposedly t-distributed, so test sig Zbr = dists.t(len(Z)-1).cdf(-1*np.abs(Zbr))*2. Zbr[Zbr>1]=1 return Zbr
def __plottdist(self, nu, bbar, sd, title): ''' Plot t distribution ''' ## and plot it rv = dstn.t(nu, bbar, sd) xmin = rv.ppf(0.001) xmax = rv.ppf(0.999) x = np.linspace(xmin, xmax, 100) h = plt.plot(x, rv.pdf(x)) plt.title(title) ## add the hpd's xl = rv.ppf(0.025) xu = rv.ppf(0.975) ltx = np.linspace(xmin, xl, 50) lty = rv.pdf(ltx) plt.fill(np.r_[ltx, ltx[-1]], np.r_[lty, 0], facecolor ="blue", alpha = 0.5) utx = np.linspace(xu, xmax, 50) uty = rv.pdf(utx) plt.fill(np.r_[utx, utx[0]], np.r_[uty, 0], facecolor ="blue", alpha = 0.5)
def pick_stable_features(R, nboot=500, do_tfce=True, connectivity=None, shape=None, dt=.01, E=2 / 3., H=2.0): """Use a bootstrap to pick stable features. """ # generate the boots boots = [ np.random.random_integers(0, len(R) - 1, len(R)) for i in xrange(nboot) ] # run tfce on each subj and cond if do_tfce: # # allocate for tfce # Rt = np.zeros([R.shape[0],R.shape[1]]+list(shape)) # # loop # for i in range(Rt.shape[0]): # for j in range(Rt.shape[1]): # Rt[i,j] = cluster.tfce(np.arctanh(R[i,j]).reshape(*shape), # dt=dt,tail=1,connectivity=connectivity, # E=E,H=H) # Rt[i,j] += cluster.tfce(np.arctanh(R[i,j]).reshape(*shape), # dt=dt,tail=-1,connectivity=connectivity, # E=E,H=H) Rt = R else: # convert to Z Rt = np.arctanh(R) # calc bootstrap ratio Rtb = np.array([Rt[boots[b]].mean(0) for b in range(len(boots))]) Rtbr = Rt.mean(0) / Rtb.std(0) Rtbr = dists.t(len(R) - 1).pdf(Rtbr) return Rtbr
def pick_stable_features(Z, nboot=500): """Use a bootstrap to pick stable features. """ # generate the boots boots = [np.random.random_integers(0, len(Z)-1, len(Z)) for i in xrange(nboot)] # calc bootstrap ratio # calc the bootstrap std in efficient way # old way # Zb = np.array([Z[boots[b]].mean(0) for b in range(len(boots))]) # Zbr = Z.mean(0)/Zb.std(0) ov = OnlineVariance(ddof=0) for b in range(len(boots)): ov.include(Z[boots[b]].mean(0)) Zbr = Z.mean(0)/ov.std # ignore any nans Zbr[np.isnan(Zbr)] = 0. # bootstrap ratios are supposedly t-distributed, so test sig Zbr = dists.t(len(Z)-1).cdf(-1*np.abs(Zbr))*2. Zbr[Zbr > 1] = 1 return Zbr
def tatval(df, mu, sigma, x): tdist = dist.t([df]) return tdist.pdf((x - mu) / sigma)
def tdist(df,mu=0,sd=1): return D.t(df,mu,sd)
def students_t(mean=0, std=1.0, df=1.0): return dists.t(df=df, loc=mean, scale=std)
import pandas as pd import numpy as np from scipy.stats.distributions import expon, gamma, rayleigh, norm, t, uniform from posteriori import between def RMSE(predicted, expected): return np.linalg.norm(predicted - expected) / np.sqrt(len(predicted)) distributions = [ norm(), t(df=5), gamma(a=2), gamma(a=4), gamma(a=8), expon(scale=1/0.5), expon(scale=1/1), expon(scale=1/2), rayleigh(), uniform(), ] errors = [] for distribution in distributions: parameters = [k + '=' + str(v) for k, v in distribution.kwds.items()] name = "{name}({parameters})".format( name=distribution.dist.name, parameters=', '.join(parameters) )
def t_ci(sample, alpha=0.95): n = sample.shape[0] se = np.std(sample, ddof=1) / np.sqrt(n) q = dist.t(n - 1).ppf(1 - (1 - alpha) / 2) return sample.mean() - q * se,\ sample.mean() + q * se
def tatval(df, mu, sigma, x): tdist = dist.t([df]) return tdist.pdf((x-mu)/sigma)
def D(self): return D.t(self.dof, loc=self.mean, scale=self.std)