def __init__(self, Y, X, norm=None, sub=None, ds=None, contours={.05: (.8, .2, .0), .01: (1., .6, .0), .001: (1., 1., .0)}, tp=.1, samples=1000, replacement=False, tstart=None, tstop=None, close_time=0, pmax=1): """ Y : ndvar Dependent variable. X : continuous | None The continuous predictor variable. norm : None | categorial Categories in which to normalize (z-score) X. """ Y = asndvar(Y, sub=sub, ds=ds) X = asvar(X, sub=sub, ds=ds) self.name = name = "%s corr %s" % (Y.name, X.name) # calculate threshold # http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient#Inference self.n = n = len(X) df = n - 2 tt = scipy.stats.distributions.t.isf(tp, df) tr = tt / np.sqrt(df + tt ** 2) cs = _cs.Colorspace(cmap=_cs.cm_xpolar, vmax=1, vmin= -1) cdist = cluster_dist(Y, N=samples, t_upper=tr, t_lower= -tr, tstart=tstart, tstop=tstop, close_time=close_time, unit='r', pmax=pmax, name=name, cs=cs) # normalization is done before the permutation b/c we are interested in the variance associated with each subject for the z-scoring. Y = Y.copy() Y.x = Y.x.reshape((n, -1)) if norm is not None: for cell in norm.cells: idx = (norm == cell) Y.x[idx] = scipy.stats.mstats.zscore(Y.x[idx]) x = X.x.reshape((n, -1)) m_x = np.mean(x) if np.isnan(m_x): raise ValueError("np.mean(x) is nan") self.x = x - m_x for _, Yrs in _resample(Y, replacement=replacement, samples=samples): r = self._corr(Yrs) cdist.add_perm(r) r = self._corr(Y) cdist.add_original(r) self.r_map = cdist.P self.all = [[self.r_map] + cdist.clusters] self.clusters = cdist
def __init__(self, Y, X, t=.1, samples=1000, replacement=False, tstart=None, tstop=None, close_time=0, pmax=1, sub=None, ds=None, ): """ Arguments --------- Y : ndvar Measurements (dependent variable) X : categorial Model t : scalar Threshold (uncorrected p-value) to use for finding clusters samples : int Number of samples to estimate parameter distributions replacement : bool whether random samples should be drawn with replacement or without tstart, tstop : None | scalar Time window for clusters. **None**: use the whole epoch; **scalar**: use only a part of the epoch .. Note:: implementation is not optimal: F-values are still computed but ignored. close_time : scalar Close gaps in clusters that are smaller than this interval. Assumes that Y is a uniform time series. sub : index Apply analysis to a subset of cases in Y, X pmax : scalar <= 1 Maximum cluster p-values to keep cluster. .. FIXME:: connectivity for >2 dimensional data. Currently, adjacent samples are connected. """ Y = self.Y = asndvar(Y, sub=sub, ds=ds) X = self.X = asmodel(X, sub=sub, ds=ds) lm = _glm.lm_fitter(X) # get F-thresholds from p-threshold tF = {} if lm.full_model: for e in lm.E_MS: effects_d = lm.E_MS[e] if effects_d: df_d = sum(ed.df for ed in effects_d) tF[e] = scipy.stats.distributions.f.isf(t, e.df, df_d) else: df_d = X.df_error tF = {e: scipy.stats.distributions.f.isf(t, e.df, df_d) for e in X.effects} # Estimate statistic distributions from permuted Ys kwargs = dict(tstart=tstart, tstop=tstop, close_time=close_time, unit='F') dists = {e: cluster_dist(Y, samples, tF[e], name=e.name, **kwargs) for e in tF} self.cluster_dists = dists for _, Yrs in _resample(Y, replacement=replacement, samples=samples): for e, F in lm.map(Yrs.x, p=False): dists[e].add_perm(F) # Find clusters in the actual data test0 = lm.map(Y.x, p=False) self.effects = [] self.clusters = {} self.F_maps = {} for e, F in test0: self.effects.append(e) dist = dists[e] dist.add_original(F) self.clusters[e] = dist self.F_maps[e] = dist.P self.name = "ANOVA Permutation Cluster Test" self.tF = tF self.all = [[self.F_maps[e]] + self.clusters[e].clusters for e in self.X.effects if e in self.F_maps]