def test_09(): # random slm['t'], slm['df'], slm['tri'], slm['resl'], # special input case: slm['dfs'] and slm['du'] k = random.randint(1000, 10000) m = random.randint(1000, 10000) n = random.randint(1, 10) slm = {} slm['t'] = np.random.rand(1, k) slm['df'] = np.array([[m]]) slm['k'] = 1 slm['du'] = n slm['tri'] = np.random.randint(1, k, size=(m, 3)) edg = py_SurfStatEdg(slm) slm['resl'] = np.random.rand(edg.shape[0], 1) slm['dfs'] = np.ones((1, k)) dummy_test(slm)
def py_SurfStatPeakClus(slm, mask, thresh, reselspvert=None, edg=None): """ Finds peaks (local maxima) and clusters for surface data. Parameters ---------- slm : a dictionary, mandatory keys: 't', 'tri' (or 'lat'), optional keys 'df', 'k'. slm['t'] : numpy array of shape (l,v), v is the number of vertices, the first row slm['t'][0,:] is used for the clusters, and the other rows are used to calculate cluster resels if slm['k']>1. See SurfStatF for the precise definition of the extra rows. slm['tri'] : numpy array of shape (t,3), dype=int, triangle indices, values should be 1 and v, or, slm['lat'] : numpy array of shape (nx,nx,nz), values should be either 0 or 1. note that [nx,ny,nz]=size(volume). mask : numpy array of shape (v), dytpe=int, values should be either 0 or 1. thresh : float, clusters are vertices where slm['t'][0,mask]>=thresh. reselspvert : numpy array of shape (v), resels per vertex, by default: np.ones(v). edg : numpy array of shape (e,2), dtype=int, edge indices, by default computed from SurfStatEdg function. slm['df'] : int, degrees of freedom, note that only the length (1 or 2) is used to determine if slm['t'] is Hotelling's T or T^2 when k>1. slm['k'] : int, k is number of variates, by default 1. Returns ------- peak : a dictionary with keys 't', 'vertid', 'clusid'. peak['t'] : numpy array of shape (np,1), array of peaks (local maxima). peak['vertid] : numpy array of shape (np,1), array of vertex id's (1-based). peak['clusid'] : numpy array of shape (np,1), array of cluster id's that contain the peak. clus : a dictionary with keys 'clusid', 'nverts', 'resels'. clus['clusid'] : numpy array of shape (nc,1), array of cluster id numbers. clus['nverts'] : numpy array of shape (nc,1), array of number of vertices in the cluster. clus['resels'] : numpy array of shape (nc,1), array of resels in the cluster. clusid : numpy array of shape (1,v), array of cluster id's for each vertex. """ if edg is None: edg = py_SurfStatEdg(slm) l, v = np.shape(slm['t']) slm_t = copy.deepcopy(slm['t']) slm_t[0, ~mask.astype(bool)] = slm_t[0, :].min() t1 = slm_t[0, edg[:, 0]] t2 = slm_t[0, edg[:, 1]] islm = np.ones((1, v)) islm[0, edg[t1 < t2, 0]] = 0 islm[0, edg[t2 < t1, 1]] = 0 lmvox = np.argwhere(islm)[:, 1] + 1 excurset = np.array(slm_t[0, :] >= thresh, dtype=int) n = excurset.sum() if n < 1: peak = [] clus = [] clusid = [] return peak, clus, clusid voxid = np.cumsum(excurset) edg = voxid[edg[np.all(excurset[edg], 1), :]] nf = np.arange(1, n + 1) # Find cluster id's in nf (from Numerical Recipes in C, page 346): for el in range(1, edg.shape[0] + 1): j = edg[el - 1, 0] k = edg[el - 1, 1] while nf[j - 1] != j: j = nf[j - 1] while nf[k - 1] != k: k = nf[k - 1] if j != k: nf[j - 1] = k for j in range(1, n + 1): while nf[j - 1] != nf[nf[j - 1] - 1]: nf[j - 1] = nf[nf[j - 1] - 1] vox = np.argwhere(excurset) + 1 ivox = np.argwhere(np.in1d(vox, lmvox)) + 1 clmid = nf[ivox - 1] uclmid, iclmid, jclmid = np.unique(clmid, return_index=True, return_inverse=True) iclmid = iclmid + 1 jclmid = jclmid + 1 ucid = np.unique(nf) nclus = len(ucid) # implementing matlab's histc function ### bin_edges = np.r_[-np.Inf, 0.5 * (ucid[:-1] + ucid[1:]), np.Inf] ucvol, ucvol_edges = np.histogram(nf, bin_edges) if reselspvert is None: reselsvox = np.ones(np.shape(vox)) else: reselsvox = reselspvert[vox - 1] # calling matlab-python version for scipy's interp1d nf1 = interp1(np.append(0, ucid), np.arange(0, nclus + 1), nf, kind='nearest') # if k>1, find volume of cluster in added sphere if 'k' not in slm or slm['k'] == 1: ucrsl = np.bincount(nf1.astype(int), reselsvox.flatten()) if 'k' in slm and slm['k'] == 2: if l == 1: ndf = len(np.array([slm['df']])) r = 2 * np.arccos((thresh / slm_t[0, vox - 1])**(float(1) / ndf)) else: r = 2 * np.arccos( np.sqrt((thresh - slm_t[1, vox - 1]) * (thresh >= slm_t[1, vox - 1]) / (slm_t[0, vox - 1] - slm_t[1, vox - 1]))) ucrsl = np.bincount(nf1.astype(int), (r.T * reselsvox.T).flatten()) if 'k' in slm and slm['k'] == 3: if l == 1: ndf = len(np.array([slm['df']])) r = 2 * math.pi * (1 - (thresh / slm_t[0, vox - 1])**(float(1) / ndf)) else: nt = 20 theta = (np.arange(1, nt + 1, 1) - 1 / 2) / nt * math.pi / 2 s = (np.cos(theta)**2 * slm_t[1, vox - 1]).T if l == 3: s = s + ((np.sin(theta)**2) * slm_t[2, vox - 1]).T r = 2 * math.pi * (1 - np.sqrt( (thresh - s) * (thresh >= s) / (np.ones( (nt, 1)) * slm_t[0, vox - 1].T - s))).mean(axis=0) ucrsl = np.bincount(nf1.astype(int), (r.T * reselsvox.T).flatten()) # and their ranks (in ascending order) iucrls = sorted(range(len(ucrsl[1:])), key=lambda k: ucrsl[1:][k]) rankrsl = np.zeros((1, nclus)) rankrsl[0, iucrls] = np.arange(nclus, 0, -1) lmid = lmvox[ismember(lmvox, vox)[0]] varA = slm_t[0, (lmid - 1)] varB = lmid varC = rankrsl[0, jclmid - 1] varALL = np.concatenate((varA.reshape( len(varA), 1), varB.reshape(len(varB), 1), varC.reshape(len(varC), 1)), axis=1) lm = np.flipud(varALL[varALL[:, 0].argsort(), ]) varNEW = np.concatenate((rankrsl.T, ucvol.reshape( len(ucvol), 1), ucrsl.reshape(len(ucrsl), 1)[1:]), axis=1) cl = varNEW[varNEW[:, 0].argsort(), ] clusid = np.zeros((1, v)) clusid[0, (vox - 1).T] = interp1(np.append(0, ucid), np.append(0, rankrsl), nf, kind='nearest') peak = {} peak['t'] = lm[:, 0].reshape(len(lm[:, 0]), 1) peak['vertid'] = lm[:, 1].reshape(len(lm[:, 1]), 1) peak['clusid'] = lm[:, 2].reshape(len(lm[:, 2]), 1) clus = {} clus['clusid'] = cl[:, 0].reshape(len(cl[:, 0]), 1) clus['nverts'] = cl[:, 1].reshape(len(cl[:, 1]), 1) clus['resels'] = cl[:, 2].reshape(len(cl[:, 2]), 1) return peak, clus, clusid
def test_07(): slm = {'lat': np.random.rand(10,10,10) > 0.5} edg = py_SurfStatEdg(slm) slm['resl'] = np.random.rand(edg.shape[0],1) dummy_test(slm)
def test_09(): slm = {'lat': np.random.rand(10,10,10) > 0.5} mask = np.zeros(np.sum(slm['lat']), dtype=bool) edg = py_SurfStatEdg(slm) slm['resl'] = np.random.rand(edg.shape[0],1) dummy_test(slm, mask)
def test_08(): slm = {'lat': np.random.rand(10,10,10) > 0.5} mask = np.random.choice([False,True],np.sum(slm['lat'])) edg = py_SurfStatEdg(slm) slm['resl'] = np.random.rand(edg.shape[0],1) dummy_test(slm, mask)
def py_SurfStatLinMod(Y, M, surf=None, niter=1, thetalim=0.01, drlim=0.1): """ Fits linear mixed effects models to surface data and estimates resels. Parameters ---------- Y : ndarray, shape = (n_samples, n_verts) or (n_samples, n_verts, n_feats) Surface data. M : Term or Random Design matrix. surf : dict, optional Surface triangles (surf['tri']) or volumetric data (surf['lat']). If 'tri', shape = (n_edges, 2). If 'lat', then it is a boolean 3D array. Default is None. niter : int, optional Number of extra iterations of the Fisher scoring algorithm for fitting mixed effects models. Default is 1. thetalim : float, optional Lower limit on variance coefficients, in sd's. Default is 0.01. drlim : float, optional Step of ratio of variance coefficients, in sd's. Default 0.1. Returns ------- slm : dict Dictionary with the following keys: - 'X' : ndarray, shape = (n_samples, n_pred) Design matrix. - 'df' : int Degrees of freedom. - 'coef' : ndarray, shape = (n_pred, n_verts) Model coefficients. - 'SSE' : ndarray, shape = (n_feat, n_verts) Sum of square errors. - 'V' : ndarray, shape = (n_samples, n_samples, n_rand) Variance matrix bases. Only when mixed effects. - 'r' : ndarray, shape = (n_rand - 1, n_verts) Coefficients of the first (q-1) components of 'V' divided by their sum. Coefficients are clamped to a minimum of 0.01 x sd. Only when mixed effects. - 'dr' : ndarray Vector of increments in 'r' = 0.1 x sd - 'resl' : ndarray, (n_edges, n_feat) Sum over observations of squares of differences of normalized residuals along each edge. Only when ``surf is not None``. - 'tri' : ndarray, (n_cells, 3) Cells in surf. Only when ``surf is not None``. - 'lat' : ndarray Neighbors in lattice. """ n, v = Y.shape[:2] # number of samples x number of points k = 1 if Y.ndim == 2 else Y.shape[2] # number of features # Get data from term/random V = None if isinstance(M, Random): X, Vl = M.mean.matrix.values, M.variance.matrix.values # check in var contains intercept (constant term) n2, q = Vl.shape II = np.identity(n).ravel() r = II - Vl @ (la.pinv(Vl) @ II) if (r ** 2).mean() > np.finfo(float).eps: warnings.warn('Did you forget an error term, I? :-)') if q > 1 or q == 1 and np.abs(II - Vl.T).sum() > 0: V = Vl.reshape(n, n, -1) else: # No random term q = 1 if isinstance(M, Term): X = M.matrix.values else: if M.size > 1: warnings.warn('If you don''t convert vectors to terms you can ' 'get unexpected results :-(') X = M if X.shape[0] == 1: X = np.tile(X, (n, 1)) # check if term (x) contains intercept (constant term) pinvX = la.pinv(X) r = 1 - X @ pinvX.sum(1) if (r ** 2).mean() > np.finfo(float).eps: warnings.warn('Did you forget an error term, I? :-)') p = X.shape[1] # number of predictors df = n - la.matrix_rank(X) # degrees of freedom slm = dict(df=df, X=X) if k == 1: # Univariate if q == 1: # Fixed effects if V is None: # OLS coef = pinvX @ Y Y = Y - X @ coef else: V = V / np.diag(V).mean(0) Vmh = la.inv(la.cholesky(V).T) coef = (la.pinv(Vmh @ X) @ Vmh) @ Y Y = Vmh @ Y - (Vmh @ X) @ coef sse = np.sum(Y ** 2, axis=0) else: # mixed effects q1 = q - 1 V /= np.diagonal(V, axis1=0, axis2=1).mean(-1) slm_r = np.zeros((q1, v)) # start Fisher scoring algorithm R = np.eye(n) - X @ la.pinv(X) RVV = (V.T @ R.T).T E = (Y.T @ (R.T @ RVV.T)) E *= Y.T E = E.sum(-1) RVV2 = np.zeros([n, n, q]) E2 = np.zeros([q, v]) for j in range(q): RV2 = R @ V[..., j] E2[j] = (Y * ((RV2 @ R) @ Y)).sum(0) RVV2[..., j] = RV2 M = np.einsum('ijk,jil->kl', RVV, RVV, optimize='optimal') theta = la.pinv(M) @ E tlim = np.sqrt(2*np.diag(la.pinv(M))) * thetalim tlim = tlim[:, None] * theta.sum(0) m = theta < tlim theta[m] = tlim[m] r = theta[:q1] / theta.sum(0) Vt = 2*la.pinv(M) m1 = np.diag(Vt) m2 = 2 * Vt.sum(0) Vr = m1[:q1]-m2[:q1] * slm_r.mean(1) + Vt.sum()*(r**2).mean(-1) dr = np.sqrt(Vr) * drlim # Extra Fisher scoring iterations for it in range(niter): irs = np.round(r.T / dr) ur, jr = np.unique(irs, axis=0, return_inverse=True) nr = ur.shape[0] for ir in range(nr): iv = jr == ir rv = r[:, iv].mean(1) Vs = (1-rv.sum()) * V[..., q-1] Vs += (V[..., :q1] * rv).sum(-1) Vinv = la.inv(Vs) VinvX = Vinv @ X G = la.pinv(X.T @ VinvX) @ VinvX.T R = Vinv - VinvX @ G RVV = (V.T @ R.T).T E = (Y[:, iv].T @ (R.T @ RVV.T)) E *= Y[:, iv].T E = E.sum(-1) M = np.einsum('ijk,jil->kl', RVV, RVV, optimize='optimal') thetav = la.pinv(M) @ E tlim = np.sqrt(2*np.diag(la.pinv(M))) * thetalim tlim = tlim[:, None] * thetav.sum(0) m = thetav < tlim thetav[m] = tlim[m] theta[:, iv] = thetav r = theta[:q1] / theta.sum(0) # finish Fisher scoring irs = np.round(r.T / dr) ur, jr = np.unique(irs, axis=0, return_inverse=True) nr = ur.shape[0] coef = np.zeros((p, v)) sse = np.zeros(v) for ir in range(nr): iv = jr == ir rv = r[:, iv].mean(1) Vs = (1 - rv.sum()) * V[..., q - 1] Vs += (V[..., :q1] * rv).sum(-1) # Vmh = la.inv(la.cholesky(Vs).T) Vmh = la.inv(la.cholesky(Vs)) VmhX = Vmh @ X G = (la.pinv(VmhX.T @ VmhX) @ VmhX.T) @ Vmh coef[:, iv] = G @ Y[:, iv] R = Vmh - VmhX @ G Y[:, iv] = R @ Y[:, iv] sse[iv] = (Y[:, iv]**2).sum(0) slm.update(dict(r=r, dr=dr[:, None])) sse = sse[None] else: # multivariate if q > 1: raise ValueError('Multivariate mixed effects models not yet ' 'implemented :-(') if V is None: X2 = X else: V = V / np.diag(V).mean(0) Vmh = la.inv(la.cholesky(V)).T X2 = Vmh @ X pinvX = la.pinv(X2) Y = Vmh @ Y coef = pinvX @ Y.T.swapaxes(-1, -2) Y = Y - (X2 @ coef).swapaxes(-1, -2).T coef = coef.swapaxes(-1, -2).T k2 = k * (k + 1) // 2 sse = np.zeros((k2, v)) j = -1 for j1 in range(k): for j2 in range(j1+1): j = j + 1 sse[j] = (Y[..., j1]*Y[..., j2]).sum(0) slm.update(dict(coef=coef, SSE=sse)) if V is not None: slm['V'] = V if surf is not None and ('tri' in surf or 'lat' in surf): key = 'tri' if 'tri' in surf else 'lat' slm[key] = surf[key] edges = py_SurfStatEdg(surf) # should start from 0? n_edges = edges.shape[0] resl = np.zeros((n_edges, k)) Y = np.atleast_3d(Y) for j in range(k): normr = np.sqrt(sse[((j+1) * (j+2) // 2) - 1]) for i in range(n): u = Y[i, :, j] / normr resl[:, j] += np.diff(u[edges], axis=1).ravel()**2 slm['resl'] = resl return slm