def test_09():
    # random slm['t'], slm['df'], slm['tri'], slm['resl'],
    # special input case: slm['dfs'] and slm['du']
    k = random.randint(1000, 10000)
    m = random.randint(1000, 10000)
    n = random.randint(1, 10)

    slm = {}
    slm['t'] = np.random.rand(1, k)
    slm['df'] = np.array([[m]])
    slm['k'] = 1
    slm['du'] = n
    slm['tri'] = np.random.randint(1, k, size=(m, 3))
    edg = py_SurfStatEdg(slm)
    slm['resl'] = np.random.rand(edg.shape[0], 1)
    slm['dfs'] = np.ones((1, k))
    dummy_test(slm)
Exemple #2
0
def py_SurfStatPeakClus(slm, mask, thresh, reselspvert=None, edg=None):
    """ Finds peaks (local maxima) and clusters for surface data.
    Parameters
    ----------
    slm : a dictionary, mandatory keys: 't', 'tri' (or 'lat'),
        optional keys 'df', 'k'.
        slm['t'] : numpy array of shape (l,v),
            v is the number of vertices, the first row slm['t'][0,:] is used
            for the clusters, and the other rows are used to calculate cluster
            resels if slm['k']>1. See SurfStatF for the precise definition
            of the extra rows.
        slm['tri'] : numpy array of shape (t,3), dype=int,
            triangle indices, values should be 1 and v,
        or,
        slm['lat'] : numpy array of shape (nx,nx,nz),
            values should be either 0 or 1.
            note that [nx,ny,nz]=size(volume).
        mask : numpy array of shape (v), dytpe=int,
            values should be either 0 or 1.
        thresh : float,
            clusters are vertices where slm['t'][0,mask]>=thresh.
        reselspvert : numpy array of shape (v),
            resels per vertex, by default: np.ones(v).
        edg :  numpy array of shape (e,2), dtype=int,
            edge indices, by default computed from SurfStatEdg function.
        slm['df'] : int,
            degrees of freedom, note that only the length (1 or 2) is used
            to determine if slm['t'] is Hotelling's T or T^2 when k>1.
        slm['k'] : int,
             k is number of variates, by default 1.

    Returns
    -------
    peak : a dictionary with keys 't', 'vertid', 'clusid'.
        peak['t'] : numpy array of shape (np,1),
            array of peaks (local maxima).
        peak['vertid] : numpy array of shape (np,1),
            array of vertex id's (1-based).
        peak['clusid'] : numpy array of shape (np,1),
            array of cluster id's that contain the peak.
    clus : a dictionary with keys 'clusid', 'nverts', 'resels'.
        clus['clusid'] : numpy array of shape (nc,1),
            array of cluster id numbers.
        clus['nverts'] : numpy array of shape (nc,1),
            array of number of vertices in the cluster.
        clus['resels'] : numpy array of shape (nc,1),
            array of resels in the cluster.
    clusid : numpy array of shape (1,v),
        array of cluster id's for each vertex.
	"""
    if edg is None:
        edg = py_SurfStatEdg(slm)

    l, v = np.shape(slm['t'])
    slm_t = copy.deepcopy(slm['t'])
    slm_t[0, ~mask.astype(bool)] = slm_t[0, :].min()
    t1 = slm_t[0, edg[:, 0]]
    t2 = slm_t[0, edg[:, 1]]
    islm = np.ones((1, v))
    islm[0, edg[t1 < t2, 0]] = 0
    islm[0, edg[t2 < t1, 1]] = 0
    lmvox = np.argwhere(islm)[:, 1] + 1
    excurset = np.array(slm_t[0, :] >= thresh, dtype=int)
    n = excurset.sum()

    if n < 1:
        peak = []
        clus = []
        clusid = []
        return peak, clus, clusid

    voxid = np.cumsum(excurset)
    edg = voxid[edg[np.all(excurset[edg], 1), :]]
    nf = np.arange(1, n + 1)

    # Find cluster id's in nf (from Numerical Recipes in C, page 346):
    for el in range(1, edg.shape[0] + 1):
        j = edg[el - 1, 0]
        k = edg[el - 1, 1]
        while nf[j - 1] != j:
            j = nf[j - 1]
        while nf[k - 1] != k:
            k = nf[k - 1]
        if j != k:
            nf[j - 1] = k

    for j in range(1, n + 1):
        while nf[j - 1] != nf[nf[j - 1] - 1]:
            nf[j - 1] = nf[nf[j - 1] - 1]

    vox = np.argwhere(excurset) + 1
    ivox = np.argwhere(np.in1d(vox, lmvox)) + 1
    clmid = nf[ivox - 1]
    uclmid, iclmid, jclmid = np.unique(clmid,
                                       return_index=True,
                                       return_inverse=True)
    iclmid = iclmid + 1
    jclmid = jclmid + 1
    ucid = np.unique(nf)
    nclus = len(ucid)
    # implementing matlab's histc function ###
    bin_edges = np.r_[-np.Inf, 0.5 * (ucid[:-1] + ucid[1:]), np.Inf]
    ucvol, ucvol_edges = np.histogram(nf, bin_edges)

    if reselspvert is None:
        reselsvox = np.ones(np.shape(vox))
    else:
        reselsvox = reselspvert[vox - 1]

    # calling matlab-python version for scipy's interp1d
    nf1 = interp1(np.append(0, ucid),
                  np.arange(0, nclus + 1),
                  nf,
                  kind='nearest')

    # if k>1, find volume of cluster in added sphere
    if 'k' not in slm or slm['k'] == 1:
        ucrsl = np.bincount(nf1.astype(int), reselsvox.flatten())
    if 'k' in slm and slm['k'] == 2:
        if l == 1:
            ndf = len(np.array([slm['df']]))
            r = 2 * np.arccos((thresh / slm_t[0, vox - 1])**(float(1) / ndf))
        else:
            r = 2 * np.arccos(
                np.sqrt((thresh - slm_t[1, vox - 1]) *
                        (thresh >= slm_t[1, vox - 1]) /
                        (slm_t[0, vox - 1] - slm_t[1, vox - 1])))
        ucrsl = np.bincount(nf1.astype(int), (r.T * reselsvox.T).flatten())
    if 'k' in slm and slm['k'] == 3:
        if l == 1:
            ndf = len(np.array([slm['df']]))
            r = 2 * math.pi * (1 -
                               (thresh / slm_t[0, vox - 1])**(float(1) / ndf))
        else:
            nt = 20
            theta = (np.arange(1, nt + 1, 1) - 1 / 2) / nt * math.pi / 2
            s = (np.cos(theta)**2 * slm_t[1, vox - 1]).T
            if l == 3:
                s = s + ((np.sin(theta)**2) * slm_t[2, vox - 1]).T
            r = 2 * math.pi * (1 - np.sqrt(
                (thresh - s) * (thresh >= s) / (np.ones(
                    (nt, 1)) * slm_t[0, vox - 1].T - s))).mean(axis=0)
        ucrsl = np.bincount(nf1.astype(int), (r.T * reselsvox.T).flatten())

    # and their ranks (in ascending order)
    iucrls = sorted(range(len(ucrsl[1:])), key=lambda k: ucrsl[1:][k])
    rankrsl = np.zeros((1, nclus))
    rankrsl[0, iucrls] = np.arange(nclus, 0, -1)

    lmid = lmvox[ismember(lmvox, vox)[0]]

    varA = slm_t[0, (lmid - 1)]
    varB = lmid
    varC = rankrsl[0, jclmid - 1]
    varALL = np.concatenate((varA.reshape(
        len(varA), 1), varB.reshape(len(varB), 1), varC.reshape(len(varC), 1)),
                            axis=1)
    lm = np.flipud(varALL[varALL[:, 0].argsort(), ])
    varNEW = np.concatenate((rankrsl.T, ucvol.reshape(
        len(ucvol), 1), ucrsl.reshape(len(ucrsl), 1)[1:]),
                            axis=1)
    cl = varNEW[varNEW[:, 0].argsort(), ]
    clusid = np.zeros((1, v))
    clusid[0, (vox - 1).T] = interp1(np.append(0, ucid),
                                     np.append(0, rankrsl),
                                     nf,
                                     kind='nearest')
    peak = {}
    peak['t'] = lm[:, 0].reshape(len(lm[:, 0]), 1)
    peak['vertid'] = lm[:, 1].reshape(len(lm[:, 1]), 1)
    peak['clusid'] = lm[:, 2].reshape(len(lm[:, 2]), 1)
    clus = {}
    clus['clusid'] = cl[:, 0].reshape(len(cl[:, 0]), 1)
    clus['nverts'] = cl[:, 1].reshape(len(cl[:, 1]), 1)
    clus['resels'] = cl[:, 2].reshape(len(cl[:, 2]), 1)

    return peak, clus, clusid
def test_07():
    slm = {'lat': np.random.rand(10,10,10) > 0.5}
    edg = py_SurfStatEdg(slm)
    slm['resl'] = np.random.rand(edg.shape[0],1)
    dummy_test(slm)
def test_09():
    slm = {'lat': np.random.rand(10,10,10) > 0.5}
    mask = np.zeros(np.sum(slm['lat']), dtype=bool)
    edg = py_SurfStatEdg(slm)
    slm['resl'] = np.random.rand(edg.shape[0],1)
    dummy_test(slm, mask) 
def test_08():
    slm = {'lat': np.random.rand(10,10,10) > 0.5}
    mask = np.random.choice([False,True],np.sum(slm['lat']))
    edg = py_SurfStatEdg(slm)
    slm['resl'] = np.random.rand(edg.shape[0],1)
    dummy_test(slm, mask) 
Exemple #6
0
def py_SurfStatLinMod(Y, M, surf=None, niter=1, thetalim=0.01, drlim=0.1):
    """ Fits linear mixed effects models to surface data and estimates resels.

    Parameters
    ----------
    Y : ndarray, shape = (n_samples, n_verts) or (n_samples, n_verts, n_feats)
        Surface data.
    M : Term or Random
        Design matrix.
    surf : dict, optional
        Surface triangles (surf['tri']) or volumetric data (surf['lat']).
        If 'tri', shape = (n_edges, 2). If 'lat', then it is a boolean 3D
        array. Default is None.
    niter : int, optional
        Number of extra iterations of the Fisher scoring algorithm for fitting
        mixed effects models. Default is 1.
    thetalim : float, optional
        Lower limit on variance coefficients, in sd's. Default is 0.01.
    drlim : float, optional
        Step of ratio of variance coefficients, in sd's. Default 0.1.

    Returns
    -------
    slm : dict
        Dictionary with the following keys:

        - 'X' : ndarray, shape = (n_samples, n_pred)
            Design matrix.
        - 'df' : int
            Degrees of freedom.
        - 'coef' : ndarray, shape = (n_pred, n_verts)
            Model coefficients.
        - 'SSE' : ndarray, shape = (n_feat, n_verts)
            Sum of square errors.
        - 'V' : ndarray, shape = (n_samples, n_samples, n_rand)
            Variance matrix bases. Only when mixed effects.
        - 'r' : ndarray, shape = (n_rand - 1, n_verts)
            Coefficients of the first (q-1) components of 'V' divided by their
            sum. Coefficients are clamped to a minimum of 0.01 x sd.
            Only when mixed effects.
        - 'dr' : ndarray
             Vector of increments in 'r' = 0.1 x sd
        - 'resl' : ndarray, (n_edges, n_feat)
            Sum over observations of squares of differences of normalized
            residuals along each edge. Only when ``surf is not None``.
        - 'tri' : ndarray, (n_cells, 3)
            Cells in surf. Only when ``surf is not None``.
        - 'lat' : ndarray
            Neighbors in lattice.

    """

    n, v = Y.shape[:2]  # number of samples x number of points
    k = 1 if Y.ndim == 2 else Y.shape[2]  # number of features

    # Get data from term/random
    V = None
    if isinstance(M, Random):
        X, Vl = M.mean.matrix.values, M.variance.matrix.values

        # check in var contains intercept (constant term)
        n2, q = Vl.shape
        II = np.identity(n).ravel()

        r = II - Vl @ (la.pinv(Vl) @ II)
        if (r ** 2).mean() > np.finfo(float).eps:
            warnings.warn('Did you forget an error term, I? :-)')

        if q > 1 or q == 1 and np.abs(II - Vl.T).sum() > 0:
            V = Vl.reshape(n, n, -1)

    else:  # No random term
        q = 1
        if isinstance(M, Term):
            X = M.matrix.values
        else:
            if M.size > 1:
                warnings.warn('If you don''t convert vectors to terms you can '
                              'get unexpected results :-(')
            X = M

        if X.shape[0] == 1:
            X = np.tile(X, (n, 1))

    # check if term (x) contains intercept (constant term)
    pinvX = la.pinv(X)
    r = 1 - X @ pinvX.sum(1)
    if (r ** 2).mean() > np.finfo(float).eps:
        warnings.warn('Did you forget an error term, I? :-)')

    p = X.shape[1]  # number of predictors
    df = n - la.matrix_rank(X)  # degrees of freedom

    slm = dict(df=df, X=X)

    if k == 1:  # Univariate

        if q == 1:  # Fixed effects

            if V is None:  # OLS
                coef = pinvX @ Y
                Y = Y - X @ coef

            else:
                V = V / np.diag(V).mean(0)
                Vmh = la.inv(la.cholesky(V).T)

                coef = (la.pinv(Vmh @ X) @ Vmh) @ Y
                Y = Vmh @ Y - (Vmh @ X) @ coef

            sse = np.sum(Y ** 2, axis=0)

        else:  # mixed effects

            q1 = q - 1

            V /= np.diagonal(V, axis1=0, axis2=1).mean(-1)
            slm_r = np.zeros((q1, v))

            # start Fisher scoring algorithm
            R = np.eye(n) - X @ la.pinv(X)
            RVV = (V.T @ R.T).T
            E = (Y.T @ (R.T @ RVV.T))
            E *= Y.T
            E = E.sum(-1)

            RVV2 = np.zeros([n, n, q])
            E2 = np.zeros([q, v])
            for j in range(q):
                RV2 = R @ V[..., j]
                E2[j] = (Y * ((RV2 @ R) @ Y)).sum(0)
                RVV2[..., j] = RV2

            M = np.einsum('ijk,jil->kl', RVV, RVV, optimize='optimal')

            theta = la.pinv(M) @ E
            tlim = np.sqrt(2*np.diag(la.pinv(M))) * thetalim
            tlim = tlim[:, None] * theta.sum(0)
            m = theta < tlim
            theta[m] = tlim[m]
            r = theta[:q1] / theta.sum(0)

            Vt = 2*la.pinv(M)
            m1 = np.diag(Vt)
            m2 = 2 * Vt.sum(0)
            Vr = m1[:q1]-m2[:q1] * slm_r.mean(1) + Vt.sum()*(r**2).mean(-1)
            dr = np.sqrt(Vr) * drlim

            # Extra Fisher scoring iterations
            for it in range(niter):
                irs = np.round(r.T / dr)
                ur, jr = np.unique(irs, axis=0, return_inverse=True)
                nr = ur.shape[0]
                for ir in range(nr):
                    iv = jr == ir
                    rv = r[:, iv].mean(1)

                    Vs = (1-rv.sum()) * V[..., q-1]
                    Vs += (V[..., :q1] * rv).sum(-1)

                    Vinv = la.inv(Vs)
                    VinvX = Vinv @ X
                    G = la.pinv(X.T @ VinvX) @ VinvX.T
                    R = Vinv - VinvX @ G

                    RVV = (V.T @ R.T).T
                    E = (Y[:, iv].T @ (R.T @ RVV.T))
                    E *= Y[:, iv].T
                    E = E.sum(-1)

                    M = np.einsum('ijk,jil->kl', RVV, RVV, optimize='optimal')

                    thetav = la.pinv(M) @ E
                    tlim = np.sqrt(2*np.diag(la.pinv(M))) * thetalim
                    tlim = tlim[:, None] * thetav.sum(0)

                    m = thetav < tlim
                    thetav[m] = tlim[m]
                    theta[:, iv] = thetav

                r = theta[:q1] / theta.sum(0)

            # finish Fisher scoring
            irs = np.round(r.T / dr)
            ur, jr = np.unique(irs, axis=0, return_inverse=True)
            nr = ur.shape[0]

            coef = np.zeros((p, v))
            sse = np.zeros(v)
            for ir in range(nr):
                iv = jr == ir
                rv = r[:, iv].mean(1)

                Vs = (1 - rv.sum()) * V[..., q - 1]
                Vs += (V[..., :q1] * rv).sum(-1)

                # Vmh = la.inv(la.cholesky(Vs).T)
                Vmh = la.inv(la.cholesky(Vs))
                VmhX = Vmh @ X
                G = (la.pinv(VmhX.T @ VmhX) @ VmhX.T) @ Vmh

                coef[:, iv] = G @ Y[:, iv]
                R = Vmh - VmhX @ G
                Y[:, iv] = R @ Y[:, iv]
                sse[iv] = (Y[:, iv]**2).sum(0)

            slm.update(dict(r=r, dr=dr[:, None]))

        sse = sse[None]

    else:  # multivariate
        if q > 1:
            raise ValueError('Multivariate mixed effects models not yet '
                             'implemented :-(')

        if V is None:
            X2 = X
        else:
            V = V / np.diag(V).mean(0)
            Vmh = la.inv(la.cholesky(V)).T
            X2 = Vmh @ X
            pinvX = la.pinv(X2)
            Y = Vmh @ Y

        coef = pinvX @ Y.T.swapaxes(-1, -2)
        Y = Y - (X2 @ coef).swapaxes(-1, -2).T
        coef = coef.swapaxes(-1, -2).T

        k2 = k * (k + 1) // 2
        sse = np.zeros((k2, v))
        j = -1
        for j1 in range(k):
            for j2 in range(j1+1):
                j = j + 1
                sse[j] = (Y[..., j1]*Y[..., j2]).sum(0)

    slm.update(dict(coef=coef, SSE=sse))
    if V is not None:
        slm['V'] = V

    if surf is not None and ('tri' in surf or 'lat' in surf):
        key = 'tri' if 'tri' in surf else 'lat'
        slm[key] = surf[key]

        edges = py_SurfStatEdg(surf)  # should start from 0?

        n_edges = edges.shape[0]

        resl = np.zeros((n_edges, k))
        Y = np.atleast_3d(Y)

        for j in range(k):
            normr = np.sqrt(sse[((j+1) * (j+2) // 2) - 1])
            for i in range(n):
                u = Y[i, :, j] / normr
                resl[:, j] += np.diff(u[edges], axis=1).ravel()**2

        slm['resl'] = resl

    return slm