예제 #1
0
def test_gradient():
    from brainiak.reprsimil.brsa import GBRSA
    import brainiak.utils.utils as utils
    import numpy as np
    import os.path
    import numdifftools as nd

    np.random.seed(100)
    file_path = os.path.join(os.path.dirname(__file__), "example_design.1D")
    # Load an example design matrix
    design = utils.ReadDesign(fname=file_path)

    # concatenate it by 1, 2, and 3 times, mimicking different length
    # of experiments for different participants
    n_run = [1, 2, 1]
    design_mat = [None] * 3
    n_T = [None] * 3
    n_V = [30, 30, 20]
    for i in range(3):
        design_mat[i] = np.tile(design.design_task[:, :-1], [n_run[i], 1])
        n_T[i] = n_run[i] * design.n_TR

    # start simulating some data
    n_C = np.size(design_mat[0], axis=1)

    noise_bot = 0.5
    noise_top = 1.5
    noise_level = [None] * 3
    for i in range(3):
        noise_level[i] = np.random.rand(
            n_V[i]) * (noise_top - noise_bot) + noise_bot
    # noise level is random.

    # AR(1) coefficient
    rho1_top = 0.8
    rho1_bot = -0.2
    rho1 = [None] * 3

    # generating noise
    noise = [None] * 3

    # baseline
    inten = [None] * 3
    for i in range(3):
        rho1[i] = np.random.rand(n_V[i]) * (rho1_top - rho1_bot) + rho1_bot
        noise[i] = np.zeros([n_T[i], n_V[i]])
        noise[i][0, :] = np.random.randn(
            n_V[i]) * noise_level[i] / np.sqrt(1 - rho1[i]**2)
        for i_t in range(1, n_T[i]):
            noise[i][i_t, :] = noise[i][i_t - 1, :] * rho1[i] + \
                np.random.randn(n_V[i]) * noise_level[i]
        noise[i] = noise[i] + \
            np.dot(np.random.randn(n_T[i], 2), np.random.randn(2, n_V[i]))
        inten[i] = np.random.rand(n_V[i]) * 20.0

    # ideal covariance matrix
    ideal_cov = np.zeros([n_C, n_C])
    ideal_cov = np.eye(n_C) * 0.6
    ideal_cov[0:4, 0:4] = 0.2
    for cond in range(0, 4):
        ideal_cov[cond, cond] = 2
    ideal_cov[5:9, 5:9] = 0.9
    for cond in range(5, 9):
        ideal_cov[cond, cond] = 1
    L_full = np.linalg.cholesky(ideal_cov)

    # generating signal
    snr_top = 5.0  # test with high SNR
    snr_bot = 1.0
    # snr = np.random.rand(n_V)*(snr_top-snr_bot)+snr_bot
    # Notice that accurately speaking this is not snr. the magnitude of signal
    # depends not only on beta but also on x.

    snr = [None] * 3
    signal = [None] * 3
    betas_simulated = [None] * 3
    scan_onsets = [None] * 3
    Y = [None] * 3
    for i in range(3):
        snr[i] = np.random.rand(n_V[i]) * (snr_top - snr_bot) + snr_bot
        sqrt_v = noise_level[i] * snr[i]
        betas_simulated[i] = np.dot(L_full, np.random.randn(n_C,
                                                            n_V[i])) * sqrt_v
        signal[i] = np.dot(design_mat[i], betas_simulated[i])

        # Adding noise to signal as data
        Y[i] = signal[i] + noise[i] + inten[i]

        scan_onsets[i] = np.linspace(0, n_T[i], num=n_run[i] + 1)

    # Get some initial fitting.
    SNR_bins = 11
    rho_bins = 20
    gbrsa = GBRSA(n_iter=3,
                  rank=n_C,
                  SNR_bins=SNR_bins,
                  rho_bins=rho_bins,
                  logS_range=0.5)

    n_grid = SNR_bins * rho_bins
    half_log_det_X0TAX0 = [np.random.randn(n_grid) for i in range(3)]
    log_weights = np.random.randn(n_grid)
    log_fixed_terms = [np.random.randn(n_grid) for i in range(3)]
    l_idx = np.tril_indices(n_C)
    L_vec = np.random.randn(int(n_C * (n_C + 1) / 2))
    n_X0 = [2, 2, 2]
    s = np.linspace(1, SNR_bins, n_grid)
    a = np.linspace(0.5, 1, n_grid)
    s2XTAcorrX = [None] * 3
    YTAcorrY_diag = [None] * 3
    sXTAcorrY = [None] * 3
    # The calculations below are quite arbitrary and do not conform
    # to the model. They simply conform to the symmetry property and shape of
    # the matrix indicated by the model
    for i in range(3):
        YTAcorrY_diag[i] = np.sum(Y[i] * Y[i], axis=0) * a[:, None]
        s2XTAcorrX[i] = np.dot(
            design_mat[i].T, design_mat[i]) * s[:, None, None]**2 * a[:, None,
                                                                      None]
        sXTAcorrY[i] = np.dot(design_mat[i].T, Y[i]) * \
            s[:, None, None] * a[:, None, None]

    # test if the gradients are correct
    print(log_fixed_terms)
    ll0, deriv0 = gbrsa._sum_loglike_marginalized(L_vec,
                                                  s2XTAcorrX,
                                                  YTAcorrY_diag,
                                                  sXTAcorrY,
                                                  half_log_det_X0TAX0,
                                                  log_weights,
                                                  log_fixed_terms,
                                                  l_idx,
                                                  n_C,
                                                  n_T,
                                                  n_V,
                                                  n_X0,
                                                  n_grid,
                                                  rank=None)
    # We test the gradient to the Cholesky factor
    vec = np.random.randn(np.size(L_vec))
    vec = vec / np.linalg.norm(vec)
    dd = nd.directionaldiff(
        lambda x: gbrsa._sum_loglike_marginalized(x,
                                                  s2XTAcorrX,
                                                  YTAcorrY_diag,
                                                  sXTAcorrY,
                                                  half_log_det_X0TAX0,
                                                  log_weights,
                                                  log_fixed_terms,
                                                  l_idx,
                                                  n_C,
                                                  n_T,
                                                  n_V,
                                                  n_X0,
                                                  n_grid,
                                                  rank=None)[0], L_vec, vec)
    assert np.isclose(dd, np.dot(deriv0, vec), rtol=1e-5), 'gradient incorrect'
예제 #2
0
def test_gradient():
    from brainiak.reprsimil.brsa import BRSA
    import brainiak.utils.utils as utils
    import scipy.stats
    import numpy as np
    import os.path
    import numdifftools as nd

    np.random.seed(100)
    file_path = os.path.join(os.path.dirname(__file__), "example_design.1D")
    # Load an example design matrix
    design = utils.ReadDesign(fname=file_path)
    n_run = 4
    # concatenate it by 4 times, mimicking 4 runs of itenditcal timing
    design.design_task = np.tile(design.design_task[:,:-1],[n_run,1])
    design.n_TR = design.n_TR * n_run

    # start simulating some data
    n_V = 200
    n_C = np.size(design.design_task,axis=1)
    n_T = design.n_TR

    noise_bot = 0.5
    noise_top = 1.5
    noise_level = np.random.rand(n_V)*(noise_top-noise_bot)+noise_bot
    # noise level is random.

    # AR(1) coefficient
    rho1_top = 0.8
    rho1_bot = -0.2
    rho1 = np.random.rand(n_V)*(rho1_top-rho1_bot)+rho1_bot

    # generating noise
    noise = np.zeros([n_T,n_V])
    noise[0,:] = np.random.randn(n_V) * noise_level / np.sqrt(1-rho1**2)
    for i_t in range(1,n_T):
        noise[i_t,:] = noise[i_t-1,:] * rho1 +  np.random.randn(n_V) * noise_level

    # ideal covariance matrix
    ideal_cov = np.zeros([n_C,n_C])
    ideal_cov = np.eye(n_C)*0.6
    ideal_cov[0,0] = 0.2
    ideal_cov[5:9,5:9] = 0.6
    for cond in range(5,9):
        ideal_cov[cond,cond] = 1
    idx = np.where(np.sum(np.abs(ideal_cov),axis=0)>0)[0]
    L_full = np.linalg.cholesky(ideal_cov)

    # generating signal
    snr_level = 5.0 # test with high SNR
    inten = np.random.randn(n_V) * 20.0

    # parameters of Gaussian process to generate pseuso SNR
    tau = 0.8
    smooth_width = 5.0
    inten_kernel = 1.0

    coords = np.arange(0,n_V)[:,None]

    dist2 = np.square(coords-coords.T)

    inten_tile = np.tile(inten,[n_V,1])
    inten_diff2 = (inten_tile-inten_tile.T)**2

    K = np.exp(-dist2/smooth_width**2/2.0 -inten_diff2/inten_kernel**2/2.0) * tau**2 + np.eye(n_V)*tau**2*0.001

    L = np.linalg.cholesky(K)
    snr = np.exp(np.dot(L,np.random.randn(n_V))) * snr_level
    # Notice that accurately speaking this is not snr. the magnitude of signal depends
    # not only on beta but also on x.
    sqrt_v = noise_level*snr
    betas_simulated = np.dot(L_full,np.random.randn(n_C,n_V)) * sqrt_v
    signal = np.dot(design.design_task,betas_simulated)

    # Adding noise to signal as data
    Y = signal + noise

    scan_onsets = np.linspace(0,design.n_TR,num=n_run+1)

    # Test fitting with GP prior.
    brsa = BRSA(GP_space=True,GP_inten=True,verbose=False,n_iter = 200,rank=n_C)

    # Additionally, we test the generation of re-used terms.
    X0 = np.ones(n_T)[:, None]
    D, F, run_TRs, n_run_returned = brsa._prepare_DF(
        n_T, scan_onsets=scan_onsets)
    assert n_run_returned == n_run, 'There is mistake in counting number of runs'
    assert np.sum(run_TRs) == n_T, 'The segmentation of the total experiment duration is wrong'
    XTY, XTDY, XTFY, YTY_diag, YTDY_diag, YTFY_diag, XTX, \
        XTDX, XTFX = brsa._prepare_data_XY(design.design_task, Y, D, F)
    X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, \
        X0TY, X0TDY, X0TFY, X0, n_base = brsa._prepare_data_XYX0(
            design.design_task, Y, X0, D, F, run_TRs, no_DC=False)
    assert np.shape(XTY) == (n_C, n_V) and np.shape(XTDY) == (n_C, n_V) \
        and np.shape(XTFY) == (n_C, n_V),\
        'Dimension of XTY etc. returned from _prepare_data is wrong'
    assert np.ndim(YTY_diag) == 1 and np.ndim(YTDY_diag) == 1 and np.ndim(YTFY_diag) == 1,\
        'Dimension of YTY_diag etc. returned from _prepare_data is wrong'
    assert np.ndim(XTX) == 2 and np.ndim(XTDX) == 2 and np.ndim(XTFX) == 2,\
        'Dimension of XTX etc. returned from _prepare_data is wrong'
    assert np.ndim(X0TX0) == 2 and np.ndim(X0TDX0) == 2 and np.ndim(X0TFX0) == 2,\
        'Dimension of X0TX0 etc. returned from _prepare_data is wrong'
    assert np.ndim(XTX0) == 2 and np.ndim(XTDX0) == 2 and np.ndim(XTFX0) == 2,\
        'Dimension of XTX0 etc. returned from _prepare_data is wrong'
    assert np.ndim(X0TY) == 2 and np.ndim(X0TDY) == 2 and np.ndim(X0TFY) == 2,\
        'Dimension of X0TY etc. returned from _prepare_data is wrong'
    l_idx = np.tril_indices(n_C)
    n_l = np.size(l_idx[0])


    # Make sure all the fields are in the indices.
    idx_param_sing, idx_param_fitU, idx_param_fitV = brsa._build_index_param(n_l, n_V, 2)
    assert 'Cholesky' in idx_param_sing and 'a1' in idx_param_sing, \
        'The dictionary for parameter indexing misses some keys'
    assert 'Cholesky' in idx_param_fitU and 'a1' in idx_param_fitU, \
        'The dictionary for parameter indexing misses some keys'
    assert 'log_SNR2' in idx_param_fitV and 'c_space' in idx_param_fitV \
        and 'c_inten' in idx_param_fitV and 'c_both' in idx_param_fitV, \
        'The dictionary for parameter indexing misses some keys'
    
    # Initial parameters are correct parameters with some perturbation
    param0_fitU = np.random.randn(n_l+n_V) * 0.1
    param0_fitV = np.random.randn(n_V+1) * 0.1
    param0_sing = np.random.randn(n_l+1) * 0.1
    param0_sing[idx_param_sing['a1']] += np.mean(np.tan(rho1 * np.pi / 2))
    param0_fitV[idx_param_fitV['log_SNR2']] += np.log(snr[:n_V-1])*2
    param0_fitV[idx_param_fitV['c_space']] += np.log(smooth_width)*2
    param0_fitV[idx_param_fitV['c_inten']] += np.log(inten_kernel)*2

    # test if the gradients are correct
    # log likelihood and derivative of the _singpara function
    ll0, deriv0 = brsa._loglike_AR1_singpara(param0_sing, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag,
                                             XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0,
                                             XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, 
                                             l_idx, n_C, n_T, n_V, n_run, n_base,
                                             idx_param_sing)
    # We test the gradient to the Cholesky factor
    vec = np.zeros(np.size(param0_sing))
    vec[idx_param_sing['Cholesky'][0]] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_singpara(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag,
                                                                 XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0,
                                                                 XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY,
                                                                 l_idx, n_C, n_T, n_V, n_run, n_base,
                                                                 idx_param_sing)[0],
                            param0_sing, vec)
    assert np.isclose(dd, np.dot(deriv0, vec), rtol=1e-5), 'gradient of singpara wrt Cholesky is incorrect'

    # We test the gradient to a1
    vec = np.zeros(np.size(param0_sing))
    vec[idx_param_sing['a1']] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_singpara(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag,
                                                                 XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0,
                                                                 XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY,
                                                                 l_idx, n_C, n_T, n_V, n_run, n_base,
                                                                 idx_param_sing)[0],
                            param0_sing, vec)
    assert np.isclose(dd, np.dot(deriv0, vec), rtol=1e-5), 'gradient of singpara wrt a1 is incorrect'


    
    # log likelihood and derivative of the fitU function.
    ll0, deriv0 = brsa._loglike_AR1_diagV_fitU(param0_fitU, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag,
                                               XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0,
                                               XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY,
                                               np.log(snr)*2, l_idx,n_C,n_T,n_V,n_run,n_base,idx_param_fitU,n_C)

    
    # We test the gradient wrt the reparametrization of AR(1) coefficient of noise.
    vec = np.zeros(np.size(param0_fitU))
    vec[idx_param_fitU['a1'][0]] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag,
                                                                   XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0,
                                                                   XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY,
                                                                   np.log(snr)*2, l_idx, n_C, n_T, n_V, n_run, n_base,
                                                                   idx_param_fitU, n_C)[0], param0_fitU, vec)
    assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitU wrt to AR(1) coefficient incorrect'

    # We test if the numerical and analytical gradient wrt to the first element of Cholesky factor is correct
    vec = np.zeros(np.size(param0_fitU))
    vec[idx_param_fitU['Cholesky'][0]] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag,
                                                                   XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0,
                                                                   XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY,
                                                                   np.log(snr)*2, l_idx, n_C, n_T, n_V, n_run,n_base,
                                                                   idx_param_fitU, n_C)[0], param0_fitU, vec)
    assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitU wrt Cholesky factor incorrect'


    # We test if the numerical and analytical gradient wrt to the first element of Cholesky factor is correct
    vec = np.zeros(np.size(param0_fitU))
    vec[idx_param_fitU['Cholesky'][0]] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag,
                                                                   XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0,
                                                                   XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY,
                                                                   np.log(snr)*2, l_idx, n_C, n_T, n_V, n_run,n_base,
                                                                   idx_param_fitU, n_C)[0], param0_fitU, vec)
    assert np.isclose(dd, np.dot(deriv0,vec), rtol=0.01), 'gradient of fitU wrt Cholesky factor incorrect'

    # Test on a random direction
    vec = np.random.randn(np.size(param0_fitU))
    vec = vec / np.linalg.norm(vec)
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag,
                                                                   XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0,
                                                                   XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, 
                                                                   np.log(snr)*2, l_idx, n_C, n_T, n_V, n_run, n_base,
                                                                   idx_param_fitU, n_C)[0], param0_fitU, vec)
    assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitU incorrect'


    # We test the gradient of _fitV wrt to log(SNR^2) assuming no GP prior.
    X0TAX0, XTAX0, X0TAY, X0TAX0_i, \
        XTAcorrX, XTAcorrY, YTAcorrY, LTXTAcorrY, XTAcorrXL, LTXTAcorrXL = \
        brsa._calc_sandwidge(XTY, XTDY, XTFY, 
                             YTY_diag, YTDY_diag, YTFY_diag,
                             XTX, XTDX, XTFX,
                             X0TX0, X0TDX0, X0TFX0,
                             XTX0, XTDX0, XTFX0,
                             X0TY, X0TDY, X0TFY,
                             L_full, rho1, n_V, n_base)
    assert np.shape(XTAcorrX) == (n_V, n_C, n_C), 'Dimension of XTAcorrX is wrong by _calc_sandwidge()'
    assert XTAcorrY.shape == XTY.shape, 'Shape of XTAcorrY is wrong by _calc_sandwidge()'
    assert YTAcorrY.shape == YTY_diag.shape, 'Shape of YTAcorrY is wrong by _calc_sandwidge()'
    assert np.shape(X0TAX0) == (n_V, n_base, n_base), 'Dimension of X0TAX0 is wrong by _calc_sandwidge()'
    assert np.shape(XTAX0) == (n_V, n_C, n_base), 'Dimension of XTAX0 is wrong by _calc_sandwidge()'
    assert X0TAY.shape == X0TY.shape, 'Shape of X0TAX0 is wrong by _calc_sandwidge()'
    assert np.all(np.isfinite(X0TAX0_i)), 'Inverse of X0TAX0 includes NaN or Inf'
    ll0, deriv0 = brsa._loglike_AR1_diagV_fitV(param0_fitV[idx_param_fitV['log_SNR2']],
                                               X0TAX0, XTAX0, X0TAY,
                                               X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, 
                                               LTXTAcorrY, XTAcorrXL, LTXTAcorrXL,
                                               L_full[l_idx], np.tan(rho1*np.pi/2),
                                               l_idx,n_C,n_T,n_V,n_run,n_base,
                                               idx_param_fitV,n_C,False,False)
    vec = np.zeros(np.size(param0_fitV[idx_param_fitV['log_SNR2']]))
    vec[idx_param_fitV['log_SNR2'][0]] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitV(x, X0TAX0, XTAX0, X0TAY,
                                                                   X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, 
                                                                   LTXTAcorrY, XTAcorrXL, LTXTAcorrXL,
                                                                   L_full[l_idx], np.tan(rho1*np.pi/2),
                                                                   l_idx, n_C, n_T, n_V, n_run, n_base,
                                                                   idx_param_fitV, n_C, False, False)[0],
                            param0_fitV[idx_param_fitV['log_SNR2']], vec)
    assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitV wrt log(SNR2) incorrect for model without GP'

    # We test the gradient of _fitV wrt to log(SNR^2) assuming GP prior.
    ll0, deriv0 = brsa._loglike_AR1_diagV_fitV(param0_fitV, X0TAX0, XTAX0, X0TAY,
                                               X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, 
                                               LTXTAcorrY, XTAcorrXL, LTXTAcorrXL,
                                               L_full[l_idx], np.tan(rho1*np.pi/2),
                                               l_idx,n_C,n_T,n_V,n_run,n_base,
                                               idx_param_fitV,n_C,True,True,
                                               dist2,inten_diff2,100,100)
    vec = np.zeros(np.size(param0_fitV))
    vec[idx_param_fitV['log_SNR2'][0]] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitV(x, X0TAX0, XTAX0, X0TAY,
                                                                   X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, 
                                                                   LTXTAcorrY, XTAcorrXL, LTXTAcorrXL,
                                                                   L_full[l_idx], np.tan(rho1*np.pi/2),
                                                                   l_idx, n_C, n_T, n_V, n_run, n_base,
                                                                   idx_param_fitV, n_C, True, True,
                                                                   dist2, inten_diff2,
                                                                   100, 100)[0], param0_fitV, vec)
    assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitV srt log(SNR2) incorrect for model with GP'

    # We test the graident wrt spatial length scale parameter of GP prior
    vec = np.zeros(np.size(param0_fitV))
    vec[idx_param_fitV['c_space']] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitV(x, X0TAX0, XTAX0, X0TAY,
                                                                   X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, 
                                                                   LTXTAcorrY, XTAcorrXL, LTXTAcorrXL,
                                                                   L_full[l_idx], np.tan(rho1*np.pi/2),
                                                                   l_idx, n_C, n_T, n_V, n_run, n_base,
                                                                   idx_param_fitV, n_C, True, True,
                                                                   dist2, inten_diff2,
                                                                   100, 100)[0], param0_fitV, vec)
    assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitV wrt spatial length scale of GP incorrect'

    # We test the graident wrt intensity length scale parameter of GP prior
    vec = np.zeros(np.size(param0_fitV))
    vec[idx_param_fitV['c_inten']] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitV(x, X0TAX0, XTAX0, X0TAY,
                                                                   X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, 
                                                                   LTXTAcorrY, XTAcorrXL, LTXTAcorrXL,
                                                                   L_full[l_idx], np.tan(rho1*np.pi/2),
                                                                   l_idx, n_C, n_T, n_V, n_run, n_base,
                                                                   idx_param_fitV, n_C, True, True,
                                                                   dist2, inten_diff2,
                                                                   100, 100)[0], param0_fitV, vec)
    assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitV wrt intensity length scale of GP incorrect'

    # We test the graident on a random direction
    vec = np.random.randn(np.size(param0_fitV))
    vec = vec / np.linalg.norm(vec)
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitV(x, X0TAX0, XTAX0, X0TAY,
                                                                   X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, 
                                                                   LTXTAcorrY, XTAcorrXL, LTXTAcorrXL,
                                                                   L_full[l_idx], np.tan(rho1*np.pi/2),
                                                                   l_idx, n_C, n_T, n_V, n_run, n_base,
                                                                   idx_param_fitV, n_C, True, True,
                                                                   dist2, inten_diff2,
                                                                   100, 100)[0], param0_fitV, vec)
    assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitV incorrect'
예제 #3
0
def test_gradient():
    from brainiak.reprsimil.brsa import GBRSA
    import brainiak.utils.utils as utils
    import numpy as np
    import os.path
    import numdifftools as nd

    np.random.seed(100)
    file_path = os.path.join(os.path.dirname(__file__), "example_design.1D")
    # Load an example design matrix
    design = utils.ReadDesign(fname=file_path)

    # concatenate it by 1, 2, and 3 times, mimicking different length
    # of experiments for different participants
    n_run = [1, 2, 1]
    design_mat = [None] * 3
    n_T = [None] * 3
    n_V = [30, 30, 20]
    for i in range(3):
        design_mat[i] = np.tile(design.design_task[:, :-1], [n_run[i], 1])
        n_T[i] = n_run[i] * design.n_TR

    # start simulating some data
    n_C = np.size(design_mat[0], axis=1)

    noise_bot = 0.5
    noise_top = 1.5
    noise_level = [None] * 3
    for i in range(3):
        noise_level[i] = np.random.rand(
            n_V[i]) * (noise_top - noise_bot) + noise_bot
    # noise level is random.

    # AR(1) coefficient
    rho1_top = 0.8
    rho1_bot = -0.2
    rho1 = [None] * 3

    # generating noise
    noise = [None] * 3

    # baseline
    inten = [None] * 3
    for i in range(3):
        rho1[i] = np.random.rand(n_V[i]) * (rho1_top - rho1_bot) + rho1_bot
        noise[i] = np.zeros([n_T[i], n_V[i]])
        noise[i][0, :] = np.random.randn(
            n_V[i]) * noise_level[i] / np.sqrt(1 - rho1[i]**2)
        for i_t in range(1, n_T[i]):
            noise[i][i_t, :] = noise[i][i_t - 1, :] * rho1[i] + \
                np.random.randn(n_V[i]) * noise_level[i]
        noise[i] = noise[i] + \
            np.dot(np.random.randn(n_T[i], 2), np.random.randn(2, n_V[i]))
        inten[i] = np.random.rand(n_V[i]) * 20.0

    # ideal covariance matrix
    ideal_cov = np.zeros([n_C, n_C])
    ideal_cov = np.eye(n_C) * 0.6
    ideal_cov[0:4, 0:4] = 0.2
    for cond in range(0, 4):
        ideal_cov[cond, cond] = 2
    ideal_cov[5:9, 5:9] = 0.9
    for cond in range(5, 9):
        ideal_cov[cond, cond] = 1
    L_full = np.linalg.cholesky(ideal_cov)

    # generating signal
    snr_top = 5.0  # test with high SNR
    snr_bot = 1.0
    # snr = np.random.rand(n_V)*(snr_top-snr_bot)+snr_bot
    # Notice that accurately speaking this is not snr. the magnitude of signal
    # depends not only on beta but also on x.

    snr = [None] * 3
    signal = [None] * 3
    betas_simulated = [None] * 3
    scan_onsets = [None] * 3
    Y = [None] * 3
    for i in range(3):
        snr[i] = np.random.rand(n_V[i]) * (snr_top - snr_bot) + snr_bot
        sqrt_v = noise_level[i] * snr[i]
        betas_simulated[i] = np.dot(
            L_full, np.random.randn(n_C, n_V[i])) * sqrt_v
        signal[i] = np.dot(design_mat[i], betas_simulated[i])

        # Adding noise to signal as data
        Y[i] = signal[i] + noise[i] + inten[i]

        scan_onsets[i] = np.linspace(0, n_T[i], num=n_run[i] + 1)

    # Get some initial fitting.
    SNR_bins = 11
    rho_bins = 20
    gbrsa = GBRSA(n_iter=3, rank=n_C, SNR_bins=SNR_bins,
                  rho_bins=rho_bins, logS_range=0.5)

    n_grid = SNR_bins * rho_bins
    half_log_det_X0TAX0 = [np.random.randn(n_grid) for i in range(3)]
    log_weights = np.random.randn(n_grid)
    log_fixed_terms = [np.random.randn(n_grid) for i in range(3)]
    l_idx = np.tril_indices(n_C)
    L_vec = np.random.randn(int(n_C * (n_C + 1) / 2))
    n_X0 = [2, 2, 2]
    s = np.linspace(1, SNR_bins, n_grid)
    a = np.linspace(0.5, 1, n_grid)
    s2XTAcorrX = [None] * 3
    YTAcorrY_diag = [None] * 3
    sXTAcorrY = [None] * 3
    # The calculations below are quite arbitrary and do not conform
    # to the model. They simply conform to the symmetry property and shape of
    # the matrix indicated by the model
    for i in range(3):
        YTAcorrY_diag[i] = np.sum(Y[i] * Y[i], axis=0) * a[:, None]
        s2XTAcorrX[i] = np.dot(design_mat[i].T, design_mat[
                               i]) * s[:, None, None]**2 * a[:, None, None]
        sXTAcorrY[i] = np.dot(design_mat[i].T, Y[i]) * \
            s[:, None, None] * a[:, None, None]

    # test if the gradients are correct
    print(log_fixed_terms)
    ll0, deriv0 = gbrsa._sum_loglike_marginalized(L_vec, s2XTAcorrX,
                                                  YTAcorrY_diag, sXTAcorrY,
                                                  half_log_det_X0TAX0,
                                                  log_weights, log_fixed_terms,
                                                  l_idx, n_C, n_T, n_V, n_X0,
                                                  n_grid, rank=None)
    # We test the gradient to the Cholesky factor
    vec = np.random.randn(np.size(L_vec))
    vec = vec / np.linalg.norm(vec)
    dd = nd.directionaldiff(
        lambda x: gbrsa._sum_loglike_marginalized(x, s2XTAcorrX, YTAcorrY_diag,
                                                  sXTAcorrY,
                                                  half_log_det_X0TAX0,
                                                  log_weights, log_fixed_terms,
                                                  l_idx, n_C, n_T, n_V, n_X0,
                                                  n_grid, rank=None)[0],
        L_vec,
        vec)
    assert np.isclose(dd, np.dot(deriv0, vec), rtol=1e-5), 'gradient incorrect'
예제 #4
0
def test_gradient():
    from brainiak.reprsimil.brsa import BRSA
    import brainiak.utils.utils as utils
    import scipy.stats
    import numpy as np
    import os.path
    import numdifftools as nd
    np.random.seed(100)
    file_path = os.path.join(os.path.dirname(__file__), "example_design.1D")
    # Load an example design matrix
    design = utils.ReadDesign(fname=file_path)
    # concatenate it by 4 times, mimicking 4 runs of itenditcal timing
    design.design_used = np.tile(design.design_used[:, 0:17], [4, 1])
    design.n_TR = design.n_TR * 4

    # start simulating some data
    n_V = 200
    n_C = np.size(design.design_used, axis=1)
    n_T = design.n_TR

    noise_bot = 0.5
    noise_top = 1.5
    noise_level = np.random.rand(n_V) * (noise_top - noise_bot) + noise_bot
    # noise level is random.

    # AR(1) coefficient
    rho1_top = 0.8
    rho1_bot = -0.2
    rho1 = np.random.rand(n_V) * (rho1_top - rho1_bot) + rho1_bot

    # generating noise
    noise = np.zeros([n_T, n_V])
    noise[0, :] = np.random.randn(n_V) * noise_level / np.sqrt(1 - rho1**2)
    for i_t in range(1, n_T):
        noise[i_t, :] = noise[i_t -
                              1, :] * rho1 + np.random.randn(n_V) * noise_level

    # ideal covariance matrix
    ideal_cov = np.zeros([n_C, n_C])
    ideal_cov = np.eye(n_C) * 0.6
    ideal_cov[0, 0] = 0.2
    ideal_cov[5:9, 5:9] = 0.6
    for cond in range(5, 9):
        ideal_cov[cond, cond] = 1
    idx = np.where(np.sum(np.abs(ideal_cov), axis=0) > 0)[0]
    L_full = np.linalg.cholesky(ideal_cov)

    # generating signal
    snr_level = 5.0  # test with high SNR
    # snr = np.random.rand(n_V)*(snr_top-snr_bot)+snr_bot
    # Notice that accurately speaking this is not snr. the magnitude of signal depends
    # not only on beta but also on x.
    inten = np.random.randn(n_V) * 20.0

    # parameters of Gaussian process to generate pseuso SNR
    tau = 0.8
    smooth_width = 5.0
    inten_kernel = 1.0

    coords = np.arange(0, n_V)[:, None]

    dist2 = np.square(coords - coords.T)

    inten_tile = np.tile(inten, [n_V, 1])
    inten_diff2 = (inten_tile - inten_tile.T)**2

    K = np.exp(-dist2 / smooth_width**2 / 2.0 - inten_diff2 / inten_kernel**2 /
               2.0) * tau**2 + np.eye(n_V) * tau**2 * 0.001

    L = np.linalg.cholesky(K)
    snr = np.exp(np.dot(L, np.random.randn(n_V))) * snr_level
    sqrt_v = noise_level * snr
    betas_simulated = np.dot(L_full, np.random.randn(n_C, n_V)) * sqrt_v
    signal = np.dot(design.design_used, betas_simulated)

    # Adding noise to signal as data
    Y = signal + noise

    scan_onsets = np.linspace(0, design.n_TR, num=5)

    # Test fitting with GP prior.
    brsa = BRSA(GP_space=True, GP_inten=True, verbose=False, n_iter=200)

    # test if the gradients are correct
    XTY, XTDY, XTFY, YTY_diag, YTDY_diag, YTFY_diag, XTX, XTDX, XTFX = brsa._prepare_data(
        design.design_used, Y, n_T, n_V, scan_onsets)
    l_idx = np.tril_indices(n_C)
    n_l = np.size(l_idx[0])

    idx_param_sing, idx_param_fitU, idx_param_fitV = brsa._build_index_param(
        n_l, n_V, 2)

    # Initial parameters are correct parameters with some perturbation
    param0_fitU = np.random.randn(n_l + n_V) * 0.1
    param0_fitV = np.random.randn(n_V + 1) * 0.1
    param0_fitV[:n_V - 1] += np.log(snr[:n_V - 1]) * 2
    param0_fitV[n_V - 1] += np.log(smooth_width) * 2
    param0_fitV[n_V] += np.log(inten_kernel) * 2

    # log likelihood and derivative at the initial parameters
    ll0, deriv0 = brsa._loglike_AR1_diagV_fitU(param0_fitU, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, \
                XTY, XTDY, XTFY, np.log(snr)*2,  l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)

    # We test if the numerical and analytical gradient wrt to the first element of Cholesky factor is correct
    vec = np.zeros(np.size(param0_fitU))
    vec[idx_param_fitU['Cholesky'][0]] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag,\
                                                                YTFY_diag, XTY, XTDY, XTFY, np.log(snr)*2,\
                                                                l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)[0], param0_fitU, vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec),
        rtol=0.01), 'gradient of fitU wrt Cholesky factor incorrect'

    # We test the gradient wrt the reparametrization of AR(1) coefficient of noise.
    vec = np.zeros(np.size(param0_fitU))
    vec[idx_param_fitU['a1'][0]] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag,\
                                                                YTFY_diag, XTY, XTDY, XTFY, np.log(snr)*2,\
                                                                l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)[0], param0_fitU, vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec),
        rtol=0.01), 'gradient of fitU wrt to AR(1) coefficient incorrect'

    # Test on a random direction
    vec = np.random.randn(np.size(param0_fitU))
    vec = vec / np.linalg.norm(vec)
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag,\
                                                                YTFY_diag, XTY, XTDY, XTFY, np.log(snr)*2,\
                                                                l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)[0], param0_fitU, vec)
    assert np.isclose(dd, np.dot(deriv0, vec),
                      rtol=0.01), 'gradient of fitU incorrect'

    # We test the gradient of _fitV wrt to log(SNR^2) assuming no GP prior.
    ll0, deriv0 = brsa._loglike_AR1_diagV_fitV(
        param0_fitV[idx_param_fitV['log_SNR2']], XTX, XTDX, XTFX, YTY_diag,
        YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx],
        np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV, n_C,
        False, False)
    vec = np.zeros(np.size(param0_fitV[idx_param_fitV['log_SNR2']]))
    vec[idx_param_fitV['log_SNR2'][0]] = 1
    dd = nd.directionaldiff(
        lambda x: brsa._loglike_AR1_diagV_fitV(
            x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY,
            XTFY, L_full[l_idx], np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T,
            n_V, idx_param_fitV, n_C, False, False)[0],
        param0_fitV[idx_param_fitV['log_SNR2']], vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec), rtol=0.01
    ), 'gradient of fitV wrt log(SNR2) incorrect for model without GP'

    # We test the gradient of _fitV wrt to log(SNR^2) assuming GP prior.
    ll0, deriv0 = brsa._loglike_AR1_diagV_fitV(param0_fitV, XTX, XTDX, XTFX,
                                               YTY_diag, YTDY_diag, YTFY_diag,
                                               XTY, XTDY, XTFY, L_full[l_idx],
                                               np.tan(rho1 * np.pi / 2), l_idx,
                                               n_C, n_T, n_V, idx_param_fitV,
                                               n_C, True, True, dist2,
                                               inten_diff2, 100, 100)
    vec = np.zeros(np.size(param0_fitV))
    vec[idx_param_fitV['log_SNR2'][0]] = 1
    dd = nd.directionaldiff(
        lambda x: brsa._loglike_AR1_diagV_fitV(
            x, XTX, XTDX, XTFX, YTY_diag,
            YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx],
            np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV,
            n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV,
        vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec), rtol=0.01
    ), 'gradient of fitV srt log(SNR2) incorrect for model with GP'

    # We test the graident wrt spatial length scale parameter of GP prior
    vec = np.zeros(np.size(param0_fitV))
    vec[idx_param_fitV['c_space']] = 1
    dd = nd.directionaldiff(
        lambda x: brsa._loglike_AR1_diagV_fitV(
            x, XTX, XTDX, XTFX, YTY_diag,
            YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx],
            np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV,
            n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV,
        vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec),
        rtol=0.01), 'gradient of fitV wrt spatial length scale of GP incorrect'

    # We test the graident wrt intensity length scale parameter of GP prior
    vec = np.zeros(np.size(param0_fitV))
    vec[idx_param_fitV['c_inten']] = 1
    dd = nd.directionaldiff(
        lambda x: brsa._loglike_AR1_diagV_fitV(
            x, XTX, XTDX, XTFX, YTY_diag,
            YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx],
            np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV,
            n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV,
        vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec), rtol=0.01
    ), 'gradient of fitV wrt intensity length scale of GP incorrect'

    # We test the graident on a random direction
    vec = np.random.randn(np.size(param0_fitV))
    vec = vec / np.linalg.norm(vec)
    dd = nd.directionaldiff(
        lambda x: brsa._loglike_AR1_diagV_fitV(
            x, XTX, XTDX, XTFX, YTY_diag,
            YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx],
            np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV,
            n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV,
        vec)
    assert np.isclose(dd, np.dot(deriv0, vec),
                      rtol=0.01), 'gradient of fitV incorrect'