def test_gradient(): from brainiak.reprsimil.brsa import GBRSA import brainiak.utils.utils as utils import numpy as np import os.path import numdifftools as nd np.random.seed(100) file_path = os.path.join(os.path.dirname(__file__), "example_design.1D") # Load an example design matrix design = utils.ReadDesign(fname=file_path) # concatenate it by 1, 2, and 3 times, mimicking different length # of experiments for different participants n_run = [1, 2, 1] design_mat = [None] * 3 n_T = [None] * 3 n_V = [30, 30, 20] for i in range(3): design_mat[i] = np.tile(design.design_task[:, :-1], [n_run[i], 1]) n_T[i] = n_run[i] * design.n_TR # start simulating some data n_C = np.size(design_mat[0], axis=1) noise_bot = 0.5 noise_top = 1.5 noise_level = [None] * 3 for i in range(3): noise_level[i] = np.random.rand( n_V[i]) * (noise_top - noise_bot) + noise_bot # noise level is random. # AR(1) coefficient rho1_top = 0.8 rho1_bot = -0.2 rho1 = [None] * 3 # generating noise noise = [None] * 3 # baseline inten = [None] * 3 for i in range(3): rho1[i] = np.random.rand(n_V[i]) * (rho1_top - rho1_bot) + rho1_bot noise[i] = np.zeros([n_T[i], n_V[i]]) noise[i][0, :] = np.random.randn( n_V[i]) * noise_level[i] / np.sqrt(1 - rho1[i]**2) for i_t in range(1, n_T[i]): noise[i][i_t, :] = noise[i][i_t - 1, :] * rho1[i] + \ np.random.randn(n_V[i]) * noise_level[i] noise[i] = noise[i] + \ np.dot(np.random.randn(n_T[i], 2), np.random.randn(2, n_V[i])) inten[i] = np.random.rand(n_V[i]) * 20.0 # ideal covariance matrix ideal_cov = np.zeros([n_C, n_C]) ideal_cov = np.eye(n_C) * 0.6 ideal_cov[0:4, 0:4] = 0.2 for cond in range(0, 4): ideal_cov[cond, cond] = 2 ideal_cov[5:9, 5:9] = 0.9 for cond in range(5, 9): ideal_cov[cond, cond] = 1 L_full = np.linalg.cholesky(ideal_cov) # generating signal snr_top = 5.0 # test with high SNR snr_bot = 1.0 # snr = np.random.rand(n_V)*(snr_top-snr_bot)+snr_bot # Notice that accurately speaking this is not snr. the magnitude of signal # depends not only on beta but also on x. snr = [None] * 3 signal = [None] * 3 betas_simulated = [None] * 3 scan_onsets = [None] * 3 Y = [None] * 3 for i in range(3): snr[i] = np.random.rand(n_V[i]) * (snr_top - snr_bot) + snr_bot sqrt_v = noise_level[i] * snr[i] betas_simulated[i] = np.dot(L_full, np.random.randn(n_C, n_V[i])) * sqrt_v signal[i] = np.dot(design_mat[i], betas_simulated[i]) # Adding noise to signal as data Y[i] = signal[i] + noise[i] + inten[i] scan_onsets[i] = np.linspace(0, n_T[i], num=n_run[i] + 1) # Get some initial fitting. SNR_bins = 11 rho_bins = 20 gbrsa = GBRSA(n_iter=3, rank=n_C, SNR_bins=SNR_bins, rho_bins=rho_bins, logS_range=0.5) n_grid = SNR_bins * rho_bins half_log_det_X0TAX0 = [np.random.randn(n_grid) for i in range(3)] log_weights = np.random.randn(n_grid) log_fixed_terms = [np.random.randn(n_grid) for i in range(3)] l_idx = np.tril_indices(n_C) L_vec = np.random.randn(int(n_C * (n_C + 1) / 2)) n_X0 = [2, 2, 2] s = np.linspace(1, SNR_bins, n_grid) a = np.linspace(0.5, 1, n_grid) s2XTAcorrX = [None] * 3 YTAcorrY_diag = [None] * 3 sXTAcorrY = [None] * 3 # The calculations below are quite arbitrary and do not conform # to the model. They simply conform to the symmetry property and shape of # the matrix indicated by the model for i in range(3): YTAcorrY_diag[i] = np.sum(Y[i] * Y[i], axis=0) * a[:, None] s2XTAcorrX[i] = np.dot( design_mat[i].T, design_mat[i]) * s[:, None, None]**2 * a[:, None, None] sXTAcorrY[i] = np.dot(design_mat[i].T, Y[i]) * \ s[:, None, None] * a[:, None, None] # test if the gradients are correct print(log_fixed_terms) ll0, deriv0 = gbrsa._sum_loglike_marginalized(L_vec, s2XTAcorrX, YTAcorrY_diag, sXTAcorrY, half_log_det_X0TAX0, log_weights, log_fixed_terms, l_idx, n_C, n_T, n_V, n_X0, n_grid, rank=None) # We test the gradient to the Cholesky factor vec = np.random.randn(np.size(L_vec)) vec = vec / np.linalg.norm(vec) dd = nd.directionaldiff( lambda x: gbrsa._sum_loglike_marginalized(x, s2XTAcorrX, YTAcorrY_diag, sXTAcorrY, half_log_det_X0TAX0, log_weights, log_fixed_terms, l_idx, n_C, n_T, n_V, n_X0, n_grid, rank=None)[0], L_vec, vec) assert np.isclose(dd, np.dot(deriv0, vec), rtol=1e-5), 'gradient incorrect'
def test_gradient(): from brainiak.reprsimil.brsa import BRSA import brainiak.utils.utils as utils import scipy.stats import numpy as np import os.path import numdifftools as nd np.random.seed(100) file_path = os.path.join(os.path.dirname(__file__), "example_design.1D") # Load an example design matrix design = utils.ReadDesign(fname=file_path) n_run = 4 # concatenate it by 4 times, mimicking 4 runs of itenditcal timing design.design_task = np.tile(design.design_task[:,:-1],[n_run,1]) design.n_TR = design.n_TR * n_run # start simulating some data n_V = 200 n_C = np.size(design.design_task,axis=1) n_T = design.n_TR noise_bot = 0.5 noise_top = 1.5 noise_level = np.random.rand(n_V)*(noise_top-noise_bot)+noise_bot # noise level is random. # AR(1) coefficient rho1_top = 0.8 rho1_bot = -0.2 rho1 = np.random.rand(n_V)*(rho1_top-rho1_bot)+rho1_bot # generating noise noise = np.zeros([n_T,n_V]) noise[0,:] = np.random.randn(n_V) * noise_level / np.sqrt(1-rho1**2) for i_t in range(1,n_T): noise[i_t,:] = noise[i_t-1,:] * rho1 + np.random.randn(n_V) * noise_level # ideal covariance matrix ideal_cov = np.zeros([n_C,n_C]) ideal_cov = np.eye(n_C)*0.6 ideal_cov[0,0] = 0.2 ideal_cov[5:9,5:9] = 0.6 for cond in range(5,9): ideal_cov[cond,cond] = 1 idx = np.where(np.sum(np.abs(ideal_cov),axis=0)>0)[0] L_full = np.linalg.cholesky(ideal_cov) # generating signal snr_level = 5.0 # test with high SNR inten = np.random.randn(n_V) * 20.0 # parameters of Gaussian process to generate pseuso SNR tau = 0.8 smooth_width = 5.0 inten_kernel = 1.0 coords = np.arange(0,n_V)[:,None] dist2 = np.square(coords-coords.T) inten_tile = np.tile(inten,[n_V,1]) inten_diff2 = (inten_tile-inten_tile.T)**2 K = np.exp(-dist2/smooth_width**2/2.0 -inten_diff2/inten_kernel**2/2.0) * tau**2 + np.eye(n_V)*tau**2*0.001 L = np.linalg.cholesky(K) snr = np.exp(np.dot(L,np.random.randn(n_V))) * snr_level # Notice that accurately speaking this is not snr. the magnitude of signal depends # not only on beta but also on x. sqrt_v = noise_level*snr betas_simulated = np.dot(L_full,np.random.randn(n_C,n_V)) * sqrt_v signal = np.dot(design.design_task,betas_simulated) # Adding noise to signal as data Y = signal + noise scan_onsets = np.linspace(0,design.n_TR,num=n_run+1) # Test fitting with GP prior. brsa = BRSA(GP_space=True,GP_inten=True,verbose=False,n_iter = 200,rank=n_C) # Additionally, we test the generation of re-used terms. X0 = np.ones(n_T)[:, None] D, F, run_TRs, n_run_returned = brsa._prepare_DF( n_T, scan_onsets=scan_onsets) assert n_run_returned == n_run, 'There is mistake in counting number of runs' assert np.sum(run_TRs) == n_T, 'The segmentation of the total experiment duration is wrong' XTY, XTDY, XTFY, YTY_diag, YTDY_diag, YTFY_diag, XTX, \ XTDX, XTFX = brsa._prepare_data_XY(design.design_task, Y, D, F) X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, \ X0TY, X0TDY, X0TFY, X0, n_base = brsa._prepare_data_XYX0( design.design_task, Y, X0, D, F, run_TRs, no_DC=False) assert np.shape(XTY) == (n_C, n_V) and np.shape(XTDY) == (n_C, n_V) \ and np.shape(XTFY) == (n_C, n_V),\ 'Dimension of XTY etc. returned from _prepare_data is wrong' assert np.ndim(YTY_diag) == 1 and np.ndim(YTDY_diag) == 1 and np.ndim(YTFY_diag) == 1,\ 'Dimension of YTY_diag etc. returned from _prepare_data is wrong' assert np.ndim(XTX) == 2 and np.ndim(XTDX) == 2 and np.ndim(XTFX) == 2,\ 'Dimension of XTX etc. returned from _prepare_data is wrong' assert np.ndim(X0TX0) == 2 and np.ndim(X0TDX0) == 2 and np.ndim(X0TFX0) == 2,\ 'Dimension of X0TX0 etc. returned from _prepare_data is wrong' assert np.ndim(XTX0) == 2 and np.ndim(XTDX0) == 2 and np.ndim(XTFX0) == 2,\ 'Dimension of XTX0 etc. returned from _prepare_data is wrong' assert np.ndim(X0TY) == 2 and np.ndim(X0TDY) == 2 and np.ndim(X0TFY) == 2,\ 'Dimension of X0TY etc. returned from _prepare_data is wrong' l_idx = np.tril_indices(n_C) n_l = np.size(l_idx[0]) # Make sure all the fields are in the indices. idx_param_sing, idx_param_fitU, idx_param_fitV = brsa._build_index_param(n_l, n_V, 2) assert 'Cholesky' in idx_param_sing and 'a1' in idx_param_sing, \ 'The dictionary for parameter indexing misses some keys' assert 'Cholesky' in idx_param_fitU and 'a1' in idx_param_fitU, \ 'The dictionary for parameter indexing misses some keys' assert 'log_SNR2' in idx_param_fitV and 'c_space' in idx_param_fitV \ and 'c_inten' in idx_param_fitV and 'c_both' in idx_param_fitV, \ 'The dictionary for parameter indexing misses some keys' # Initial parameters are correct parameters with some perturbation param0_fitU = np.random.randn(n_l+n_V) * 0.1 param0_fitV = np.random.randn(n_V+1) * 0.1 param0_sing = np.random.randn(n_l+1) * 0.1 param0_sing[idx_param_sing['a1']] += np.mean(np.tan(rho1 * np.pi / 2)) param0_fitV[idx_param_fitV['log_SNR2']] += np.log(snr[:n_V-1])*2 param0_fitV[idx_param_fitV['c_space']] += np.log(smooth_width)*2 param0_fitV[idx_param_fitV['c_inten']] += np.log(inten_kernel)*2 # test if the gradients are correct # log likelihood and derivative of the _singpara function ll0, deriv0 = brsa._loglike_AR1_singpara(param0_sing, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, l_idx, n_C, n_T, n_V, n_run, n_base, idx_param_sing) # We test the gradient to the Cholesky factor vec = np.zeros(np.size(param0_sing)) vec[idx_param_sing['Cholesky'][0]] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_singpara(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, l_idx, n_C, n_T, n_V, n_run, n_base, idx_param_sing)[0], param0_sing, vec) assert np.isclose(dd, np.dot(deriv0, vec), rtol=1e-5), 'gradient of singpara wrt Cholesky is incorrect' # We test the gradient to a1 vec = np.zeros(np.size(param0_sing)) vec[idx_param_sing['a1']] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_singpara(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, l_idx, n_C, n_T, n_V, n_run, n_base, idx_param_sing)[0], param0_sing, vec) assert np.isclose(dd, np.dot(deriv0, vec), rtol=1e-5), 'gradient of singpara wrt a1 is incorrect' # log likelihood and derivative of the fitU function. ll0, deriv0 = brsa._loglike_AR1_diagV_fitU(param0_fitU, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, np.log(snr)*2, l_idx,n_C,n_T,n_V,n_run,n_base,idx_param_fitU,n_C) # We test the gradient wrt the reparametrization of AR(1) coefficient of noise. vec = np.zeros(np.size(param0_fitU)) vec[idx_param_fitU['a1'][0]] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, np.log(snr)*2, l_idx, n_C, n_T, n_V, n_run, n_base, idx_param_fitU, n_C)[0], param0_fitU, vec) assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitU wrt to AR(1) coefficient incorrect' # We test if the numerical and analytical gradient wrt to the first element of Cholesky factor is correct vec = np.zeros(np.size(param0_fitU)) vec[idx_param_fitU['Cholesky'][0]] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, np.log(snr)*2, l_idx, n_C, n_T, n_V, n_run,n_base, idx_param_fitU, n_C)[0], param0_fitU, vec) assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitU wrt Cholesky factor incorrect' # We test if the numerical and analytical gradient wrt to the first element of Cholesky factor is correct vec = np.zeros(np.size(param0_fitU)) vec[idx_param_fitU['Cholesky'][0]] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, np.log(snr)*2, l_idx, n_C, n_T, n_V, n_run,n_base, idx_param_fitU, n_C)[0], param0_fitU, vec) assert np.isclose(dd, np.dot(deriv0,vec), rtol=0.01), 'gradient of fitU wrt Cholesky factor incorrect' # Test on a random direction vec = np.random.randn(np.size(param0_fitU)) vec = vec / np.linalg.norm(vec) dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, np.log(snr)*2, l_idx, n_C, n_T, n_V, n_run, n_base, idx_param_fitU, n_C)[0], param0_fitU, vec) assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitU incorrect' # We test the gradient of _fitV wrt to log(SNR^2) assuming no GP prior. X0TAX0, XTAX0, X0TAY, X0TAX0_i, \ XTAcorrX, XTAcorrY, YTAcorrY, LTXTAcorrY, XTAcorrXL, LTXTAcorrXL = \ brsa._calc_sandwidge(XTY, XTDY, XTFY, YTY_diag, YTDY_diag, YTFY_diag, XTX, XTDX, XTFX, X0TX0, X0TDX0, X0TFX0, XTX0, XTDX0, XTFX0, X0TY, X0TDY, X0TFY, L_full, rho1, n_V, n_base) assert np.shape(XTAcorrX) == (n_V, n_C, n_C), 'Dimension of XTAcorrX is wrong by _calc_sandwidge()' assert XTAcorrY.shape == XTY.shape, 'Shape of XTAcorrY is wrong by _calc_sandwidge()' assert YTAcorrY.shape == YTY_diag.shape, 'Shape of YTAcorrY is wrong by _calc_sandwidge()' assert np.shape(X0TAX0) == (n_V, n_base, n_base), 'Dimension of X0TAX0 is wrong by _calc_sandwidge()' assert np.shape(XTAX0) == (n_V, n_C, n_base), 'Dimension of XTAX0 is wrong by _calc_sandwidge()' assert X0TAY.shape == X0TY.shape, 'Shape of X0TAX0 is wrong by _calc_sandwidge()' assert np.all(np.isfinite(X0TAX0_i)), 'Inverse of X0TAX0 includes NaN or Inf' ll0, deriv0 = brsa._loglike_AR1_diagV_fitV(param0_fitV[idx_param_fitV['log_SNR2']], X0TAX0, XTAX0, X0TAY, X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, LTXTAcorrY, XTAcorrXL, LTXTAcorrXL, L_full[l_idx], np.tan(rho1*np.pi/2), l_idx,n_C,n_T,n_V,n_run,n_base, idx_param_fitV,n_C,False,False) vec = np.zeros(np.size(param0_fitV[idx_param_fitV['log_SNR2']])) vec[idx_param_fitV['log_SNR2'][0]] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitV(x, X0TAX0, XTAX0, X0TAY, X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, LTXTAcorrY, XTAcorrXL, LTXTAcorrXL, L_full[l_idx], np.tan(rho1*np.pi/2), l_idx, n_C, n_T, n_V, n_run, n_base, idx_param_fitV, n_C, False, False)[0], param0_fitV[idx_param_fitV['log_SNR2']], vec) assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitV wrt log(SNR2) incorrect for model without GP' # We test the gradient of _fitV wrt to log(SNR^2) assuming GP prior. ll0, deriv0 = brsa._loglike_AR1_diagV_fitV(param0_fitV, X0TAX0, XTAX0, X0TAY, X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, LTXTAcorrY, XTAcorrXL, LTXTAcorrXL, L_full[l_idx], np.tan(rho1*np.pi/2), l_idx,n_C,n_T,n_V,n_run,n_base, idx_param_fitV,n_C,True,True, dist2,inten_diff2,100,100) vec = np.zeros(np.size(param0_fitV)) vec[idx_param_fitV['log_SNR2'][0]] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitV(x, X0TAX0, XTAX0, X0TAY, X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, LTXTAcorrY, XTAcorrXL, LTXTAcorrXL, L_full[l_idx], np.tan(rho1*np.pi/2), l_idx, n_C, n_T, n_V, n_run, n_base, idx_param_fitV, n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV, vec) assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitV srt log(SNR2) incorrect for model with GP' # We test the graident wrt spatial length scale parameter of GP prior vec = np.zeros(np.size(param0_fitV)) vec[idx_param_fitV['c_space']] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitV(x, X0TAX0, XTAX0, X0TAY, X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, LTXTAcorrY, XTAcorrXL, LTXTAcorrXL, L_full[l_idx], np.tan(rho1*np.pi/2), l_idx, n_C, n_T, n_V, n_run, n_base, idx_param_fitV, n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV, vec) assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitV wrt spatial length scale of GP incorrect' # We test the graident wrt intensity length scale parameter of GP prior vec = np.zeros(np.size(param0_fitV)) vec[idx_param_fitV['c_inten']] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitV(x, X0TAX0, XTAX0, X0TAY, X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, LTXTAcorrY, XTAcorrXL, LTXTAcorrXL, L_full[l_idx], np.tan(rho1*np.pi/2), l_idx, n_C, n_T, n_V, n_run, n_base, idx_param_fitV, n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV, vec) assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitV wrt intensity length scale of GP incorrect' # We test the graident on a random direction vec = np.random.randn(np.size(param0_fitV)) vec = vec / np.linalg.norm(vec) dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitV(x, X0TAX0, XTAX0, X0TAY, X0TAX0_i, XTAcorrX, XTAcorrY, YTAcorrY, LTXTAcorrY, XTAcorrXL, LTXTAcorrXL, L_full[l_idx], np.tan(rho1*np.pi/2), l_idx, n_C, n_T, n_V, n_run, n_base, idx_param_fitV, n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV, vec) assert np.isclose(dd, np.dot(deriv0,vec), rtol=1e-5), 'gradient of fitV incorrect'
def test_gradient(): from brainiak.reprsimil.brsa import GBRSA import brainiak.utils.utils as utils import numpy as np import os.path import numdifftools as nd np.random.seed(100) file_path = os.path.join(os.path.dirname(__file__), "example_design.1D") # Load an example design matrix design = utils.ReadDesign(fname=file_path) # concatenate it by 1, 2, and 3 times, mimicking different length # of experiments for different participants n_run = [1, 2, 1] design_mat = [None] * 3 n_T = [None] * 3 n_V = [30, 30, 20] for i in range(3): design_mat[i] = np.tile(design.design_task[:, :-1], [n_run[i], 1]) n_T[i] = n_run[i] * design.n_TR # start simulating some data n_C = np.size(design_mat[0], axis=1) noise_bot = 0.5 noise_top = 1.5 noise_level = [None] * 3 for i in range(3): noise_level[i] = np.random.rand( n_V[i]) * (noise_top - noise_bot) + noise_bot # noise level is random. # AR(1) coefficient rho1_top = 0.8 rho1_bot = -0.2 rho1 = [None] * 3 # generating noise noise = [None] * 3 # baseline inten = [None] * 3 for i in range(3): rho1[i] = np.random.rand(n_V[i]) * (rho1_top - rho1_bot) + rho1_bot noise[i] = np.zeros([n_T[i], n_V[i]]) noise[i][0, :] = np.random.randn( n_V[i]) * noise_level[i] / np.sqrt(1 - rho1[i]**2) for i_t in range(1, n_T[i]): noise[i][i_t, :] = noise[i][i_t - 1, :] * rho1[i] + \ np.random.randn(n_V[i]) * noise_level[i] noise[i] = noise[i] + \ np.dot(np.random.randn(n_T[i], 2), np.random.randn(2, n_V[i])) inten[i] = np.random.rand(n_V[i]) * 20.0 # ideal covariance matrix ideal_cov = np.zeros([n_C, n_C]) ideal_cov = np.eye(n_C) * 0.6 ideal_cov[0:4, 0:4] = 0.2 for cond in range(0, 4): ideal_cov[cond, cond] = 2 ideal_cov[5:9, 5:9] = 0.9 for cond in range(5, 9): ideal_cov[cond, cond] = 1 L_full = np.linalg.cholesky(ideal_cov) # generating signal snr_top = 5.0 # test with high SNR snr_bot = 1.0 # snr = np.random.rand(n_V)*(snr_top-snr_bot)+snr_bot # Notice that accurately speaking this is not snr. the magnitude of signal # depends not only on beta but also on x. snr = [None] * 3 signal = [None] * 3 betas_simulated = [None] * 3 scan_onsets = [None] * 3 Y = [None] * 3 for i in range(3): snr[i] = np.random.rand(n_V[i]) * (snr_top - snr_bot) + snr_bot sqrt_v = noise_level[i] * snr[i] betas_simulated[i] = np.dot( L_full, np.random.randn(n_C, n_V[i])) * sqrt_v signal[i] = np.dot(design_mat[i], betas_simulated[i]) # Adding noise to signal as data Y[i] = signal[i] + noise[i] + inten[i] scan_onsets[i] = np.linspace(0, n_T[i], num=n_run[i] + 1) # Get some initial fitting. SNR_bins = 11 rho_bins = 20 gbrsa = GBRSA(n_iter=3, rank=n_C, SNR_bins=SNR_bins, rho_bins=rho_bins, logS_range=0.5) n_grid = SNR_bins * rho_bins half_log_det_X0TAX0 = [np.random.randn(n_grid) for i in range(3)] log_weights = np.random.randn(n_grid) log_fixed_terms = [np.random.randn(n_grid) for i in range(3)] l_idx = np.tril_indices(n_C) L_vec = np.random.randn(int(n_C * (n_C + 1) / 2)) n_X0 = [2, 2, 2] s = np.linspace(1, SNR_bins, n_grid) a = np.linspace(0.5, 1, n_grid) s2XTAcorrX = [None] * 3 YTAcorrY_diag = [None] * 3 sXTAcorrY = [None] * 3 # The calculations below are quite arbitrary and do not conform # to the model. They simply conform to the symmetry property and shape of # the matrix indicated by the model for i in range(3): YTAcorrY_diag[i] = np.sum(Y[i] * Y[i], axis=0) * a[:, None] s2XTAcorrX[i] = np.dot(design_mat[i].T, design_mat[ i]) * s[:, None, None]**2 * a[:, None, None] sXTAcorrY[i] = np.dot(design_mat[i].T, Y[i]) * \ s[:, None, None] * a[:, None, None] # test if the gradients are correct print(log_fixed_terms) ll0, deriv0 = gbrsa._sum_loglike_marginalized(L_vec, s2XTAcorrX, YTAcorrY_diag, sXTAcorrY, half_log_det_X0TAX0, log_weights, log_fixed_terms, l_idx, n_C, n_T, n_V, n_X0, n_grid, rank=None) # We test the gradient to the Cholesky factor vec = np.random.randn(np.size(L_vec)) vec = vec / np.linalg.norm(vec) dd = nd.directionaldiff( lambda x: gbrsa._sum_loglike_marginalized(x, s2XTAcorrX, YTAcorrY_diag, sXTAcorrY, half_log_det_X0TAX0, log_weights, log_fixed_terms, l_idx, n_C, n_T, n_V, n_X0, n_grid, rank=None)[0], L_vec, vec) assert np.isclose(dd, np.dot(deriv0, vec), rtol=1e-5), 'gradient incorrect'
def test_gradient(): from brainiak.reprsimil.brsa import BRSA import brainiak.utils.utils as utils import scipy.stats import numpy as np import os.path import numdifftools as nd np.random.seed(100) file_path = os.path.join(os.path.dirname(__file__), "example_design.1D") # Load an example design matrix design = utils.ReadDesign(fname=file_path) # concatenate it by 4 times, mimicking 4 runs of itenditcal timing design.design_used = np.tile(design.design_used[:, 0:17], [4, 1]) design.n_TR = design.n_TR * 4 # start simulating some data n_V = 200 n_C = np.size(design.design_used, axis=1) n_T = design.n_TR noise_bot = 0.5 noise_top = 1.5 noise_level = np.random.rand(n_V) * (noise_top - noise_bot) + noise_bot # noise level is random. # AR(1) coefficient rho1_top = 0.8 rho1_bot = -0.2 rho1 = np.random.rand(n_V) * (rho1_top - rho1_bot) + rho1_bot # generating noise noise = np.zeros([n_T, n_V]) noise[0, :] = np.random.randn(n_V) * noise_level / np.sqrt(1 - rho1**2) for i_t in range(1, n_T): noise[i_t, :] = noise[i_t - 1, :] * rho1 + np.random.randn(n_V) * noise_level # ideal covariance matrix ideal_cov = np.zeros([n_C, n_C]) ideal_cov = np.eye(n_C) * 0.6 ideal_cov[0, 0] = 0.2 ideal_cov[5:9, 5:9] = 0.6 for cond in range(5, 9): ideal_cov[cond, cond] = 1 idx = np.where(np.sum(np.abs(ideal_cov), axis=0) > 0)[0] L_full = np.linalg.cholesky(ideal_cov) # generating signal snr_level = 5.0 # test with high SNR # snr = np.random.rand(n_V)*(snr_top-snr_bot)+snr_bot # Notice that accurately speaking this is not snr. the magnitude of signal depends # not only on beta but also on x. inten = np.random.randn(n_V) * 20.0 # parameters of Gaussian process to generate pseuso SNR tau = 0.8 smooth_width = 5.0 inten_kernel = 1.0 coords = np.arange(0, n_V)[:, None] dist2 = np.square(coords - coords.T) inten_tile = np.tile(inten, [n_V, 1]) inten_diff2 = (inten_tile - inten_tile.T)**2 K = np.exp(-dist2 / smooth_width**2 / 2.0 - inten_diff2 / inten_kernel**2 / 2.0) * tau**2 + np.eye(n_V) * tau**2 * 0.001 L = np.linalg.cholesky(K) snr = np.exp(np.dot(L, np.random.randn(n_V))) * snr_level sqrt_v = noise_level * snr betas_simulated = np.dot(L_full, np.random.randn(n_C, n_V)) * sqrt_v signal = np.dot(design.design_used, betas_simulated) # Adding noise to signal as data Y = signal + noise scan_onsets = np.linspace(0, design.n_TR, num=5) # Test fitting with GP prior. brsa = BRSA(GP_space=True, GP_inten=True, verbose=False, n_iter=200) # test if the gradients are correct XTY, XTDY, XTFY, YTY_diag, YTDY_diag, YTFY_diag, XTX, XTDX, XTFX = brsa._prepare_data( design.design_used, Y, n_T, n_V, scan_onsets) l_idx = np.tril_indices(n_C) n_l = np.size(l_idx[0]) idx_param_sing, idx_param_fitU, idx_param_fitV = brsa._build_index_param( n_l, n_V, 2) # Initial parameters are correct parameters with some perturbation param0_fitU = np.random.randn(n_l + n_V) * 0.1 param0_fitV = np.random.randn(n_V + 1) * 0.1 param0_fitV[:n_V - 1] += np.log(snr[:n_V - 1]) * 2 param0_fitV[n_V - 1] += np.log(smooth_width) * 2 param0_fitV[n_V] += np.log(inten_kernel) * 2 # log likelihood and derivative at the initial parameters ll0, deriv0 = brsa._loglike_AR1_diagV_fitU(param0_fitU, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, \ XTY, XTDY, XTFY, np.log(snr)*2, l_idx,n_C,n_T,n_V,idx_param_fitU,n_C) # We test if the numerical and analytical gradient wrt to the first element of Cholesky factor is correct vec = np.zeros(np.size(param0_fitU)) vec[idx_param_fitU['Cholesky'][0]] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag,\ YTFY_diag, XTY, XTDY, XTFY, np.log(snr)*2,\ l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)[0], param0_fitU, vec) assert np.isclose( dd, np.dot(deriv0, vec), rtol=0.01), 'gradient of fitU wrt Cholesky factor incorrect' # We test the gradient wrt the reparametrization of AR(1) coefficient of noise. vec = np.zeros(np.size(param0_fitU)) vec[idx_param_fitU['a1'][0]] = 1 dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag,\ YTFY_diag, XTY, XTDY, XTFY, np.log(snr)*2,\ l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)[0], param0_fitU, vec) assert np.isclose( dd, np.dot(deriv0, vec), rtol=0.01), 'gradient of fitU wrt to AR(1) coefficient incorrect' # Test on a random direction vec = np.random.randn(np.size(param0_fitU)) vec = vec / np.linalg.norm(vec) dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag,\ YTFY_diag, XTY, XTDY, XTFY, np.log(snr)*2,\ l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)[0], param0_fitU, vec) assert np.isclose(dd, np.dot(deriv0, vec), rtol=0.01), 'gradient of fitU incorrect' # We test the gradient of _fitV wrt to log(SNR^2) assuming no GP prior. ll0, deriv0 = brsa._loglike_AR1_diagV_fitV( param0_fitV[idx_param_fitV['log_SNR2']], XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx], np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV, n_C, False, False) vec = np.zeros(np.size(param0_fitV[idx_param_fitV['log_SNR2']])) vec[idx_param_fitV['log_SNR2'][0]] = 1 dd = nd.directionaldiff( lambda x: brsa._loglike_AR1_diagV_fitV( x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx], np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV, n_C, False, False)[0], param0_fitV[idx_param_fitV['log_SNR2']], vec) assert np.isclose( dd, np.dot(deriv0, vec), rtol=0.01 ), 'gradient of fitV wrt log(SNR2) incorrect for model without GP' # We test the gradient of _fitV wrt to log(SNR^2) assuming GP prior. ll0, deriv0 = brsa._loglike_AR1_diagV_fitV(param0_fitV, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx], np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV, n_C, True, True, dist2, inten_diff2, 100, 100) vec = np.zeros(np.size(param0_fitV)) vec[idx_param_fitV['log_SNR2'][0]] = 1 dd = nd.directionaldiff( lambda x: brsa._loglike_AR1_diagV_fitV( x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx], np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV, n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV, vec) assert np.isclose( dd, np.dot(deriv0, vec), rtol=0.01 ), 'gradient of fitV srt log(SNR2) incorrect for model with GP' # We test the graident wrt spatial length scale parameter of GP prior vec = np.zeros(np.size(param0_fitV)) vec[idx_param_fitV['c_space']] = 1 dd = nd.directionaldiff( lambda x: brsa._loglike_AR1_diagV_fitV( x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx], np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV, n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV, vec) assert np.isclose( dd, np.dot(deriv0, vec), rtol=0.01), 'gradient of fitV wrt spatial length scale of GP incorrect' # We test the graident wrt intensity length scale parameter of GP prior vec = np.zeros(np.size(param0_fitV)) vec[idx_param_fitV['c_inten']] = 1 dd = nd.directionaldiff( lambda x: brsa._loglike_AR1_diagV_fitV( x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx], np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV, n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV, vec) assert np.isclose( dd, np.dot(deriv0, vec), rtol=0.01 ), 'gradient of fitV wrt intensity length scale of GP incorrect' # We test the graident on a random direction vec = np.random.randn(np.size(param0_fitV)) vec = vec / np.linalg.norm(vec) dd = nd.directionaldiff( lambda x: brsa._loglike_AR1_diagV_fitV( x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx], np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV, n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV, vec) assert np.isclose(dd, np.dot(deriv0, vec), rtol=0.01), 'gradient of fitV incorrect'