def run_ridge(train_stim, train_resp, test_stim, test_resp, alphas, nruns, prefix): print('Training bootstrap ridge regression.') print([s.shape for s in train_stim]) print([s.shape for s in train_resp]) print(test_stim.shape, test_resp.shape) wt, corrs, alphas, _, _ = bootstrap_ridge(train_stim, train_resp, test_stim, test_resp, alphas, nruns, single_alpha=True, return_wt=False) print('Finished training ridge regression, writing to file.') # no weights to save bc return_wt=False # save the corrs as npy and niml.dset np.save(prefix + 'corrs.npy', corrs) print(np.min(corrs), np.max(corrs)) # out = get_full_surface(corrs) # mv.niml.write(prefix + 'corrs.{0}.niml.dset'.format(hemi), corrs[None,:]) # save the alphas np.save(prefix + 'alphas.npy', alphas) print('\nFinished writing corrs and alphas to {0}'.format(prefix)) return corrs
snrs = np.linspace(0, 0.2, M) realresponses = np.dot(features, realwt) # shape (TR+TP, M) responses = (realresponses * snrs) + noise Rresp = responses[:TR] Presp = responses[TR:] Rstim = features[:TR] Pstim = features[TR:] if arg == "--benchmark" and rank == 0: f = open("benchmark.log", "a") t0 = time.time() # Run the optimized version of the ridge code wt, corr, valphas, bscorrs, valinds = bootstrap_ridge(Rstim, Rresp, Pstim, Presp, alphas=np.logspace(-2, 2, 20), nboots=15, chunklen=10, nchunks=15, test_bootstrap=True) if arg == "--benchmark" and rank == 0: t1 = time.time() total_runtime = t1 - t0 f.write("Runtime: N=%d, M=%d, TR=%d, TP=%d, Time=%d\n" % (N, M, TR, TP, total_runtime)) f.close() # Run the original ridge code, if we are correctness testing only if arg == "--test-correct": print("Running original ridge code...") wt_test, corr_test, valphas_test, bscorrs_test, valinds_test = bootstrap_ridge_test(Rstim, Rresp, Pstim, Presp, alphas=np.logspace(-2, 2, 20),
M = 1000 # response sources (voxels, whatever) TR = 1000 # regression timepoints TP = 200 # prediction timepoints snrs = np.linspace(0, 0.2, M) realwt = np.random.randn(N, M) features = np.random.randn(TR + TP, N) realresponses = np.dot(features, realwt) # shape (TR+TP, M) noise = np.random.randn(TR + TP, M) responses = (realresponses * snrs) + noise Rresp = responses[:TR] Presp = responses[TR:] Rstim = features[:TR] Pstim = features[TR:] # Run bootstrap ridge wt, corr, valphas, bscorrs, valinds = bootstrap_ridge(Rstim, Rresp, Pstim, Presp, alphas=np.logspace( -2, 2, 20), nboots=5, chunklen=10, nchunks=15, return_wt=False) # Corr should increase quickly across "voxels". Last corr should be large (>0.9-ish). # wt should be very similar to realwt for last few voxels.
Rresp = responses[:TR] Presp = responses[TR:] Rstim = features[:TR] Pstim = features[TR:] if arg == "--benchmark" and rank == 0: f = open("benchmark.log", "a") t0 = time.time() # Run the optimized version of the ridge code wt, corr, valphas, bscorrs, valinds = bootstrap_ridge(Rstim, Rresp, Pstim, Presp, alphas=np.logspace( -2, 2, 20), nboots=15, chunklen=10, nchunks=15, test_bootstrap=True) if arg == "--benchmark" and rank == 0: t1 = time.time() total_runtime = t1 - t0 f.write("Runtime: N=%d, M=%d, TR=%d, TP=%d, Time=%d\n" % (N, M, TR, TP, total_runtime)) f.close() # Run the original ridge code, if we are correctness testing only if arg == "--test-correct": print("Running original ridge code...")
def fit_models(X_feats, zY_total, bulked_X_feats, Xideal, data_params, use_ols=False, use_features="raw", metric="corr", ridge_optimize_corr=True, alphas=np.logspace(-3, 3, 10), verbose=True, nboots=5, **etc): feature_combs = list( chain(*[combinations(range(3), n) for n in [1, 2, 3]])) # feature spaces to use in each model B_est = [] # estimated weights (not used for anything currently) corr_est = [] # estimated r^2 from correlation rsq_est = [] # estimated R^2 from sum of squared error N_R = data_params['N_R'] N_P = data_params['N_P'] true_variances = data_params['true_variances'] combs = data_params['combs'] if not use_ols and verbose: figure() Psum = sum(data_params['P_models']) for combi, comb in enumerate(feature_combs): if verbose: print "\nFitting model %s" % ", ".join([['A', 'B', 'C'][c] for c in comb]) thisP = np.array(data_params['P_models'])[list(comb)].sum() if use_features == "raw": Xcomb = npp.zs(np.vstack([X_feats[c] for c in comb]).T).T elif use_features == "bulked": Xcomb = npp.zs(np.vstack( [bulked_X_feats[c] for c in comb]).T).T # <- bulked gives best results!! ??!!?! elif use_features == "same": Xcomb = npp.zs( np.vstack([X_feats[c] for c in comb] + [np.random.randn(Psum - thisP, N_R + N_P)]).T).T elif use_features == "ideal": Xcomb = npp.zs(Xideal[combi].T).T else: raise ValueError(use_features) if verbose: print Xcomb.shape if use_ols: wts, res, ranks, sings = np.linalg.lstsq(Xcomb.T[:N_R], zY_total[:N_R]) else: wts, vcorrs, valphas, bscorrs, valinds = ridge.bootstrap_ridge( Xcomb.T[:N_R], zY_total[:N_R], Xcomb.T[N_R:], zY_total[N_R:], alphas=alphas, nboots=nboots, chunklen=1, nchunks=int(N_R * 0.2), use_corr=ridge_optimize_corr, single_alpha=True) if not use_ols and verbose: semilogx(alphas, npp.zs(bscorrs.mean(2).mean(1))) B_est.append(np.vstack(wts).T) preds = np.dot(Xcomb.T[N_R:], wts) corrs = [ np.corrcoef(pred, Y[N_R:])[0, 1] for pred, Y in zip(preds.T, zY_total.T) ] rsqs = [ 1 - (Y[N_R:] - pred).var() / Y[N_R:].var() for pred, Y in zip(preds.T, zY_total.T) ] corr_est.append(corrs) rsq_est.append(rsqs) theoretical_rsq = true_variances[list( set.union(*[set(combs[c]) for c in comb]))].sum() avg_corr_rsq = rsq_corr(np.array(corrs)).mean() avg_rsq = np.array(rsqs).mean() if verbose: print "Theor. rsq: %0.3f, corr-based: %0.3f, rsq: %0.3f" % ( theoretical_rsq, avg_corr_rsq, avg_rsq) if not use_ols and verbose: xlabel("Alpha") title("Ridge Regularization Path") if metric == "corr": return rsq_corr(np.array(corr_est)) elif metric == "rsq": return np.array(rsq_est) else: raise ValueError(metric)
def runRegression(delRstim, delPstim, zRresp, zPresp): # Run regression alphas = np.logspace( 1, 3, 10 ) # Equally log-spaced alphas between 10 and 1000. The third number is the number of alphas to test. nboots = 1 # Number of cross-validation runs. chunklen = 40 # nchunks = 20 wt, corr, alphas, bscorrs, valinds = bootstrap_ridge(delRstim, zRresp, delPstim, zPresp, alphas, nboots, chunklen, nchunks, singcutoff=1e-10, single_alpha=True) f = figure() ax = f.add_subplot(1, 1, 1) ax.semilogx(np.logspace(1, 3, 10), bscorrs.mean(2).mean(1), 'o-') # wt is the regression weights print("wt has shape: ", wt.shape) # corr is the correlation between predicted and actual voxel responses in the Prediction dataset print("corr has shape: ", corr.shape) # alphas is the selected alpha value for each voxel, here it should be the same across voxels print("alphas has shape: ", alphas.shape) # bscorrs is the correlation between predicted and actual voxel responses for each round of cross-validation # within the Regression dataset print("bscorrs has shape (num alphas, num voxels, nboots): ", bscorrs.shape) # valinds is the indices of the time points in the Regression dataset that were used for each # round of cross-validation print("valinds has shape: ", np.array(valinds).shape) # ### Testing the regression models by predicting responses # The `bootstrap_ridge` function already computed predictions and correlations for the Prediction dataset, but this is important so let's reproduce that step more explicitly. # # Remember that according to the linear model, the predicted responses for each voxel are a weighted sum of the semantic features. An easy way to compute that is by taking the dot product between the weights and semantic features: $$\hat{R} = S \beta$$ # In[37]: # Predict responses in the Prediction dataset # First let's refresh ourselves on the shapes of these matrices print("zPresp has shape: ", zPresp.shape) print("wt has shape: ", wt.shape) print("delPstim has shape: ", delPstim.shape) # In[38]: # Then let's predict responses by taking the dot product of the weights and stim pred = np.dot(delPstim, wt) print("pred has shape: ", pred.shape) # #### Visualizing predicted and actual responses # Next let's plot some predicted and actual responses side by side. f = figure(figsize=(15, 5)) ax = f.add_subplot(1, 1, 1) selvox = 20710 # a decent voxel realresp = ax.plot(zPresp[:, selvox], 'k')[0] predresp = ax.plot(pred[:, selvox], 'r')[0] ax.set_xlim(0, 291) ax.set_xlabel("Time (fMRI time points)") ax.legend((realresp, predresp), ("Actual response", "Predicted response")) # #### Visualizing predicted and actual responses cont'd # You might notice above that the predicted and actual responses look pretty different scale-wise, although the patterns of ups and downs are vaguely similar. But we don't really care about the scale -- for fMRI it's relatively arbitrary anyway, so let's rescale them both to have unit standard deviation and re-plot. f = figure(figsize=(15, 5)) ax = f.add_subplot(1, 1, 1) selvox = 20710 # a good voxel realresp = ax.plot(zPresp[:, selvox], 'k')[0] predresp = ax.plot(zscore(pred[:, selvox]), 'r')[0] ax.set_xlim(0, 291) ax.set_xlabel("Time (fMRI time points)") ax.legend((realresp, predresp), ("Actual response", "Predicted response (scaled)")) # Now you see that the actual and scaled predicted responses look very similar. We can quantify this similarity by computing the correlation between the two (correlation is scale-free, so it effectively automatically does the re-scaling that we did here). This voxel has high correlation. # Compute correlation between single predicted and actual response # (np.corrcoef returns a correlation matrix; pull out the element [0,1] to get # correlation between the two vectors) voxcorr = np.corrcoef(zPresp[:, selvox], pred[:, selvox])[0, 1] print( "Correlation between predicted and actual responses for voxel %d: %f" % (selvox, voxcorr)) # #### Computing correlations for all voxels # Next let's compute this correlation for every voxel in the dataset. There are some very efficient ways to do this, but here I've written a for loop so that it's very explicit what's happening. (This should give exactly the same values as the variable `corr`, which was returned by `bootstrap_ridge`.) voxcorrs = np.zeros( (zPresp.shape[1], )) # create zero-filled array to hold correlations for vi in range(zPresp.shape[1]): voxcorrs[vi] = np.corrcoef(zPresp[:, vi], pred[:, vi])[0, 1] print(voxcorrs) # ### Visualizing correlations across the brain # Let's start with a supposition: the correlation should not be high everywhere, even if this is a good model of how the brain represents the semantic content of speech. There are parts of the brain that just don't respond to speech, so the correlation should be low in those areas. There are other parts of the brain that respond to speech, but maybe don't represent semantic information, so the correlation should be low in those areas as well. # But let's begin by plotting a histogram of the correlations across the entire brain. This will show generally whether the model is working well or not. # Plot histogram of correlations f = figure(figsize=(8, 8)) ax = f.add_subplot(1, 1, 1) ax.hist(voxcorrs, 100) # histogram correlations with 100 bins ax.set_xlabel("Correlation") ax.set_ylabel("Num. voxels") ax.set_title('Histogram of correlations') #plt.show() return voxcorrs
def fit_models(X_feats, zY_total, bulked_X_feats, Xideal, data_params, use_ols=False, use_features="raw", metric="corr", ridge_optimize_corr=True, alphas=np.logspace(-3, 3, 10), verbose=True, nboots=5, **etc): feature_combs = list(chain(*[combinations(range(3), n) for n in [1, 2, 3]])) # feature spaces to use in each model B_est = [] # estimated weights (not used for anything currently) corr_est = [] # estimated r^2 from correlation rsq_est = [] # estimated R^2 from sum of squared error N_R = data_params['N_R'] N_P = data_params['N_P'] true_variances = data_params['true_variances'] combs = data_params['combs'] if not use_ols and verbose: figure() Psum = sum(data_params['P_models']) for combi,comb in enumerate(feature_combs): if verbose: print "\nFitting model %s" % ", ".join([['A','B','C'][c] for c in comb]) thisP = np.array(data_params['P_models'])[list(comb)].sum() if use_features == "raw": Xcomb = npp.zs(np.vstack([X_feats[c] for c in comb]).T).T elif use_features == "bulked": Xcomb = npp.zs(np.vstack([bulked_X_feats[c] for c in comb]).T).T # <- bulked gives best results!! ??!!?! elif use_features == "same": Xcomb = npp.zs(np.vstack([X_feats[c] for c in comb] + [np.random.randn(Psum - thisP, N_R + N_P)]).T).T elif use_features == "ideal": Xcomb = npp.zs(Xideal[combi].T).T else: raise ValueError(use_features) if verbose: print Xcomb.shape if use_ols: wts, res, ranks, sings = np.linalg.lstsq(Xcomb.T[:N_R], zY_total[:N_R]) else: wts, vcorrs, valphas, bscorrs, valinds = ridge.bootstrap_ridge(Xcomb.T[:N_R], zY_total[:N_R], Xcomb.T[N_R:], zY_total[N_R:], alphas=alphas, nboots=nboots, chunklen=1, nchunks=int(N_R * 0.2), use_corr=ridge_optimize_corr, single_alpha=True) if not use_ols and verbose: semilogx(alphas, npp.zs(bscorrs.mean(2).mean(1))) B_est.append(np.vstack(wts).T) preds = np.dot(Xcomb.T[N_R:], wts) corrs = [np.corrcoef(pred, Y[N_R:])[0,1] for pred,Y in zip(preds.T, zY_total.T)] rsqs = [1 - (Y[N_R:] - pred).var() / Y[N_R:].var() for pred,Y in zip(preds.T, zY_total.T)] corr_est.append(corrs) rsq_est.append(rsqs) theoretical_rsq = true_variances[list(set.union(*[set(combs[c]) for c in comb]))].sum() avg_corr_rsq = rsq_corr(np.array(corrs)).mean() avg_rsq = np.array(rsqs).mean() if verbose: print "Theor. rsq: %0.3f, corr-based: %0.3f, rsq: %0.3f" % (theoretical_rsq, avg_corr_rsq, avg_rsq) if not use_ols and verbose: xlabel("Alpha"); title("Ridge Regularization Path"); if metric == "corr": return rsq_corr(np.array(corr_est)) elif metric == "rsq": return np.array(rsq_est) else: raise ValueError(metric)
logging.basicConfig(level=logging.DEBUG) # Create some test data N = 200 # features M = 1000 # response sources (voxels, whatever) TR = 1000 # regression timepoints TP = 200 # prediction timepoints snrs = np.linspace(0, 0.2, M) realwt = np.random.randn(N, M) features = np.random.randn(TR+TP, N) realresponses = np.dot(features, realwt) # shape (TR+TP, M) noise = np.random.randn(TR+TP, M) responses = (realresponses * snrs) + noise Rresp = responses[:TR] Presp = responses[TR:] Rstim = features[:TR] Pstim = features[TR:] # Run bootstrap ridge wt, corr, valphas, bscorrs, valinds = bootstrap_ridge(Rstim, Rresp, Pstim, Presp, alphas=np.logspace(-2, 2, 20), nboots=5, chunklen=10, nchunks=15, return_wt=False) # Corr should increase quickly across "voxels". Last corr should be large (>0.9-ish). # wt should be very similar to realwt for last few voxels.
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) ######################################################################## from ridge import bootstrap_ridge import logging logging.basicConfig(level=logging.INFO) alphas = np.logspace(0, 2, 10) # Equally log-spaced alphas between 10 and 1000 wt, corr, alphas, bscorrs, valinds = bootstrap_ridge(x_train, y_train, x_test, y_test, alphas, nboots=1, chunklen=40, nchunks=20, singcutoff=1e-10, single_alpha=True) ################################################################################# print(wt.shape) pred_test = x_test.dot(wt) import npp Ridge_correlations = npp.mcorr(y_test, pred_test)