def fit_srm(data_train, data_test, n_components): """Fit the shared response model Parameters ---------- n_components: k data_train: 3d array (n_subj, n_features, n_examples/tps) data_test: 3d array (n_subj, n_features, n_examples/tps) Returns ------- data_train_sr: 3d array (n_subj, n_components, n_examples/tps) the transformed training set data_test_sr: 3d array (n_subj, n_components, n_examples/tps) the transformed test set srm: the fitted model """ assert len(data_train) == len(data_test) n_subjects = len(data_train) # fit SRM on the training set srm = SRM(features=n_components) data_train_sr = srm.fit_transform(data_train) # transform the hidden activity (on the test set) to the shared space data_test_sr = srm.transform(data_test) # calculate variance explained var_exp_train = calc_srm_var_exp(data_train, data_train_sr, srm.w_) return data_train_sr, data_test_sr, srm, var_exp_train
def SRM_V3(run1, run2, srm_k, n_iter): # initialize model print('Building Models') n_iter = n_iter srm_k = srm_k srm_train = SRM(n_iter=n_iter, features=srm_k) # concatenate run1 and run2 within subject before fitting SRM runs = [] for i in range(len(run1)): runs.append(np.concatenate((run1[i], run2[i]), axis=1)) # fit model to training data print('Training Models') srm_train.fit(runs) print('Testing Models') shared_data_run1 = srm_train.transform(run1) shared_data_run2 = srm_train.transform(run2) # average test data across subjects run1 = sum(shared_data_run1) / len(shared_data_run1) run2 = sum(shared_data_run2) / len(shared_data_run2) return run1, run2
def srm(run1, run2): # initialize model print('Building Models') n_iter = 50 srm_k = 30 srm_train_run1 = SRM(n_iter=n_iter, features=srm_k) srm_train_run2 = SRM(n_iter=n_iter, features=srm_k) # fit model to training data print('Training Models') srm_train_run1.fit(run1) srm_train_run2.fit(run2) print('Testing Models') shared_data_run1 = stats.zscore(np.dstack(srm_train_run2.transform(run1)), axis=1, ddof=1) shared_data_run2 = stats.zscore(np.dstack(srm_train_run1.transform(run2)), axis=1, ddof=1) # average test data across subjects run1 = np.mean(shared_data_run1, axis=2) run2 = np.mean(shared_data_run2, axis=2) return run1, run2
def sfn(l, msk, myrad, bcast_var): # Arguments: # l -- a list of 4D arrays, containing data from a single searchlight # msk -- a 3D binary array, mask of this searchlight # myrad -- an integer, sl_rad # bcast_var -- whatever is broadcasted # extract training and testing data train_data = [] test_data = [] d1,d2,d3,ntr = l[0].shape nvx = d1*d2*d3 for s in l: train_data.append(np.reshape(s[:,:,:,:int(ntr/2)],(nvx,int(ntr/2)))) test_data.append(np.reshape(s[:,:,:,int(ntr/2):],(nvx,ntr-int(ntr/2)))) # train an srm model srm = SRM(bcast_var[0],bcast_var[1]) srm.fit(train_data) # transform test data shared_data = srm.transform(test_data) for s in range(len(l)): shared_data[s] = np.nan_to_num(stats.zscore(shared_data[s],axis=1,ddof=1)) # run experiment accu = time_segment_matching_accuracy(shared_data) # return: can also return several values. In that case, the final output will be # a 3D array of tuples return np.mean(accu)
def corr2_coeff(A,msk,myrad,bcast_var): #if not np.all(msk): # return None print('Assigning Masked Data') run1 = [A[i][msk==1] for i in np.arange(0, int(len(A)/2))] run2 = [A[i][msk==1] for i in np.arange(int(len(A)/2), len(A))] print('Building Model') srm = SRM(bcast_var[0],bcast_var[1]) print('Training Model') srm.fit(run1) print('Testing Model') shared_data = srm.transform(run2) shared_data = stats.zscore(np.dstack(shared_data),axis=1,ddof=1) others = np.mean(shared_data[:,:,np.arange(shared_data.shape[-1]) != loo_idx],axis=2) loo = shared_data[:,:,loo_idx] corrAB = np.corrcoef(loo.T,others.T)[16:,:16] corr_eye = np.identity(16) same_songs = corrAB[corr_eye == 1] diff_songs = corrAB[corr_eye == 0] avg_same_songs = np.mean(same_songs) avg_diff_songs = np.mean(diff_songs) same_song_minus_diff_song = avg_same_songs - avg_diff_songs # Compute difference score for permuted matrices np.random.seed(0) diff_perm_holder = np.zeros((1000,1)) for i in range(1000): corrAB_perm = corrAB[np.random.permutation(16),:] same_songs_perm = corrAB_perm[corr_eye == 1] diff_songs_perm = corrAB_perm[corr_eye == 0] diff_perm_holder[i] = np.mean(same_songs_perm) - np.mean(diff_songs_perm) z = (same_song_minus_diff_song - np.mean(diff_perm_holder))/np.std(diff_perm_holder) return z
def HMM(X,K,loo_idx,song_idx,song_bounds): """fit hidden markov model Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations Parameters ---------- A: voxel by time ndarray (2D) B: voxel by time ndarray (2D) C: voxel by time ndarray (2D) D: voxel by time ndarray (2D) K: # of events for HMM (scalar) Returns ------- z: z-score after performing permuted cross-validation analysis """ w = 6 srm_k = 45 nPerm = 1000 within_across = np.zeros(nPerm+1) run1 = [X[i] for i in np.arange(0, int(len(X)/2))] run2 = [X[i] for i in np.arange(int(len(X)/2), len(X))] print('Building Model') srm = SRM(n_iter=10, features=srm_k) print('Training Model') srm.fit(run1) print('Testing Model') shared_data = srm.transform(run2) shared_data = stats.zscore(np.dstack(shared_data),axis=1,ddof=1) others = np.mean(shared_data[:,:,np.arange(shared_data.shape[-1]) != loo_idx],axis=2) loo = shared_data[:,song_bounds[song_idx]:song_bounds[song_idx + 1],loo_idx] nTR = loo.shape[1] # Fit to all but one subject ev = brainiak.eventseg.event.EventSegment(K) ev.fit(others[:,song_bounds[song_idx]:song_bounds[song_idx + 1]].T) events = np.argmax(ev.segments_[0],axis=1) # Compute correlations separated by w in time corrs = np.zeros(nTR-w) for t in range(nTR-w): corrs[t] = pearsonr(loo[:,t],loo[:,t+w])[0] # Compute within vs across boundary correlations, for real and permuted bounds for p in range(nPerm+1): within = corrs[events[:-w] == events[w:]].mean() across = corrs[events[:-w] != events[w:]].mean() within_across[p] = within - across np.random.seed(p) events = np.zeros(nTR, dtype=np.int) events[np.random.choice(nTR,K-1,replace=False)] = 1 events = np.cumsum(events) return within_across
def HMM(X, human_bounds, song_idx, song_bounds, srm_k, hrf): """fit hidden markov model Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations Parameters ---------- A: voxel by time ndarray (2D) B: voxel by time ndarray (2D) C: voxel by time ndarray (2D) D: voxel by time ndarray (2D) K: # of events for HMM (scalar) Returns ------- z: z-score after performing permuted cross-validation analysis """ w = 3 nPerm = 1000 run1 = [X[i] for i in np.arange(0, int(len(X) / 2))] run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))] print('Building Model') srm = SRM(n_iter=10, features=srm_k) print('Training Model') srm.fit(run2) print('Testing Model') shared_data = srm.transform(run1) shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1) data = np.mean(shared_data[:, song_bounds[song_idx]:song_bounds[song_idx + 1]], axis=2) nTR = data.shape[1] # Fit to all but one subject K = len(human_bounds) + 1 ev = brainiak.eventseg.event.EventSegment(K) ev.fit(data.T) bounds = np.where(np.diff(np.argmax(ev.segments_[0], axis=1)))[0] + 1 match = np.zeros(nPerm + 1) events = np.argmax(ev.segments_[0], axis=1) _, event_lengths = np.unique(events, return_counts=True) perm_bounds = bounds.copy() for p in range(nPerm + 1): for hb in human_bounds: if np.any(np.abs(perm_bounds - hb) <= w): match[p] += 1 match[p] /= len(human_bounds) np.random.seed(p) perm_bounds = np.cumsum(np.random.permutation(event_lengths))[:-1] return match
def SRM_V1(train, test, srm_k, n_iter): # initialize model print('Building Models') srm_train_data = SRM(n_iter=n_iter, features=srm_k) # fit model to training data print('Training Models') srm_train_data.fit(train) print('Testing Models') shared_data = srm_train_data.transform(test) return shared_data
def srm(train_data, test_data, n_features): # Z-score the data n = len(train_data) for subject in range(n): train_data[subject] = stats.zscore(train_data[subject], axis=1, ddof=1) test_data[subject] = stats.zscore(test_data[subject], axis=1, ddof=1) model = SRM(n_iter=10, features=n_features) model.fit(train_data) projected_data = model.transform(test_data) for subject in range(n): projected_data[subject] = stats.zscore(projected_data[subject], axis=1, ddof=1) return projected_data
def train_srm(training_data, use_robust_srm=True, n_comps=50, n_iters=20, printruntime=True): """ Fit srm on training data """ # TODO: delete after test from mpi4py import MPI mpicomm = MPI.COMM_WORLD if use_robust_srm: srm = RSRM(n_iter=n_iters, features=n_comps, comm=mpicomm) else: srm = SRM(n_iter=n_iters, features=n_comps, comm=mpicomm) # fit if printruntime: start = time.time() srm.fit(training_data) if printruntime: elapsed = start - time.time() print('fitting srm took: ', elapsed) return srm
def srm(train_data, test_data, n_features): # Z-score the data n = len(train_data) for subject in range(n): train_data[subject] = stats.zscore(train_data[subject], axis=1, ddof=1) test_data = stats.zscore(test_data, axis=1, ddof=1) model = SRM(n_iter=10, features=n_features) model.fit(train_data) # now we do the mstep of srm to get its W for the new X a_new = test_data.dot(model.s_.T) perturbation = np.zeros(a_new.shape) np.fill_diagonal(perturbation, 0.001) u, s, v = np.linalg.svd(a_new + perturbation, full_matrices=False) w_new = u.dot(v) return rmse(w_new.dot(model.s_), test_data), relative_mse(w_new.dot(model.s_), test_data)
def isc_srm(X,set_srm): """perform isc on srm searchlights Parameters ---------- X: list of searchlights where searchlights are voxels by time Returns ------- r: correlations for each searchlights timecourse correlated with all other timecourses """ run1 = [X[i] for i in np.arange(0, int(len(X)/2))] run2 = [X[i] for i in np.arange(int(len(X)/2), len(X))] data = np.zeros((run1[0].shape[0],run1[0].shape[1]*2,len(run1))) if set_srm == 0: for i in range(data.shape[2]): data[:,:,i] = np.hstack((run1[i],run2[i])) # run isc isc_output = brainiak.isfc.isfc(data) elif set_srm == 1: # train on run 1 and test on run 2 print('Building Model') srm = SRM(n_iter=10, features=5) print('Training Model') srm.fit(run1) print('Testing Model') shared_data = srm.transform(run2) shared_data = stats.zscore(np.dstack(shared_data),axis=1,ddof=1) # run isc isc_output = brainiak.isfc.isfc(shared_data) print(isc_output) # average isc results mean_isc = np.mean(isc_output) return mean_isc,isc_output
def SRM_V2(run1, run2, srm_k, n_iter): # initialize model print('Building Models') n_iter = n_iter srm_k = srm_k srm_train_run1 = SRM(n_iter=n_iter, features=srm_k) srm_train_run2 = SRM(n_iter=n_iter, features=srm_k) # fit model to training data print('Training Models') srm_train_run1.fit(run1) srm_train_run2.fit(run2) print('Testing Models') shared_data_run1 = srm_train_run2.transform(run1) shared_data_run2 = srm_train_run1.transform(run2) # average test data across subjects run1 = sum(shared_data_run1) / len(shared_data_run1) run2 = sum(shared_data_run2) / len(shared_data_run2) return run1, run2
def sfn(l, msk, myrad, bcast_var): # extract training and testing data train_data = [] test_data = [] d1, d2, d3, ntr = l[0].shape nvx = d1 * d2 * d3 for s in l: train_data.append( np.reshape(s[:, :, :, :int(ntr / 2)], (nvx, int(ntr / 2)))) test_data.append( np.reshape(s[:, :, :, int(ntr / 2):], (nvx, ntr - int(ntr / 2)))) # train an srm model srm = SRM(bcast_var[0], bcast_var[1]) srm.fit(train_data) # transform test data shared_data = srm.transform(test_data) for s in range(len(l)): shared_data[s] = np.nan_to_num( stats.zscore(shared_data[s], axis=1, ddof=1)) # experiment accu = timesegmentmatching_accuracy(shared_data, 6) return np.mean(accu), stats.sem( accu) # multiple outputs will be saved as tuples
def srm(train_data, n_features): # Z-score the data n = len(train_data) for subject in range(n): train_data[subject] = stats.zscore(train_data[subject], axis=1, ddof=1) all_subj = np.arange(n) np.random.shuffle(all_subj) model1 = SRM(n_iter=10, features=n_features) model2 = SRM(n_iter=10, features=n_features) dset1 = train_data[all_subj[:n // 2], :, :] dset2 = train_data[all_subj[n // 2:], :, :] model1.fit(dset1) model2.fit(dset2) return norm_from_ortho(model1.s_, model2.s_)
def train_srm(training_data, use_robust_srm=True, n_comps=10, n_iters=4, printruntime=True): """ Fit srm on training data """ if use_robust_srm: srm = RSRM(n_iter=n_iters, features=n_comps) else: srm = SRM(n_iter=n_iters, features=n_comps) # fit if printruntime: start = time.time() srm.fit(training_data) if printruntime: elapsed = time.time() - start print('fitting srm took: ', elapsed) return srm
test = np.nan_to_num( stats.zscore(np.load(datadir + 'angular_gyrus_raw_hmm_searchlight_run2_n25.npy'), axis=1, ddof=1)) # Convert data into lists where each element is voxels by samples train_list = [] test_list = [] for i in range(0, train.shape[2]): train_list.append(train[:, :, i]) test_list.append(test[:, :, i]) # Initialize model print('Building Model') srm = SRM(n_iter=10, features=25) # Fit model to training data (run 1) print('Training Model') srm.fit(train_list) # Test model on testing data to produce shared response print('Testing Model') shared_data = srm.transform(test_list) avg_response = sum(shared_data) / len(shared_data) human_bounds = np.cumsum(np.load(datadir + 'songs2Dur.npy'))[:-1] human_bounds2 = np.array([ 0, 90, 270, 449, 538, 672, 851, 1031, 1255, 1480, 1614, 1704, 1839, 2063, 2288, 2377, 2511 ])
train_list_A1 = [] test_list_A1 = [] train_list_vmPFC = [] test_list_vmPFC = [] for i in range(0, train_A1.shape[2]): train_list_A1.append(train_A1[:, :, i]) test_list_A1.append(test_A1[:, :, i]) for i in range(0, train_vmPFC.shape[2]): train_list_vmPFC.append(train_vmPFC[:, :, i]) test_list_vmPFC.append(test_vmPFC[:, :, i]) # Initialize models print('Building Model') srm_A1 = SRM(n_iter=10, features=50) srm_vmPFC = SRM(n_iter=10, features=10) # Fit model to training data (run 1) print('Training Model') srm_A1.fit(train_list_A1) srm_vmPFC.fit(train_list_vmPFC) # Test model on testing data to produce shared response print('Testing Model') shared_data_A1 = srm_A1.transform(test_list_A1) shared_data_vmPFC = srm_vmPFC.transform(test_list_vmPFC) avg_response_A1 = sum(shared_data_A1) / len(shared_data_A1) avg_response_vmPFC = sum(shared_data_vmPFC) / len(shared_data_vmPFC)
# extract max target activation as the metric for recall tma[cond] = np.array([ np.max(ma_dlist[cond][i_s]['targ'], axis=-1) for i_s in range(n_subjs) ]).transpose((0, 2, 1)) print(f'np.shape(tma[{cond}]) = {np.shape(tma[cond])}') # organize brain activity CMs_darray, DAs_darray = {}, {} for cond in all_conds: CMs_darray[cond] = np.array(CMs_dlist[cond]).transpose((0, 3, 2, 1)) DAs_darray[cond] = np.array(DAs_dlist[cond]).transpose((0, 3, 2, 1)) print(f'np.shape(CMs_darray[{cond}]) = {np.shape(CMs_darray[cond])}') print(f'np.shape(DAs_darray[{cond}]) = {np.shape(DAs_darray[cond])}') '''isc''' dim_srm = 16 srm = SRM(features=dim_srm) test_prop = .5 n_examples_tr = int(n_examples * (1 - test_prop)) n_examples_te = n_examples - n_examples_tr data = DAs_darray _, nH, _, _ = np.shape(data[cond]) # split data data_tr = {cond: [] for cond in all_conds} data_te = {cond: [] for cond in all_conds} for cond in all_conds: data_tr_cond = data[cond][:, :, :, :n_examples_tr] data_te_cond = data[cond][:, :, :, n_examples_tr:]
for story in train_stories: roi_data = roi_datasets[story]['data'] if story in test_stories: roi_trs = roi_data.shape[0] train_roi[story] = np.nan_to_num( zscore(roi_data[:roi_trs // 2, ...], axis=0)) test_roi[story] = np.nan_to_num( zscore(roi_data[roi_trs // 2:, ...], axis=0)) else: train_roi[story] = np.nan_to_num(zscore(roi_data, axis=0)) # Time-series SRM on ROI data if timeseries_srm: test_shared = {} for story in stories: srm = SRM(n_iter=n_iter, features=n_features) # Change subjects to list for SRM train_list = [train.T for train in np.moveaxis(train_roi[story], 2, 0)] test_list = [test.T for test in np.moveaxis(test_roi[story], 2, 0)] # Train SRM and apply srm.fit(train_list) test_transformed = srm.transform(test_list) test_shared[story] = np.dstack([test.T for test in test_transformed]) # Load in the whole-brain surface data train_target, test_target = {}, {} for story in train_stories:
train_mean = np.mean(train, axis=axis) train_std = np.std(train, axis=axis) train_z = (train - train_mean) / train_std test_z = (test - train_mean) / train_std return train_z, test_z # Initialize 10-fold cross-validation splitter cv = KFold(n_splits=10) # Initialize SRM with fixed number of features adaptive_k = False n_features = 50 srm = SRM(n_iter=10, features=n_features) # Loop through all pairs of embedding types and get cross-validated SRM srm_pairs = {} for pair in emb_pairs: # Cross-validation split indices srm_tests = {pair[0]: [], pair[1]: []} for cv_i, (train_ids, test_ids) in enumerate(cv.split(np.arange(n_embs))): # Split and z-score embeddings emb0_train, emb0_test = zscore_cv(embeddings[pair[0]][train_ids], embeddings[pair[0]][test_ids]) emb1_train, emb1_test = zscore_cv(embeddings[pair[1]][train_ids], embeddings[pair[1]][test_ids])
include_list.append(ints) include_list = np.hstack(include_list) # grab first 4 songs from run 1 to train which is equal to first 628 seconds for i in subjs: run1 = datadir + 'subjects/' + i + '/analysis/run1.feat/trans_filtered_func_data.nii' run2 = datadir + 'subjects/' + i + '/analysis/run2.feat/trans_filtered_func_data.nii' run1 = nib.load(run1).get_data()[:, :, :, 0:2511] run2 = nib.load(run2).get_data()[:, :, :, 0:2511] run2 = run2[mask == 1, :] train.append(run1[mask == 1, 0:628]) test.append(run2[:, omit_mask == 1]) print('Building Model') srm = SRM(n_iter=10, features=50) print('Training Model') srm.fit(train) print('Testing Model') shared_data = srm.transform(test) shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1) corrAB_holder = [] correct_songs = np.zeros((25, 12)) for i in range(len(subjs)): loo = shared_data[:, :, i] others = np.mean(shared_data[:, :, np.arange(shared_data.shape[-1]) != i], axis=2) corrAB = np.corrcoef(loo.T, others.T)[12:, :12]
def srm_fit(target_fcs, stories=None, subjects=None, hemisphere=None, k=360, n_iter=10, half=1, save_prefix=None): # By default grab all stories stories = check_keys(target_fcs, keys=stories) # Recompile FCs accounting for repeat subjects subject_fcs = {} for story in stories: # By default just grab all subjects subject_list = check_keys(target_fcs[story], keys=subjects, subkey=story) for subject in subject_list: # For simplicity we just assume same hemis across stories/subjects hemis = check_keys(target_fcs[story][subject], keys=hemisphere) for hemi in hemis: # If subject is not already there, make new dict for them if subject not in subject_fcs: subject_fcs[subject] = {} # If hemispheres aren't in there, add them if hemi not in subject_fcs[subject]: subject_fcs[subject][hemi] = [] # Finally, make list of connectivity matrices per subject subject_fcs[subject][hemi].append( target_fcs[story][subject][hemi]) # Stack FCs in connectivity space (for all subjects across stories!) all_subjects = list(subject_fcs.keys()) for subject in all_subjects: for hemi in hemis: # If more than one connectivity per subject, take average if len(subject_fcs[subject][hemi]) > 1: subject_fcs[subject][hemi] = np.mean(subject_fcs[subject][hemi], axis=0) else: subject_fcs[subject][hemi] = subject_fcs[subject][hemi][0] # Convert FCs to list for SRM (grab the shared space too) transforms, shared_space, srms = {}, {}, {} for hemi in hemis: # Declare SRM for this hemi srm = SRM(n_iter=n_iter, features=k) subject_ids, subject_stack = [], [] for subject in all_subjects: subject_ids.append(subject) subject_stack.append(subject_fcs[subject][hemi]) if subject not in transforms: transforms[subject] = {} # Train SRM and apply start = time() srm.fit(subject_stack) print(f"Finished fitting SRM after {time() - start:.1f} seconds") for subject_id, transform in zip(subject_ids, srm.w_): transforms[subject_id][hemi] = transform shared_space[hemi] = srm.s_ srms[hemi] = srm if save_prefix: np.save(f'data/half-{half}_{save_prefix}_w.npy', transforms) np.save(f'data/half-{half}_{save_prefix}_s.npy', shared_space) return transforms, srms
test_list_roi_1 = [] train_list_roi_2 = [] test_list_roi_2 = [] for i in range(0,train_roi_1.shape[2]): train_list_roi_1.append(train_roi_1[:,:,i]) test_list_roi_1.append(test_roi_1[:,:,i]) for i in range(0,train_roi_2.shape[2]): train_list_roi_2.append(train_roi_2[:,:,i]) test_list_roi_2.append(test_roi_2[:,:,i]) # Initialize models print('Building Model') srm_roi_1 = SRM(n_iter=50, features=5) srm_roi_2 = SRM(n_iter=50, features=5) # Fit model to training data (run 1) print('Training Model') srm_roi_1.fit(train_list_roi_1) srm_roi_2.fit(train_list_roi_2) # Test model on testing data to produce shared response print('Testing Model') shared_data_roi_1 = srm_roi_1.transform(test_list_roi_1) shared_data_roi_2 = srm_roi_2.transform(test_list_roi_2) avg_response_roi_1 = sum(shared_data_roi_1)/len(shared_data_roi_1) avg_response_roi_2 = sum(shared_data_roi_2)/len(shared_data_roi_2)
def parcel_srm(data, atlas, k=3, parcel_labels=None, stories=None, subjects=None): # By default grab all stories stories = check_keys(data, keys=stories) # Firsts compute mean time-series for all target parcels targets = parcel_means(data, atlas, parcel_labels=parcel_labels, stories=stories, subjects=subjects) # Compute ISFCs with targets for all vertices target_fcs = target_isfc(data, targets, stories=stories, subjects=subjects) parcels = {} for story in stories: parcels[story] = {} # By default just grab all subjects subject_list = check_keys(data[story], keys=subjects, subkey=story) # Loop through both hemispheres hemi_stack = [] for hemi in ['lh', 'rh']: # Loop through parcels parcel_tss = [] for parcel_label in parcel_labels[hemi]: # Resort parcel FCs into list of subject parcels fc_stack = [] ts_stack = [] for subject in subject_list: # Grab the connectivities for this parcel parcel_fcs = target_fcs[story][subject][hemi][ atlas[hemi] == parcel_label, :] fc_stack.append(parcel_fcs) ts_stack.append(data[story][subject][hemi] [:, atlas[hemi] == parcel_label]) # Set up fresh SRM srm = SRM(features=k) # Train SRM on parcel connectivities srm.fit(np.nan_to_num(fc_stack)) # Apply transformations to time series transformed_stack = [ ts.dot(w) for ts, w in zip(ts_stack, srm.w_) ] transformed_stack = np.dstack(transformed_stack) parcel_tss.append(transformed_stack) print(f"Finished SRM for {hemi} parcel " f"{parcel_label} in '{story}'") # Stack parcel means parcel_tss = np.hstack(parcel_tss) hemi_stack.append(parcel_tss) # Stack hemispheres hemi_stack = np.hstack(hemi_stack) assert hemi_stack.shape[1] == (len(parcel_labels['lh']) + len(parcel_labels['rh'])) * k assert hemi_stack.shape[2] == len(subject_list) # Unstack subjects hemi_stack = np.dsplit(hemi_stack, hemi_stack.shape[2]) for subject, ts in zip(subject_list, hemi_stack): parcels[story][subject] = np.squeeze(ts) print(f"Finished applying cSRM to parcels for '{story}'") return parcels
test_list.append(test[:,:,i]) songs = ['St_Pauls_Suite', 'I_Love_Music', 'Moonlight_Sonata', 'Change_of_the_Guard','Waltz_of_Flowers','The_Bird', 'Island', 'Allegro_Moderato', 'Finlandia', 'Early_Summer', 'Capriccio_Espagnole', 'Symphony_Fantastique', 'Boogie_Stop_Shuffle', 'My_Favorite_Things', 'Blue_Monk','All_Blues'] durs = np.array([90,180,180,90,135,180,180,225,225,135,90,135,225,225,90,135]) song_bounds = np.array([0,90,270,449,538,672,851,1031,1255,1480,1614,1704,1839,2063,2288,2377,2511]) features = np.array([5,10,15,20,25,30,35,40,45,50]) n_iter = 50 hrf = 5 nPerm = 1000 w = 3 z_scores = np.zeros((len(features),len(songs))) t_scores = np.zeros((len(features))) for k in range(len(features)): srm = SRM(n_iter=n_iter, features=features[k]) # Fit model to training data (run 1) srm.fit(train_list) # Test model on testing data to produce shared response shared_data = srm.transform(test_list) avg_response = sum(shared_data)/len(shared_data) for s in range(len(songs)): # Get start and end of chosen song start = song_bounds[s] + hrf end = song_bounds[s+1] + hrf nTR = shared_data[0][:,start:end].shape[1]
np.random.seed(bootNum) for b in range(nboot): resamp_subjs = np.random.choice(nSubj, size=nSubj, replace=True) run1_list = [] run2_list = [] for r in range(len(resamp_subjs)): run1_list.append(run1_list_orig[resamp_subjs[r]]) run2_list.append(run2_list_orig[resamp_subjs[r]]) n_iter = 50 features = 10 # Initialize model print('Building Model') srm_train_run1 = SRM(n_iter=n_iter, features=features) srm_train_run2 = SRM(n_iter=n_iter, features=features) # Fit model to training data print('Training Model') srm_train_run1.fit(run1_list) srm_train_run2.fit(run2_list) # Test model on testing data to produce shared response print('Testing Model') shared_data_train_run1 = srm_train_run1.transform(run2_list) shared_data_train_run2 = srm_train_run2.transform(run1_list) avg_response_train_run1 = sum(shared_data_train_run1) / len( shared_data_train_run1) avg_response_train_run2 = sum(shared_data_train_run2) / len(
subjects = movie_data.shape[0] nVoxel, nTR = movie_data[0].shape train_data = [] test_data = [] for s in range(subjects): train_data.append(movie_data[s, :, :nTR // 2]) test_data.append(movie_data[s, :, nTR // 2:]) for subject in range(subjects): train_data[subject] = stats.zscore(train_data[subject], axis=1, ddof=1) for subject in range(subjects): test_data[subject] = stats.zscore(test_data[subject], axis=1, ddof=1) srm = SRM(n_iter=10, features=50) srm.fit(train_data) data_shared = srm.transform(test_data) timesegmentmatching_accuracy_evaluation_loo_cv(data_shared, win_size=6, method="SRM") dpsrm = DPSRM(n_features=50) dpsrm.fit(np.array(train_data)) data_shared_dpsrm = dpsrm.transform(test_data) timesegmentmatching_accuracy_evaluation_loo_cv(data_shared_dpsrm, win_size=6, method="DPSRM")
def HMM(X, human_bounds, song_idx, song_bounds, hrf, srm_k): """fit hidden markov model Fit HMM to average data and cross-validate with leftout subjects using within song and between song average correlations Parameters ---------- A: list of 50 (contains 2 runs per subject) 2D (voxels x full time course) arrays B: # of events for HMM (scalar) song_idx: song index (scalar) C: voxel by time ndarray (2D) D: array of song boundaries (1D) Returns ------- wVa score: final score after performing cross-validation of leftout subjects """ w = 6 nPerm = 1000 within_across = np.zeros(nPerm + 1) run1 = [X[i] for i in np.arange(0, int(len(X) / 2))] run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))] print('Building Model') srm = SRM(n_iter=10, features=srm_k) print('Training Model') srm.fit(run1) print('Testing Model') shared_data = srm.transform(run2) shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1) others = np.mean( shared_data[:, song_bounds[song_idx]:song_bounds[song_idx + 1], :13], axis=2) loo = np.mean(shared_data[:, song_bounds[song_idx]:song_bounds[song_idx + 1], 13:], axis=2) nTR = loo.shape[1] # Fit to all but one subject K = len(human_bounds) + 1 ev = brainiak.eventseg.event.EventSegment(K) ev.fit(others.T) events = np.argmax(ev.segments_[0], axis=1) # Compute correlations separated by w in time corrs = np.zeros(nTR - w) for t in range(nTR - w): corrs[t] = pearsonr(loo[:, t], loo[:, t + w])[0] # Compute within vs across boundary correlations, for real and permuted bounds for p in range(nPerm + 1): within = corrs[events[:-w] == events[w:]].mean() across = corrs[events[:-w] != events[w:]].mean() within_across[p] = within - across np.random.seed(p) events = np.zeros(nTR, dtype=np.int) events[np.random.choice(nTR, K - 1, replace=False)] = 1 events = np.cumsum(events) print((within_across[0] - np.mean(within_across[1:])) / np.std(within_across[1:])) return within_across
def srm_fit(target_fcs, stories=None, subjects=None, hemisphere=None, k=360, n_iter=10): # By default grab all stories stories = check_keys(target_fcs, keys=stories) # Recompile FCs accounting for repeat subjects subject_fcs = {} for story in stories: # By default just grab all subjects subject_list = check_keys(target_fcs[story], keys=subjects, subkey=story) for subject in subject_list: # For simplicity we just assume same hemis across stories/subjects hemis = check_keys(target_fcs[story][subject], keys=hemisphere) for hemi in hemis: if subject not in subject_fcs: subject_fcs[subject] = {} if hemi not in subject_fcs[subject]: subject_fcs[subject][hemi] = [] subject_fcs[subject][hemi].append( target_fcs[story][subject][hemi]) # Stack FCs across stories in connectivity space for subject in subject_list: for hemi in hemis: if len(subject_fcs[subject][hemi]) > 1: subject_fcs[subject][hemi] = np.mean(subject_fcs[subject][hemi], axis=0) else: subject_fcs[subject][hemi] = subject_fcs[subject][hemi][0] # Convert FCs to list for SRM transforms = {} for hemi in hemis: # Declare SRM for this hemi srm = SRM(n_iter=n_iter, features=k) subject_ids, subject_stack = [], [] for subject in subject_list: subject_ids.append(subject) subject_stack.append(subject_fcs[subject][hemi]) if subject not in transforms: transforms[subject] = {} # Train SRM and apply start = time() srm.fit(subject_stack) print(f"Finished fitting SRM after {time() - start:.1f} seconds") for subject_id, transform in zip(subject_ids, srm.w_): transforms[subject_id][hemi] = transform return transforms