def sfn(l, msk, myrad, bcast_var): # Arguments: # l -- a list of 4D arrays, containing data from a single searchlight # msk -- a 3D binary array, mask of this searchlight # myrad -- an integer, sl_rad # bcast_var -- whatever is broadcasted # extract training and testing data train_data = [] test_data = [] d1,d2,d3,ntr = l[0].shape nvx = d1*d2*d3 for s in l: train_data.append(np.reshape(s[:,:,:,:int(ntr/2)],(nvx,int(ntr/2)))) test_data.append(np.reshape(s[:,:,:,int(ntr/2):],(nvx,ntr-int(ntr/2)))) # train an srm model srm = SRM(bcast_var[0],bcast_var[1]) srm.fit(train_data) # transform test data shared_data = srm.transform(test_data) for s in range(len(l)): shared_data[s] = np.nan_to_num(stats.zscore(shared_data[s],axis=1,ddof=1)) # run experiment accu = time_segment_matching_accuracy(shared_data) # return: can also return several values. In that case, the final output will be # a 3D array of tuples return np.mean(accu)
def srm(run1, run2): # initialize model print('Building Models') n_iter = 50 srm_k = 30 srm_train_run1 = SRM(n_iter=n_iter, features=srm_k) srm_train_run2 = SRM(n_iter=n_iter, features=srm_k) # fit model to training data print('Training Models') srm_train_run1.fit(run1) srm_train_run2.fit(run2) print('Testing Models') shared_data_run1 = stats.zscore(np.dstack(srm_train_run2.transform(run1)), axis=1, ddof=1) shared_data_run2 = stats.zscore(np.dstack(srm_train_run1.transform(run2)), axis=1, ddof=1) # average test data across subjects run1 = np.mean(shared_data_run1, axis=2) run2 = np.mean(shared_data_run2, axis=2) return run1, run2
def HMM(X,K,loo_idx,song_idx,song_bounds): """fit hidden markov model Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations Parameters ---------- A: voxel by time ndarray (2D) B: voxel by time ndarray (2D) C: voxel by time ndarray (2D) D: voxel by time ndarray (2D) K: # of events for HMM (scalar) Returns ------- z: z-score after performing permuted cross-validation analysis """ w = 6 srm_k = 45 nPerm = 1000 within_across = np.zeros(nPerm+1) run1 = [X[i] for i in np.arange(0, int(len(X)/2))] run2 = [X[i] for i in np.arange(int(len(X)/2), len(X))] print('Building Model') srm = SRM(n_iter=10, features=srm_k) print('Training Model') srm.fit(run1) print('Testing Model') shared_data = srm.transform(run2) shared_data = stats.zscore(np.dstack(shared_data),axis=1,ddof=1) others = np.mean(shared_data[:,:,np.arange(shared_data.shape[-1]) != loo_idx],axis=2) loo = shared_data[:,song_bounds[song_idx]:song_bounds[song_idx + 1],loo_idx] nTR = loo.shape[1] # Fit to all but one subject ev = brainiak.eventseg.event.EventSegment(K) ev.fit(others[:,song_bounds[song_idx]:song_bounds[song_idx + 1]].T) events = np.argmax(ev.segments_[0],axis=1) # Compute correlations separated by w in time corrs = np.zeros(nTR-w) for t in range(nTR-w): corrs[t] = pearsonr(loo[:,t],loo[:,t+w])[0] # Compute within vs across boundary correlations, for real and permuted bounds for p in range(nPerm+1): within = corrs[events[:-w] == events[w:]].mean() across = corrs[events[:-w] != events[w:]].mean() within_across[p] = within - across np.random.seed(p) events = np.zeros(nTR, dtype=np.int) events[np.random.choice(nTR,K-1,replace=False)] = 1 events = np.cumsum(events) return within_across
def HMM(X, human_bounds, song_idx, song_bounds, srm_k, hrf): """fit hidden markov model Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations Parameters ---------- A: voxel by time ndarray (2D) B: voxel by time ndarray (2D) C: voxel by time ndarray (2D) D: voxel by time ndarray (2D) K: # of events for HMM (scalar) Returns ------- z: z-score after performing permuted cross-validation analysis """ w = 3 nPerm = 1000 run1 = [X[i] for i in np.arange(0, int(len(X) / 2))] run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))] print('Building Model') srm = SRM(n_iter=10, features=srm_k) print('Training Model') srm.fit(run2) print('Testing Model') shared_data = srm.transform(run1) shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1) data = np.mean(shared_data[:, song_bounds[song_idx]:song_bounds[song_idx + 1]], axis=2) nTR = data.shape[1] # Fit to all but one subject K = len(human_bounds) + 1 ev = brainiak.eventseg.event.EventSegment(K) ev.fit(data.T) bounds = np.where(np.diff(np.argmax(ev.segments_[0], axis=1)))[0] + 1 match = np.zeros(nPerm + 1) events = np.argmax(ev.segments_[0], axis=1) _, event_lengths = np.unique(events, return_counts=True) perm_bounds = bounds.copy() for p in range(nPerm + 1): for hb in human_bounds: if np.any(np.abs(perm_bounds - hb) <= w): match[p] += 1 match[p] /= len(human_bounds) np.random.seed(p) perm_bounds = np.cumsum(np.random.permutation(event_lengths))[:-1] return match
def srm(train_data, test_data, n_features): # Z-score the data n = len(train_data) for subject in range(n): train_data[subject] = stats.zscore(train_data[subject], axis=1, ddof=1) test_data[subject] = stats.zscore(test_data[subject], axis=1, ddof=1) model = SRM(n_iter=10, features=n_features) model.fit(train_data) projected_data = model.transform(test_data) for subject in range(n): projected_data[subject] = stats.zscore(projected_data[subject], axis=1, ddof=1) return projected_data
def isc_srm(X,set_srm): """perform isc on srm searchlights Parameters ---------- X: list of searchlights where searchlights are voxels by time Returns ------- r: correlations for each searchlights timecourse correlated with all other timecourses """ run1 = [X[i] for i in np.arange(0, int(len(X)/2))] run2 = [X[i] for i in np.arange(int(len(X)/2), len(X))] data = np.zeros((run1[0].shape[0],run1[0].shape[1]*2,len(run1))) if set_srm == 0: for i in range(data.shape[2]): data[:,:,i] = np.hstack((run1[i],run2[i])) # run isc isc_output = brainiak.isfc.isfc(data) elif set_srm == 1: # train on run 1 and test on run 2 print('Building Model') srm = SRM(n_iter=10, features=5) print('Training Model') srm.fit(run1) print('Testing Model') shared_data = srm.transform(run2) shared_data = stats.zscore(np.dstack(shared_data),axis=1,ddof=1) # run isc isc_output = brainiak.isfc.isfc(shared_data) print(isc_output) # average isc results mean_isc = np.mean(isc_output) return mean_isc,isc_output
def sfn(l, msk, myrad, bcast_var): # extract training and testing data train_data = [] test_data = [] d1, d2, d3, ntr = l[0].shape nvx = d1 * d2 * d3 for s in l: train_data.append( np.reshape(s[:, :, :, :int(ntr / 2)], (nvx, int(ntr / 2)))) test_data.append( np.reshape(s[:, :, :, int(ntr / 2):], (nvx, ntr - int(ntr / 2)))) # train an srm model srm = SRM(bcast_var[0], bcast_var[1]) srm.fit(train_data) # transform test data shared_data = srm.transform(test_data) for s in range(len(l)): shared_data[s] = np.nan_to_num( stats.zscore(shared_data[s], axis=1, ddof=1)) # experiment accu = timesegmentmatching_accuracy(shared_data, 6) return np.mean(accu), stats.sem( accu) # multiple outputs will be saved as tuples
train_list_roi_1.append(train_roi_1[:,:,i]) test_list_roi_1.append(test_roi_1[:,:,i]) for i in range(0,train_roi_2.shape[2]): train_list_roi_2.append(train_roi_2[:,:,i]) test_list_roi_2.append(test_roi_2[:,:,i]) # Initialize models print('Building Model') srm_roi_1 = SRM(n_iter=50, features=5) srm_roi_2 = SRM(n_iter=50, features=5) # Fit model to training data (run 1) print('Training Model') srm_roi_1.fit(train_list_roi_1) srm_roi_2.fit(train_list_roi_2) # Test model on testing data to produce shared response print('Testing Model') shared_data_roi_1 = srm_roi_1.transform(test_list_roi_1) shared_data_roi_2 = srm_roi_2.transform(test_list_roi_2) avg_response_roi_1 = sum(shared_data_roi_1)/len(shared_data_roi_1) avg_response_roi_2 = sum(shared_data_roi_2)/len(shared_data_roi_2) # Fit to ROI 1 ev1 = brainiak.eventseg.event.EventSegment(len(human_bounds) - 1) ev1.fit(avg_response_roi_1[:,start:end].T) bounds1 = np.where(np.diff(np.argmax(ev1.segments_[0], axis=1)))[0]
def parcel_srm(data, atlas, k=3, parcel_labels=None, stories=None, subjects=None): # By default grab all stories stories = check_keys(data, keys=stories) # Firsts compute mean time-series for all target parcels targets = parcel_means(data, atlas, parcel_labels=parcel_labels, stories=stories, subjects=subjects) # Compute ISFCs with targets for all vertices target_fcs = target_isfc(data, targets, stories=stories, subjects=subjects) parcels = {} for story in stories: parcels[story] = {} # By default just grab all subjects subject_list = check_keys(data[story], keys=subjects, subkey=story) # Loop through both hemispheres hemi_stack = [] for hemi in ['lh', 'rh']: # Loop through parcels parcel_tss = [] for parcel_label in parcel_labels[hemi]: # Resort parcel FCs into list of subject parcels fc_stack = [] ts_stack = [] for subject in subject_list: # Grab the connectivities for this parcel parcel_fcs = target_fcs[story][subject][hemi][ atlas[hemi] == parcel_label, :] fc_stack.append(parcel_fcs) ts_stack.append(data[story][subject][hemi] [:, atlas[hemi] == parcel_label]) # Set up fresh SRM srm = SRM(features=k) # Train SRM on parcel connectivities srm.fit(np.nan_to_num(fc_stack)) # Apply transformations to time series transformed_stack = [ ts.dot(w) for ts, w in zip(ts_stack, srm.w_) ] transformed_stack = np.dstack(transformed_stack) parcel_tss.append(transformed_stack) print(f"Finished SRM for {hemi} parcel " f"{parcel_label} in '{story}'") # Stack parcel means parcel_tss = np.hstack(parcel_tss) hemi_stack.append(parcel_tss) # Stack hemispheres hemi_stack = np.hstack(hemi_stack) assert hemi_stack.shape[1] == (len(parcel_labels['lh']) + len(parcel_labels['rh'])) * k assert hemi_stack.shape[2] == len(subject_list) # Unstack subjects hemi_stack = np.dsplit(hemi_stack, hemi_stack.shape[2]) for subject, ts in zip(subject_list, hemi_stack): parcels[story][subject] = np.squeeze(ts) print(f"Finished applying cSRM to parcels for '{story}'") return parcels
run1_list = [] run2_list = [] for r in range(len(resamp_subjs)): run1_list.append(run1_list_orig[resamp_subjs[r]]) run2_list.append(run2_list_orig[resamp_subjs[r]]) n_iter = 50 features = 10 # Initialize model print('Building Model') srm_train_run1 = SRM(n_iter=n_iter, features=features) srm_train_run2 = SRM(n_iter=n_iter, features=features) # Fit model to training data print('Training Model') srm_train_run1.fit(run1_list) srm_train_run2.fit(run2_list) # Test model on testing data to produce shared response print('Testing Model') shared_data_train_run1 = srm_train_run1.transform(run2_list) shared_data_train_run2 = srm_train_run2.transform(run1_list) avg_response_train_run1 = sum(shared_data_train_run1) / len( shared_data_train_run1) avg_response_train_run2 = sum(shared_data_train_run2) / len( shared_data_train_run2) ################################################################################## for i in range(16):
def srm_fit(target_fcs, stories=None, subjects=None, hemisphere=None, k=360, n_iter=10, half=1, save_prefix=None): # By default grab all stories stories = check_keys(target_fcs, keys=stories) # Recompile FCs accounting for repeat subjects subject_fcs = {} for story in stories: # By default just grab all subjects subject_list = check_keys(target_fcs[story], keys=subjects, subkey=story) for subject in subject_list: # For simplicity we just assume same hemis across stories/subjects hemis = check_keys(target_fcs[story][subject], keys=hemisphere) for hemi in hemis: # If subject is not already there, make new dict for them if subject not in subject_fcs: subject_fcs[subject] = {} # If hemispheres aren't in there, add them if hemi not in subject_fcs[subject]: subject_fcs[subject][hemi] = [] # Finally, make list of connectivity matrices per subject subject_fcs[subject][hemi].append( target_fcs[story][subject][hemi]) # Stack FCs in connectivity space (for all subjects across stories!) all_subjects = list(subject_fcs.keys()) for subject in all_subjects: for hemi in hemis: # If more than one connectivity per subject, take average if len(subject_fcs[subject][hemi]) > 1: subject_fcs[subject][hemi] = np.mean(subject_fcs[subject][hemi], axis=0) else: subject_fcs[subject][hemi] = subject_fcs[subject][hemi][0] # Convert FCs to list for SRM (grab the shared space too) transforms, shared_space, srms = {}, {}, {} for hemi in hemis: # Declare SRM for this hemi srm = SRM(n_iter=n_iter, features=k) subject_ids, subject_stack = [], [] for subject in all_subjects: subject_ids.append(subject) subject_stack.append(subject_fcs[subject][hemi]) if subject not in transforms: transforms[subject] = {} # Train SRM and apply start = time() srm.fit(subject_stack) print(f"Finished fitting SRM after {time() - start:.1f} seconds") for subject_id, transform in zip(subject_ids, srm.w_): transforms[subject_id][hemi] = transform shared_space[hemi] = srm.s_ srms[hemi] = srm if save_prefix: np.save(f'data/half-{half}_{save_prefix}_w.npy', transforms) np.save(f'data/half-{half}_{save_prefix}_s.npy', shared_space) return transforms, srms
for i in range(0, train_A1.shape[2]): train_list_A1.append(train_A1[:, :, i]) test_list_A1.append(test_A1[:, :, i]) for i in range(0, train_vmPFC.shape[2]): train_list_vmPFC.append(train_vmPFC[:, :, i]) test_list_vmPFC.append(test_vmPFC[:, :, i]) # Initialize models print('Building Model') srm_A1 = SRM(n_iter=10, features=50) srm_vmPFC = SRM(n_iter=10, features=10) # Fit model to training data (run 1) print('Training Model') srm_A1.fit(train_list_A1) srm_vmPFC.fit(train_list_vmPFC) # Test model on testing data to produce shared response print('Testing Model') shared_data_A1 = srm_A1.transform(test_list_A1) shared_data_vmPFC = srm_vmPFC.transform(test_list_vmPFC) avg_response_A1 = sum(shared_data_A1) / len(shared_data_A1) avg_response_vmPFC = sum(shared_data_vmPFC) / len(shared_data_vmPFC) A1_no_srm = np.mean(test_A1, axis=2) vmPFC_no_srm = np.mean(test_vmPFC, axis=2) X_MIN = 0 X_MAX = spect_corr.shape[0]
def srm_fit(target_fcs, stories=None, subjects=None, hemisphere=None, k=360, n_iter=10): # By default grab all stories stories = check_keys(target_fcs, keys=stories) # Recompile FCs accounting for repeat subjects subject_fcs = {} for story in stories: # By default just grab all subjects subject_list = check_keys(target_fcs[story], keys=subjects, subkey=story) for subject in subject_list: # For simplicity we just assume same hemis across stories/subjects hemis = check_keys(target_fcs[story][subject], keys=hemisphere) for hemi in hemis: if subject not in subject_fcs: subject_fcs[subject] = {} if hemi not in subject_fcs[subject]: subject_fcs[subject][hemi] = [] subject_fcs[subject][hemi].append( target_fcs[story][subject][hemi]) # Stack FCs across stories in connectivity space for subject in subject_list: for hemi in hemis: if len(subject_fcs[subject][hemi]) > 1: subject_fcs[subject][hemi] = np.mean(subject_fcs[subject][hemi], axis=0) else: subject_fcs[subject][hemi] = subject_fcs[subject][hemi][0] # Convert FCs to list for SRM transforms = {} for hemi in hemis: # Declare SRM for this hemi srm = SRM(n_iter=n_iter, features=k) subject_ids, subject_stack = [], [] for subject in subject_list: subject_ids.append(subject) subject_stack.append(subject_fcs[subject][hemi]) if subject not in transforms: transforms[subject] = {} # Train SRM and apply start = time() srm.fit(subject_stack) print(f"Finished fitting SRM after {time() - start:.1f} seconds") for subject_id, transform in zip(subject_ids, srm.w_): transforms[subject_id][hemi] = transform return transforms
def HMM(X, human_bounds, song_idx, song_bounds, hrf, srm_k): """fit hidden markov model Fit HMM to average data and cross-validate with leftout subjects using within song and between song average correlations Parameters ---------- A: list of 50 (contains 2 runs per subject) 2D (voxels x full time course) arrays B: # of events for HMM (scalar) song_idx: song index (scalar) C: voxel by time ndarray (2D) D: array of song boundaries (1D) Returns ------- wVa score: final score after performing cross-validation of leftout subjects """ w = 6 nPerm = 1000 within_across = np.zeros(nPerm + 1) run1 = [X[i] for i in np.arange(0, int(len(X) / 2))] run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))] print('Building Model') srm = SRM(n_iter=10, features=srm_k) print('Training Model') srm.fit(run1) print('Testing Model') shared_data = srm.transform(run2) shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1) others = np.mean( shared_data[:, song_bounds[song_idx]:song_bounds[song_idx + 1], :13], axis=2) loo = np.mean(shared_data[:, song_bounds[song_idx]:song_bounds[song_idx + 1], 13:], axis=2) nTR = loo.shape[1] # Fit to all but one subject K = len(human_bounds) + 1 ev = brainiak.eventseg.event.EventSegment(K) ev.fit(others.T) events = np.argmax(ev.segments_[0], axis=1) # Compute correlations separated by w in time corrs = np.zeros(nTR - w) for t in range(nTR - w): corrs[t] = pearsonr(loo[:, t], loo[:, t + w])[0] # Compute within vs across boundary correlations, for real and permuted bounds for p in range(nPerm + 1): within = corrs[events[:-w] == events[w:]].mean() across = corrs[events[:-w] != events[w:]].mean() within_across[p] = within - across np.random.seed(p) events = np.zeros(nTR, dtype=np.int) events[np.random.choice(nTR, K - 1, replace=False)] = 1 events = np.cumsum(events) print((within_across[0] - np.mean(within_across[1:])) / np.std(within_across[1:])) return within_across
def HMM(X, K, loo_idx, song_idx, song_bounds): """fit hidden markov model Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations Parameters ---------- A: voxel by time ndarray (2D) B: voxel by time ndarray (2D) C: voxel by time ndarray (2D) D: voxel by time ndarray (2D) K: # of events for HMM (scalar) Returns ------- z: z-score after performing permuted cross-validation analysis """ w = 6 srm_k = 45 nPerm = 1000 within_across = np.zeros(nPerm + 1) run1 = [X[i] for i in np.arange(0, int(len(X) / 2))] run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))] print('Building Model') srm = SRM(n_iter=10, features=srm_k) print('Training Model') srm.fit(run1) print('Testing Model') shared_data = srm.transform(run2) shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1) others = np.mean(shared_data[:, :, np.arange(shared_data.shape[-1]) != loo_idx], axis=2) loo = shared_data[:, song_bounds[song_idx]:song_bounds[song_idx + 1], loo_idx] nTR = loo.shape[1] # Fit to all but one subject ev = brainiak.eventseg.event.EventSegment(K) ev.fit(others[:, song_bounds[song_idx]:song_bounds[song_idx + 1]].T) events = np.argmax(ev.segments_[0], axis=1) #### # plot searchlights import matplotlib.pyplot as plt import matplotlib.patches as patches shared_data = srm.transform(run2) avg_response = sum(shared_data) / len(shared_data) plt.figure(figsize=(10, 10)) plt.imshow(np.corrcoef(avg_response[:, 0:89].T)) bounds = np.where(np.diff(np.argmax(ev.segments_[0], axis=1)))[0] ax = plt.gca() bounds_aug = np.concatenate(([0], bounds, [nTR])) for i in range(len(bounds_aug) - 1): rect1 = patches.Rectangle((bounds_aug[i], bounds_aug[i]), bounds_aug[i + 1] - bounds_aug[i], bounds_aug[i + 1] - bounds_aug[i], linewidth=3, edgecolor='w', facecolor='none', label='Model Fit') ax.add_patch(rect1) plt.title('HMM Fit to A1 SRM K = ' + str(srm_k), fontsize=18, fontweight='bold') plt.savefig('plots/St_Pauls SRM K = ' + str(srm_k)) #### # Compute correlations separated by w in time corrs = np.zeros(nTR - w) for t in range(nTR - w): corrs[t] = pearsonr(loo[:, t], loo[:, t + w])[0] # Compute within vs across boundary correlations, for real and permuted bounds for p in range(nPerm + 1): within = corrs[events[:-w] == events[w:]].mean() across = corrs[events[:-w] != events[w:]].mean() within_across[p] = within - across np.random.seed(p) events = np.zeros(nTR, dtype=np.int) events[np.random.choice(nTR, K - 1, replace=False)] = 1 events = np.cumsum(events) return within_across