/
human_parcels_sl.py
199 lines (149 loc) · 7.15 KB
/
human_parcels_sl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import numpy as np
import sys
import nibabel as nib
from scipy.spatial import distance
import brainiak.eventseg.event
from scipy.stats import norm,zscore,pearsonr,stats
import os
from sklearn import linear_model
from srm import SRM_V1, SRM_V2, SRM_V3
import scipy.stats as st
import matplotlib.pyplot as plt
idx = int(sys.argv[1])
runNum = int(sys.argv[2])
parcelNum = 300
def save_nifti(data,affine,savedir):
minval = np.min(data)
maxval = np.max(data)
img = nib.Nifti1Image(data,affine)
img.header["cal_min"] = minval
img.header["cal_max"] = maxval
img.header.set_data_dtype(np.float64)
nib.save(img, savedir)
# Custom mean estimator with Fisher z transformation for correlations
def fisher_mean(correlation, axis=None):
return np.tanh(np.mean(np.arctanh(correlation), axis=axis))
def HMM(X,Y,human_bounds):
"""fit hidden markov model
Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations
Parameters
----------
A: voxel by time ndarray (2D)
B: voxel by time ndarray (2D)
C: voxel by time ndarray (2D)
D: voxel by time ndarray (2D)
K: # of events for HMM (scalar)
Returns
-------
z: z-score after performing permuted cross-validation analysis
"""
# Fit to all but one subject
nPerm=1000
within_across = np.zeros(nPerm + 1)
K = len(human_bounds) + 1
nTR = X.shape[1]
# create events vector out of human bounds
add_zero = np.append([0],human_bounds)
full_human_bounds = np.append(add_zero,[nTR])
diff_bounds = np.diff(full_human_bounds)
events = np.zeros((nTR))
for l in range(len(diff_bounds)):
events[full_human_bounds[l]:full_human_bounds[l+1]] = l
max_event_length = stats.mode(events)[1][0]
# compute timepoint by timepoint correlation matrix
cc = np.corrcoef(Y.T) # Should be a time by time correlation matrix
# Create a mask to only look at values up to max_event_length
local_mask = np.zeros(cc.shape, dtype=bool)
for k in range(1,max_event_length):
local_mask[np.diag(np.ones(cc.shape[0]-k, dtype=bool), k)] = True
for p in range(nPerm+1):
same_event = events[:,np.newaxis] == events
within = fisher_mean(cc[same_event*local_mask])
across = fisher_mean(cc[(~same_event)*local_mask])
within_across[p] = within - across
np.random.seed(p)
events = np.zeros(nTR, dtype=np.int)
events[np.random.choice(nTR,K-1,replace=False)] = 1
events = np.cumsum(events)
return within_across
if runNum == 0:
# run 1 times
song_bounds = np.array([0,225,314,494,628,718,898,1032,1122,1301,1436,1660,1749,1973, 2198,2377,2511])
songs = ['Finlandia', 'Blue_Monk', 'I_Love_Music','Waltz_of_Flowers','Capriccio_Espagnole','Island','All_Blues','St_Pauls_Suite','Moonlight_Sonata','Symphony_Fantastique','Allegro_Moderato','Change_of_the_Guard','Boogie_Stop_Shuffle','My_Favorite_Things','The_Bird','Early_Summer']
# get song start time and end time for run 1
start_idx = song_bounds[idx]
end_idx = song_bounds[idx+1]
elif runNum == 1:
# run 2 times
song_bounds = np.array([0,90,270,449,538,672,851,1031,1255,1480,1614,1704,1839,2063,2288,2377,2511])
songs = ['St_Pauls_Suite', 'I_Love_Music', 'Moonlight_Sonata', 'Change_of_the_Guard','Waltz_of_Flowers','The_Bird', 'Island', 'Allegro_Moderato', 'Finlandia', 'Early_Summer', 'Capriccio_Espagnole', 'Symphony_Fantastique', 'Boogie_Stop_Shuffle', 'My_Favorite_Things', 'Blue_Monk','All_Blues']
# get song start time and end time for run 2
start_idx = song_bounds[idx]
end_idx = song_bounds[idx+1]
datadir = '/tigress/jamalw/MES/'
mask_img = nib.load(datadir + 'data/mask_nonan.nii')
n_iter = 50
initial_srm_k = 30
# load human boundaries
human_bounds = np.load(datadir + 'prototype/link/scripts/data/beh/annotations/' + songs[idx] + '_beh_seg.npy')
parcels = nib.load(datadir + "data/CBIG/stable_projects/brain_parcellation/Schaefer2018_LocalGlobal/Parcellations/MNI/Schaefer2018_" + str(parcelNum) + "Parcels_17Networks_order_FSLMNI152_2mm.nii.gz").get_data()
# create brain-like object to store data into
pvals = np.zeros_like(mask_img.get_data(),dtype=float)
zscores = np.zeros_like(mask_img.get_data(),dtype=float)
rawWvA = np.zeros_like(mask_img.get_data(),dtype=float)
parcel_dir = datadir + "prototype/link/scripts/data/searchlight_input/Schaefer" + str(parcelNum) + "/"
savedir = datadir + "prototype/link/scripts/data/searchlight_output/parcels/Schaefer" + str(parcelNum) + "/" + songs[idx]
n_folds = 5
WvA = np.zeros((n_folds,1001))
for i in range(int(np.max(parcels))):
print("Parcel Num: ", str(i+1))
# get indices where mask and parcels overlap
indices = np.where((mask_img.get_data() > 0) & (parcels == i + 1))
# initialize list for storing masked data across subjects
run1 = np.load(parcel_dir + "parcel" + str(i+1) + "_run1.npy")
run2 = np.load(parcel_dir + "parcel" + str(i+1) + "_run2.npy")
# add lines to check whether number of voxels is less than initial srm K
if len(indices[0]) < initial_srm_k:
srm_k = len(indices[0])
elif len(indices[0]) >= initial_srm_k:
srm_k = initial_srm_k
# run SRM on masked data
if runNum == 0:
shared_data = SRM_V1(run1,run2,srm_k,n_iter)
elif runNum == 1:
shared_data = SRM_V1(run2,run1,srm_k,n_iter)
# perform cross-validation style HMM for n_folds
for n in range(n_folds):
np.random.seed(n)
subj_list_shuffle = np.random.permutation(shared_data)
# convert data from list to numpy array and z-score in time
shared_data_stack = stats.zscore(np.dstack(subj_list_shuffle),axis=1,ddof=1)
# split subjects into two groups
others = np.mean(shared_data_stack[:,start_idx:end_idx,:13],axis=2)
loo = np.mean(shared_data_stack[:,start_idx:end_idx,13:],axis=2)
# fit HMM to song data and return match data where first entry is true match score and all others are permutation scores
print("Fitting HMM")
WvA[n,:]= HMM(others,loo,human_bounds)
# take average of WvA scores and bounds over folds
avgWvA = fisher_mean(WvA,axis=0)
# compute z-score
match_z = (avgWvA[0] - fisher_mean(avgWvA[1:])) / (np.std(avgWvA[1:]))
# convert z-score to p-value
match_p = st.norm.sf(match_z)
# compute p-value
#match_p = (np.sum(SL_match[1:] <= SL_match[0]) + 1) / (len(SL_match))
# fit wva score and pvalue into brain
pvals[indices] = match_p
zscores[indices] = match_z
rawWvA[indices] = avgWvA[0]
if runNum == 0:
pfn = savedir + "/pvals_srm_v1_test_run2_split_merge_human"
zfn = savedir + "/zscores_srm_v1_test_run2_split_merge_human"
rfn = savedir + "/rawWva_srm_v1_test_run2_split_merge_human"
elif runNum == 1:
pfn = savedir + "/pvals_srm_v1_test_run1_split_merge_human"
zfn = savedir + "/zscores_srm_v1_test_run1_split_merge_human"
rfn = savedir + "/rawWva_srm_v1_test_run1_split_merge_human"
save_nifti(pvals, mask_img.affine, pfn)
save_nifti(zscores, mask_img.affine, zfn)
save_nifti(rawWvA, mask_img.affine, rfn)