def test_load(tmpdir): sim = Simulator() sigma = 1 y = [0, 1] n_reps = 3 output_dir = str(tmpdir) dat = sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir) # if MNI_Template["resolution"] == '2mm': # shape_3d = (91, 109, 91) # shape_2d = (6, 238955) # elif MNI_Template["resolution"] == '3mm': # shape_3d = (60, 72, 60) # shape_2d = (6, 71020) y = pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))), header=None, index_col=None) # holdout = pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))), header=None, index_col=None) # Test load list of 4D images file_list = [str(tmpdir.join('data.nii.gz')), str(tmpdir.join('data.nii.gz'))] dat = Brain_Data(file_list) dat = Brain_Data([nb.load(x) for x in file_list]) # Test load list dat = Brain_Data(data=str(tmpdir.join('data.nii.gz')), Y=y) # Test Write dat.write(os.path.join(str(tmpdir.join('test_write.nii')))) assert Brain_Data(os.path.join(str(tmpdir.join('test_write.nii'))))
def _run_interface(self, runtime): from nltools.data import Brain_Data import os in_file = self.inputs.in_file mask = self.inputs.mask low_pass = self.inputs.low_pass_cutoff high_pass = self.inputs.high_pass_cutoff TR = self.inputs.sampling_rate if low_pass == 0: low_pass = None if high_pass == 0: high_pass = None dat = Brain_Data(in_file, mask=mask) # Handle no filtering if low_pass or high_pass: dat = dat.filter(sampling_rate=TR, low_pass=low_pass, high_pass=high_pass) # Generate output file name out_file = os.path.split(in_file)[-1].split( '.nii.gz')[0] + '_filtered.nii.gz' dat.write(out_file) self._out_file = out_file runtime.returncode = 0 return runtime
def test_load(tmpdir): sim = Simulator() sigma = 1 y = [0, 1] n_reps = 3 output_dir = str(tmpdir) dat = sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir) # if MNI_Template["resolution"] == '2mm': # shape_3d = (91, 109, 91) # shape_2d = (6, 238955) # elif MNI_Template["resolution"] == '3mm': # shape_3d = (60, 72, 60) # shape_2d = (6, 71020) y = pd.read_csv(os.path.join(str(tmpdir.join("y.csv"))), header=None, index_col=None) # holdout = pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))), header=None, index_col=None) # Test load list of 4D images file_list = [ str(tmpdir.join("data.nii.gz")), str(tmpdir.join("data.nii.gz")) ] dat = Brain_Data(file_list) dat = Brain_Data([nb.load(x) for x in file_list]) # Test load string and path dat = Brain_Data(data=str(tmpdir.join("data.nii.gz")), Y=y) dat = Brain_Data(data=Path(tmpdir.join("data.nii.gz")), Y=y) # Test Write dat.write(os.path.join(str(tmpdir.join("test_write.nii")))) assert Brain_Data(os.path.join(str(tmpdir.join("test_write.nii")))) # Test i/o for hdf5 dat.write(os.path.join(str(tmpdir.join("test_write.h5")))) b = Brain_Data(os.path.join(tmpdir.join("test_write.h5"))) for k in ["X", "Y", "mask", "nifti_masker", "file_name", "data"]: if k == "data": assert np.allclose(b.__dict__[k], dat.__dict__[k]) elif k in ["X", "Y"]: assert all(b.__dict__[k].eq(dat.__dict__[k]).values) elif k == "mask": assert np.allclose(b.__dict__[k].affine, dat.__dict__[k].affine) assert np.allclose(b.__dict__[k].get_fdata(), dat.__dict__[k].get_fdata()) assert b.__dict__[k].get_filename( ) == dat.__dict__[k].get_filename() elif k == "nifti_masker": assert np.allclose(b.__dict__[k].affine_, dat.__dict__[k].affine_) assert np.allclose(b.__dict__[k].mask_img.get_fdata(), dat.__dict__[k].mask_img.get_fdata()) else: assert b.__dict__[k] == dat.__dict__[k]
def create_data(self, levels, sigma, radius = 5, center = None, reps = 1, output_dir = None): """ create simulated data with integers Args: levels: vector of intensities or class labels sigma: amount of noise to add radius: vector of radius. Will create multiple spheres if len(radius) > 1 center: center(s) of sphere(s) of the form [px, py, pz] or [[px1, py1, pz1], ..., [pxn, pyn, pzn]] reps: number of data repetitions useful for trials or subjects output_dir: string path of directory to output data. If None, no data will be written **kwargs: Additional keyword arguments to pass to the prediction algorithm """ # Create reps nlevels = len(levels) y = levels rep_id = [1] * len(levels) for i in range(reps - 1): y = y + levels rep_id.extend([i+2] * nlevels) # Initialize Spheres with options for multiple radii and centers of the spheres (or just an int and a 3D list) A = self.n_spheres(radius, center) #for each intensity A_list = [] for i in y: A_list.append(np.multiply(A, i)) #generate a different gaussian noise profile for each mask mu = 0 #values centered around 0 N_list = [] for i in range(len(y)): N_list.append(self.normal_noise(mu, sigma)) #add noise and signal together, then convert to nifti files NF_list = [] for i in range(len(y)): NF_list.append(self.to_nifti(np.add(N_list[i], A_list[i]) )) NF_list = Brain_Data(NF_list) # Assign variables to object self.data = NF_list self.y = pd.DataFrame(data=y) self.rep_id = pd.DataFrame(data=rep_id) dat = self.data dat.Y = self.y # Write Data to files if requested if output_dir is not None and isinstance(output_dir, six.string_types): NF_list.write(os.path.join(output_dir,'data.nii.gz')) self.y.to_csv(os.path.join(output_dir, 'y.csv'), index=None,header=False) self.rep_id.to_csv(os.path.join(output_dir, 'rep_id.csv'), index=None,header=False) return dat
def get_trialtype_pain_regressors(self,nifti_data,onset_file): print("importing nifti") #import the nifti if (os.path.isfile(nifti_data + "nltoolstandard.nii.gz")): msmrl1 = Brain_Data( nifti_data + "nltoolstandard.nii.gz") else: msmrl1 = Brain_Data( nifti_data + ".nii.gz") msmrl1.write(nifti_data + "nltoolstandard.nii.gz") #preprocess the nifti? print("importing onsets") #import the onset onsets = onsets_to_dm( onset_file, TR=2, runLength=msmrl1.shape()[0] ) #process the onset files # onsets.sampling_rate=2 onsets_convolved=onsets.convolve() for c in onsets_convolved.columns: if sum(onsets_convolved.ix[:, c]) <= 0: print('deleting '+ str(c)) del onsets_convolved[c] onsets_convolved['linearterm']=range(1,361) onsets_convolved['quadraticterm']=[pow(x,2) for x in onsets_convolved['linearterm']] onsets_convolved['cubicterm']=[pow(x,3) for x in onsets_convolved['linearterm']] onsets_convolved['ones']=[1]*360 msmrl1.X=onsets_convolved print("convolved onsets; regressing...") #regress regression=msmrl1.regress() print("Regressing; calculating similarity...") msm_predicted_pain = regression['beta'].similarity(self.stats['weight_map'], 'dot_product') onset_colnames = onsets_convolved.columns.tolist() msm_predicted_pain_dict={} for i, b in enumerate(msm_predicted_pain): msm_predicted_pain_dict[onset_colnames[i]] = b return msm_predicted_pain_dict
def test_data(tmpdir): sim = Simulator() r = 10 sigma = 1 y = [0, 1] n_reps = 3 output_dir = str(tmpdir) sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir) shape_3d = (91, 109, 91) shape_2d = (6, 238955) y = pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))), header=None, index_col=None).T flist = glob.glob(str(tmpdir.join('centered*.nii.gz'))) # Test load list dat = Brain_Data(data=flist, Y=y) # Test load file assert Brain_Data(flist[0]) # Test to_nifti d = dat.to_nifti() assert d.shape[0:3] == shape_3d # Test load nibabel assert Brain_Data(d) # Test shape assert dat.shape() == shape_2d # Test Mean assert dat.mean().shape()[0] == shape_2d[1] # Test Std assert dat.std().shape()[0] == shape_2d[1] # Test add new = dat + dat assert new.shape() == shape_2d # Test subtract new = dat - dat assert new.shape() == shape_2d # Test multiply new = dat * dat assert new.shape() == shape_2d # Test Iterator x = [x for x in dat] assert len(x) == len(dat) assert len(x[0].data.shape) == 1 # # Test T-test out = dat.ttest() assert out['t'].shape()[0] == shape_2d[1] # # # Test T-test - permutation method # out = dat.ttest(threshold_dict={'permutation':'tfce','n_permutations':50,'n_jobs':1}) # assert out['t'].shape()[0]==shape_2d[1] # Test Regress dat.X = pd.DataFrame( { 'Intercept': np.ones(len(dat.Y)), 'X1': np.array(dat.Y).flatten() }, index=None) out = dat.regress() assert out['beta'].shape() == (2, shape_2d[1]) # Test indexing assert out['t'][1].shape()[0] == shape_2d[1] # Test threshold i = 1 tt = threshold(out['t'][i], out['p'][i], .05) assert isinstance(tt, Brain_Data) # Test write dat.write(os.path.join(str(tmpdir.join('test_write.nii')))) assert Brain_Data(os.path.join(str(tmpdir.join('test_write.nii')))) # Test append assert dat.append(dat).shape()[0] == shape_2d[0] * 2 # Test distance distance = dat.distance(method='euclidean') assert distance.shape == (shape_2d[0], shape_2d[0]) # Test predict stats = dat.predict(algorithm='svm', cv_dict={ 'type': 'kfolds', 'n_folds': 2, 'n': len(dat.Y) }, plot=False, **{'kernel': "linear"}) # Support Vector Regression, with 5 fold cross-validation with Platt Scaling # This will output probabilities of each class stats = dat.predict(algorithm='svm', cv_dict=None, plot=False, **{ 'kernel': 'linear', 'probability': True }) assert isinstance(stats['weight_map'], Brain_Data) # Logistic classificiation, with 5 fold stratified cross-validation. stats = dat.predict(algorithm='logistic', cv_dict={ 'type': 'kfolds', 'n_folds': 5, 'n': len(dat.Y) }, plot=False) assert isinstance(stats['weight_map'], Brain_Data) # Ridge classificiation, with 5 fold between-subject cross-validation, where data for each subject is held out together. stats = dat.predict(algorithm='ridgeClassifier', cv_dict=None, plot=False) assert isinstance(stats['weight_map'], Brain_Data) # Test Similarity r = dat.similarity(stats['weight_map']) assert len(r) == shape_2d[0] r2 = dat.similarity(stats['weight_map'].to_nifti()) assert len(r2) == shape_2d[0] # Test apply_mask - might move part of this to test mask suite s1 = create_sphere([41, 64, 55], radius=10) assert isinstance(s1, nb.Nifti1Image) s2 = Brain_Data(s1) masked_dat = dat.apply_mask(s1) assert masked_dat.shape()[1] == np.sum(s2.data != 0) # Test extract_roi mask = create_sphere([41, 64, 55], radius=10) assert len(dat.extract_roi(mask)) == shape_2d[0] # Test r_to_z z = dat.r_to_z() assert z.shape() == dat.shape() # Test copy d_copy = dat.copy() assert d_copy.shape() == dat.shape() # Test detrend detrend = dat.detrend() assert detrend.shape() == dat.shape()
def get_trialtype_pain_regressors(self, nifti_data, onset_file): print("importing nifti") #import the nifti #load the nltools prepped file if it's available. if (os.path.isfile(nifti_data + "nltoolstandard.nii.gz")): msmrl1 = Brain_Data(nifti_data + "nltoolstandard.nii.gz") else: #but if it's not; no worries; just load the original one. msmrl1 = Brain_Data(nifti_data + ".nii.gz") msmrl1.write(nifti_data + "nltoolstandard.nii.gz") #I want to standardize globally; this will preserve the relative strengths of each time point #and preserve the relative activity at each voxel. #and let's use the mean standard deviation across all the images. #msmrl1.data = msmrl1.data - np.tile(msmrl1.mean().mean(),msmrl1.data.shape) #msmrl1.data = msmrl1.data / np.tile(np.std(msmrl1.data,axis=1).mean(),msmrl1.data.shape) # OR we could apply the standardization to the OUTPUT. #grand_mean=msmrl1.mean().mean() #grand_sd=np.std(msmrl1.data,axis=1).mean() #preprocess the nifti? print("importing onsets") #import the onset onsets = onsets_to_dm(onset_file, TR=2, runLength=msmrl1.shape()[0]) #process the onset files # onsets.sampling_rate = 2 onsets_convolved = onsets.convolve() #delete columns with no information in them. for c in onsets_convolved.columns: if sum(onsets_convolved.ix[:, c]) <= 0: print('deleting ' + str(c)) del onsets_convolved[c] rowcount = onsets_convolved.__len__() if rowcount != 360: warnings.warn( "Just a friendly FYI: expected number of rows is 360 but this subject had " + str(rowcount) + ". Probably this subject got cut off the task half-way through." ) onsets_convolved['linearterm'] = range(1, rowcount + 1) onsets_convolved['quadraticterm'] = [ pow(x, 2) for x in onsets_convolved['linearterm'] ] onsets_convolved['cubicterm'] = [ pow(x, 3) for x in onsets_convolved['linearterm'] ] onsets_convolved['ones'] = [1] * rowcount msmrl1.X = onsets_convolved print("convolved onsets; regressing...") #regress the file on each of the onsets. So then, when we compare similarity to the regression, we'll be getting the #regression to the each event, not to each TR. regression = msmrl1.regress() print("Regressing; calculating similarity to the pain map from " + self.decoder_origin + "...") msm_predicted_pain = regression['beta'].similarity( self.decoder, 'dot_product') msm_predicted_pain_scaled = msm_predicted_pain - msmrl1.data.std() onset_colnames = onsets_convolved.columns.tolist() msm_predicted_pain_dict = {} for i, b in enumerate(msm_predicted_pain_scaled): msm_predicted_pain_dict[onset_colnames[i]] = b return msm_predicted_pain_dict
def test_brain_data(tmpdir): # Add 3mm to list to test that resolution as well for resolution in ['2mm']: MNI_Template["resolution"] = resolution sim = Simulator() r = 10 sigma = 1 y = [0, 1] n_reps = 3 output_dir = str(tmpdir) dat = sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir) if MNI_Template["resolution"] == '2mm': shape_3d = (91, 109, 91) shape_2d = (6, 238955) elif MNI_Template["resolution"] == '3mm': shape_3d = (60, 72, 60) shape_2d = (6, 71020) y = pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))),header=None, index_col=None) holdout = pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))),header=None,index_col=None) # Test load list of 4D images file_list = [str(tmpdir.join('data.nii.gz')), str(tmpdir.join('data.nii.gz'))] dat = Brain_Data(file_list) dat = Brain_Data([nb.load(x) for x in file_list]) # Test load list dat = Brain_Data(data=str(tmpdir.join('data.nii.gz')), Y=y) # Test concatenate out = Brain_Data([x for x in dat]) assert isinstance(out, Brain_Data) assert len(out)==len(dat) # Test to_nifti d = dat.to_nifti() assert d.shape[0:3] == shape_3d # Test load nibabel assert Brain_Data(d) # Test shape assert dat.shape() == shape_2d # Test Mean assert dat.mean().shape()[0] == shape_2d[1] # Test Std assert dat.std().shape()[0] == shape_2d[1] # Test add new = dat + dat assert new.shape() == shape_2d # Test subtract new = dat - dat assert new.shape() == shape_2d # Test multiply new = dat * dat assert new.shape() == shape_2d # Test Indexing index = [0, 3, 1] assert len(dat[index]) == len(index) index = range(4) assert len(dat[index]) == len(index) index = dat.Y == 1 assert len(dat[index.values.flatten()]) == index.values.sum() assert len(dat[index]) == index.values.sum() assert len(dat[:3]) == 3 # Test Iterator x = [x for x in dat] assert len(x) == len(dat) assert len(x[0].data.shape) == 1 # # Test T-test out = dat.ttest() assert out['t'].shape()[0] == shape_2d[1] # # # Test T-test - permutation method # out = dat.ttest(threshold_dict={'permutation':'tfce','n_permutations':50,'n_jobs':1}) # assert out['t'].shape()[0]==shape_2d[1] # Test Regress dat.X = pd.DataFrame({'Intercept':np.ones(len(dat.Y)), 'X1':np.array(dat.Y).flatten()}, index=None) # Standard OLS out = dat.regress() assert type(out['beta'].data) == np.ndarray assert type(out['t'].data) == np.ndarray assert type(out['p'].data) == np.ndarray assert type(out['residual'].data) == np.ndarray assert type(out['df'].data) == np.ndarray assert out['beta'].shape() == (2, shape_2d[1]) assert out['t'][1].shape()[0] == shape_2d[1] # Robust OLS out = dat.regress(mode='robust') assert type(out['beta'].data) == np.ndarray assert type(out['t'].data) == np.ndarray assert type(out['p'].data) == np.ndarray assert type(out['residual'].data) == np.ndarray assert type(out['df'].data) == np.ndarray assert out['beta'].shape() == (2, shape_2d[1]) assert out['t'][1].shape()[0] == shape_2d[1] # Test threshold i=1 tt = threshold(out['t'][i], out['p'][i], .05) assert isinstance(tt, Brain_Data) # Test write dat.write(os.path.join(str(tmpdir.join('test_write.nii')))) assert Brain_Data(os.path.join(str(tmpdir.join('test_write.nii')))) # Test append assert dat.append(dat).shape()[0] == shape_2d[0]*2 # Test distance distance = dat.distance(method='euclidean') assert isinstance(distance, Adjacency) assert distance.square_shape()[0] == shape_2d[0] # Test predict stats = dat.predict(algorithm='svm', cv_dict={'type': 'kfolds', 'n_folds': 2}, plot=False, **{'kernel':"linear"}) # Support Vector Regression, with 5 fold cross-validation with Platt Scaling # This will output probabilities of each class stats = dat.predict(algorithm='svm', cv_dict=None, plot=False, **{'kernel':'linear', 'probability':True}) assert isinstance(stats['weight_map'], Brain_Data) # Logistic classificiation, with 2 fold cross-validation. stats = dat.predict(algorithm='logistic', cv_dict={'type': 'kfolds', 'n_folds': 2}, plot=False) assert isinstance(stats['weight_map'], Brain_Data) # Ridge classificiation, stats = dat.predict(algorithm='ridgeClassifier', cv_dict=None, plot=False) assert isinstance(stats['weight_map'], Brain_Data) # Ridge stats = dat.predict(algorithm='ridge', cv_dict={'type': 'kfolds', 'n_folds': 2, 'subject_id':holdout}, plot=False, **{'alpha':.1}) # Lasso stats = dat.predict(algorithm='lasso', cv_dict={'type': 'kfolds', 'n_folds': 2, 'stratified':dat.Y}, plot=False, **{'alpha':.1}) # PCR stats = dat.predict(algorithm='pcr', cv_dict=None, plot=False) # Test Similarity r = dat.similarity(stats['weight_map']) assert len(r) == shape_2d[0] r2 = dat.similarity(stats['weight_map'].to_nifti()) assert len(r2) == shape_2d[0] r = dat.similarity(stats['weight_map'], method='dot_product') assert len(r) == shape_2d[0] r = dat.similarity(stats['weight_map'], method='cosine') assert len(r) == shape_2d[0] r = dat.similarity(dat, method='correlation') assert r.shape == (dat.shape()[0],dat.shape()[0]) r = dat.similarity(dat, method='dot_product') assert r.shape == (dat.shape()[0],dat.shape()[0]) r = dat.similarity(dat, method='cosine') assert r.shape == (dat.shape()[0],dat.shape()[0]) # Test apply_mask - might move part of this to test mask suite s1 = create_sphere([12, 10, -8], radius=10) assert isinstance(s1, nb.Nifti1Image) masked_dat = dat.apply_mask(s1) assert masked_dat.shape()[1] == np.sum(s1.get_data() != 0) # Test extract_roi mask = create_sphere([12, 10, -8], radius=10) assert len(dat.extract_roi(mask)) == shape_2d[0] # Test r_to_z z = dat.r_to_z() assert z.shape() == dat.shape() # Test copy d_copy = dat.copy() assert d_copy.shape() == dat.shape() # Test detrend detrend = dat.detrend() assert detrend.shape() == dat.shape() # Test standardize s = dat.standardize() assert s.shape() == dat.shape() assert np.isclose(np.sum(s.mean().data), 0, atol=.1) s = dat.standardize(method='zscore') assert s.shape() == dat.shape() assert np.isclose(np.sum(s.mean().data), 0, atol=.1) # Test Sum s = dat.sum() assert s.shape() == dat[1].shape() # Test Groupby s1 = create_sphere([12, 10, -8], radius=10) s2 = create_sphere([22, -2, -22], radius=10) mask = Brain_Data([s1, s2]) d = dat.groupby(mask) assert isinstance(d, Groupby) # Test Aggregate mn = dat.aggregate(mask, 'mean') assert isinstance(mn, Brain_Data) assert len(mn.shape()) == 1 # Test Threshold s1 = create_sphere([12, 10, -8], radius=10) s2 = create_sphere([22, -2, -22], radius=10) mask = Brain_Data(s1)*5 mask = mask + Brain_Data(s2) m1 = mask.threshold(upper=.5) m2 = mask.threshold(upper=3) m3 = mask.threshold(upper='98%') m4 = Brain_Data(s1)*5 + Brain_Data(s2)*-.5 m4 = mask.threshold(upper=.5,lower=-.3) assert np.sum(m1.data > 0) > np.sum(m2.data > 0) assert np.sum(m1.data > 0) == np.sum(m3.data > 0) assert np.sum(m4.data[(m4.data > -.3) & (m4.data <.5)]) == 0 assert np.sum(m4.data[(m4.data < -.3) | (m4.data >.5)]) > 0 # Test Regions r = mask.regions(min_region_size=10) m1 = Brain_Data(s1) m2 = r.threshold(1, binarize=True) # assert len(r)==2 assert len(np.unique(r.to_nifti().get_data())) == 2 diff = m2-m1 assert np.sum(diff.data) == 0 # Test Bootstrap masked = dat.apply_mask(create_sphere(radius=10, coordinates=[0, 0, 0])) n_samples = 3 b = masked.bootstrap('mean', n_samples=n_samples) assert isinstance(b['Z'], Brain_Data) b = masked.bootstrap('std', n_samples=n_samples) assert isinstance(b['Z'], Brain_Data) b = masked.bootstrap('predict', n_samples=n_samples, plot=False) assert isinstance(b['Z'], Brain_Data) b = masked.bootstrap('predict', n_samples=n_samples, plot=False, cv_dict={'type':'kfolds','n_folds':3}) assert isinstance(b['Z'], Brain_Data) b = masked.bootstrap('predict', n_samples=n_samples, save_weights=True, plot=False) assert len(b['samples'])==n_samples # Test decompose n_components = 3 stats = dat.decompose(algorithm='pca', axis='voxels', n_components=n_components) assert n_components == len(stats['components']) assert stats['weights'].shape == (len(dat), n_components) stats = dat.decompose(algorithm='ica', axis='voxels', n_components=n_components) assert n_components == len(stats['components']) assert stats['weights'].shape == (len(dat), n_components) dat.data = dat.data + 2 dat.data[dat.data<0] = 0 stats = dat.decompose(algorithm='nnmf', axis='voxels', n_components=n_components) assert n_components == len(stats['components']) assert stats['weights'].shape == (len(dat), n_components) stats = dat.decompose(algorithm='fa', axis='voxels', n_components=n_components) assert n_components == len(stats['components']) assert stats['weights'].shape == (len(dat), n_components) stats = dat.decompose(algorithm='pca', axis='images', n_components=n_components) assert n_components == len(stats['components']) assert stats['weights'].shape == (len(dat), n_components) stats = dat.decompose(algorithm='ica', axis='images', n_components=n_components) assert n_components == len(stats['components']) assert stats['weights'].shape == (len(dat), n_components) dat.data = dat.data + 2 dat.data[dat.data<0] = 0 stats = dat.decompose(algorithm='nnmf', axis='images', n_components=n_components) assert n_components == len(stats['components']) assert stats['weights'].shape == (len(dat), n_components) stats = dat.decompose(algorithm='fa', axis='images', n_components=n_components) assert n_components == len(stats['components']) assert stats['weights'].shape == (len(dat), n_components) # Test Hyperalignment Method sim = Simulator() y = [0, 1] n_reps = 10 s1 = create_sphere([0, 0, 0], radius=3) d1 = sim.create_data(y, 1, reps=n_reps, output_dir=None).apply_mask(s1) d2 = sim.create_data(y, 2, reps=n_reps, output_dir=None).apply_mask(s1) d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1) # Test procrustes using align data = [d1, d2, d3] out = align(data, method='procrustes') assert len(data) == len(out['transformed']) assert len(data) == len(out['transformation_matrix']) assert data[0].shape() == out['common_model'].shape() transformed = np.dot(d1.data, out['transformation_matrix'][0]) centered = d1.data - np.mean(d1.data, 0) transformed = (np.dot(centered/np.linalg.norm(centered), out['transformation_matrix'][0])*out['scale'][0]) np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data - transformed), decimal=5) # Test deterministic brain_data bout = d1.align(out['common_model'], method='deterministic_srm') assert d1.shape() == bout['transformed'].shape() assert d1.shape() == bout['common_model'].shape() assert d1.shape()[1] == bout['transformation_matrix'].shape[0] btransformed = np.dot(d1.data, bout['transformation_matrix']) np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data - btransformed)) # Test deterministic brain_data bout = d1.align(out['common_model'], method='probabilistic_srm') assert d1.shape() == bout['transformed'].shape() assert d1.shape() == bout['common_model'].shape() assert d1.shape()[1] == bout['transformation_matrix'].shape[0] btransformed = np.dot(d1.data, bout['transformation_matrix']) np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed)) # Test procrustes brain_data bout = d1.align(out['common_model'], method='procrustes') assert d1.shape() == bout['transformed'].shape() assert d1.shape() == bout['common_model'].shape() assert d1.shape()[1] == bout['transformation_matrix'].shape[0] centered = d1.data - np.mean(d1.data, 0) btransformed = (np.dot(centered/np.linalg.norm(centered), bout['transformation_matrix'])*bout['scale']) np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed), decimal=5) np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data - bout['transformed'].data)) # Test hyperalignment on Brain_Data over time (axis=1) sim = Simulator() y = [0, 1] n_reps = 10 s1 = create_sphere([0, 0, 0], radius=5) d1 = sim.create_data(y, 1, reps=n_reps, output_dir=None).apply_mask(s1) d2 = sim.create_data(y, 2, reps=n_reps, output_dir=None).apply_mask(s1) d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1) data = [d1, d2, d3] out = align(data, method='procrustes', axis=1) assert len(data) == len(out['transformed']) assert len(data) == len(out['transformation_matrix']) assert data[0].shape() == out['common_model'].shape() centered = data[0].data.T-np.mean(data[0].data.T, 0) transformed = (np.dot(centered/np.linalg.norm(centered), out['transformation_matrix'][0])*out['scale'][0]) np.testing.assert_almost_equal(0,np.sum(out['transformed'][0].data-transformed.T), decimal=5) bout = d1.align(out['common_model'], method='deterministic_srm', axis=1) assert d1.shape() == bout['transformed'].shape() assert d1.shape() == bout['common_model'].shape() assert d1.shape()[0] == bout['transformation_matrix'].shape[0] btransformed = np.dot(d1.data.T, bout['transformation_matrix']) np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T)) bout = d1.align(out['common_model'], method='probabilistic_srm', axis=1) assert d1.shape() == bout['transformed'].shape() assert d1.shape() == bout['common_model'].shape() assert d1.shape()[0] == bout['transformation_matrix'].shape[0] btransformed = np.dot(d1.data.T, bout['transformation_matrix']) np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T)) bout = d1.align(out['common_model'], method='procrustes', axis=1) assert d1.shape() == bout['transformed'].shape() assert d1.shape() == bout['common_model'].shape() assert d1.shape()[0] == bout['transformation_matrix'].shape[0] centered = d1.data.T-np.mean(d1.data.T, 0) btransformed = (np.dot(centered/np.linalg.norm(centered), bout['transformation_matrix'])*bout['scale']) np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T), decimal=5) np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data-bout['transformed'].data))
new = new.append(data[4]) # Lists of `Brain_Data` instances can also be concatenated by recasting as a `Brain_Data` object. # In[60]: print(type([x for x in data[:4]])) type(Brain_Data([x for x in data[:4]])) # Any Brain_Data object can be written out to a nifti file. # In[203]: data.write('Tmp_Data.nii.gz') # Images within a Brain_Data() instance are iterable. Here we use a list comprehension to calculate the overall mean across all voxels within an image. # In[61]: [x.mean() for x in data] # Though, we could also do this with the `mean` method by setting `axis=1`. # In[62]: data.mean(axis=1) # Let's plot the mean to see how the global signal changes over time.
def import_and_convolve(self, nifti_data, onset_file, data_mask=None, motion_regressors=None): print("importing nifti") # import the nifti # load the nltools prepped file if it's available. if (os.path.isfile(nifti_data + self.data_fmri_space + ".nii.gz")): msmrl1 = Brain_Data(nifti_data + self.data_fmri_space + ".nii.gz", mask=data_mask) else: # but if it's not; no worries; just load the original one. msmrl1 = Brain_Data(nifti_data + ".nii.gz", mask=data_mask) msmrl1.write(nifti_data + self.data_fmri_space + ".nii.gz") # I want to standardize globally; this will preserve the relative strengths of each time point # and preserve the relative activity at each voxel. # and let's use the mean standard deviation across all the images. # msmrl1.data = msmrl1.data - np.tile(msmrl1.mean().mean(),msmrl1.data.shape) # msmrl1.data = msmrl1.data / np.tile(np.std(msmrl1.data,axis=1).mean(),msmrl1.data.shape) # OR we could apply the standardization to the OUTPUT. # grand_mean=msmrl1.mean().mean() # grand_sd=np.std(msmrl1.data,axis=1).mean() # preprocess the nifti? print("importing onsets") # import the onset onsets = onsets_to_dm(onset_file, TR=2, runLength=msmrl1.shape()[0]) # process the onset files # onsets.sampling_rate = 2 onsets_convolved = onsets.convolve() # delete columns with no information in them. for c in onsets_convolved.columns: if sum(onsets_convolved.ix[:, c]) <= 0: print('deleting ' + str(c)) del onsets_convolved[c] rowcount = onsets_convolved.__len__() if rowcount != 360: warnings.warn( "Just a friendly FYI: expected number of rows is 360 but this subject had " + str(rowcount) + ". Probably this subject got cut off the task half-way through." ) onsets_convolved['linearterm'] = range(1, rowcount + 1) onsets_convolved['quadraticterm'] = [ pow(x, 2) for x in onsets_convolved['linearterm'] ] onsets_convolved['cubicterm'] = [ pow(x, 3) for x in onsets_convolved['linearterm'] ] onsets_convolved['ones'] = [1] * rowcount if (motion_regressors is not None): onsets_convolved = pandas.concat( [onsets_convolved, motion_regressors], axis=1) msmrl1.X = onsets_convolved return msmrl1
def test_brain_data(tmpdir): sim = Simulator() r = 10 sigma = 1 y = [0, 1] n_reps = 3 output_dir = str(tmpdir) sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir) shape_3d = (91, 109, 91) shape_2d = (6, 238955) y=pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))), header=None,index_col=None).T holdout=pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))),header=None,index_col=None).T flist = glob.glob(str(tmpdir.join('centered*.nii.gz'))) # Test load list dat = Brain_Data(data=flist,Y=y) # Test load file assert Brain_Data(flist[0]) # Test to_nifti d = dat.to_nifti() assert d.shape[0:3] == shape_3d # Test load nibabel assert Brain_Data(d) # Test shape assert dat.shape() == shape_2d # Test Mean assert dat.mean().shape()[0] == shape_2d[1] # Test Std assert dat.std().shape()[0] == shape_2d[1] # Test add new = dat + dat assert new.shape() == shape_2d # Test subtract new = dat - dat assert new.shape() == shape_2d # Test multiply new = dat * dat assert new.shape() == shape_2d # Test Iterator x = [x for x in dat] assert len(x) == len(dat) assert len(x[0].data.shape) == 1 # # Test T-test out = dat.ttest() assert out['t'].shape()[0] == shape_2d[1] # # # Test T-test - permutation method # out = dat.ttest(threshold_dict={'permutation':'tfce','n_permutations':50,'n_jobs':1}) # assert out['t'].shape()[0]==shape_2d[1] # Test Regress dat.X = pd.DataFrame({'Intercept':np.ones(len(dat.Y)), 'X1':np.array(dat.Y).flatten()},index=None) out = dat.regress() assert out['beta'].shape() == (2,shape_2d[1]) # Test indexing assert out['t'][1].shape()[0] == shape_2d[1] # Test threshold i=1 tt = threshold(out['t'][i], out['p'][i], .05) assert isinstance(tt,Brain_Data) # Test write dat.write(os.path.join(str(tmpdir.join('test_write.nii')))) assert Brain_Data(os.path.join(str(tmpdir.join('test_write.nii')))) # Test append assert dat.append(dat).shape()[0]==shape_2d[0]*2 # Test distance distance = dat.distance(method='euclidean') assert isinstance(distance,Adjacency) assert distance.square_shape()[0]==shape_2d[0] # Test predict stats = dat.predict(algorithm='svm', cv_dict={'type': 'kfolds','n_folds': 2}, plot=False,**{'kernel':"linear"}) # Support Vector Regression, with 5 fold cross-validation with Platt Scaling # This will output probabilities of each class stats = dat.predict(algorithm='svm', cv_dict=None, plot=False,**{'kernel':'linear', 'probability':True}) assert isinstance(stats['weight_map'],Brain_Data) # Logistic classificiation, with 2 fold cross-validation. stats = dat.predict(algorithm='logistic', cv_dict={'type': 'kfolds', 'n_folds': 2}, plot=False) assert isinstance(stats['weight_map'],Brain_Data) # Ridge classificiation, stats = dat.predict(algorithm='ridgeClassifier', cv_dict=None,plot=False) assert isinstance(stats['weight_map'],Brain_Data) # Ridge stats = dat.predict(algorithm='ridge', cv_dict={'type': 'kfolds', 'n_folds': 2,'subject_id':holdout}, plot=False,**{'alpha':.1}) # Lasso stats = dat.predict(algorithm='lasso', cv_dict={'type': 'kfolds', 'n_folds': 2,'stratified':dat.Y}, plot=False,**{'alpha':.1}) # PCR stats = dat.predict(algorithm='pcr', cv_dict=None, plot=False) # Test Similarity r = dat.similarity(stats['weight_map']) assert len(r) == shape_2d[0] r2 = dat.similarity(stats['weight_map'].to_nifti()) assert len(r2) == shape_2d[0] # Test apply_mask - might move part of this to test mask suite s1 = create_sphere([12, 10, -8], radius=10) assert isinstance(s1, nb.Nifti1Image) s2 = Brain_Data(s1) masked_dat = dat.apply_mask(s1) assert masked_dat.shape()[1] == np.sum(s2.data != 0) # Test extract_roi mask = create_sphere([12, 10, -8], radius=10) assert len(dat.extract_roi(mask)) == shape_2d[0] # Test r_to_z z = dat.r_to_z() assert z.shape() == dat.shape() # Test copy d_copy = dat.copy() assert d_copy.shape() == dat.shape() # Test detrend detrend = dat.detrend() assert detrend.shape() == dat.shape() # Test standardize s = dat.standardize() assert s.shape() == dat.shape() assert np.isclose(np.sum(s.mean().data), 0, atol=.1) s = dat.standardize(method='zscore') assert s.shape() == dat.shape() assert np.isclose(np.sum(s.mean().data), 0, atol=.1) # Test Sum s = dat.sum() assert s.shape() == dat[1].shape() # Test Groupby s1 = create_sphere([12, 10, -8], radius=10) s2 = create_sphere([22, -2, -22], radius=10) mask = Brain_Data([s1, s2]) d = dat.groupby(mask) assert isinstance(d, Groupby) # Test Aggregate mn = dat.aggregate(mask, 'mean') assert isinstance(mn, Brain_Data) assert len(mn.shape()) == 1 # Test Threshold s1 = create_sphere([12, 10, -8], radius=10) s2 = create_sphere([22, -2, -22], radius=10) mask = Brain_Data(s1)*5 mask = mask + Brain_Data(s2) m1 = mask.threshold(thresh=.5) m2 = mask.threshold(thresh=3) m3 = mask.threshold(thresh='98%') assert np.sum(m1.data > 0) > np.sum(m2.data > 0) assert np.sum(m1.data > 0) == np.sum(m3.data > 0) # Test Regions r = mask.regions(min_region_size=10) m1 = Brain_Data(s1) m2 = r.threshold(1, binarize=True) # assert len(r)==2 assert len(np.unique(r.to_nifti().get_data())) == 2 # JC edit: I think this is what you were trying to do diff = m2-m1 assert np.sum(diff.data) == 0
def load_data(self, sub, return_type="Brain_Data", write="all", force="none", verbose=True, reload=True, **processes) -> Brain_Data: """Load data from pipeline.root/derivatives/pipeline.name and/or applies processes from pipeline.processes to it. By default, first checks wether the processes have been applied and saved before and then loads them. By default, saves all the intermediate steps Parameters ---------- sub : str Name of the subject to load the process from. return_type : str, optional Type the return value. Must be one of "path", "Brain_Data". If "path" and write="none" and file does not exist, throws an Error, as path does not exist. By default "Brain_Data" write : str, optional Wether to save the intermediate and the last step when applying processes. Must be one of "none" (no step is saved), "main" (only endresult is saved) or "all" (all intermediate steps are saved). By default "all" force : str, optional Wether to apply processes even though a file of this already exists. Must be one of "none", "main", "all" (see above). By default "none" verbose : bool, optional Wether to be verbose, by default True reload : bool, optional Wether to reload the pipeline.layout after writing a file. Only recommended if computing multiple independend processes. Then, afterwards, should be reloaded by hand (call `pipeline.layout = BIDSLayout(pipeline.root)` , by default True Returns ------- Brain_Data, str (Un)Processed data or path to where the data is stored Raises ------ TypeError If wrong return_type is supplied FileNotFoundError If subject is not found KeyError If an unknown process is supplied """ # We'll use this function to print only when being verbose def v_print(*args, **kwargs): if verbose: print(*args, **kwargs) return_types = ["Brain_Data", "path"] if return_type not in return_types: raise TypeError( f"Returntype {return_type} not recognised. Must be in {return_types}.") path = join(self.root, "derivatives", self.name, f"sub-{sub}") if not isdir(path): raise FileNotFoundError( f"Did not find subject {sub} in directory {dir}.") for key in processes.keys(): if key not in self.processes.keys(): raise KeyError( f"{key} is not known process. Known processes are {self.processes.keys()}") if len(processes) == 0: v_print(f"...loading the unprocessed file of subject {sub}") path = self.original_path(layout=self.layout, sub=sub) if return_type == "path": return path if return_type == "Brain_Data": return Brain_Data(path) # This is the most important part: name = "_".join([f"sub-{sub}", "_".join( [f"{self.processes[key].readable(args)}" for key, args in processes.items()]), "bold.nii.gz"]) if isfile(join(path, name)): v_print(f"...found {name}") if return_type == "path": return join(path, name) data = Brain_Data(join(path, name)) else: last_process, last_key = processes.popitem() v_print(f"...{name} does not exist yet") yet_to_process = self.load_data( sub=sub, return_type="Brain_Data", write=("all" if write == "all" else "none"), **processes ) v_print(f"Applying process {last_process}") data = self.processes[last_process].process( self, sub, yet_to_process, **last_key if isinstance(last_key, dict) else last_key ) if write in ["all", "main"]: v_print(f"...writing {name}") data.write(join(path, name)) if reload: self.layout = BIDSLayout(self.root, derivatives=True) if return_type == "Brain_Data": return data if return_type == "path": return join(dir, f"sub-{sub}", name)
smoothed.X = dm stats = smoothed.regress() stats['residual'].data = np.float32(stats['residual'].data) # cast as float32 to reduce storage space stats['residual'].write(os.path.join(base_dir, sub, 'func', f'{sub}_denoise_smooth{fwhm}mm_task-sherlockPart1_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz')) We also saved the cropped denoised viewing data as an hdf5 file to speed up loading times when using nltools. data_dir = '/Volumes/Engram/Data/Sherlock/fmriprep' for scan in ['Part1', 'Part2']: file_list = glob.glob(os.path.join(data_dir, '*', 'func', f'*crop*{scan}*nii.gz')) for f in file_list: data = Brain_Data(f) data.write(f"{f.split('.nii.gz')[0]}.hdf5") Finally, we have also precomputed average activations within a whole brain parcellation (n=50) for some of the tutorials. data_dir = '/Volumes/Engram/Data/Sherlock/fmriprep' mask = Brain_Data('http://neurovault.org/media/images/2099/Neurosynth%20Parcellation_0.nii.gz') for scan in ['Part1', 'Part2']: file_list = glob.glob(os.path.join(data_dir, '*', 'func', f'*crop*{scan}*hdf5')) for f in file_list: sub = os.path.basename(f).split('_')[0] print(sub) data = Brain_Data(f) roi = data.extract_roi(mask) pd.DataFrame(roi.T).to_csv(os.path.join(os.path.dirname(f), f"{sub}_{scan}_Average_ROI_n50.csv" ), index=False)