dm_with_nuissance = dm.add_poly(2, include_lower=True) dm_with_nuissance.heatmap() ######################################################################### # We can see that 3 new columns were added to the design matrix. We can also inspect the change to the meta-data. Notice that the Design Matrix is aware of the existence of three polynomial terms now. print(dm_with_nuissance.details()) ######################################################################### # Discrete Cosine Basis Functions # ******************************* # # Polynomial variables are not the only type of nuisance covariates that can be generated for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s. Let's create DCT filters for 20s durations in our toy data. # Short filter duration for our simple example dm_with_cosine = dm.add_dct_basis(duration=20) dm_with_cosine.heatmap() ######################################################################### # Data operations # --------------- # # Performing convolution # ********************** # # Design Matrix makes it easy to perform convolution and will auto-ignore all columns that are consider polynomials. The default convolution kernel is the Glover (1999) HRF parameterized by the glover_hrf implementation in nipy (see nltools.externals.hrf for details). However, any arbitrary kernel can be passed as a 1d numpy array, or multiple kernels can be passed as a 2d numpy array for highly flexible convolution across many types of data (e.g. SCR). dm_with_nuissance_c = dm_with_nuissance.convolve() print(dm_with_nuissance_c.details()) dm_with_nuissance_c.heatmap()
def test_designmat(tmpdir): mat1 = Design_Matrix({ 'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], 'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10], 'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2], 'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1] }, sampling_rate=2.0,polys=['intercept']) mat2 = Design_Matrix({ 'X':[9, 9, 2, 7, 5, 0, 1, 1, 1, 2], 'Y':[3, 3, 3, 6, 9, 0, 1, 10, 1, 10], 'Z':[2, 6, 3, 2, 7, 0, 1, 7, 8, 8], 'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1] }, sampling_rate=2.0, polys=['intercept']) # Appending # Basic horz cat new_mat = mat1.append(mat1,axis=1) assert new_mat.shape == (mat1.shape[0], mat1.shape[1] + mat2.shape[1]) both_cols = list(mat1.columns) + list(mat1.columns) assert all(new_mat.columns == both_cols) # Basic vert cat new_mat = mat1.append(mat1,axis=0) assert new_mat.shape == (mat1.shape[0]*2, mat1.shape[1]+1) # Advanced vert cat new_mat = mat1.append(mat1,axis=0,keep_separate=False) assert new_mat.shape == (mat1.shape[0]*2,mat1.shape[1]) # More advanced vert cat new_mat = mat1.append(mat1,axis=0,add_poly=2) assert new_mat.shape == (mat1.shape[0]*2, 9) #convolution doesn't affect intercept assert all(mat1.convolve().iloc[:, -1] == mat1.iloc[:, -1]) #but it still works assert (mat1.convolve().iloc[:, :3].values != mat1.iloc[:, :3].values).any() #Test vifs expectedVifs = np.array([ 1.03984251, 1.02889877, 1.02261945]) assert np.allclose(expectedVifs,mat1.vif()) #poly mat1.add_poly(order=4).shape[1] == mat1.shape[1]+4 mat1.add_poly(order=4, include_lower=False).shape[1] == mat1.shape[1]+1 #zscore z = mat1.zscore(columns=['X', 'Z']) assert (z['Y'] == mat1['Y']).all() assert z.shape == mat1.shape # clean mat = Design_Matrix({ 'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], 'A':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], 'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10], 'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2], 'C':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], 'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1] }, sampling_rate=2.0,polys=['intercept']) mat = mat[['X','A','Y','Z','C','intercept']] assert all(mat.clean().columns == ['X','Y','Z','intercept']) # replace data mat = Design_Matrix({ 'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], 'A':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2], 'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10], 'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2], 'C':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2] }, sampling_rate=2.0) mat = mat.replace_data(np.ones((mat.shape[0],mat.shape[1]-1)),column_names=['a','b','c','d']) assert(np.allclose(mat.values,1)) assert(all(mat.columns == ['a','b','c','d'])) #DCT basis_mat mat = Design_Matrix(np.random.randint(2,size=(500,3)),sampling_rate=2.0) mat = mat.add_dct_basis() assert len(mat.polys) == 11 assert mat.shape[1] == 14 #Up and down sampling mat = Design_Matrix(np.random.randint(2,size=(500,4)),sampling_rate=2.0,columns=['a','b','c','d']) target = 1 assert mat.upsample(target).shape[0] == mat.shape[0]*2 - target*2 target = 4 assert mat.downsample(target).shape[0] == mat.shape[0]/2
def get_terms(confound_file, noise_transforms, noise_regressors, TR, options): ''' Gathers confounds (and transformations) into a pandas dataframe. Input [Mandatory]: confound_file [string]: path to confound.tsv file, given by fmriprep. noise_transforms [list of strings]: noise transforms to be applied to select noise_regressors above. Possible values are 'quad', 'tderiv', and 'quadtderiv', standing for quadratic function of value, temporal derivative of value, and quadratic function of temporal derivative. e.g. model_wf.inputs.inputspec.noise_transforms = ['quad', 'tderiv', 'quadtderiv'] noise_regressors [list of strings]: column names in confounds.tsv, specifying desired noise regressors for model. IF noise_transforms are to be applied to a regressor, add '*' to the name. e.g. model_wf.inputs.inputspec.noise_regressors = ['CSF', 'WhiteMatter', 'GlobalSignal', 'X*', 'Y*', 'Z*', 'RotX*', 'RotY*', 'RotZ*'] TR [float]: Scanner TR value in seconds. options: dictionary with the following entries remove_steadystateoutlier [boolean]: Should always be True. Remove steady state outliers from bold timecourse, specified in fmriprep confounds file. ICA_AROMA [boolean]: Use AROMA error components, from fmriprep confounds file. poly_trend [integer. Use None to skip]: If given, polynomial trends will be added to run confounds, up to the order of the integer e.g. "0", gives an intercept, "1" gives intercept + linear trend, "2" gives intercept + linear trend + quadratic. dct_basis [integer. Use None to skip]: If given, adds a discrete cosine transform, with a length (in seconds) of the interger specified. Adds unit scaled cosine basis functions to Design_Matrix columns, based on spm-style discrete cosine transform for use in high-pass filtering. Does not add intercept/constant. ''' import numpy as np import pandas as pd from nltools.data import Design_Matrix df_cf = pd.DataFrame(pd.read_csv(confound_file, sep='\t', parse_dates=False)) transfrm_list = [] for idx, entry in enumerate(noise_regressors): # get entries marked with *, indicating they should be transformed. if '*' in entry: transfrm_list.append(entry.replace('*', '')) # add entry to transformation list if it has *. noise_regressors[idx] = entry.replace('*', '') confounds = df_cf[noise_regressors] transfrmd_cnfds = df_cf[transfrm_list] # for transforms TR_time = pd.Series(np.arange(0.0, TR*transfrmd_cnfds.shape[0], TR)) # time series for derivatives. if 'quad' in noise_transforms: quad = np.square(transfrmd_cnfds) confounds = confounds.join(quad, rsuffix='_quad') if 'tderiv' in noise_transforms: tderiv = pd.DataFrame(pd.Series(np.gradient(transfrmd_cnfds[col]), TR_time) for col in transfrmd_cnfds).T tderiv.columns = transfrmd_cnfds.columns tderiv.index = confounds.index confounds = confounds.join(tderiv, rsuffix='_tderiv') if 'quadtderiv' in noise_transforms: quadtderiv = np.square(tderiv) confounds = confounds.join(quadtderiv, rsuffix='_quadtderiv') if options['remove_steadystateoutlier']: if not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^non_steady_state_outlier')]].empty: confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^non_steady_state_outlier')]]) elif not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^NonSteadyStateOutlier')]].empty: confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^NonSteadyStateOutlier')]]) # old syntax if options['ICA_AROMA']: if not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^aroma_motion')]].empty: confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^aroma_motion')]]) elif not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^AROMAAggrComp')]].empty: confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^AROMAAggrComp')]]) # old syntax confounds = Design_Matrix(confounds, sampling_freq=1/TR) if isinstance(options['poly_trend'], int): confounds = confounds.add_poly(order = options['poly_trend']) # these do not play nice with high pass filters. if isinstance(options['dct_basis'], int): confounds = confounds.add_dct_basis(duration=options['dct_basis']) # these do not play nice with high pass filters. return confounds