def test_append(sim_design_matrix):
    mats = sim_design_matrix.append(sim_design_matrix)
    assert mats.shape[0] == sim_design_matrix.shape[0] * 2
    # Keep polys separate by default

    assert (mats.shape[1] - 4) == (sim_design_matrix.shape[1] - 4) * 2
    # Otherwise stack them
    mats = sim_design_matrix.append(sim_design_matrix, keep_separate=False)
    assert mats.shape[1] == sim_design_matrix.shape[1]
    assert mats.shape[0] == sim_design_matrix.shape[0] * 2

    # Keep a single stimulus column separate
    assert (sim_design_matrix.append(sim_design_matrix,
                                     unique_cols=["face_A"]).shape[1] == 5)

    # Keep a common stimulus class separate
    assert (sim_design_matrix.append(sim_design_matrix,
                                     unique_cols=["face*"]).shape[1] == 6)
    # Keep a common stimulus class and a different single stim separate
    assert (sim_design_matrix.append(sim_design_matrix,
                                     unique_cols=["face*",
                                                  "house_A"]).shape[1] == 7)
    # Keep multiple stimulus class separate
    assert (sim_design_matrix.append(sim_design_matrix,
                                     unique_cols=["face*",
                                                  "house*"]).shape[1] == 8)

    # Growing a multi-run design matrix; keeping things separate
    num_runs = 4
    all_runs = Design_Matrix(sampling_freq=0.5)
    for i in range(num_runs):
        run = Design_Matrix(
            np.array([
                [1, 0, 0, 0],
                [1, 0, 0, 0],
                [0, 0, 0, 0],
                [0, 1, 0, 0],
                [0, 1, 0, 0],
                [0, 0, 0, 0],
                [0, 0, 1, 0],
                [0, 0, 1, 0],
                [0, 0, 0, 0],
                [0, 0, 0, 1],
                [0, 0, 0, 1],
            ]),
            sampling_freq=0.5,
            columns=["stim_A", "stim_B", "cond_C", "cond_D"],
        )
        run = run.add_poly(2)
        all_runs = all_runs.append(run, unique_cols=["stim*", "cond*"])
    assert all_runs.shape == (44, 28)
Example #2
0
def test_append(sim_design_matrix):
    mats = sim_design_matrix.append(sim_design_matrix)
    assert mats.shape[0] == sim_design_matrix.shape[0] * 2
    # Keep polys separate by default

    assert (mats.shape[1] - 4) == (sim_design_matrix.shape[1] - 4) * 2
    # Otherwise stack them
    assert sim_design_matrix.append(sim_design_matrix,
                                    keep_separate=False).shape[1] == sim_design_matrix.shape[1]
    # Keep a single stimulus column separate
    assert sim_design_matrix.append(sim_design_matrix,
                                    unique_cols=['face_A']).shape[1] == 5

    # Keep a common stimulus class separate
    assert sim_design_matrix.append(sim_design_matrix,
                                    unique_cols=['face*']).shape[1] == 6
    # Keep a common stimulus class and a different single stim separate
    assert sim_design_matrix.append(sim_design_matrix,
                                    unique_cols=['face*', 'house_A']).shape[1] == 7
    # Keep multiple stimulus class separate
    assert sim_design_matrix.append(sim_design_matrix,
                                    unique_cols=['face*', 'house*']).shape[1] == 8

    # Growing a multi-run design matrix; keeping things separate
    num_runs = 4
    all_runs = Design_Matrix(sampling_freq=.5)
    for i in range(num_runs):
        run = Design_Matrix(np.array([
                                [1, 0, 0, 0],
                                [1, 0, 0, 0],
                                [0, 0, 0, 0],
                                [0, 1, 0, 0],
                                [0, 1, 0, 0],
                                [0, 0, 0, 0],
                                [0, 0, 1, 0],
                                [0, 0, 1, 0],
                                [0, 0, 0, 0],
                                [0, 0, 0, 1],
                                [0, 0, 0, 1]
                                ]),
                            sampling_freq=.5,
                            columns=['stim_A', 'stim_B', 'cond_C', 'cond_D']
                            )
        run = run.add_poly(2)
        all_runs = all_runs.append(run, unique_cols=['stim*', 'cond*'])
    assert all_runs.shape == (44, 28)
Example #3
0
    all_mc = pd.concat([z_mc, z_mc**2, z_mc.diff(), z_mc.diff()**2], axis=1)
    all_mc.fillna(value=0, inplace=True)
    return Design_Matrix(all_mc, sampling_freq=1/tr)


vmpfc = zscore(pd.DataFrame(vmpfc, columns=['vmpfc']))

csf_mask = Brain_Data(os.path.join(base_dir, 'masks', 'csf.nii.gz'))
csf = zscore(pd.DataFrame(smoothed.extract_roi(mask=csf_mask).T, columns=['csf']))

spikes = smoothed.find_spikes(global_spike_cutoff=3, diff_spike_cutoff=3)
covariates = pd.read_csv(layout.get(subject=sub, scope='derivatives', extension='.tsv')[0].path, sep='\t')
mc = covariates[['trans_x','trans_y','trans_z','rot_x', 'rot_y', 'rot_z']]
mc_cov = make_motion_covariates(mc, tr)
dm = Design_Matrix(pd.concat([vmpfc, csf, mc_cov, spikes.drop(labels='TR', axis=1)], axis=1), sampling_freq=1/tr)
dm = dm.add_poly(order=2, include_lower=True)

smoothed.X = dm
stats = smoothed.regress()

vmpfc_conn = stats['beta'][0]

vmpfc_conn.threshold(upper=25, lower=-25).plot()

Notice how this analysis identifies the default network? This analysis is very similar to the [original papers](https://www.pnas.org/content/102/27/9673/) that identified the default mode network using resting state data.

For an actual analysis, we would need to repeat this procedure over all of the participants in our sample and then perform a second level group analysis to identify which voxels are consistently coactive with the vmPFC. We will explore group level analyses in the exercises.

### Psychophysiological Interactions

Suppose we were interested in seeing if the vmPFC was connected to other regions differently when performing a finger tapping task compared to all other conditions. To compute this analysis, we will need to create a new design matrix that combines the motor regressors and then calculates an interaction term between the seed region activity (e.g., vmpfc) and the condition of interest (e.g., motor).
Example #4
0
def test_designmat(tmpdir):

    mat1 = Design_Matrix({
    'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10],
    'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2],
    'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    },
    sampling_rate=2.0,polys=['intercept'])

    mat2 = Design_Matrix({
    'X':[9, 9, 2, 7, 5, 0, 1, 1, 1, 2],
    'Y':[3, 3, 3, 6, 9, 0, 1, 10, 1, 10],
    'Z':[2, 6, 3, 2, 7, 0, 1, 7, 8, 8],
    'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    },
    sampling_rate=2.0, polys=['intercept'])

    # Appending
    # Basic horz cat
    new_mat = mat1.append(mat1,axis=1)
    assert new_mat.shape == (mat1.shape[0], mat1.shape[1] + mat2.shape[1])
    both_cols = list(mat1.columns) + list(mat1.columns)
    assert all(new_mat.columns == both_cols)
    # Basic vert cat
    new_mat = mat1.append(mat1,axis=0)
    assert new_mat.shape == (mat1.shape[0]*2, mat1.shape[1]+1)
    # Advanced vert cat
    new_mat = mat1.append(mat1,axis=0,keep_separate=False)
    assert new_mat.shape == (mat1.shape[0]*2,mat1.shape[1])
    # More advanced vert cat
    new_mat = mat1.append(mat1,axis=0,add_poly=2)
    assert new_mat.shape == (mat1.shape[0]*2, 9)

    #convolution doesn't affect intercept
    assert all(mat1.convolve().iloc[:, -1] == mat1.iloc[:, -1])
    #but it still works
    assert (mat1.convolve().iloc[:, :3].values != mat1.iloc[:, :3].values).any()

    #Test vifs
    expectedVifs = np.array([ 1.03984251, 1.02889877, 1.02261945])
    assert np.allclose(expectedVifs,mat1.vif())

    #poly
    mat1.add_poly(order=4).shape[1] == mat1.shape[1]+4
    mat1.add_poly(order=4, include_lower=False).shape[1] == mat1.shape[1]+1

    #zscore
    z = mat1.zscore(columns=['X', 'Z'])
    assert (z['Y'] == mat1['Y']).all()
    assert z.shape == mat1.shape

    # clean
    mat = Design_Matrix({
    'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'A':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10],
    'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2],
    'C':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'intercept':[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    },
    sampling_rate=2.0,polys=['intercept'])
    mat = mat[['X','A','Y','Z','C','intercept']]
    assert all(mat.clean().columns == ['X','Y','Z','intercept'])

    # replace data
    mat = Design_Matrix({
    'X':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'A':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2],
    'Y':[3, 0, 0, 6, 9, 9, 10, 10, 1, 10],
    'Z':[2, 2, 2, 2, 7, 0, 1, 3, 3, 2],
    'C':[1, 4, 2, 7, 5, 9, 2, 1, 3, 2]
    },
    sampling_rate=2.0)

    mat = mat.replace_data(np.ones((mat.shape[0],mat.shape[1]-1)),column_names=['a','b','c','d'])

    assert(np.allclose(mat.values,1))
    assert(all(mat.columns == ['a','b','c','d']))

    #DCT basis_mat
    mat = Design_Matrix(np.random.randint(2,size=(500,3)),sampling_rate=2.0)
    mat = mat.add_dct_basis()
    assert len(mat.polys) == 11
    assert mat.shape[1] == 14

    #Up and down sampling
    mat = Design_Matrix(np.random.randint(2,size=(500,4)),sampling_rate=2.0,columns=['a','b','c','d'])
    target = 1
    assert mat.upsample(target).shape[0] == mat.shape[0]*2 - target*2
    target = 4
    assert mat.downsample(target).shape[0] == mat.shape[0]/2
Example #5
0
#########################################################################
# We can also easily visualize the design matrix using an SPM/AFNI/FSL style heatmap

dm.heatmap()

#########################################################################
# Adding nuisiance covariates
# ---------------------------
#
# Legendre Polynomials
# ********************
#
# A common operation is adding an intercept and polynomial trend terms (e.g. linear and quadtratic) as nuisance regressors. This is easy to do. Consistent with other software packages, these are orthogonal Legendre poylnomials on the scale -1 to 1.

# with include_lower = True (default), 2 here means: 0-intercept, 1-linear-trend, 2-quadtratic-trend
dm_with_nuissance = dm.add_poly(2, include_lower=True)
dm_with_nuissance.heatmap()

#########################################################################
# We can see that 3 new columns were added to the design matrix. We can also inspect the change to the meta-data. Notice that the Design Matrix is aware of the existence of three polynomial terms now.

print(dm_with_nuissance.details())

#########################################################################
# Discrete Cosine Basis Functions
# *******************************
#
# Polynomial variables are not the only type of nuisance covariates that can be generated for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s. Let's create DCT filters for 20s durations in our toy data.

# Short filter duration for our simple example
dm_with_cosine = dm.add_dct_basis(duration=20)
    def get_terms(confound_file, noise_transforms, noise_regressors, TR, options):
        '''
        Gathers confounds (and transformations) into a pandas dataframe.
        Input [Mandatory]:
            confound_file [string]: path to confound.tsv file, given by fmriprep.
            noise_transforms [list of strings]:
                noise transforms to be applied to select noise_regressors above. Possible values are 'quad', 'tderiv', and 'quadtderiv', standing for quadratic function of value, temporal derivative of value, and quadratic function of temporal derivative.
                e.g. model_wf.inputs.inputspec.noise_transforms = ['quad', 'tderiv', 'quadtderiv']
            noise_regressors [list of strings]:
                column names in confounds.tsv, specifying desired noise regressors for model.
                IF noise_transforms are to be applied to a regressor, add '*' to the name.
                e.g. model_wf.inputs.inputspec.noise_regressors = ['CSF', 'WhiteMatter', 'GlobalSignal', 'X*', 'Y*', 'Z*', 'RotX*', 'RotY*', 'RotZ*']
            TR [float]:
                Scanner TR value in seconds.
            options: dictionary with the following entries
                remove_steadystateoutlier [boolean]:
                    Should always be True. Remove steady state outliers from bold timecourse, specified in fmriprep confounds file.
                ICA_AROMA [boolean]:
                    Use AROMA error components, from fmriprep confounds file.
                poly_trend [integer. Use None to skip]:
                    If given, polynomial trends will be added to run confounds, up to the order of the integer
                    e.g. "0", gives an intercept, "1" gives intercept + linear trend,
                    "2" gives intercept + linear trend + quadratic.
                dct_basis [integer. Use None to skip]:
                    If given, adds a discrete cosine transform, with a length (in seconds) of the interger specified.
                        Adds unit scaled cosine basis functions to Design_Matrix columns,
                        based on spm-style discrete cosine transform for use in
                        high-pass filtering. Does not add intercept/constant.
        '''
        import numpy as np
        import pandas as pd
        from nltools.data import Design_Matrix

        df_cf = pd.DataFrame(pd.read_csv(confound_file, sep='\t', parse_dates=False))
        transfrm_list = []
        for idx, entry in enumerate(noise_regressors): # get entries marked with *, indicating they should be transformed.
            if '*' in entry:
                transfrm_list.append(entry.replace('*', '')) # add entry to transformation list if it has *.
                noise_regressors[idx] = entry.replace('*', '')

        confounds = df_cf[noise_regressors]
        transfrmd_cnfds = df_cf[transfrm_list] # for transforms
        TR_time = pd.Series(np.arange(0.0, TR*transfrmd_cnfds.shape[0], TR)) # time series for derivatives.
        if 'quad' in noise_transforms:
            quad = np.square(transfrmd_cnfds)
            confounds = confounds.join(quad, rsuffix='_quad')
        if 'tderiv' in noise_transforms:
            tderiv = pd.DataFrame(pd.Series(np.gradient(transfrmd_cnfds[col]), TR_time)
                                  for col in transfrmd_cnfds).T
            tderiv.columns = transfrmd_cnfds.columns
            tderiv.index = confounds.index
            confounds = confounds.join(tderiv, rsuffix='_tderiv')
        if 'quadtderiv' in noise_transforms:
            quadtderiv = np.square(tderiv)
            confounds = confounds.join(quadtderiv, rsuffix='_quadtderiv')
        if options['remove_steadystateoutlier']:
            if not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^non_steady_state_outlier')]].empty:
                confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^non_steady_state_outlier')]])
            elif not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^NonSteadyStateOutlier')]].empty:
                confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^NonSteadyStateOutlier')]]) # old syntax
        if options['ICA_AROMA']:
            if not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^aroma_motion')]].empty:
                confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^aroma_motion')]])
            elif not df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^AROMAAggrComp')]].empty:
                confounds = confounds.join(df_cf[df_cf.columns[df_cf.columns.to_series().str.contains('^AROMAAggrComp')]]) # old syntax
        confounds = Design_Matrix(confounds, sampling_freq=1/TR)
        if isinstance(options['poly_trend'], int):
            confounds = confounds.add_poly(order = options['poly_trend']) # these do not play nice with high pass filters.
        if isinstance(options['dct_basis'], int):
            confounds = confounds.add_dct_basis(duration=options['dct_basis']) # these do not play nice with high pass filters.
        return confounds
Example #7
0
#########################################################################
# Let's take a look at some of that meta-data. We can see that no columns have been convolved as of yet and this design matrix has no polynomial terms (e.g. such as an intercept or linear trend).

print(dm.details())

#########################################################################
# We can also easily visualize the design matrix using an SPM/AFNI/FSL style heatmap

dm.heatmap()

#########################################################################
# A common operation might include adding an intercept and polynomial trend terms (e.g. linear and quadtratic) as nuisance regressors. This is easy to do. Note that polynomial terms are normalized to unit variance (i.e. mean = 0, std = 1) before inclusion to keep values on approximately the same scale.

# with include_lower = True (default), 1 here means: 0-intercept, 1-linear-trend, 2-quadtratic-trend
dm_with_nuissance = dm.add_poly(2,include_lower=True)
dm_with_nuissance.heatmap()

#########################################################################
# We can see that 3 new columns were added to the design matrix. We can also inspect the change to the meta-data. Notice that the Design Matrix is aware of the existence of three polynomial terms now.

print(dm_with_nuissance.details())

#########################################################################
# Polynomial variables are not the only type of nuisance covariates that can be generate for you. Design Matrix also supports the creation of discrete-cosine basis functions ala SPM. This will create a series of filters added as new columns based on a specified duration, defaulting to 180s.

# Short filter duration for our simple example
dm_with_cosine = dm.add_dct_basis(duration=5)
print(dm_with_cosine.details())

#########################################################################
outlier_cutoff = 3

file_list = [x for x in glob.glob(os.path.join(base_dir, '*/func/*preproc*gz')) if 'denoised' not in x] 
for f in file_list:
    sub = os.path.basename(f).split('_')[0]

    data = Brain_Data(f)
    smoothed = data.smooth(fwhm=fwhm)

    spikes = smoothed.find_spikes(global_spike_cutoff=outlier_cutoff, diff_spike_cutoff=outlier_cutoff)
    covariates = pd.read_csv(glob.glob(os.path.join(base_dir, sub, 'func', '*tsv'))[0], sep='\t')
    mc = covariates[['trans_x','trans_y','trans_z','rot_x', 'rot_y', 'rot_z']]
    mc_cov = make_motion_covariates(mc, tr)
    csf = covariates['csf'] # Use CSF from fmriprep output
    dm = Design_Matrix(pd.concat([csf, mc_cov, spikes.drop(labels='TR', axis=1)], axis=1), sampling_freq=1/tr)
    dm = dm.add_poly(order=2, include_lower=True) # Add Intercept, Linear and Quadratic Trends

    smoothed.X = dm
    stats = smoothed.regress()
    stats['residual'].data = np.float32(stats['residual'].data) # cast as float32 to reduce storage space
    stats['residual'].write(os.path.join(base_dir, sub, 'func', f'{sub}_denoise_smooth{fwhm}mm_task-sherlockPart1_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz'))


We also saved the cropped denoised viewing data as an hdf5 file to speed up loading times when using nltools.

data_dir = '/Volumes/Engram/Data/Sherlock/fmriprep'

for scan in ['Part1', 'Part2']:
    file_list = glob.glob(os.path.join(data_dir, '*', 'func', f'*crop*{scan}*nii.gz'))
    for f in file_list:
        data = Brain_Data(f)