def run_glm(Y, X, noise_model='ar1', bins=100, n_jobs=1, verbose=0): """ GLM fit for an fMRI data matrix Parameters ---------- Y : array of shape (n_time_points, n_voxels) The fMRI data. X : array of shape (n_time_points, n_regressors) The design matrix. noise_model : {'ar(N)', 'ols'}, optional The temporal variance model. To specify the order of an autoregressive model place the order after the characters `ar`, for example to specify a third order model use `ar3`. Default='ar1'. bins : int, optional Maximum number of discrete bins for the AR coef histogram. If an autoregressive model with order greater than one is specified then adaptive quantification is performed and the coefficients will be clustered via K-means with `bins` number of clusters. Default=100. n_jobs : int, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. Default=1. verbose : int, optional The verbosity level. Default=0. Returns ------- labels : array of shape (n_voxels,), A map of values on voxels used to identify the corresponding model. results : dict, Keys correspond to the different labels values values are RegressionResults instances corresponding to the voxels. """ acceptable_noise_models = ['ols', 'arN'] if ((noise_model[:2] != 'ar') and (noise_model != 'ols')): raise ValueError( "Acceptable noise models are {0}. You provided " "'noise_model={1}'".format(acceptable_noise_models, noise_model) ) if Y.shape[0] != X.shape[0]: raise ValueError('The number of rows of Y ' 'should match the number of rows of X.' ' You provided X with shape {0} ' 'and Y with shape {1}'. format(X.shape, Y.shape)) # Create the model ols_result = OLSModel(X).fit(Y) if noise_model[:2] == 'ar': err_msg = ('AR order must be a positive integer specified as arN, ' 'where N is an integer. E.g. ar3. ' 'You provided {0}.'.format(noise_model)) try: ar_order = int(noise_model[2:]) except ValueError: raise ValueError(err_msg) # compute the AR coefficients ar_coef_ = _yule_walker(ols_result.residuals.T, ar_order) del ols_result if len(ar_coef_[0]) == 1: ar_coef_ = ar_coef_[:, 0] # Either bin the AR1 coefs or cluster ARN coefs if ar_order == 1: for idx in range(len(ar_coef_)): ar_coef_[idx] = (ar_coef_[idx] * bins).astype(int) * 1. / bins labels = np.array([str(val) for val in ar_coef_]) else: # AR(N>1) case n_clusters = np.min([bins, Y.shape[1]]) kmeans = KMeans(n_clusters=n_clusters).fit(ar_coef_) ar_coef_ = kmeans.cluster_centers_[kmeans.labels_] # Create a set of rounded values for the labels with _ between # each coefficient cluster_labels = kmeans.cluster_centers_.copy() cluster_labels = np.array(['_'.join(map(str, np.round(a, 2))) for a in cluster_labels]) # Create labels and coef per voxel labels = np.array([cluster_labels[i] for i in kmeans.labels_]) unique_labels = np.unique(labels) results = {} # Fit the AR model according to current AR(N) estimates ar_result = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_ar_model_fit)(X, ar_coef_[labels == val][0], Y[:, labels == val]) for val in unique_labels) # Converting the key to a string is required for AR(N>1) cases for val, result in zip(unique_labels, ar_result): results[val] = result del unique_labels del ar_result else: labels = np.zeros(Y.shape[1]) results = {0.0: ols_result} return labels, results
def run_glm(Y, X, noise_model='ar1', bins=100, n_jobs=1, verbose=0): """ GLM fit for an fMRI data matrix Parameters ---------- Y : array of shape (n_time_points, n_voxels) The fMRI data. X : array of shape (n_time_points, n_regressors) The design matrix. noise_model : {'ar1', 'ols'}, optional The temporal variance model. Defaults to 'ar1'. bins : int, optional Maximum number of discrete bins for the AR(1) coef histogram. n_jobs : int, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : int, optional The verbosity level. Defaut is 0 Returns ------- labels : array of shape (n_voxels,), A map of values on voxels used to identify the corresponding model. results : dict, Keys correspond to the different labels values values are RegressionResults instances corresponding to the voxels. """ acceptable_noise_models = ['ar1', 'ols'] if noise_model not in acceptable_noise_models: raise ValueError( "Acceptable noise models are {0}. You provided " "'noise_model={1}'".format(acceptable_noise_models, noise_model) ) if Y.shape[0] != X.shape[0]: raise ValueError('The number of rows of Y ' 'should match the number of rows of X.' ' You provided X with shape {0} ' 'and Y with shape {1}'. format(X.shape, Y.shape)) # Create the model ols_result = OLSModel(X).fit(Y) if noise_model == 'ar1': # compute and discretize the AR1 coefs ar1 = ( (ols_result.residuals[1:] * ols_result.residuals[:-1]).sum(axis=0) / (ols_result.residuals ** 2).sum(axis=0) ) del ols_result ar1 = (ar1 * bins).astype(np.int) * 1. / bins # Fit the AR model acccording to current AR(1) estimates results = {} labels = ar1 # Parallelize by creating a job per ARModel vals = np.unique(ar1) ar_result = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_ar_model_fit)(X, val, Y[:, labels == val]) for val in vals) for val, result in zip(vals, ar_result): results[val] = result del vals del ar_result else: labels = np.zeros(Y.shape[1]) results = {0.0: ols_result} return labels, results
for i in range(ds_.shape[1])) df = pd.DataFrame(np.vstack([ ds_.samples[:, 0], ds_.sa.task, ds_.sa.subject, ds_.sa.dexterity1, ]).T, columns=['y', 'task', 'subject', 'dexterity']) df['y'] = np.float_(df['y']) dm = patsy.dmatrix("y ~ task + dexterity - 1", df) X = np.asarray(dm) model = OLSModel(X) y = ds_.samples y_ = (y - np.mean(y, axis=0)) / np.std(y, axis=0) results = model.fit(y_) x = ds_.sa.dexterity2 model = OLSModel(x) betas = np.zeros((3, 7, ds.shape[1])) rsquared = np.zeros((3, ds.shape[1])) ttask = np.zeros((3, 7, ds.shape[1])) for b, result in enumerate(results):