if site_correction == 'MGLM': # pull out site variable and convert to one-hot connectivity_data = data enc = OneHotEncoder(sparse=False) enc.fit(site.reshape(-1, 1)) site_one_hot = enc.transform(site.reshape(-1, 1)) site_one_hot = np.transpose(site_one_hot) # transpose and reshape the connectivity data data_perm = np.reshape(connectivity_data, (n_subjects, n_regions, n_regions)) data_perm = np.transpose(data_perm, (1, 2, 0)) # generate MGLM regressed data p, V, E, Y_hat, gnorm = mglm_spd(site_one_hot, data_perm, 100) # calculate matrix 'residuals' for i in range(n_subjects): mglm_corrected_matrix = Y_hat[:, :, i] mglm_residuals_matrix = logmap_spd(mglm_corrected_matrix, data_perm[:, :, i]) mglm_residuals_vector = np.reshape(mglm_residuals_matrix, (n_regions * n_regions, )) connectivity_data[i, :] = mglm_residuals_vector # set up MCCV n_repeats = 200 test_fraction = 0.2 train_inds_all, test_inds_all, test_size = ShuffleSplitFixed(
# join metadata and data connectivity_metadata_data = pd.merge(definitive_subject_metadata, connectivity_subjects_data, left_on=0, right_on='subject') # get number of subjects n_subjects = len(connectivity_metadata_data) # pull out connectivity data connectivity_data = connectivity_metadata_data.iloc[:, 8:].as_matrix() # get demographics demographics = connectivity_metadata_data.iloc[:, 2:4].as_matrix().astype(float) demographics = np.flip(demographics, 1) # normalise predictors - optional if normalise_demographics: demographics = demographics - np.min(demographics, axis=0) demographics = demographics / np.max(demographics, axis=0) # transpose and reshape for mglm demographics = np.transpose(demographics) connectivity_data = np.transpose(connectivity_data) connectivity_data = np.reshape(connectivity_data, (290, 290, 108)) # try to run MGLM... p, V, E, Y_hat, gnorm = mglm_spd(demographics, connectivity_data, 10)