def test_create_residuals_df_covars_none(subject_array, subject_data): names, covars = ['noggin_left', 'noggin_right'], [] array_resids = [residuals(subject_array[:, 2:2].T, subject_array[:, i]) for i in [0, 1]] np.testing.assert_almost_equal( np.array(create_residuals_df(subject_data, names, covars)[names]), np.array(array_resids).T)
def test_create_residuals_df_covars_singular(): names, covars = ['noggin_left', 'noggin_right'], ['day of week'] array_resids = [ residuals(subject_array()[:, 2:3].T, subject_array()[:, i]) for i in [0, 1] ] np.testing.assert_almost_equal( np.array(create_residuals_df(subject_data(), names, covars)[names]), np.array(array_resids).T)
def create_residuals_df(df, names, covars=[]): ''' Calculate residuals of columns specified by names, correcting for the columns in covars_list. Parameters ---------- df : :class:`pandas.DataFrame` A pandas data frame with subjects as rows and columns including brain regions and covariates. Should be numeric for the columns in `names` and `covars`. names : list A list of the brain regions you wish to correlate. covars: list, optional A list of covariates to correct for before correlating the regional measures. Each element should correspond to a column heading in `df`. Default is an empty list. Returns ------- :class:`pandas.DataFrame` Residuals of columns `names` of `df`, correcting for `covars` Raises ------ TypeError if there are non numeric entries in the columns specified by `names` or `covars` ''' # Raise TypeError if any of the relevant columns are nonnumeric non_numeric_cols = get_non_numeric_cols(df[names + covars]) if non_numeric_cols: raise TypeError('DataFrame columns {} are non numeric'.format( ', '.join(non_numeric_cols))) # Make a new data frame that will contain # the residuals for each column after correcting for # the covariates in covars df_res = df[names + covars].copy() # Create your covariates array if len(covars) > 1: x = np.vstack([df[covars]]) elif len(covars) == 1: x = df[covars] else: x = np.ones_like(df.iloc[:, 0]) # Calculate the residuals for name in names: df_res.loc[:, name] = residuals(x.T, df.loc[:, name]) # Return the residuals data frame return df_res