def test_create_residuals_df_covars_none(subject_array, subject_data):
    names, covars = ['noggin_left', 'noggin_right'], []
    array_resids = [residuals(subject_array[:, 2:2].T, subject_array[:, i])
                    for i in [0, 1]]
    np.testing.assert_almost_equal(
        np.array(create_residuals_df(subject_data, names, covars)[names]),
        np.array(array_resids).T)
def test_create_residuals_df_covars_singular():
    names, covars = ['noggin_left', 'noggin_right'], ['day of week']
    array_resids = [
        residuals(subject_array()[:, 2:3].T,
                  subject_array()[:, i]) for i in [0, 1]
    ]
    np.testing.assert_almost_equal(
        np.array(create_residuals_df(subject_data(), names, covars)[names]),
        np.array(array_resids).T)
Beispiel #3
0
def create_residuals_df(df, names, covars=[]):
    '''
    Calculate residuals of columns specified by names, correcting for the
    columns in covars_list.

    Parameters
    ----------
    df : :class:`pandas.DataFrame`
        A pandas data frame with subjects as rows and columns including
        brain regions and covariates. Should be numeric for the columns in
        `names` and `covars`.
    names : list
        A list of the brain regions you wish to correlate.
    covars: list, optional
        A list of covariates to correct for before correlating
        the regional measures. Each element should correspond to a
        column heading in `df`.
        Default is an empty list.

    Returns
    -------
    :class:`pandas.DataFrame`
        Residuals of columns `names` of `df`, correcting for `covars`

    Raises
    ------
    TypeError
        if there are non numeric entries in the columns specified by `names` or
        `covars`
    '''
    # Raise TypeError if any of the relevant columns are nonnumeric
    non_numeric_cols = get_non_numeric_cols(df[names + covars])
    if non_numeric_cols:
        raise TypeError('DataFrame columns {} are non numeric'.format(
            ', '.join(non_numeric_cols)))

    # Make a new data frame that will contain
    # the residuals for each column after correcting for
    # the covariates in covars
    df_res = df[names + covars].copy()

    # Create your covariates array
    if len(covars) > 1:
        x = np.vstack([df[covars]])
    elif len(covars) == 1:
        x = df[covars]
    else:
        x = np.ones_like(df.iloc[:, 0])

    # Calculate the residuals
    for name in names:
        df_res.loc[:, name] = residuals(x.T, df.loc[:, name])

    # Return the residuals data frame
    return df_res