def impute_data(training_data: DataFrame, testing_data: DataFrame) -> Tuple[DataFrame, DataFrame]: """Calculate imputations from the training data and apply to the testing data Inputs training_data: DataFrame training data testing_data: DataFrame testing data Returns training_data: imputations based on the training data only testing_data: imputations based on the training data only """ # calculate from training data, impute into testing data # make training imputations training_data.CODE_GENDER = training_data.CODE_GENDER.fillna( get_mode(training_data.CODE_GENDER.dropna())) training_data.NAME_TYPE_SUITE = training_data.NAME_TYPE_SUITE.fillna( get_mode(training_data.NAME_TYPE_SUITE.dropna())) training_data.OBS_30_CNT_SOCIAL_CIRCLE = training_data.OBS_30_CNT_SOCIAL_CIRCLE.fillna( np.median(training_data.OBS_30_CNT_SOCIAL_CIRCLE.dropna())) training_data.DEF_30_CNT_SOCIAL_CIRCLE = training_data.DEF_30_CNT_SOCIAL_CIRCLE.fillna( np.median(training_data.DEF_30_CNT_SOCIAL_CIRCLE.dropna())) training_data.OBS_60_CNT_SOCIAL_CIRCLE = training_data.OBS_60_CNT_SOCIAL_CIRCLE.fillna( np.median(training_data.OBS_60_CNT_SOCIAL_CIRCLE.dropna())) training_data.DEF_60_CNT_SOCIAL_CIRCLE = training_data.DEF_60_CNT_SOCIAL_CIRCLE.fillna( np.median(training_data.DEF_60_CNT_SOCIAL_CIRCLE.dropna())) training_data.AMT_ANNUITY = training_data.AMT_ANNUITY.fillna( np.median(training_data.AMT_ANNUITY.dropna())) training_data.ANNUITY_INCOME_RATIO = training_data.ANNUITY_INCOME_RATIO.fillna( np.median(training_data.ANNUITY_INCOME_RATIO.dropna())) training_data.DAYS_LAST_PHONE_CHANGE = training_data.DAYS_LAST_PHONE_CHANGE.fillna( np.median(training_data.DAYS_LAST_PHONE_CHANGE.dropna())) # make testing imputation based on training data testing_data.CODE_GENDER = testing_data.CODE_GENDER.fillna( get_mode(training_data.CODE_GENDER.dropna())) testing_data.NAME_TYPE_SUITE = testing_data.NAME_TYPE_SUITE.fillna( get_mode(training_data.NAME_TYPE_SUITE.dropna())) testing_data.OBS_30_CNT_SOCIAL_CIRCLE = testing_data.OBS_30_CNT_SOCIAL_CIRCLE.fillna( np.median(training_data.OBS_30_CNT_SOCIAL_CIRCLE.dropna())) testing_data.DEF_30_CNT_SOCIAL_CIRCLE = testing_data.DEF_30_CNT_SOCIAL_CIRCLE.fillna( np.median(training_data.DEF_30_CNT_SOCIAL_CIRCLE.dropna())) testing_data.OBS_60_CNT_SOCIAL_CIRCLE = testing_data.OBS_60_CNT_SOCIAL_CIRCLE.fillna( np.median(training_data.OBS_60_CNT_SOCIAL_CIRCLE.dropna())) testing_data.DEF_60_CNT_SOCIAL_CIRCLE = testing_data.DEF_60_CNT_SOCIAL_CIRCLE.fillna( np.median(training_data.DEF_60_CNT_SOCIAL_CIRCLE.dropna())) testing_data.AMT_ANNUITY = testing_data.AMT_ANNUITY.fillna( np.median(training_data.AMT_ANNUITY.dropna())) testing_data.ANNUITY_INCOME_RATIO = testing_data.ANNUITY_INCOME_RATIO.fillna( np.median(training_data.ANNUITY_INCOME_RATIO.dropna())) testing_data.DAYS_LAST_PHONE_CHANGE = testing_data.DAYS_LAST_PHONE_CHANGE.fillna( np.median(training_data.DAYS_LAST_PHONE_CHANGE.dropna())) return training_data, testing_data