def get_beta_draws(self, scale): ''' Make beta draws (and standard beta draws) for ALL submodels. :param scale: (int) how much to scale up the draws by -- will take (n_draws * scale) number of draws :return df: data frame ''' submodels = self.covariate_summary_df().sort_values( ['submodel_version_id']) beta_df, keys = self.get_covar_df() submodels['model'] = keys + keys submodels.rename(columns={'Type': 'type'}, inplace=True) lm_dict = import_json("FILEPATH.txt") st_dict = import_json("FILEPATH.txt") master_json = {'mixed': lm_dict, 'spacetime': st_dict} submodels = submodels.loc[submodels.n_draws > 0] covlist = self.get_covlist(lm_dict) ratio_dict = self.make_ratio_dict(covlist) df = pd.concat([ self.get_full_df(master_json, row['type'], row['model'], row['Dependent_Variable'], row['n_draws'], ratio_dict, scale) for index, row in submodels.iterrows() ]) return df
def reload_data(self): """ Reload a codem model after it has been completed. """ sexn = "male" if self.mod_inputs.sex_id == 1 else "female" st_model_file = FILEPATH json_dict = space.import_json(st_model_file) draw_data = pd.read_hdf(FILEPATH, "data") self.mod_inputs.data_frame, self.mod_inputs.covariates, self.mod_inputs.priors = \ Q.getCodemInputData(self.model_version_id, self.db_connection) self.adjust_input_data(save=False) st_models_temp = \ All_Models(pd.concat([self.mod_inputs.data_frame, self.mod_inputs.covariates], axis=1), self.mod_inputs.knockouts, self.mod_inputs.linear_floor_rate, json_dict, self.db_connection, make_preds=False) self.st_models.all_models[-1].models = st_models_temp.all_models[ -1].models self.mod_inputs.bare_necessities() draw_data = draw_data.rename(columns={ "age_group_id": "age", "year_id": "year", "sex_id": "sex" }) self.mod_inputs.data_frame = pd.merge(self.mod_inputs.data_frame, draw_data, how='inner') self.mod_inputs.data_frame.drop_duplicates( ["year", "location_id", "age"], inplace=True)
def get_covar_df(self): ''' Get covariate data frame for all submodels. Gives their fixed effects and their standard errors. :return df: covariate data frame for all submodels :return keys: model names (e.g. ln_rate_model001) in order ''' lm_dict = import_json("FILEPATH.txt") st_dict = import_json("FILEPATH.txt") master_json = {'mixed': lm_dict, 'spacetime': st_dict} keys = sorted(lm_dict.keys()) covar_list = [[ self.get_fe_and_var(master_json, key, mtype) for key in keys ] for mtype in ['mixed', 'spacetime']] df = pd.concat(map(pd.concat, covar_list)) df.rename(columns={'values': 'beta'}, inplace=True) return df, keys
def read_linear_models(self): """ Read linear models created in R """ json_dict = space.import_json("linear_model_json.txt") self.linear_models = All_Models(pd.concat([self.mod_inputs.data_frame, self.mod_inputs.covariates], axis=1), self.mod_inputs.knockouts, self.mod_inputs.linear_floor_rate, json_dict)
def read_linear_models(self): """ Read linear models created in R """ json_dict = space.import_json("FILEPATH") self.linear_models = All_Models( pd.concat([self.mod_inputs.data_frame, self.mod_inputs.covariates], axis=1), self.mod_inputs.knockouts, self.mod_inputs.linear_floor_rate, json_dict, self.db_connection) if self.debug_mode: self.linear_models.pred_mat_lin = self.linear_models.all_models[ -1].pred_mat.copy()
def run_linear_model_builds(self): """ Run the linear model portions of all codem models. """ if self.log_results: self.warnings.time_stamp("Running Linear Model Builds") # run linear models and linear portion of space time models subprocess.call("/share/local/codem/R-3.1.2/bin/Rscript " + self.rSource + "lm_model_prototype.R", shell=True) # check to make sure covariate selection picked something if not os.path.isfile("space_time_json.txt"): sys.stderr.write("No covariates seem to have been selected.") # import json file from R build and save the model list portion self.json_dict = space.import_json("space_time_json.txt") self.mod_inputs.response_list = [x.rsplit("_", 1)[0] for x in sorted(self.json_dict.keys())]
def run_linear_model_builds(self): """ Run the linear model portions of all codem models. """ if self.log_results: self.warnings.time_stamp("Running Linear Model Builds") # run linear models and linear portion of space time models subprocess.call("R_PATH " + self.rSource + "FILEPATH " + str(self.model_version_id) + " " + self.model_dir, shell=True) if not os.path.isfile("FILEPATH"): sys.stderr.write("No covariates seem to have been selected.") # import json file from R build and save the model list portion self.json_dict = space.import_json("FILEPATH") self.mod_inputs.response_list = [ x.rsplit("_", 1)[0] for x in sorted(self.json_dict.keys()) ]
def merge_submodels_with_keys(self): ''' IMPORTANT: Merge the submodel metadata with predictive validity/n_draws/rank onto the covariate dataframe. USES the keys from the submodel. The sorted values are key -- the keys are sorted and the submodel table is sorted, and they line up row by row. Note: This is the only way to link the json with covariate betas/SE with the submodel metadata. Do not unsort by submodel version ID whatever you do! If you do they keys will align with the wrong models. :return df: data frame ''' submodels = self.covariate_summary_df().sort_values( ['submodel_version_id']) beta_df, keys = self.get_covar_df() submodels['model'] = keys + keys submodels.rename(columns={'Type': 'type'}, inplace=True) df = beta_df.merge(submodels, on=['model', 'type']) lm_dict = import_json("FILEPATH.txt") covlist = self.get_covlist(lm_dict) ratio_dict = self.make_ratio_dict(covlist) df.loc[df.n_draws.isnull(), 'n_draws'] = 0.0 df.sort_values(['covariate', 'n_draws'], inplace=True) for mtype in ['ln_rate', 'lt_cf']: df.loc[df['model'].str.contains(mtype), 'mtype'] = mtype df["standard_beta"] = 0. for index, row in df.iterrows(): standard = self.standardize_beta(row["covariate"], row["Dependent_Variable"], row["beta"], ratio_dict) df.set_value(index, 'standard_beta', standard) return df