Ejemplo n.º 1
0
    def get_beta_draws(self, scale):
        '''
        Make beta draws (and standard beta draws) for ALL submodels.

        :param scale: (int) how much to scale up the draws by -- will take (n_draws * scale) number of draws
        :return df: data frame
        '''

        submodels = self.covariate_summary_df().sort_values(
            ['submodel_version_id'])
        beta_df, keys = self.get_covar_df()

        submodels['model'] = keys + keys
        submodels.rename(columns={'Type': 'type'}, inplace=True)

        lm_dict = import_json("FILEPATH.txt")
        st_dict = import_json("FILEPATH.txt")

        master_json = {'mixed': lm_dict, 'spacetime': st_dict}
        submodels = submodels.loc[submodels.n_draws > 0]

        covlist = self.get_covlist(lm_dict)
        ratio_dict = self.make_ratio_dict(covlist)

        df = pd.concat([
            self.get_full_df(master_json, row['type'], row['model'],
                             row['Dependent_Variable'], row['n_draws'],
                             ratio_dict, scale)
            for index, row in submodels.iterrows()
        ])

        return df
Ejemplo n.º 2
0
 def reload_data(self):
     """
     Reload a codem model after it has been completed.
     """
     sexn = "male" if self.mod_inputs.sex_id == 1 else "female"
     st_model_file = FILEPATH
     json_dict = space.import_json(st_model_file)
     draw_data = pd.read_hdf(FILEPATH, "data")
     self.mod_inputs.data_frame, self.mod_inputs.covariates, self.mod_inputs.priors = \
         Q.getCodemInputData(self.model_version_id, self.db_connection)
     self.adjust_input_data(save=False)
     st_models_temp = \
         All_Models(pd.concat([self.mod_inputs.data_frame,
                               self.mod_inputs.covariates], axis=1),
                    self.mod_inputs.knockouts,
                    self.mod_inputs.linear_floor_rate, json_dict,
                    self.db_connection, make_preds=False)
     self.st_models.all_models[-1].models = st_models_temp.all_models[
         -1].models
     self.mod_inputs.bare_necessities()
     draw_data = draw_data.rename(columns={
         "age_group_id": "age",
         "year_id": "year",
         "sex_id": "sex"
     })
     self.mod_inputs.data_frame = pd.merge(self.mod_inputs.data_frame,
                                           draw_data,
                                           how='inner')
     self.mod_inputs.data_frame.drop_duplicates(
         ["year", "location_id", "age"], inplace=True)
Ejemplo n.º 3
0
    def get_covar_df(self):
        '''
        Get covariate data frame for all submodels. Gives their fixed effects and their standard errors.

        :return df: covariate data frame for all submodels
        :return keys: model names (e.g. ln_rate_model001) in order
        '''
        lm_dict = import_json("FILEPATH.txt")
        st_dict = import_json("FILEPATH.txt")

        master_json = {'mixed': lm_dict, 'spacetime': st_dict}

        keys = sorted(lm_dict.keys())
        covar_list = [[
            self.get_fe_and_var(master_json, key, mtype) for key in keys
        ] for mtype in ['mixed', 'spacetime']]

        df = pd.concat(map(pd.concat, covar_list))
        df.rename(columns={'values': 'beta'}, inplace=True)
        return df, keys
Ejemplo n.º 4
0
 def read_linear_models(self):
     """
     Read linear models created in R
     """
     json_dict = space.import_json("linear_model_json.txt")
     self.linear_models = All_Models(pd.concat([self.mod_inputs.data_frame,
                                                self.mod_inputs.covariates],
                                                axis=1),
                                     self.mod_inputs.knockouts,
                                     self.mod_inputs.linear_floor_rate,
                                     json_dict)
Ejemplo n.º 5
0
 def read_linear_models(self):
     """
     Read linear models created in R
     """
     json_dict = space.import_json("FILEPATH")
     self.linear_models = All_Models(
         pd.concat([self.mod_inputs.data_frame, self.mod_inputs.covariates],
                   axis=1), self.mod_inputs.knockouts,
         self.mod_inputs.linear_floor_rate, json_dict, self.db_connection)
     if self.debug_mode:
         self.linear_models.pred_mat_lin = self.linear_models.all_models[
             -1].pred_mat.copy()
Ejemplo n.º 6
0
    def run_linear_model_builds(self):
        """
        Run the linear model portions of all codem models.
        """
        if self.log_results:
            self.warnings.time_stamp("Running Linear Model Builds")

        # run linear models and linear portion of space time models
        subprocess.call("/share/local/codem/R-3.1.2/bin/Rscript " +
                        self.rSource + "lm_model_prototype.R", shell=True)

        # check to make sure covariate selection picked something
        if not os.path.isfile("space_time_json.txt"):
            sys.stderr.write("No covariates seem to have been selected.")

        # import json file from R build and save the model list portion
        self.json_dict = space.import_json("space_time_json.txt")
        self.mod_inputs.response_list = [x.rsplit("_", 1)[0] for x in
                                         sorted(self.json_dict.keys())]
Ejemplo n.º 7
0
    def run_linear_model_builds(self):
        """
        Run the linear model portions of all codem models.
        """
        if self.log_results:
            self.warnings.time_stamp("Running Linear Model Builds")

        # run linear models and linear portion of space time models
        subprocess.call("R_PATH " + self.rSource + "FILEPATH " +
                        str(self.model_version_id) + " " + self.model_dir,
                        shell=True)

        if not os.path.isfile("FILEPATH"):
            sys.stderr.write("No covariates seem to have been selected.")

        # import json file from R build and save the model list portion
        self.json_dict = space.import_json("FILEPATH")
        self.mod_inputs.response_list = [
            x.rsplit("_", 1)[0] for x in sorted(self.json_dict.keys())
        ]
Ejemplo n.º 8
0
    def merge_submodels_with_keys(self):
        '''
        IMPORTANT: Merge the submodel metadata with predictive validity/n_draws/rank onto the covariate dataframe.
        USES the keys from the submodel. The sorted values are key -- the keys are sorted and the submodel table
        is sorted, and they line up row by row.

        Note: This is the only way to link the json with covariate betas/SE with the submodel metadata.  Do not
        unsort by submodel version ID whatever you do! If you do they keys will align with the wrong models.

        :return df: data frame
        '''
        submodels = self.covariate_summary_df().sort_values(
            ['submodel_version_id'])
        beta_df, keys = self.get_covar_df()

        submodels['model'] = keys + keys
        submodels.rename(columns={'Type': 'type'}, inplace=True)

        df = beta_df.merge(submodels, on=['model', 'type'])

        lm_dict = import_json("FILEPATH.txt")
        covlist = self.get_covlist(lm_dict)
        ratio_dict = self.make_ratio_dict(covlist)

        df.loc[df.n_draws.isnull(), 'n_draws'] = 0.0

        df.sort_values(['covariate', 'n_draws'], inplace=True)
        for mtype in ['ln_rate', 'lt_cf']:
            df.loc[df['model'].str.contains(mtype), 'mtype'] = mtype

        df["standard_beta"] = 0.
        for index, row in df.iterrows():
            standard = self.standardize_beta(row["covariate"],
                                             row["Dependent_Variable"],
                                             row["beta"], ratio_dict)
            df.set_value(index, 'standard_beta', standard)
        return df