Esempio n. 1
0
 def interpolate_country_covariate_values(self, df: pd.DataFrame, cov_dict: Dict[Union[float, str], pd.DataFrame]):
     """
     Interpolates the covariate values onto the data
     so that the non-standard ages and years match up to meaningful
     covariate values.
     """
     LOG.info(f"Interpolating and merging the country covariates.")
     interp_df = get_interpolated_covariate_values(
         data_df=df,
         covariate_dict=cov_dict,
         population_df=self.population.configure_for_dismod()
     )
     return interp_df
Esempio n. 2
0
    def calculate_country_covariate_reference_values(
            self, parent_location_id: int, sex_id: int) -> CovariateSpecs:
        """
        Gets the country covariate reference value for a covariate ID and a
        parent location ID. Also gets the maximum difference between the
        reference value and covariate values observed.

        Run this when you're going to make a DisMod AT database for a specific
        parent location and sex ID.

        :param: (int)
        :param parent_location_id: (int)
        :param sex_id: (int)
        :return: List[CovariateSpec] list of the covariate specs with the
            correct reference values and max diff.
        """
        covariate_specs = copy(self.covariate_specs)

        age_min = self.dismod_data.age_lower.min()
        age_max = self.dismod_data.age_upper.max()
        time_min = self.dismod_data.time_lower.min()
        time_max = self.dismod_data.time_upper.max()

        children = self.location_dag.children(parent_location_id)

        for c in covariate_specs.covariate_specs:
            if c.study_country == 'study':
                if c.name == 's_sex':
                    c.reference = StudyCovConstants.SEX_COV_VALUE_MAP[
                        SEX_ID_TO_NAME[sex_id]]
                    c.max_difference = StudyCovConstants.MAX_DIFFERENCE_SEX_COV
                elif c.name == 's_one':
                    c.reference = StudyCovConstants.ONE_COV_VALUE
                    c.max_difference = StudyCovConstants.MAX_DIFFERENCE_ONE_COV
                else:
                    raise ValueError(f"The only two study covariates allowed are sex and one, you tried {c.name}.")
            elif c.study_country == 'country':
                LOG.info(f"Calculating the reference and max difference for country covariate {c.covariate_id}.")

                cov_df = self.country_covariate_data[c.covariate_id]
                parent_df = (
                    cov_df.loc[cov_df.location_id == parent_location_id].copy()
                )
                child_df = cov_df.loc[cov_df.location_id.isin(children)].copy()
                all_loc_df = pd.concat([child_df, parent_df], axis=0)

                # if there is no data for the parent location at all (which
                # there should be provided by Central Comp)
                # then we are going to set the reference value to 0.
                if cov_df.empty:
                    reference_value = 0
                    max_difference = np.nan
                else:
                    pop_df = self.population.configure_for_dismod()
                    pop_df = (
                        pop_df.loc[pop_df.location_id == parent_location_id].copy()
                    )

                    df_to_interp = pd.DataFrame({
                        'location_id': parent_location_id,
                        'sex_id': [sex_id],
                        'age_lower': [age_min], 'age_upper': [age_max],
                        'time_lower': [time_min], 'time_upper': [time_max]
                    })
                    reference_value = get_interpolated_covariate_values(
                        data_df=df_to_interp,
                        covariate_dict={c.name: parent_df},
                        population_df=pop_df
                    )[c.name].iloc[0]
                    max_difference = np.max(
                        np.abs(all_loc_df.mean_value - reference_value)
                    ) + CascadeConstants.PRECISION_FOR_REFERENCE_VALUES

                c.reference = reference_value
                c.max_difference = max_difference
        covariate_specs.create_covariate_list()
        return covariate_specs