Example #1
0
 def replace_poisoning_and_suicide(self, df):
     df = add_cause_metadata(df, 'acause', cause_meta_df=self.cause_meta_df)
     inj_poison = self.cause_meta_df[
         self.cause_meta_df.acause ==
         "inj_poisoning"]['cause_id'].unique()[0]
     inj_suicide = self.cause_meta_df[self.cause_meta_df.acause ==
                                      "inj_suicide"]['cause_id'].unique()[0]
     df.loc[df['acause'].str.startswith("inj_poison"),
            'cause_id'] = inj_poison
     df.loc[df['acause'].str.startswith("inj_suicide"),
            'cause_id'] = inj_suicide
     df = df[~df['acause'].isin(['inj_homicide', 'inj_trans_road'])]
     df = df.drop("acause", axis=1)
     return df
Example #2
0
 def prep_injury_proportions_file(self):
     filepath = self.conf.get_resource('injury_proportions')
     inj_props = pd.read_csv(filepath)
     inj_props = inj_props[inj_props['most_detailed'] == 1]
     inj_props = inj_props[['acause', 'rdp2', 'rdp1']]
     inj_props = add_cause_metadata(inj_props,
                                    'cause_id',
                                    merge_col='acause',
                                    cause_meta_df=self.cause_meta_df)
     inj_props = inj_props.loc[inj_props['cause_id'].notnull()]
     inj_props = inj_props.drop('acause', axis=1)
     inj_props = pd.melt(inj_props,
                         id_vars=['cause_id'],
                         var_name='sex_id',
                         value_name='prop')
     inj_props['sex_id'] = inj_props['sex_id'].apply(lambda x: x[3]).astype(
         int)
     inj_props['total_prop'] = inj_props.groupby(
         'sex_id')['prop'].transform(sum)
     inj_props['prop'] = inj_props['prop'] / inj_props['total_prop']
     inj_props = inj_props.drop('total_prop', axis=1)
     return inj_props
Example #3
0
    def conform_secret_causes(self, df):

        df = add_cause_metadata(df,
                                add_cols=['secret_cause', 'parent_id'],
                                cause_meta_df=self.cause_meta_df,
                                **self.cache_options)
        injuries_replace_parents = [722, 720, 719]
        replaced_injuries = df['cause_id'].isin(injuries_replace_parents)
        df.loc[replaced_injuries, 'parent_id'] = 723
        secret_causes = df['secret_cause'] == 1
        not_cc_code = df['cause_id'] != 919
        len_before = len(df)
        if df['parent_id'].isnull().values.any():
            raise AssertionError('There are missing parent cause_ids')
        df.loc[secret_causes & not_cc_code, 'cause_id'] = df['parent_id']
        len_after = len(df)
        if len_before != len_after:
            raise AssertionError(
                'The length of the dataframe has changed from {} to {}'.format(
                    len_before, len_after))
        df.drop(['parent_id', 'secret_cause'], axis=1, inplace=True)
        return df