def replace_poisoning_and_suicide(self, df): df = add_cause_metadata(df, 'acause', cause_meta_df=self.cause_meta_df) inj_poison = self.cause_meta_df[ self.cause_meta_df.acause == "inj_poisoning"]['cause_id'].unique()[0] inj_suicide = self.cause_meta_df[self.cause_meta_df.acause == "inj_suicide"]['cause_id'].unique()[0] df.loc[df['acause'].str.startswith("inj_poison"), 'cause_id'] = inj_poison df.loc[df['acause'].str.startswith("inj_suicide"), 'cause_id'] = inj_suicide df = df[~df['acause'].isin(['inj_homicide', 'inj_trans_road'])] df = df.drop("acause", axis=1) return df
def prep_injury_proportions_file(self): filepath = self.conf.get_resource('injury_proportions') inj_props = pd.read_csv(filepath) inj_props = inj_props[inj_props['most_detailed'] == 1] inj_props = inj_props[['acause', 'rdp2', 'rdp1']] inj_props = add_cause_metadata(inj_props, 'cause_id', merge_col='acause', cause_meta_df=self.cause_meta_df) inj_props = inj_props.loc[inj_props['cause_id'].notnull()] inj_props = inj_props.drop('acause', axis=1) inj_props = pd.melt(inj_props, id_vars=['cause_id'], var_name='sex_id', value_name='prop') inj_props['sex_id'] = inj_props['sex_id'].apply(lambda x: x[3]).astype( int) inj_props['total_prop'] = inj_props.groupby( 'sex_id')['prop'].transform(sum) inj_props['prop'] = inj_props['prop'] / inj_props['total_prop'] inj_props = inj_props.drop('total_prop', axis=1) return inj_props
def conform_secret_causes(self, df): df = add_cause_metadata(df, add_cols=['secret_cause', 'parent_id'], cause_meta_df=self.cause_meta_df, **self.cache_options) injuries_replace_parents = [722, 720, 719] replaced_injuries = df['cause_id'].isin(injuries_replace_parents) df.loc[replaced_injuries, 'parent_id'] = 723 secret_causes = df['secret_cause'] == 1 not_cc_code = df['cause_id'] != 919 len_before = len(df) if df['parent_id'].isnull().values.any(): raise AssertionError('There are missing parent cause_ids') df.loc[secret_causes & not_cc_code, 'cause_id'] = df['parent_id'] len_after = len(df) if len_before != len_after: raise AssertionError( 'The length of the dataframe has changed from {} to {}'.format( len_before, len_after)) df.drop(['parent_id', 'secret_cause'], axis=1, inplace=True) return df