def get_births(data, with_ntds=False): key = 'born_with_ntds' if with_ntds else 'births' data = pivot_data(data[project_globals.RESULT_COLUMNS(key) + GROUPBY_COLUMNS]) data['measure'] = 'live_births_with_ntds' if with_ntds else 'live_births' data['year'], process = data.process.str.split('_in_').str[1].str.split( '_among_').str data['sex'], process = process.str.split('_folic_acid_').str # ignore the vitamin A portion, it is not relevant to birth data data['folic_acid_fortification_group'], _ = process.str.split( '_vitamin_a_').str return sort_data(data.drop(columns='process'))
def aggregate_over_seed(data): non_count_columns = [] for non_count_template in project_globals.NON_COUNT_TEMPLATES: non_count_columns += project_globals.RESULT_COLUMNS(non_count_template) count_columns = [ c for c in data.columns if c not in non_count_columns + GROUPBY_COLUMNS ] non_count_data = data[non_count_columns + GROUPBY_COLUMNS].groupby(GROUPBY_COLUMNS).mean() count_data = data[count_columns + GROUPBY_COLUMNS].groupby(GROUPBY_COLUMNS).sum() return pd.concat([count_data, non_count_data], axis=1).reset_index()
def get_measure_anemia_split(data, measure): data = pivot_data(data[project_globals.RESULT_COLUMNS(measure) + GROUPBY_COLUMNS]) data = split_anemia_processing_column(data) return sort_data(data)
def get_measure_birthweight_split(data, measure): data = pivot_data(data[project_globals.RESULT_COLUMNS(measure) + GROUPBY_COLUMNS]) data = split_birthweight_processing_column(data) return sort_data(data.rename(columns={'process': 'measure'}))
def get_measure_no_split(data, measure): data = pivot_data(data[project_globals.RESULT_COLUMNS(measure) + GROUPBY_COLUMNS]) return sort_data(data.rename(columns={'process': 'measure'}))
def get_measure_data(data, measure, with_cause=True): data = pivot_data(data[project_globals.RESULT_COLUMNS(measure) + GROUPBY_COLUMNS]) data = split_processing_column(data, with_cause) return sort_data(data)
def get_population_data(data): total_pop = pivot_data( data[[project_globals.TOTAL_POPULATION_COLUMN] + project_globals.RESULT_COLUMNS('population') + GROUPBY_COLUMNS]) total_pop = total_pop.rename(columns={'process': 'measure'}) return sort_data(total_pop)