def get_death_rates_by_agegroup(age_breakpoints: List[float], country_iso_code: str): """ Find death rates from UN data that are specific to the age groups provided. Returns a list of death rates and a list of years. """ age_breakpoints = _check_age_breakpoints(age_breakpoints) input_db = get_input_db() rate_df = _get_death_rates(country_iso_code) years = rate_df["mean_year"].unique().tolist() orig_ages = rate_df["start_age"].unique().tolist() year_step = 5 year_rates = {} for year in years: orig_rates = rate_df[rate_df["mean_year"] == year]["death_rate"].tolist() new_rates = downsample_rate(orig_rates, orig_ages, year_step, age_breakpoints) year_rates[year] = new_rates death_rates_by_agegroup = {} for i, age in enumerate(age_breakpoints): death_rates_by_agegroup[age] = [year_rates[y][i] for y in years] return death_rates_by_agegroup, years
def get_population_by_agegroup(age_breakpoints: List[int], country_iso_code: str, region: str = None, year: int = 2020): """ Find population for age bins. Returns a list of ints, each item being the population for that age bracket. """ if country_iso_code in MAPPING_ISO_CODE: country_iso_code = MAPPING_ISO_CODE[country_iso_code] age_breakpoints = _check_age_breakpoints(age_breakpoints) input_db = get_input_db() pop_df = input_db.query( "population", conditions={ "iso3": country_iso_code, "year": year, "region": region or None, }, ) pop_df = pop_df.sort_values(["start_age"]) pop_df_with_data = pop_df.dropna(subset=["population"]) orig_ages = pop_df_with_data["start_age"].tolist() orig_pop = pop_df_with_data["population"].tolist() assert len(orig_ages) == len(orig_pop) population = downsample_quantity(orig_pop, orig_ages, age_breakpoints) return [int(p) for p in population]
def get_mobility_data(country_iso_code: str, region: str, base_date: datetime, location_map: dict): """ Get daily Google mobility data for locations, for a given country. Times are in days since a given base date. The location map parameter transforms Google Mobility locations into Autumn-friendly locations. Google mobility provides us with: - workplaces - retail_and_recreation - grocery_and_pharmacy - parks - transit_stations - residential An example mapping would be { "work": ["workplaces"], "other_locations": [ "retail_and_recreation", "grocery_and_pharmacy", "parks", "transit_stations", ], } """ input_db = get_input_db() mob_df = input_db.query( "mobility", conditions={ "iso3": country_iso_code, "region": region or None, }, ) # Average out Google Mobility locations into Autumn-friendly locations revised_location_map = { key: value for key, value in location_map.items() if value } for new_loc, old_locs in revised_location_map.items(): mob_df[new_loc] = 0 for old_loc in old_locs: mob_df[new_loc] += mob_df[old_loc] mob_df[new_loc] = mob_df[new_loc] / len(old_locs) mob_df["date"] = pd.to_datetime(mob_df["date"], format="%Y-%m-%d") mob_df = mob_df.sort_values(["date"]) mob_df = mob_df[mob_df["date"] >= base_date] days = mob_df["date"].apply(lambda d: (d - base_date).days).tolist() loc_mobility_values = { loc: mob_df[loc].tolist() for loc in revised_location_map.keys() } return loc_mobility_values, days
def get_crude_birth_rate(country_iso_code: str): """ Gets crude birth rate over time for a given country. Returns a list of birth rates and a list of years. """ input_db = get_input_db() birth_df = input_db.query("birth_rates", conditions=[f"iso3='{country_iso_code}'"]) birth_df = birth_df.sort_values(["mean_year"]) return birth_df["birth_rate"].tolist(), birth_df["mean_year"].tolist()
def _get_life_expectancy(country_iso_code: str): input_db = get_input_db() expectancy_df = input_db.query("life_expectancy", conditions=[f"iso3='{country_iso_code}'"],) # Calculate mean year expectancy_df["mean_year"] = (expectancy_df["start_year"] + expectancy_df["end_year"]) / 2 cols = ["mean_year", "start_age", "life_expectancy"] expectancy_df = expectancy_df.drop(columns=[c for c in expectancy_df.columns if c not in cols]) expectancy_df = expectancy_df.sort_values(["mean_year", "start_age"]) return expectancy_df
def get_iso3_from_country_name(country_name: str): """ Return the iso3 code matching with a given country name. """ input_db = get_input_db() country_df = input_db.query("countries", conditions=[f"country='{country_name}'"]) results = country_df["iso3"].tolist() if results: return results[0] else: raise ValueError(f"Country name {country_name} not found")
def get_crude_birth_rate(country_iso_code: str): """ Gets crude birth rate over time for a given country. Returns a list of birth rates and a list of years. """ if country_iso_code in MAPPING_ISO_CODE: country_iso_code = MAPPING_ISO_CODE[country_iso_code] input_db = get_input_db() birth_df = input_db.query("birth_rates", conditions={"iso3": country_iso_code}) birth_df = birth_df.sort_values(["mean_year"]) return birth_df["birth_rate"].tolist(), birth_df["mean_year"].tolist()
def get_vic_testing_numbers(): """ Returns 7-day moving average of number of tests administered in Victoria. """ input_db = get_input_db() df = input_db.query("covid_au", columns=["date", "tests"], conditions={"state_abbrev": "VIC"}) date_str_to_int = lambda s: (datetime.strptime(s, "%Y-%m-%d") - COVID_BASE_DATETIME).days test_dates = df.date.apply(date_str_to_int).to_numpy() test_values = df.tests.to_numpy() epsilon = 1e-6 # A really tiny number to avoid having any zeros avg_vals = np.array(apply_moving_average(test_values, 7)) + epsilon return test_dates, avg_vals
def get_international_testing_numbers(iso3): """ Returns 7-day moving average of number of tests administered in Victoria. """ input_db = get_input_db() df = input_db.query("owid", columns=["date", "new_tests"], conditions={"iso_code": iso3}) df_with_data = df.dropna( ) # dropna default behaviour is to drop entire row if any nas date_str_to_int = lambda s: (datetime.strptime(s, "%Y-%m-%d") - COVID_BASE_DATETIME).days test_dates = list(df_with_data.date.apply(date_str_to_int).to_numpy()) test_numbers = list(df_with_data.loc[:, "new_tests"]) return test_dates, test_numbers
def get_phl_subregion_testing_numbers(region): """ Returns 7-day moving average of number of tests administered in Philippines & sub regions. """ input_db = get_input_db() df = input_db.query( "covid_phl", columns=["date_index", "daily_output_unique_individuals"], conditions={"facility_name": region}, ) test_dates = df.date_index.to_numpy() test_values = df.daily_output_unique_individuals.to_numpy() epsilon = 1e-6 # A really tiny number to avoid having any zeros avg_vals = np.array(apply_moving_average(test_values, 7)) + epsilon return test_dates, avg_vals
def get_country_mixing_matrix(mixing_location: str, country_iso_code: str): """ Load a mixing matrix for a given country and mixing location. The rows and columns indices of each matrix represent a 5 year age bracket from 0-80, giving us a 16x16 matrix. """ assert mixing_location in LOCATIONS, f"Invalid mixing location {mixing_location}" input_db = get_input_db() cols = [f"X{n}" for n in range(1, 17)] mix_df = input_db.query( "social_mixing", column=",".join(cols), conditions=[ f"iso3='{country_iso_code}'", f"location='{mixing_location}'", ], ) matrix = np.array(mix_df) assert matrix.shape == (16, 16), "Mixing matrix is not 16x16" return matrix
def get_dhhs_testing_numbers(cluster: str = None): """ Returns 7-day moving average of number of tests administered in Victoria. """ input_db = get_input_db() if cluster is None: df = input_db.query("covid_dhhs_test", columns=["date", "test"]) df = df.groupby("date", as_index=False).sum() else: df = input_db.query("covid_dhhs_test", columns=["date", "test"], conditions={"cluster_name": cluster}) date_str_to_int = lambda s: (datetime.strptime(s, "%Y-%m-%d") - COVID_BASE_DATETIME).days test_dates = (pd.to_datetime(df.date) - pd.datetime(2019, 12, 31)).dt.days.to_numpy() test_values = df.test.to_numpy() epsilon = 1e-6 # A really tiny number to avoid having any zeros avg_vals = np.array(apply_moving_average(test_values, 7)) + epsilon return test_dates, avg_vals
def _get_death_rates(country_iso_code: str): if country_iso_code in MAPPING_ISO_CODE: country_iso_code = MAPPING_ISO_CODE[country_iso_code] input_db = get_input_db() death_df = input_db.query( "deaths", conditions={"iso3": country_iso_code}, ) pop_df = input_db.query( "population", conditions={ "iso3": country_iso_code, "region": None, }, ) # Calculate mean year and time period death_df["mean_year"] = (death_df["start_year"] + death_df["end_year"]) / 2 death_df["period"] = death_df["end_year"] - death_df["start_year"] # Combine population and total death data so we can calulate death rate. # Throws away data for population over 100 y.o. rate_df = pd.merge(death_df, pop_df, left_on=["start_year", "start_age"], right_on=["year", "start_age"]) rate_df["population"] = rate_df["population"].where( rate_df["population"] > 0.0, 1.0) # Calculate death rate. rate_df["death_rate"] = rate_df["death_count"] / (rate_df["population"] * rate_df["period"]) cols = ["mean_year", "start_age", "death_rate"] rate_df = rate_df.drop( columns=[c for c in rate_df.columns if c not in cols]) rate_df = rate_df.sort_values(["mean_year", "start_age"]) return rate_df
def get_country_mixing_matrix( mixing_location: str, country_iso_code: str, mix_matrix="social_mixing" ): """ Load a mixing matrix for a given country and mixing location. The rows and columns indices of each matrix represent a 5 year age bracket from 0-80, giving us a 16x16 matrix. """ assert mixing_location in LOCATIONS, f"Invalid mixing location {mixing_location}" if country_iso_code in MAPPING_ISO_CODE: country_iso_code = MAPPING_ISO_CODE[country_iso_code] input_db = get_input_db() mix_df = input_db.query( mix_matrix, columns=[f"X{n}" for n in range(1, 17)], conditions={ "iso3": country_iso_code, "location": mixing_location, }, ) matrix = np.array(mix_df) assert matrix.shape == (16, 16), "Mixing matrix is not 16x16" return matrix
def get_population_by_agegroup( age_breakpoints: List[float], country_iso_code: str, region: str = None, year: int = 2020 ): """ Find population for age bins. Returns a list of ints, each item being the population for that age bracket. """ assert age_breakpoints == sorted(age_breakpoints) assert age_breakpoints[0] == 0 input_db = get_input_db() pop_df = input_db.query( "population", conditions=[ f"iso3='{country_iso_code}'", f"year={year}", f"region='{region}'" if region else "region IS NULL", ], ) pop_df = pop_df.sort_values(["start_age"]) orig_ages = pop_df["start_age"].tolist() orig_pop = pop_df["population"].tolist() assert len(orig_ages) == len(orig_pop) population = downsample_quantity(orig_pop, orig_ages, age_breakpoints) return [int(p) for p in population]
def _get_death_rates(country_iso_code: str): input_db = get_input_db() death_df = input_db.query("deaths", conditions=[f"iso3='{country_iso_code}'"],) pop_df = input_db.query( "population", conditions=[f"iso3='{country_iso_code}'", "region IS NULL",], ) # Calculate mean year and time period death_df["mean_year"] = (death_df["start_year"] + death_df["end_year"]) / 2 death_df["period"] = death_df["end_year"] - death_df["start_year"] # Combine population and total death data so we can calulate death rate. # Throws away data for population over 100 y.o. rate_df = pd.merge( death_df, pop_df, left_on=["start_year", "start_age"], right_on=["year", "start_age"] ) # Calculate death rate. rate_df["death_rate"] = rate_df["death_count"] / (rate_df["population"] * rate_df["period"]) cols = ["mean_year", "start_age", "death_rate"] rate_df = rate_df.drop(columns=[c for c in rate_df.columns if c not in cols]) rate_df = rate_df.sort_values(["mean_year", "start_age"]) return rate_df
import os from autumn.inputs.database import get_input_db db = get_input_db() def test_database__with_read_table__expect_table_df(): """ Ensure we can read a table from the input db as a dataframe. """ result_df = db.query(table_name="countries") assert len(result_df.columns) == 3 # Number of columns eth_df = result_df[result_df["country"] == "Ethiopia"] assert eth_df["iso3"].iloc[0] == "ETH" def test_database__with_conditions__expect_filtered_df(): """ Ensure we can read a filtered table from the input db as a dataframe. """ result_df = db.query( table_name="countries", conditions={"country": "Ethiopia"}, ) assert len(result_df) == 1 # Number of rows assert len(result_df.columns) == 3 # Number of columns assert result_df["iso3"].iloc[0] == "ETH" def test_database__with_conditions_and_column__expect_filtered_df():