Python get_input_db 예제들, autumn.inputs.database.get_input_db Python 예제들

예제 #1

0

파일 보기

파일: queries.py 프로젝트: thecapitalistcycle/AuTuMN

def get_death_rates_by_agegroup(age_breakpoints: List[float],
                                country_iso_code: str):
    """
    Find death rates from UN data that are specific to the age groups provided.
    Returns a list of death rates and a list of years.
    """
    age_breakpoints = _check_age_breakpoints(age_breakpoints)
    input_db = get_input_db()
    rate_df = _get_death_rates(country_iso_code)
    years = rate_df["mean_year"].unique().tolist()
    orig_ages = rate_df["start_age"].unique().tolist()
    year_step = 5
    year_rates = {}
    for year in years:
        orig_rates = rate_df[rate_df["mean_year"] ==
                             year]["death_rate"].tolist()
        new_rates = downsample_rate(orig_rates, orig_ages, year_step,
                                    age_breakpoints)
        year_rates[year] = new_rates

    death_rates_by_agegroup = {}
    for i, age in enumerate(age_breakpoints):
        death_rates_by_agegroup[age] = [year_rates[y][i] for y in years]

    return death_rates_by_agegroup, years

예제 #2

0

파일 보기

파일: queries.py 프로젝트: thecapitalistcycle/AuTuMN

def get_population_by_agegroup(age_breakpoints: List[int],
                               country_iso_code: str,
                               region: str = None,
                               year: int = 2020):
    """
    Find population for age bins.
    Returns a list of ints, each item being the population for that age bracket.
    """
    if country_iso_code in MAPPING_ISO_CODE:
        country_iso_code = MAPPING_ISO_CODE[country_iso_code]

    age_breakpoints = _check_age_breakpoints(age_breakpoints)
    input_db = get_input_db()
    pop_df = input_db.query(
        "population",
        conditions={
            "iso3": country_iso_code,
            "year": year,
            "region": region or None,
        },
    )
    pop_df = pop_df.sort_values(["start_age"])
    pop_df_with_data = pop_df.dropna(subset=["population"])
    orig_ages = pop_df_with_data["start_age"].tolist()
    orig_pop = pop_df_with_data["population"].tolist()
    assert len(orig_ages) == len(orig_pop)
    population = downsample_quantity(orig_pop, orig_ages, age_breakpoints)
    return [int(p) for p in population]

예제 #3

0

파일 보기

def get_mobility_data(country_iso_code: str, region: str, base_date: datetime,
                      location_map: dict):
    """
    Get daily Google mobility data for locations, for a given country.
    Times are in days since a given base date.

    The location map parameter transforms Google Mobility locations
    into Autumn-friendly locations.

    Google mobility provides us with:
        - workplaces
        - retail_and_recreation
        - grocery_and_pharmacy
        - parks
        - transit_stations
        - residential

    An example mapping would be
    {
        "work": ["workplaces"],
        "other_locations": [
            "retail_and_recreation",
            "grocery_and_pharmacy",
            "parks",
            "transit_stations",
        ],
    }
    """
    input_db = get_input_db()
    mob_df = input_db.query(
        "mobility",
        conditions={
            "iso3": country_iso_code,
            "region": region or None,
        },
    )

    # Average out Google Mobility locations into Autumn-friendly locations
    revised_location_map = {
        key: value
        for key, value in location_map.items() if value
    }
    for new_loc, old_locs in revised_location_map.items():
        mob_df[new_loc] = 0
        for old_loc in old_locs:
            mob_df[new_loc] += mob_df[old_loc]

        mob_df[new_loc] = mob_df[new_loc] / len(old_locs)

    mob_df["date"] = pd.to_datetime(mob_df["date"], format="%Y-%m-%d")
    mob_df = mob_df.sort_values(["date"])
    mob_df = mob_df[mob_df["date"] >= base_date]
    days = mob_df["date"].apply(lambda d: (d - base_date).days).tolist()
    loc_mobility_values = {
        loc: mob_df[loc].tolist()
        for loc in revised_location_map.keys()
    }
    return loc_mobility_values, days

예제 #4

0

파일 보기

파일: queries.py 프로젝트: emmamcbryde/AuTuMN-1

def get_crude_birth_rate(country_iso_code: str):
    """
    Gets crude birth rate over time for a given country.
    Returns a list of birth rates and a list of years.
    """
    input_db = get_input_db()
    birth_df = input_db.query("birth_rates", conditions=[f"iso3='{country_iso_code}'"])
    birth_df = birth_df.sort_values(["mean_year"])
    return birth_df["birth_rate"].tolist(), birth_df["mean_year"].tolist()

예제 #5

0

파일 보기

파일: queries.py 프로젝트: emmamcbryde/AuTuMN-1

def _get_life_expectancy(country_iso_code: str):
    input_db = get_input_db()
    expectancy_df = input_db.query("life_expectancy", conditions=[f"iso3='{country_iso_code}'"],)

    # Calculate mean year
    expectancy_df["mean_year"] = (expectancy_df["start_year"] + expectancy_df["end_year"]) / 2

    cols = ["mean_year", "start_age", "life_expectancy"]
    expectancy_df = expectancy_df.drop(columns=[c for c in expectancy_df.columns if c not in cols])
    expectancy_df = expectancy_df.sort_values(["mean_year", "start_age"])
    return expectancy_df

예제 #6

0

파일 보기

파일: queries.py 프로젝트: emmamcbryde/AuTuMN-1

def get_iso3_from_country_name(country_name: str):
    """
    Return the iso3 code matching with a given country name.
    """
    input_db = get_input_db()
    country_df = input_db.query("countries", conditions=[f"country='{country_name}'"])
    results = country_df["iso3"].tolist()
    if results:
        return results[0]
    else:
        raise ValueError(f"Country name {country_name} not found")

예제 #7

0

파일 보기

파일: queries.py 프로젝트: thecapitalistcycle/AuTuMN

def get_crude_birth_rate(country_iso_code: str):
    """
    Gets crude birth rate over time for a given country.
    Returns a list of birth rates and a list of years.
    """
    if country_iso_code in MAPPING_ISO_CODE:
        country_iso_code = MAPPING_ISO_CODE[country_iso_code]
    input_db = get_input_db()
    birth_df = input_db.query("birth_rates",
                              conditions={"iso3": country_iso_code})
    birth_df = birth_df.sort_values(["mean_year"])
    return birth_df["birth_rate"].tolist(), birth_df["mean_year"].tolist()

예제 #8

0

파일 보기

def get_vic_testing_numbers():
    """
    Returns 7-day moving average of number of tests administered in Victoria.
    """
    input_db = get_input_db()
    df = input_db.query("covid_au",
                        columns=["date", "tests"],
                        conditions={"state_abbrev": "VIC"})
    date_str_to_int = lambda s: (datetime.strptime(s, "%Y-%m-%d") -
                                 COVID_BASE_DATETIME).days
    test_dates = df.date.apply(date_str_to_int).to_numpy()
    test_values = df.tests.to_numpy()
    epsilon = 1e-6  # A really tiny number to avoid having any zeros
    avg_vals = np.array(apply_moving_average(test_values, 7)) + epsilon
    return test_dates, avg_vals

예제 #9

0

파일 보기

파일: queries.py 프로젝트: thecapitalistcycle/AuTuMN

def get_international_testing_numbers(iso3):
    """
    Returns 7-day moving average of number of tests administered in Victoria.
    """

    input_db = get_input_db()
    df = input_db.query("owid",
                        columns=["date", "new_tests"],
                        conditions={"iso_code": iso3})
    df_with_data = df.dropna(
    )  # dropna default behaviour is to drop entire row if any nas
    date_str_to_int = lambda s: (datetime.strptime(s, "%Y-%m-%d") -
                                 COVID_BASE_DATETIME).days
    test_dates = list(df_with_data.date.apply(date_str_to_int).to_numpy())
    test_numbers = list(df_with_data.loc[:, "new_tests"])
    return test_dates, test_numbers

예제 #10

0

파일 보기

파일: queries.py 프로젝트: thecapitalistcycle/AuTuMN

def get_phl_subregion_testing_numbers(region):
    """
    Returns 7-day moving average of number of tests administered in Philippines & sub regions.
    """

    input_db = get_input_db()
    df = input_db.query(
        "covid_phl",
        columns=["date_index", "daily_output_unique_individuals"],
        conditions={"facility_name": region},
    )
    test_dates = df.date_index.to_numpy()
    test_values = df.daily_output_unique_individuals.to_numpy()
    epsilon = 1e-6  # A really tiny number to avoid having any zeros
    avg_vals = np.array(apply_moving_average(test_values, 7)) + epsilon
    return test_dates, avg_vals

예제 #11

0

파일 보기

파일: queries.py 프로젝트: emmamcbryde/AuTuMN-1

def get_country_mixing_matrix(mixing_location: str, country_iso_code: str):
    """
    Load a mixing matrix for a given country and mixing location.
    The rows and columns indices of each matrix represent a 5 year age bracket from 0-80,
    giving us a 16x16 matrix.
    """
    assert mixing_location in LOCATIONS, f"Invalid mixing location {mixing_location}"
    input_db = get_input_db()
    cols = [f"X{n}" for n in range(1, 17)]
    mix_df = input_db.query(
        "social_mixing",
        column=",".join(cols),
        conditions=[
            f"iso3='{country_iso_code}'",
            f"location='{mixing_location}'",
        ],
    )
    matrix = np.array(mix_df)
    assert matrix.shape == (16, 16), "Mixing matrix is not 16x16"
    return matrix

예제 #12

0

파일 보기

def get_dhhs_testing_numbers(cluster: str = None):
    """
    Returns 7-day moving average of number of tests administered in Victoria.
    """
    input_db = get_input_db()

    if cluster is None:
        df = input_db.query("covid_dhhs_test", columns=["date", "test"])
        df = df.groupby("date", as_index=False).sum()
    else:
        df = input_db.query("covid_dhhs_test",
                            columns=["date", "test"],
                            conditions={"cluster_name": cluster})
    date_str_to_int = lambda s: (datetime.strptime(s, "%Y-%m-%d") -
                                 COVID_BASE_DATETIME).days

    test_dates = (pd.to_datetime(df.date) -
                  pd.datetime(2019, 12, 31)).dt.days.to_numpy()
    test_values = df.test.to_numpy()
    epsilon = 1e-6  # A really tiny number to avoid having any zeros
    avg_vals = np.array(apply_moving_average(test_values, 7)) + epsilon
    return test_dates, avg_vals

예제 #13

0

파일 보기

파일: queries.py 프로젝트: thecapitalistcycle/AuTuMN

def _get_death_rates(country_iso_code: str):
    if country_iso_code in MAPPING_ISO_CODE:
        country_iso_code = MAPPING_ISO_CODE[country_iso_code]
    input_db = get_input_db()
    death_df = input_db.query(
        "deaths",
        conditions={"iso3": country_iso_code},
    )
    pop_df = input_db.query(
        "population",
        conditions={
            "iso3": country_iso_code,
            "region": None,
        },
    )
    # Calculate mean year and time period
    death_df["mean_year"] = (death_df["start_year"] + death_df["end_year"]) / 2
    death_df["period"] = death_df["end_year"] - death_df["start_year"]

    # Combine population and total death data so we can calulate death rate.
    # Throws away data for population over 100 y.o.
    rate_df = pd.merge(death_df,
                       pop_df,
                       left_on=["start_year", "start_age"],
                       right_on=["year", "start_age"])

    rate_df["population"] = rate_df["population"].where(
        rate_df["population"] > 0.0, 1.0)

    # Calculate death rate.
    rate_df["death_rate"] = rate_df["death_count"] / (rate_df["population"] *
                                                      rate_df["period"])

    cols = ["mean_year", "start_age", "death_rate"]
    rate_df = rate_df.drop(
        columns=[c for c in rate_df.columns if c not in cols])
    rate_df = rate_df.sort_values(["mean_year", "start_age"])
    return rate_df

예제 #14

0

파일 보기

def get_country_mixing_matrix(
    mixing_location: str, country_iso_code: str, mix_matrix="social_mixing"
):
    """
    Load a mixing matrix for a given country and mixing location.
    The rows and columns indices of each matrix represent a 5 year age bracket from 0-80,
    giving us a 16x16 matrix.
    """
    assert mixing_location in LOCATIONS, f"Invalid mixing location {mixing_location}"
    if country_iso_code in MAPPING_ISO_CODE:
        country_iso_code = MAPPING_ISO_CODE[country_iso_code]

    input_db = get_input_db()
    mix_df = input_db.query(
        mix_matrix,
        columns=[f"X{n}" for n in range(1, 17)],
        conditions={
            "iso3": country_iso_code,
            "location": mixing_location,
        },
    )
    matrix = np.array(mix_df)
    assert matrix.shape == (16, 16), "Mixing matrix is not 16x16"
    return matrix

예제 #15

0

파일 보기

파일: queries.py 프로젝트: emmamcbryde/AuTuMN-1

def get_population_by_agegroup(
    age_breakpoints: List[float], country_iso_code: str, region: str = None, year: int = 2020
):
    """
    Find population for age bins.
    Returns a list of ints, each item being the population for that age bracket.
    """
    assert age_breakpoints == sorted(age_breakpoints)
    assert age_breakpoints[0] == 0
    input_db = get_input_db()
    pop_df = input_db.query(
        "population",
        conditions=[
            f"iso3='{country_iso_code}'",
            f"year={year}",
            f"region='{region}'" if region else "region IS NULL",
        ],
    )
    pop_df = pop_df.sort_values(["start_age"])
    orig_ages = pop_df["start_age"].tolist()
    orig_pop = pop_df["population"].tolist()
    assert len(orig_ages) == len(orig_pop)
    population = downsample_quantity(orig_pop, orig_ages, age_breakpoints)
    return [int(p) for p in population]

예제 #16

0

파일 보기

파일: queries.py 프로젝트: emmamcbryde/AuTuMN-1

def _get_death_rates(country_iso_code: str):
    input_db = get_input_db()
    death_df = input_db.query("deaths", conditions=[f"iso3='{country_iso_code}'"],)
    pop_df = input_db.query(
        "population", conditions=[f"iso3='{country_iso_code}'", "region IS NULL",],
    )

    # Calculate mean year and time period
    death_df["mean_year"] = (death_df["start_year"] + death_df["end_year"]) / 2
    death_df["period"] = death_df["end_year"] - death_df["start_year"]

    # Combine population and total death data so we can calulate death rate.
    # Throws away data for population over 100 y.o.
    rate_df = pd.merge(
        death_df, pop_df, left_on=["start_year", "start_age"], right_on=["year", "start_age"]
    )

    # Calculate death rate.
    rate_df["death_rate"] = rate_df["death_count"] / (rate_df["population"] * rate_df["period"])

    cols = ["mean_year", "start_age", "death_rate"]
    rate_df = rate_df.drop(columns=[c for c in rate_df.columns if c not in cols])
    rate_df = rate_df.sort_values(["mean_year", "start_age"])
    return rate_df

예제 #17

0

파일 보기

import os

from autumn.inputs.database import get_input_db

db = get_input_db()


def test_database__with_read_table__expect_table_df():
    """
    Ensure we can read a table from the input db as a dataframe.
    """
    result_df = db.query(table_name="countries")
    assert len(result_df.columns) == 3  # Number of columns
    eth_df = result_df[result_df["country"] == "Ethiopia"]
    assert eth_df["iso3"].iloc[0] == "ETH"


def test_database__with_conditions__expect_filtered_df():
    """
    Ensure we can read a filtered table from the input db as a dataframe.
    """
    result_df = db.query(
        table_name="countries",
        conditions={"country": "Ethiopia"},
    )
    assert len(result_df) == 1  # Number of rows
    assert len(result_df.columns) == 3  # Number of columns
    assert result_df["iso3"].iloc[0] == "ETH"


def test_database__with_conditions_and_column__expect_filtered_df():