Esempio n. 1
0
def normalize_age_and_years(
        data: pd.DataFrame,
        fill_value: Real = None,
        cols_to_fill: List[str] = vi_globals.DRAW_COLUMNS,
        gbd_round_id: int = GBD_2020_ROUND_ID,
        age_group_ids: List[int] = AGE_GROUP.GBD_2020) -> pd.DataFrame:
    data = vi_utils.normalize_sex(data, fill_value, cols_to_fill)

    # vi_inputs.normalize_year(data)
    binned_years = get_gbd_estimation_years(gbd_round_id)
    years = {
        'annual': list(range(min(binned_years),
                             max(binned_years) + 1)),
        'binned': binned_years
    }

    if 'year_id' not in data:
        # Data doesn't vary by year, so copy for each year.
        df = []
        for year in years['annual']:
            fill_data = data.copy()
            fill_data['year_id'] = year
            df.append(fill_data)
        data = pd.concat(df, ignore_index=True)
    elif set(data.year_id) == set(years['binned']):
        data = vi_utils.interpolate_year(data)
    else:  # set(data.year_id.unique()) == years['annual']
        pass

    # Dump extra data.
    data = data[data.year_id.isin(years['annual'])]

    data = _normalize_age(data, fill_value, cols_to_fill, age_group_ids)
    return data
Esempio n. 2
0
def test_normalize_sex_copy_3():
    values = [1, 2, 3, 4]
    df = pd.DataFrame({"sex_id": [3] * len(values), "value": values})
    normalized = utilities.normalize_sex(df,
                                         fill_value=0.0,
                                         cols_to_fill=["value"])
    assert (normalized.loc[normalized.sex_id == 1, "value"] == values).all()
    assert (normalized.loc[normalized.sex_id == 2, "value"] == values).all()
Esempio n. 3
0
def test_normalize_sex_fill_value():
    values = [1, 2, 3, 4]
    fill = 0.0
    for sex in [1, 2]:
        missing_sex = 1 if sex == 2 else 2
        df = pd.DataFrame({"sex_id": [sex] * len(values), "value": values})
        normalized = utilities.normalize_sex(df,
                                             fill_value=fill,
                                             cols_to_fill=["value"])
        assert (normalized.loc[normalized.sex_id == sex,
                               "value"] == values).all()
        assert (normalized.loc[normalized.sex_id == missing_sex,
                               "value"] == [fill] * len(values)).all()
Esempio n. 4
0
def load_location_specific_life_expectancy(key: EntityKey, location: str):
    location_id = extract.get_location_id(location)
    data = extract.get_location_specific_life_expectancy(location_id)
    data = data.rename(columns={'age': 'age_start'})
    data['age_end'] = data.age_start.shift(-1).fillna(5.01)
    earliest_year = data[data.year_id == 2025]
    out = []
    for year in range(project_globals.MIN_YEAR, 2025):
        df = earliest_year.copy()
        df['year_id'] = year
        out.append(df)
    data = pd.concat(out + [data], ignore_index=True)
    data = utilities.normalize_sex(data, None, ['value'])
    data = standardize.normalize_year(data)
    data = utilities.reshape(data, value_cols=['value'])
    data = utilities.scrub_gbd_conventions(data, location)
    data = utilities.split_interval(data,
                                    interval_column='year',
                                    split_column_prefix='year')
    return utilities.sort_hierarchical_data(data)
Esempio n. 5
0
def test_normalize_sex_no_sex_id():
    df = pd.DataFrame({"ColumnA": [1, 2, 3], "ColumnB": [1, 2, 3]})
    normalized = utilities.normalize_sex(df,
                                         fill_value=0.0,
                                         cols_to_fill=["value"])
    pd.testing.assert_frame_equal(df, normalized)
Esempio n. 6
0
def test_normalize_sex(sex_ids):
    df = pd.DataFrame({"sex_id": sex_ids, "value": [1] * len(sex_ids)})
    normalized = utilities.normalize_sex(df,
                                         fill_value=0.0,
                                         cols_to_fill=["value"])
    assert {1, 2} == set(normalized.sex_id)
def normalize(data: pd.DataFrame, fill_value: Real = None,
              cols_to_fill: List[str] = vi_globals.DRAW_COLUMNS) -> pd.DataFrame:
    data = utilities.normalize_sex(data, fill_value, cols_to_fill)
    data = normalize_year(data)
    data = normalize_age(data, fill_value, cols_to_fill)
    return data