Esempio n. 1
0
def test_scipy_optimizer(seed):
    np.random.seed(seed)
    num_obs = 20
    df = pd.DataFrame({
        "obs": np.random.randn(num_obs),
        "cov0": np.random.randn(num_obs),
        "cov1": np.random.randn(num_obs)
    })
    data = Data(col_obs="obs", col_covs=["cov0", "cov1"], df=df)

    spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5),
                               degree=3,
                               knots_type="rel_domain")

    var_cov0 = Variable(name="cov0")
    var_cov1 = SplineVariable(name="cov1", spline_specs=spline_specs)

    model = GaussianModel(
        data, param_specs={"mu": {
            "variables": [var_cov0, var_cov1]
        }})

    coefs = scipy_optimize(model)

    tr_coef = np.linalg.solve(
        (model.mat[0].T * model.data.weights).dot(model.mat[0]),
        (model.mat[0].T * model.data.weights).dot(model.data.obs))

    assert np.allclose(coefs, tr_coef)
Esempio n. 2
0
def get_mortality_pattern_model(df: DataFrame,
                                col_time: str = "time_start",
                                units_per_year: int = 12,
                                knots: np.ndarray = np.arange(2010, 2021),
                                smooth_order: int = 1) -> ExcessMortalityModel:
    seas_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5),
                                    degree=3,
                                    knots_type="rel_domain")
    time_knots = get_time_knots(df.time.min(), df.time.max(), knots)
    time_spline_specs = SplineSpecs(knots=time_knots,
                                    degree=1,
                                    knots_type="abs")
    seas_var = SplineVariable(col_time, spline_specs=seas_spline_specs)
    time_var = SplineVariable("time", spline_specs=time_spline_specs)
    variables = [
        SeasonalityModelVariables([seas_var], col_time, smooth_order),
        TimeModelVariables([time_var])
    ]
    return ExcessMortalityModel(df, variables)
Esempio n. 3
0
def get_mortality_pattern_model(df: DataFrame,
                                col_time: str = "time_start",
                                units_per_year: int = 12,
                                knots_per_year: float = 0.5,
                                tail_size: int = 18,
                                smooth_order: int = 1) -> ExcessMortalityModel:
    """
    Define one mortality pattern model
    """
    seas_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5),
                                    degree=3,
                                    knots_type="rel_domain")
    time_knots = get_time_knots(df.time.min(), df.time.max(), units_per_year,
                                knots_per_year, tail_size)
    time_spline_specs = SplineSpecs(knots=time_knots,
                                    degree=1,
                                    knots_type="abs")
    seas_var = SplineVariable(col_time, spline_specs=seas_spline_specs)
    time_var = SplineVariable("time", spline_specs=time_spline_specs)
    variables = [
        SeasonalityModelVariables([seas_var], col_time, smooth_order),
        TimeModelVariables([time_var])
    ]
    return ExcessMortalityModel(df, variables)
Esempio n. 4
0
def var_cov1(spline_gprior, spline_uprior, spline_specs):
    return SplineVariable(name="cov1",
                          spline_specs=spline_specs,
                          priors=[spline_gprior, spline_uprior])
Esempio n. 5
0
def main():
    # process input data
    df = pd.read_csv(data_path)
    df_2020 = df[[
        "location_name", "year_x", "week", "pop_2020", "death_rate_2020"
    ]].copy()
    df_2019 = df[[
        "location_name", "year_y", "week", "pop_2019", "death_rate_2019"
    ]].copy()
    df_2020 = df_2020.rename(
        columns={
            "year_x": "year",
            "pop_2020": "population",
            "death_rate_2020": "death_rate"
        })
    df_2019 = df_2019.rename(
        columns={
            "year_y": "year",
            "pop_2019": "population",
            "death_rate_2019": "death_rate"
        })
    df = pd.concat([df_2019, df_2020])
    df["deaths"] = df["death_rate"] * df["population"] / 100000

    time_ub = {k: v - time_start + 1 for k, v in time_end.items()}

    locations = df.location_name.unique()
    data = {k: {} for k in time_end.keys()}
    for location in locations:
        df_loc = df.loc[df.location_name == location]
        df_loc = df_loc.sort_values(["year", "week"])
        df_loc = add_time(df_loc, "year", "week", time_start)

        for k in time_end.keys():
            data[k][location] = df_loc.loc[df_loc.time < time_ub[k]].copy()
            data[k][location]["offset_0"] = np.log(
                data[k][location].population)

    # create models
    models = {}
    for location, d in data["fit"].items():
        seas_var = SplineVariable(time_unit, spline_specs=seas_spline_specs)
        time_var = SplineVariable("time", spline_specs=time_spline_specs)
        variables = [
            SeasonalityModelVariables([seas_var], time_unit, smooth_order=1),
            TimeModelVariables([time_var])
        ]
        models[location] = ExcessMortalityModel(d, variables)

    # run models
    results = {}
    for location, model in models.items():
        model.run_models()
        d_pred = model.predict(data["pred"][location],
                               col_pred="mortality_pattern")
        results[location] = d_pred

    # save results
    if not results_folder.exists():
        results_folder.mkdir()
    df_result = pd.concat(results.values())
    df_result.to_csv(results_folder / "prediction.csv", index=False)

    # plot results
    for location, result in results.items():
        ax, axs = plot_data(result, "week", "year")
        plt.delaxes(axs[1])
        ax = plot_model(ax, result, "mortality_pattern", color="#008080")
        ax.set_title(location, loc="left")
        ax.legend()
        plt.savefig(results_folder / f"{location}.pdf", bbox_inches="tight")
        plt.close("all")

    return models, results
Esempio n. 6
0
def spline_variable():
    return SplineVariable(name=COL_COVS[0],
                          spline_specs=SplineSpecs(knots=np.linspace(
                              0.0, 1.0, 5),
                                                   degree=3))
Esempio n. 7
0
# result folder
results_path = Path("./examples/results_debug")

# define all variables
intercept_variable = Variable("intercept")

idr_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5),
                               degree=2,
                               knots_type="rel_domain",
                               include_first_basis=False)
idr_variable = SplineVariable("idr_lagged",
                              spline_specs=idr_spline_specs,
                              priors=[
                                  SplineUniformPrior(order=1,
                                                     lb=-np.inf,
                                                     ub=0.0),
                                  SplineGaussianPrior(order=1,
                                                      mean=0.0,
                                                      sd=1e-4,
                                                      domain_lb=0.4,
                                                      domain_ub=1.0)
                              ])

time_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 10),
                                degree=2,
                                knots_type="rel_domain",
                                include_first_basis=False,
                                r_linear=True)
time_variable = SplineVariable("time_id",
                               spline_specs=time_spline_specs,
                               priors=[
                                   SplineGaussianPrior(order=1,