def test_scipy_optimizer(seed): np.random.seed(seed) num_obs = 20 df = pd.DataFrame({ "obs": np.random.randn(num_obs), "cov0": np.random.randn(num_obs), "cov1": np.random.randn(num_obs) }) data = Data(col_obs="obs", col_covs=["cov0", "cov1"], df=df) spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5), degree=3, knots_type="rel_domain") var_cov0 = Variable(name="cov0") var_cov1 = SplineVariable(name="cov1", spline_specs=spline_specs) model = GaussianModel( data, param_specs={"mu": { "variables": [var_cov0, var_cov1] }}) coefs = scipy_optimize(model) tr_coef = np.linalg.solve( (model.mat[0].T * model.data.weights).dot(model.mat[0]), (model.mat[0].T * model.data.weights).dot(model.data.obs)) assert np.allclose(coefs, tr_coef)
def get_mortality_pattern_model(df: DataFrame, col_time: str = "time_start", units_per_year: int = 12, knots: np.ndarray = np.arange(2010, 2021), smooth_order: int = 1) -> ExcessMortalityModel: seas_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5), degree=3, knots_type="rel_domain") time_knots = get_time_knots(df.time.min(), df.time.max(), knots) time_spline_specs = SplineSpecs(knots=time_knots, degree=1, knots_type="abs") seas_var = SplineVariable(col_time, spline_specs=seas_spline_specs) time_var = SplineVariable("time", spline_specs=time_spline_specs) variables = [ SeasonalityModelVariables([seas_var], col_time, smooth_order), TimeModelVariables([time_var]) ] return ExcessMortalityModel(df, variables)
def get_mortality_pattern_model(df: DataFrame, col_time: str = "time_start", units_per_year: int = 12, knots_per_year: float = 0.5, tail_size: int = 18, smooth_order: int = 1) -> ExcessMortalityModel: """ Define one mortality pattern model """ seas_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5), degree=3, knots_type="rel_domain") time_knots = get_time_knots(df.time.min(), df.time.max(), units_per_year, knots_per_year, tail_size) time_spline_specs = SplineSpecs(knots=time_knots, degree=1, knots_type="abs") seas_var = SplineVariable(col_time, spline_specs=seas_spline_specs) time_var = SplineVariable("time", spline_specs=time_spline_specs) variables = [ SeasonalityModelVariables([seas_var], col_time, smooth_order), TimeModelVariables([time_var]) ] return ExcessMortalityModel(df, variables)
def var_cov1(spline_gprior, spline_uprior, spline_specs): return SplineVariable(name="cov1", spline_specs=spline_specs, priors=[spline_gprior, spline_uprior])
def main(): # process input data df = pd.read_csv(data_path) df_2020 = df[[ "location_name", "year_x", "week", "pop_2020", "death_rate_2020" ]].copy() df_2019 = df[[ "location_name", "year_y", "week", "pop_2019", "death_rate_2019" ]].copy() df_2020 = df_2020.rename( columns={ "year_x": "year", "pop_2020": "population", "death_rate_2020": "death_rate" }) df_2019 = df_2019.rename( columns={ "year_y": "year", "pop_2019": "population", "death_rate_2019": "death_rate" }) df = pd.concat([df_2019, df_2020]) df["deaths"] = df["death_rate"] * df["population"] / 100000 time_ub = {k: v - time_start + 1 for k, v in time_end.items()} locations = df.location_name.unique() data = {k: {} for k in time_end.keys()} for location in locations: df_loc = df.loc[df.location_name == location] df_loc = df_loc.sort_values(["year", "week"]) df_loc = add_time(df_loc, "year", "week", time_start) for k in time_end.keys(): data[k][location] = df_loc.loc[df_loc.time < time_ub[k]].copy() data[k][location]["offset_0"] = np.log( data[k][location].population) # create models models = {} for location, d in data["fit"].items(): seas_var = SplineVariable(time_unit, spline_specs=seas_spline_specs) time_var = SplineVariable("time", spline_specs=time_spline_specs) variables = [ SeasonalityModelVariables([seas_var], time_unit, smooth_order=1), TimeModelVariables([time_var]) ] models[location] = ExcessMortalityModel(d, variables) # run models results = {} for location, model in models.items(): model.run_models() d_pred = model.predict(data["pred"][location], col_pred="mortality_pattern") results[location] = d_pred # save results if not results_folder.exists(): results_folder.mkdir() df_result = pd.concat(results.values()) df_result.to_csv(results_folder / "prediction.csv", index=False) # plot results for location, result in results.items(): ax, axs = plot_data(result, "week", "year") plt.delaxes(axs[1]) ax = plot_model(ax, result, "mortality_pattern", color="#008080") ax.set_title(location, loc="left") ax.legend() plt.savefig(results_folder / f"{location}.pdf", bbox_inches="tight") plt.close("all") return models, results
def spline_variable(): return SplineVariable(name=COL_COVS[0], spline_specs=SplineSpecs(knots=np.linspace( 0.0, 1.0, 5), degree=3))
# result folder results_path = Path("./examples/results_debug") # define all variables intercept_variable = Variable("intercept") idr_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5), degree=2, knots_type="rel_domain", include_first_basis=False) idr_variable = SplineVariable("idr_lagged", spline_specs=idr_spline_specs, priors=[ SplineUniformPrior(order=1, lb=-np.inf, ub=0.0), SplineGaussianPrior(order=1, mean=0.0, sd=1e-4, domain_lb=0.4, domain_ub=1.0) ]) time_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 10), degree=2, knots_type="rel_domain", include_first_basis=False, r_linear=True) time_variable = SplineVariable("time_id", spline_specs=time_spline_specs, priors=[ SplineGaussianPrior(order=1,