def test_smart_initialization(self, curve_fun): np.random.seed(100) num_groups = 3 params_set, params_true, x_true = simulate_params(num_groups) data = simulate_data(curve_fun, params_true) core_model = CoreModel(params_set, curve_fun, normal_loss) num_init = 3 xs_init = -np.random.rand(num_init, x_true.shape[1] * (num_groups + 1)) * 3 sample_fun = lambda x: xs_init solver_inner = MultipleInitializations(sample_fun) solver = SmartInitialization() solver.set_solver(solver_inner) solver.set_model_instance(core_model) solver.fit(data=data, options={ 'maxiter': 500, 'ftol': 1e-16, 'gtol': 1e-16 }) ys = data[0]['obs'].to_numpy() ts = data[0]['t'].to_numpy() start = 0 for i, s in enumerate(core_model.data_inputs.group_sizes): y_true = ys[start:start + s] y_pred = solver.predict(t=ts[start:start + s], is_multi_groups=True)[i] assert np.linalg.norm(y_pred - y_true) / np.linalg.norm(y_true) < 5e-2 start += s
def fake_alpha_beta_solver(parameter_set): solver = Solver() # Fake arguments model = CoreModel(param_set=parameter_set, curve_fun=lambda x: x, loss_fun=lambda x: x) model.data_inputs = DataInputs( t=np.array([0., 1., 2.]), obs=np.array([0., 1., 2.]), obs_se=np.array([0., 1., 2.]), group_sizes=[1], covariates_matrices=[np.array([[1.]]), np.array([[1.]])]) model.data_inputs.var_link_fun = [lambda x: x, lambda x: x] model.data_inputs.num_groups = 1 solver.set_model_instance(model) return solver
def test_multi_init_core_model(self, curve_fun): params_set, params_true, x_true = simulate_params(1) data = simulate_data(curve_fun, params_true) core_model = CoreModel(params_set, curve_fun, normal_loss) num_init = 5 xs_init = np.random.randn(num_init, x_true.shape[1] * 2) sample_fun = lambda x: xs_init solver = MultipleInitializations(sample_fun) solver.set_model_instance(core_model) solver.fit(data=data, options={'maxiter': 200}) y_pred = solver.predict(t=data[0]['t'].to_numpy()) y_true = data[0]['obs'].to_numpy() assert np.linalg.norm(y_pred - y_true) / np.linalg.norm(y_true) < 2e-2 for x in xs_init: assert core_model.objective(x, data) >= solver.fun_val_opt
def test_scipyopt_core_model(self, curve_fun, seed): np.random.seed(seed) params_set, params_true, _ = simulate_params(1) data = simulate_data(curve_fun, params_true) core_model = CoreModel(params_set, curve_fun, normal_loss) solver = ScipyOpt(core_model) solver.fit(data=data, options={'maxiter': 200}) y_pred = solver.predict(t=data[0]['t'].to_numpy()) y_true = data[0]['obs'].to_numpy() assert np.linalg.norm(y_pred - y_true) / np.linalg.norm(y_true) < 2e-2
def test_gaussian_mixture_integration(self, curve_fun): gm_model = GaussianMixtures(stride=1.0, size=3) params_set, params_true, _ = simulate_params(1) data = simulate_data(curve_fun, params_true) core_model = CoreModel(params_set, curve_fun, normal_loss) y_true = data[0]['obs'].to_numpy() solver = GaussianMixturesIntegration(gm_model) solver.set_model_instance(core_model) solver.fit(data=data, options={'maxiter': 20}) y_pred = solver.predict(t=data[0]['t'].to_numpy()) core_model.erase_data() if curve_fun.__name__ == 'gaussian_pdf': solver_base = ScipyOpt(core_model) solver_base.fit(data=data, options={'maxiter': 20}) y_pred_base = solver_base.predict(t=data[0]['t'].to_numpy()) assert np.linalg.norm(y_pred - y_true) < np.linalg.norm(y_pred_base - y_true)
def test_multi_init_outside_gaussian_mixture(self, curve_fun): params_set, params_true, x_true = simulate_params(1) data = simulate_data(curve_fun, params_true) core_model = CoreModel(params_set, curve_fun, normal_loss) gm_model = GaussianMixtures(stride=1.0, size=3) solver_inner = GaussianMixturesIntegration(gm_model) num_init = 5 xs_init = np.random.randn(num_init, x_true.shape[1] * 2) sample_fun = lambda x: xs_init solver = MultipleInitializations(sample_fun) solver.set_solver(solver_inner) solver.set_model_instance(core_model) solver.fit(data=data, options={'maxiter': 200}) if curve_fun.__name__ == 'gaussian_pdf': y_pred = solver.predict(t=data[0]['t'].to_numpy()) y_true = data[0]['obs'].to_numpy() assert np.linalg.norm(y_pred - y_true) / np.linalg.norm(y_true) < 2e-2 for x in xs_init: assert core_model.objective(x, None) >= solver.fun_val_opt
link_fun=exp_fun, variables=[a_intercept]) beta = Parameter(param_name='beta', link_fun=identity_fun, variables=[b_intercept, b_social_distance]) p = Parameter(param_name='p', link_fun=exp_fun, variables=[phi_intercept]) parameters = ParameterSet([alpha, beta, p]) optimizer_options = { 'ftol': 1e-12, 'gtol': 1e-12, } model = CoreModel(param_set=parameters, curve_fun=gaussian_cdf, loss_fun=normal_loss) solver = ScipyOpt(model) solver.fit(data=data._get_df(copy=True, return_specs=True), options=optimizer_options) params_estimate = model.get_params(solver.x_opt, expand=True) # ------------------------------------------------------------------------- # check result for i in range(3): assert numpy.allclose(params_estimate[i], params_value[i], rtol=rel_tol) print('covariate.py: OK') """ ``` {end_markdown covariate_xam}
re_gprior=re_gprior) alpha = Parameter('alpha', link_fun=np.exp, variables=[alpha_fe]) beta = Parameter('beta', link_fun=lambda x: x, variables=[beta_fe]) p = Parameter('p', link_fun=lambda x: x, variables=[p_fe]) params = ParameterSet([alpha, beta, p]) """ ``` ### Models and Solvers We now need a `Model` that contains our parameter set and also which curve function we want to fit, and the loss function for the optimization. We also need to define a `Solver` that will actually perform the optimization. Finally, we will fit the solver to our simulated data. ```python """ model = CoreModel(param_set=params, curve_fun=ln_gaussian_pdf, loss_fun=normal_loss) solver = ScipyOpt() """ ``` ### Use the Prior Initializer The purpose of using the prior initializer is that it gets smarter priors for later on. Therefore, there are many different things that one could do to get good priors based on subsets of the data. Each of these types is implemented in a separate [`PriorInitializerComponent`](PriorInitializerComponent.md). Here we will use the `BetaPrior` prior initializer component; the goal is to estimate the mean and variance of the random effects and then set that as the fixed effects prior variance for a **new** parameter set that can be used in later model fits. Even though we only have 10 groups, our random effect variance should get pretty close to our simulated value. The `BetaPrior()` will update the fixed effects Gaussian prior for the `beta` parameter to what is estimated based on a joint model fit with random effects.
variables=[a_intercept]) beta = Parameter(param_name='beta', link_fun=lambda x: x, variables=[b_intercept]) p = Parameter(param_name='p', link_fun=numpy.exp, variables=[phi_intercept]) parameters = ParameterSet([alpha, beta, p]) optimizer_options = { 'disp': 0, 'maxiter': 300, 'ftol': 1e-8, 'gtol': 1e-8, } model = CoreModel(param_set=parameters, curve_fun=expit, loss_fun=normal_loss) solver = ScipyOpt(model) solver.fit(data=data._get_df(copy=True, return_specs=True), options=optimizer_options) params_estimate = model.get_params(solver.x_opt, expand=False) if not solver.success: print(solver.status) params_true = [alpha_true, beta_true, p_true] for k in range(3): est = params_estimate[k] truth = params_true[k] rel_error = est / truth - 1.0 # for i in range(n_group) : # print(est[i], truth[i], rel_error[i]) assert numpy.allclose(params_estimate[i], params_true[i], rtol=rel_tol)
def test_core_model_run(data, param_set, curve_fun, loss_fun): model = CoreModel(param_set, curve_fun, loss_fun) num_fe = param_set.num_fe x0 = np.array([0.0] * num_fe * 4) model.objective(x0, data) num_groups = model.data_inputs.num_groups covs_mat = model.data_inputs.covariates_matrices assert covs_mat[0].shape[1] == 1 assert covs_mat[1].shape[1] == 1 assert covs_mat[2].shape[1] == 2 assert model.data_inputs.group_sizes == [n_B, n_A, n_C] assert len(model.data_inputs.var_link_fun) == num_fe assert model.bounds.shape == (num_fe * (num_groups + 1), 2) ub = [b[1] for b in model.bounds] assert ub[:4] == [np.inf] * param_set.num_fe assert np.linalg.norm( np.array(ub[4:]) - np.array([1.0, 2.0, 3.0, 3.0] * num_groups)) < 1e-10 assert len(model.x_init) == num_fe * (num_groups + 1) assert np.linalg.norm(model.x_init[:4] - np.array([0.0, 0.1, 0.2, 0.2])) < 1e-10 assert np.linalg.norm(model.x_init[4:] - [0.3, 0.4, 0.5, 0.5] * num_groups) < 1e-10 model.gradient(x0, data) model.predict(x0, np.arange(10, 16)) data_inputs = model.detach_data() data_inputs.bounds[0][0] = -2.0 model.objective(x0, data_inputs) assert model.data_inputs.bounds[0][0] == -2.0