예제 #1
0
    def test_smart_initialization(self, curve_fun):
        np.random.seed(100)
        num_groups = 3
        params_set, params_true, x_true = simulate_params(num_groups)
        data = simulate_data(curve_fun, params_true)
        core_model = CoreModel(params_set, curve_fun, normal_loss)

        num_init = 3
        xs_init = -np.random.rand(num_init, x_true.shape[1] *
                                  (num_groups + 1)) * 3
        sample_fun = lambda x: xs_init
        solver_inner = MultipleInitializations(sample_fun)

        solver = SmartInitialization()
        solver.set_solver(solver_inner)
        solver.set_model_instance(core_model)
        solver.fit(data=data,
                   options={
                       'maxiter': 500,
                       'ftol': 1e-16,
                       'gtol': 1e-16
                   })

        ys = data[0]['obs'].to_numpy()
        ts = data[0]['t'].to_numpy()
        start = 0
        for i, s in enumerate(core_model.data_inputs.group_sizes):
            y_true = ys[start:start + s]
            y_pred = solver.predict(t=ts[start:start + s],
                                    is_multi_groups=True)[i]
            assert np.linalg.norm(y_pred -
                                  y_true) / np.linalg.norm(y_true) < 5e-2
            start += s
예제 #2
0
def fake_alpha_beta_solver(parameter_set):
    solver = Solver()
    # Fake arguments
    model = CoreModel(param_set=parameter_set,
                      curve_fun=lambda x: x,
                      loss_fun=lambda x: x)
    model.data_inputs = DataInputs(
        t=np.array([0., 1., 2.]),
        obs=np.array([0., 1., 2.]),
        obs_se=np.array([0., 1., 2.]),
        group_sizes=[1],
        covariates_matrices=[np.array([[1.]]),
                             np.array([[1.]])])
    model.data_inputs.var_link_fun = [lambda x: x, lambda x: x]
    model.data_inputs.num_groups = 1
    solver.set_model_instance(model)
    return solver
예제 #3
0
    def test_multi_init_core_model(self, curve_fun):
        params_set, params_true, x_true = simulate_params(1)
        data = simulate_data(curve_fun, params_true)
        core_model = CoreModel(params_set, curve_fun, normal_loss)

        num_init = 5
        xs_init = np.random.randn(num_init, x_true.shape[1] * 2)
        sample_fun = lambda x: xs_init
        solver = MultipleInitializations(sample_fun)
        solver.set_model_instance(core_model)
        solver.fit(data=data, options={'maxiter': 200})
        y_pred = solver.predict(t=data[0]['t'].to_numpy())
        y_true = data[0]['obs'].to_numpy()
        assert np.linalg.norm(y_pred - y_true) / np.linalg.norm(y_true) < 2e-2

        for x in xs_init:
            assert core_model.objective(x, data) >= solver.fun_val_opt
예제 #4
0
 def test_scipyopt_core_model(self, curve_fun, seed):
     np.random.seed(seed)
     params_set, params_true, _ = simulate_params(1)
     data = simulate_data(curve_fun, params_true)
     core_model = CoreModel(params_set, curve_fun, normal_loss)
     solver = ScipyOpt(core_model)
     solver.fit(data=data, options={'maxiter': 200})
     y_pred = solver.predict(t=data[0]['t'].to_numpy())
     y_true = data[0]['obs'].to_numpy()
     assert np.linalg.norm(y_pred - y_true) / np.linalg.norm(y_true) < 2e-2
예제 #5
0
    def test_gaussian_mixture_integration(self, curve_fun):
        gm_model = GaussianMixtures(stride=1.0, size=3)
        params_set, params_true, _ = simulate_params(1)
        data = simulate_data(curve_fun, params_true)
        core_model = CoreModel(params_set, curve_fun, normal_loss)
        y_true = data[0]['obs'].to_numpy()

        solver = GaussianMixturesIntegration(gm_model)
        solver.set_model_instance(core_model)
        solver.fit(data=data, options={'maxiter': 20})
        y_pred = solver.predict(t=data[0]['t'].to_numpy())
        core_model.erase_data()

        if curve_fun.__name__ == 'gaussian_pdf':
            solver_base = ScipyOpt(core_model)
            solver_base.fit(data=data, options={'maxiter': 20})
            y_pred_base = solver_base.predict(t=data[0]['t'].to_numpy())

            assert np.linalg.norm(y_pred -
                                  y_true) < np.linalg.norm(y_pred_base -
                                                           y_true)
예제 #6
0
    def test_multi_init_outside_gaussian_mixture(self, curve_fun):
        params_set, params_true, x_true = simulate_params(1)
        data = simulate_data(curve_fun, params_true)
        core_model = CoreModel(params_set, curve_fun, normal_loss)

        gm_model = GaussianMixtures(stride=1.0, size=3)
        solver_inner = GaussianMixturesIntegration(gm_model)

        num_init = 5
        xs_init = np.random.randn(num_init, x_true.shape[1] * 2)
        sample_fun = lambda x: xs_init
        solver = MultipleInitializations(sample_fun)
        solver.set_solver(solver_inner)
        solver.set_model_instance(core_model)
        solver.fit(data=data, options={'maxiter': 200})

        if curve_fun.__name__ == 'gaussian_pdf':
            y_pred = solver.predict(t=data[0]['t'].to_numpy())
            y_true = data[0]['obs'].to_numpy()
            assert np.linalg.norm(y_pred -
                                  y_true) / np.linalg.norm(y_true) < 2e-2

        for x in xs_init:
            assert core_model.objective(x, None) >= solver.fun_val_opt
예제 #7
0
                  link_fun=exp_fun,
                  variables=[a_intercept])
beta = Parameter(param_name='beta',
                 link_fun=identity_fun,
                 variables=[b_intercept, b_social_distance])
p = Parameter(param_name='p', link_fun=exp_fun, variables=[phi_intercept])

parameters = ParameterSet([alpha, beta, p])

optimizer_options = {
    'ftol': 1e-12,
    'gtol': 1e-12,
}

model = CoreModel(param_set=parameters,
                  curve_fun=gaussian_cdf,
                  loss_fun=normal_loss)
solver = ScipyOpt(model)
solver.fit(data=data._get_df(copy=True, return_specs=True),
           options=optimizer_options)
params_estimate = model.get_params(solver.x_opt, expand=True)

# -------------------------------------------------------------------------
# check result

for i in range(3):
    assert numpy.allclose(params_estimate[i], params_value[i], rtol=rel_tol)

print('covariate.py: OK')
""" ```
{end_markdown covariate_xam}
예제 #8
0
                re_gprior=re_gprior)
alpha = Parameter('alpha', link_fun=np.exp, variables=[alpha_fe])
beta = Parameter('beta', link_fun=lambda x: x, variables=[beta_fe])
p = Parameter('p', link_fun=lambda x: x, variables=[p_fe])

params = ParameterSet([alpha, beta, p])
""" ```

### Models and Solvers
We now need a `Model` that contains our parameter set and also which curve function we want to fit, and the loss
function for the optimization. We also need to define a `Solver` that will actually perform the optimization.
Finally, we will fit the solver to our simulated data.

```python """
model = CoreModel(param_set=params,
                  curve_fun=ln_gaussian_pdf,
                  loss_fun=normal_loss)
solver = ScipyOpt()
""" ```

### Use the Prior Initializer
The purpose of using the prior initializer is that it gets smarter priors for later on. Therefore, there are many
different things that one could do to get good priors based on subsets of the data. Each of these types
is implemented in a separate [`PriorInitializerComponent`](PriorInitializerComponent.md). Here we will use
the `BetaPrior` prior initializer component; the goal is to estimate the mean and variance of the random effects
and then set that as the fixed effects prior variance for a **new** parameter set that can be used in later model fits.

Even though we only have 10 groups, our random effect variance should get pretty close to our simulated value.
The `BetaPrior()` will update the fixed effects Gaussian prior for the `beta` parameter to what is estimated based on
a joint model fit with random effects.
예제 #9
0
                  variables=[a_intercept])
beta = Parameter(param_name='beta',
                 link_fun=lambda x: x,
                 variables=[b_intercept])
p = Parameter(param_name='p', link_fun=numpy.exp, variables=[phi_intercept])

parameters = ParameterSet([alpha, beta, p])

optimizer_options = {
    'disp': 0,
    'maxiter': 300,
    'ftol': 1e-8,
    'gtol': 1e-8,
}

model = CoreModel(param_set=parameters, curve_fun=expit, loss_fun=normal_loss)
solver = ScipyOpt(model)
solver.fit(data=data._get_df(copy=True, return_specs=True),
           options=optimizer_options)
params_estimate = model.get_params(solver.x_opt, expand=False)

if not solver.success:
    print(solver.status)
params_true = [alpha_true, beta_true, p_true]
for k in range(3):
    est = params_estimate[k]
    truth = params_true[k]
    rel_error = est / truth - 1.0
    # for i in range(n_group) :
    #   print(est[i], truth[i], rel_error[i])
    assert numpy.allclose(params_estimate[i], params_true[i], rtol=rel_tol)
예제 #10
0
def test_core_model_run(data, param_set, curve_fun, loss_fun):
    model = CoreModel(param_set, curve_fun, loss_fun)
    num_fe = param_set.num_fe
    x0 = np.array([0.0] * num_fe * 4)
    model.objective(x0, data)
    num_groups = model.data_inputs.num_groups

    covs_mat = model.data_inputs.covariates_matrices
    assert covs_mat[0].shape[1] == 1
    assert covs_mat[1].shape[1] == 1
    assert covs_mat[2].shape[1] == 2

    assert model.data_inputs.group_sizes == [n_B, n_A, n_C]
    assert len(model.data_inputs.var_link_fun) == num_fe

    assert model.bounds.shape == (num_fe * (num_groups + 1), 2)
    ub = [b[1] for b in model.bounds]
    assert ub[:4] == [np.inf] * param_set.num_fe
    assert np.linalg.norm(
        np.array(ub[4:]) - np.array([1.0, 2.0, 3.0, 3.0] * num_groups)) < 1e-10

    assert len(model.x_init) == num_fe * (num_groups + 1)
    assert np.linalg.norm(model.x_init[:4] -
                          np.array([0.0, 0.1, 0.2, 0.2])) < 1e-10
    assert np.linalg.norm(model.x_init[4:] -
                          [0.3, 0.4, 0.5, 0.5] * num_groups) < 1e-10

    model.gradient(x0, data)
    model.predict(x0, np.arange(10, 16))

    data_inputs = model.detach_data()
    data_inputs.bounds[0][0] = -2.0
    model.objective(x0, data_inputs)
    assert model.data_inputs.bounds[0][0] == -2.0