Exemple #1
0
def test_LFM_gradient(artificial_data, models):
    reg_truth = Regressor(ss=models[0])
    reg_truth._use_penalty = False
    reg_truth._use_jacobian = True
    dt, u, u1, y, *_ = reg_truth._prepare_data(artificial_data, ['To', 'Qh'],
                                               'Ti')

    reg_lfm = Regressor(ss=models[1])
    reg_lfm._use_penalty = False
    reg_lfm._use_jacobian = True

    eta_truth = deepcopy(reg_truth.ss.parameters.eta_free)
    eta_lfm = deepcopy(reg_lfm.ss.parameters.eta_free)

    grad_truth = reg_truth._eval_dlog_posterior(eta_truth, dt, u, u1, y)[1]
    grad_lfm = reg_lfm._eval_dlog_posterior(eta_lfm, dt, u, u1, y)[1]

    fct = nd.Gradient(reg_truth._eval_log_posterior)
    grad_truth_approx = fct(eta_truth, dt, u, u1, y)

    assert np.all(eta_truth == eta_lfm)
    assert ned(grad_truth, grad_lfm) < 1e-7
    assert ned(grad_truth, grad_truth_approx) < 1e-7
    assert np.all(np.sign(grad_truth) == np.sign(grad_truth_approx))
    assert np.all(np.sign(grad_truth) == np.sign(grad_lfm))
    assert grad_truth == pytest.approx(grad_truth_approx, rel=1e-6)
    assert grad_truth == pytest.approx(grad_lfm, rel=1e-6)
Exemple #2
0
def test_gp_product_creation(data_Periodic, models):
    m, qp = models
    period, mscale, lscale, decay, sigv = np.random.uniform(0.3, 3.0, 5)

    par_Periodic = [
        dict(name='period', value=period, transform='log'),
        dict(name='mscale', value=mscale, transform='log'),
        dict(name='lscale', value=lscale, transform='log'),
        dict(name='sigv', value=sigv, transform='log'),
    ]

    par_Matern = [
        dict(name='mscale', value=1.0, transform='fixed'),
        dict(name='lscale', value=decay, transform='log'),
        dict(name='sigv', value=0.0, transform='fixed'),
    ]

    par_QuasiPeriodic = [
        dict(name='period', value=period, transform='log'),
        dict(name='mscale', value=mscale, transform='log'),
        dict(name='lscale', value=lscale, transform='log'),
        dict(name='sigv', value=sigv, transform='log'),
        dict(name='decay', value=decay, transform='log'),
    ]

    reg1 = Regressor(ss=qp(par_QuasiPeriodic))
    reg2 = Regressor(ss=Periodic(par_Periodic) * m(par_Matern))
    check_grad(data_Periodic, reg1, reg2)
Exemple #3
0
def test_save_version_number():
    from pysip import __version__

    save_model('test', Regressor(None))
    load_reg = load_model('test')

    assert load_reg.__version__ == __version__
Exemple #4
0
def test_save_model_to_pickle(reg):
    reg = Regressor(reg)
    reg.ss.parameters.theta = np.random.uniform(1e-1, 1,
                                                len(reg.ss.parameters.theta))
    reg.ss.update_continuous_dssm()
    dA = reg.ss.dA
    dB = reg.ss.dB
    dC = reg.ss.dC
    dD = reg.ss.dD
    dQ = reg.ss.dQ
    dR = reg.ss.dR
    dx0 = reg.ss.dx0
    dP0 = reg.ss.dP0

    save_model('test', reg)
    load_reg = load_model('test')

    for k in dA.keys():
        assert np.allclose(dA[k], load_reg.ss.dA[k])
        assert np.allclose(dB[k], load_reg.ss.dB[k])
        assert np.allclose(dC[k], load_reg.ss.dC[k])
        assert np.allclose(dD[k], load_reg.ss.dD[k])
        assert np.allclose(dQ[k], load_reg.ss.dQ[k])
        assert np.allclose(dR[k], load_reg.ss.dR[k])
        assert np.allclose(dx0[k], load_reg.ss.dx0[k])
        assert np.allclose(dP0[k], load_reg.ss.dP0[k])

    assert id(reg) != id(load_reg)
    for a, b in zip(reg.ss.parameters, load_reg.ss.parameters):
        assert a == b

    os.remove('test.pickle')
Exemple #5
0
def test_fit_hmc_m32():
    """Generate samples from the posterior distribution"""
    n_cpu = 1
    np.random.seed(1)
    N = 50
    t = np.linspace(0, 1, N)
    y = np.sin(12 * t) + 0.66 * np.cos(25 * t) + np.random.randn(N) * 0.1
    df = pd.DataFrame(index=t, data=y, columns=['y'])

    par = [
        dict(name='mscale', value=9.313e-01, bounds=(0, None), prior=Gamma(4, 4)),
        dict(name='lscale', value=1.291e-01, bounds=(0, None), prior=InverseGamma(3.5, 0.5)),
        dict(name='sigv', value=9.241e-02, bounds=(0, None), prior=InverseGamma(3.5, 0.5)),
    ]
    reg = Regressor(Matern32(par))
    fit = reg.fit(df=df, outputs='y', options={'init': 'fixed', 'n_cpu': n_cpu})
    # return df, reg, fit
    diagnostic = fit.diagnostic
    assert isinstance(diagnostic, pd.DataFrame)
    assert np.all(diagnostic['ebfmi'] > 0.8)
    assert np.all(diagnostic['mean accept_prob'] > 0.7)
    assert np.sum(diagnostic['sum diverging']) == 0
    assert np.sum(diagnostic['sum max_tree_depth']) == 0

    summary = az.summary(fit.posterior, round_to='none')
    assert isinstance(summary, pd.DataFrame)
    assert np.all(summary['r_hat'] < 1.01)
    assert np.all(summary[['ess_mean', 'ess_sd', 'ess_bulk', 'ess_tail']] > 1000)
    # mcse for ess_mean = 1000
    assert summary['mean']['mscale'] == pytest.approx(1.107023, abs=3 * 0.009261)
    assert summary['mean']['lscale'] == pytest.approx(0.146614, abs=3 * 0.001074)
    assert summary['mean']['sigv'] == pytest.approx(0.096477, abs=3 * 0.000515)
    assert summary['mean']['lp_'] == pytest.approx(2.919439, abs=3 * 0.038186)

    xm, xsd = reg.posterior_state_distribution(
        trace=fit.posterior, df=df, outputs='y', smooth=True, n_cpu=n_cpu
    )
    assert isinstance(xm, np.ndarray)
    assert isinstance(xsd, np.ndarray)
    assert xm.shape == (4000, len(df), reg.ss.nx)
    assert xsd.shape == (4000, len(df), reg.ss.nx)
    assert np.mean(np.mean((df['y'].values - xm[:, :, 0]) ** 2, axis=1) ** 0.5) == pytest.approx(
        5.839e-2, abs=1e-2
    )

    ym, ysd = reg.posterior_predictive(trace=fit.posterior, df=df, outputs='y', n_cpu=n_cpu)
    assert isinstance(ym, np.ndarray)
    assert isinstance(ysd, np.ndarray)
    assert ym.shape == (4000, len(df))
    assert ysd.shape == (4000, len(df))
    assert np.mean(np.mean((df['y'].values - ym) ** 2, axis=1) ** 0.5) == pytest.approx(
        3.728e-2, abs=1e-2
    )

    pw_loglik = reg.pointwise_log_likelihood(trace=fit.posterior, df=df, outputs='y', n_cpu=n_cpu)
    assert isinstance(pw_loglik, dict)
    assert pw_loglik['log_likelihood'].shape == (4, 1000, len(df))
    # 0.026 ~ pw_loglik['log_likelihood'].sum(axis=2).std() / np.sqrt(1000)
    assert pw_loglik['log_likelihood'].sum(axis=2).mean() == pytest.approx(-1.394, abs=3.256e-2)
Exemple #6
0
def gen_regressor(statespaces):
    """State-space generator for gradient test"""
    for ssm in statespaces:
        p = random_parameters(ssm)
        inputs = get_inputs(ssm)
        outputs = get_outputs(ssm)
        h = np.round(np.random.rand())

        yield Regressor(ss=ssm(parameters=p, hold_order=h)), inputs, outputs
Exemple #7
0
def generate_regressor(*statespaces):
    """State-space generator for gradient test"""
    values = np.random.uniform(0.3, 2.0, 3)
    p = [
        dict(name='mscale', value=values[0], transform='log'),
        dict(name='lscale', value=values[1], transform='log'),
        dict(name='sigv', value=values[2], transform='log'),
    ]
    for ssm in statespaces:
        yield Regressor(ss=ssm(parameters=p))
Exemple #8
0
def test_Periodic(data_Periodic):
    p = [
        dict(name='period', transform='log'),
        dict(name='mscale', transform='log'),
        dict(name='lscale', transform='log'),
        dict(name='sigv', transform='log'),
    ]
    regressor = Regressor(ss=Periodic(parameters=p))
    regressor.ss.parameters.theta = np.random.uniform(0.3, 3.0, 4)
    check_grad_fd(data_Periodic, regressor)
Exemple #9
0
def test_model_pickle_file_size_limit(ss, size_kb):
    model = Regressor(ss)
    model.ss.parameters.theta = np.random.uniform(
        1e-1, 1, len(model.ss.parameters.theta))
    model.ss.update_continuous_dssm()

    save_model('big', model)
    size = os.path.getsize('big.pickle')
    os.remove('big.pickle')

    assert size <= size_kb * 1000
Exemple #10
0
def generate_regressor_sum(*statespaces):
    """State-space generator for gradient test"""
    values = np.random.uniform(0.3, 3.0, 6)

    par_Periodic = [
        dict(name="period", value=values[0], bounds=(0.0, None)),
        dict(name='mscale', value=values[1], bounds=(0.0, None)),
        dict(name='lscale', value=values[2], bounds=(0.0, None)),
        dict(name='sigv', value=values[3], bounds=(0.0, None)),
    ]

    par_Matern = [
        dict(name="mscale", value=values[4], bounds=(0.0, None)),
        dict(name="lscale", value=values[5], bounds=(0.0, None)),
        dict(name="sigv", value=0.0, transform='fixed'),
    ]

    for ssm in statespaces:
        yield Regressor(Periodic(par_Periodic) + ssm(par_Matern))
Exemple #11
0
def generate_regressor_product(*statespaces):
    """State-space generator for gradient test"""
    values = np.random.uniform(1.0, 2.0, 5)

    par_Periodic = [
        dict(name='period', value=values[0], bounds=(0.0, None)),
        dict(name='mscale', value=values[1], bounds=(0.0, None)),
        dict(name='lscale', value=values[2], bounds=(0.0, None)),
        dict(name='sigv', value=values[3], bounds=(0.0, None)),
    ]

    par_Matern = [
        dict(name='mscale', value=1.0, transform='fixed'),
        dict(name='lscale', value=values[4], bounds=(0.0, None)),
        dict(name='sigv', value=0.0, transform='fixed'),
    ]

    for ssm in statespaces:
        yield Regressor(ss=Periodic(parameters=par_Periodic) *
                        ssm(parameters=par_Matern))
Exemple #12
0
# Generate artificial data
np.random.seed(1)
N = 20
t = np.sort(np.random.rand(1, N), axis=1).flatten()
y = np.sin(12 * t) + 0.66 * np.cos(25 * t) + np.random.randn(1, N) * 0.01
y = y.flatten()
df = pd.DataFrame(index=t, data=y, columns=['y'])

# Parameter settings for the Matérn covariance function with smoothness = 3/2
parameters = [
    dict(name='mscale', value=0.5, transform='log'),
    dict(name='lscale', value=0.5, transform='log'),
    dict(name='sigv', value=0.1, transform='log'),
]

reg = Regressor(Matern32(parameters))
fit_summary, corr_matrix, opt_summary = reg.fit(df=df, outputs='y')

# Fit results
print(f'\n{fit_summary}')

# Predict on test data
tnew = np.linspace(-0.1, 1.1, 500)
ym, ysd = reg.predict(df=df, outputs='y', tnew=tnew, smooth=True)

# Plot output mean and 95% credible intervals
sns.set_style('darkgrid')
sns.set_context('talk')
plt.plot(t, y, linestyle='', marker='+', mew=2, label='data', color='darkred')
plt.plot(tnew, ym, color='navy', label='mean')
plt.fill_between(tnew,
amplitude = alim[0] + (alim[1] - alim[0]) * np.random.random()
noise = 0.2 * np.random.randn(n)
y = amplitude * np.sin(2.0 * np.pi / period * t) + noise
y[y <= 0] = 0.0
data = pd.DataFrame(index=t, data=y, columns=['y'])

# Parameter settings for the Periodic covariance function
parameters = [
    dict(name='period', value=1.0, transform='fixed'),
    dict(name='mscale', value=1.0, transform='log'),
    dict(name='lscale', value=1.0, transform='log'),
    dict(name='sigv', value=0.1, transform='log'),
]

# Instantiate regressor with the Periodic covariance function
reg = Regressor(Periodic(parameters))

fit_summary, corr_matrix, opt_summary = reg.fit(df=data, outputs='y')

# Fit results
print(f'\n{fit_summary}')

# Predict on test data
tnew = np.linspace(xlim[0], xlim[1] + 1, 500)
ym, ysd = reg.predict(df=data, outputs='y', tnew=tnew, smooth=True)

# Plot output mean and 95% credible intervals
sns.set_style('darkgrid')
sns.set_context('talk')
plt.plot(t, y, linestyle='', marker='+', mew=2, label='data', color='darkred')
plt.plot(tnew, ym, color='navy', label='mean')
Exemple #14
0
# Matérn 3/2 for short-term effects
p4 = [
    dict(name='mscale', value=4.595e-01, transform='log'),
    dict(name='lscale', value=6.359e-01, transform='log'),
    dict(name='sigv', value=0.0, transform='fixed'),
]

k1 = Matern52(p1, name='k1')
k2 = Periodic(p2, name='k2')
k3 = Matern32(p3, name='k3')
k4 = Matern32(p4, name='k4')

# Compose covariance function
K = k1 + k2 * k3 + k4
reg = Regressor(K)

fit_summary, corr_matrix, opt_summary = reg.fit(df=df, outputs='CO2_fit')
print(f'\n{fit_summary}')

# generate new prediction time instants
tnew = np.arange(1958, 2030, 0.01)
ym, ysd = reg.predict(df=df, outputs='CO2_pred', smooth=True, tnew=tnew)

# Plot output mean and 95% credible intervals
sns.set_style('darkgrid')
sns.set_context('talk')
plt.plot(df.index, df['CO2'], color='darkred', label='data')
plt.plot(tnew, C02_mean + ym, color='navy', label='mean')
plt.fill_between(
    tnew,
Exemple #15
0
def test_fit_hmc_m32():
    """Generate samples from the posterior distribution"""
    n_cpu = 1
    np.random.seed(1)
    N = 50
    t = np.linspace(0, 1, N)
    y = np.sin(12 * t) + 0.66 * np.cos(25 * t) + np.random.randn(N) * 0.1
    df = pd.DataFrame(index=t, data=y, columns=['y'])

    par = [
        dict(name='mscale', value=1.11, bounds=(0, None), prior=Gamma(4, 4)),
        dict(name='lscale',
             value=0.15,
             bounds=(0, None),
             prior=InverseGamma(3.5, 0.5)),
        dict(name='sigv',
             value=0.1,
             bounds=(0, None),
             prior=InverseGamma(3.5, 0.5)),
    ]
    reg = Regressor(Matern32(par))
    fit = reg.fit(
        df=df,
        outputs='y',
        options={
            'init': 'fixed',
            'n_cpu': n_cpu,
            'dense_mass_matrix': True
        },
    )
    # return df, reg, fit
    diag_ = fit.diagnostic
    assert isinstance(diag_, pd.DataFrame)
    assert np.all(diag_['ebfmi'] > 0.9)
    assert np.all(diag_['mean accept_prob'] > 0.7)
    assert np.sum(diag_['sum diverging']) == 0
    assert np.sum(diag_['sum max_tree_depth']) == 0

    sum_ = az.summary(fit.posterior, round_to='none')
    assert isinstance(sum_, pd.DataFrame)
    assert np.all(sum_['r_hat'] < 1.01)
    assert np.all(sum_[['ess_mean', 'ess_sd', 'ess_bulk', 'ess_tail']] > 1000)
    assert sum_['mean']['mscale'] == pytest.approx(1.1, rel=5e-2)
    assert sum_['mean']['lscale'] == pytest.approx(1.5e-1, rel=5e-2)
    assert sum_['mean']['sigv'] == pytest.approx(9.6e-2, rel=5e-2)
    assert sum_['mean']['lp_'] == pytest.approx(2.9, rel=5e-2)

    xm, xsd = reg.posterior_state_distribution(trace=fit.posterior,
                                               df=df,
                                               outputs='y',
                                               smooth=True,
                                               n_cpu=n_cpu)
    assert isinstance(xm, np.ndarray)
    assert isinstance(xsd, np.ndarray)
    assert xm.shape == (4000, len(df), reg.ss.nx)
    assert xsd.shape == (4000, len(df), reg.ss.nx)
    assert np.mean(np.mean((df['y'].values - xm[:, :, 0])**2,
                           axis=1)**0.5) == pytest.approx(5.8e-2, rel=5e-2)

    ym, ysd = reg.posterior_predictive(trace=fit.posterior,
                                       df=df,
                                       outputs='y',
                                       n_cpu=n_cpu)
    assert isinstance(ym, np.ndarray)
    assert isinstance(ysd, np.ndarray)
    assert ym.shape == (4000, len(df))
    assert ysd.shape == (4000, len(df))
    assert np.mean(np.mean((df['y'].values - ym)**2,
                           axis=1)**0.5) == pytest.approx(3.7e-2, rel=5e-2)

    pwloglik = reg.pointwise_log_likelihood(trace=fit.posterior,
                                            df=df,
                                            outputs='y',
                                            n_cpu=n_cpu)
    assert isinstance(pwloglik, dict)
    assert pwloglik['log_likelihood'].shape == (4, 1000, len(df))
    assert pwloglik['log_likelihood'].sum(axis=2).mean() == pytest.approx(
        -1.35, rel=5e-2)
Exemple #16
0
    dict(name='Ro', scale=1e-2, bounds=(0, None), prior=Gamma(2, 0.1)),
    dict(name='Ri', scale=1e-3, bounds=(0, None), prior=Gamma(2, 0.1)),
    dict(name='Cw', scale=1e7 / sT, bounds=(0, None), prior=Gamma(2, 0.1)),
    dict(name='Ci', scale=1e6 / sT, bounds=(0, None), prior=Gamma(2, 0.1)),
    dict(name='sigw_w', scale=1e-2 * sT ** 0.5, bounds=(0, None), prior=Gamma(2, 0.1)),
    dict(name='sigw_i', value=0, transform='fixed'),
    dict(name='sigv', scale=1e-2, bounds=(0, None), prior=Gamma(2, 0.1)),
    dict(name='x0_w', loc=25, scale=7, prior=Normal(0, 1)),
    dict(name='x0_i', value=y0, transform='fixed'),
    dict(name='sigx0_w', value=0.1, transform='fixed'),
    dict(name='sigx0_i', value=0.1, transform='fixed'),
]

# Instantiate the model and use the first order hold approximation
model = TwTi_RoRi(parameters, hold_order=1)
reg = Regressor(model)

# Dynamic Hamiltonian Monte Carlo with multinomial sampling
fit = reg.fit(df=df, inputs=inputs, outputs='T_int')

# Compute the posterior predictive distribution
ym = reg.posterior_predictive(trace=fit.posterior, df=df, inputs=inputs)[0]

sns.set_style('darkgrid')
sns.set_context('talk')
percentile_plot(
    df.index,
    ym,
    n=10,
    percentile_min=0,
    percentile_max=100,
Exemple #17
0
def test_armadillo_hmc(dense_mass_matrix):
    n_cpu = 1
    np.random.seed(4567)

    # Prepare data
    df = pd.read_csv('data/armadillo/armadillo_data_H2.csv').set_index('Time')
    df.drop(df.index[-1], axis=0, inplace=True)
    inputs = ['T_ext', 'P_hea']
    y0 = df['T_int'][0]
    sT = 3600.0 * 24.0
    df.index /= sT

    # Parameter settings for second order dynamic thermal model
    parameters = [
        dict(name='Ro', scale=1e-2, bounds=(0, None), prior=Gamma(2, 0.1)),
        dict(name='Ri', scale=1e-3, bounds=(0, None), prior=Gamma(2, 0.1)),
        dict(name='Cw', scale=1e7 / sT, bounds=(0, None), prior=Gamma(2, 0.1)),
        dict(name='Ci', scale=1e6 / sT, bounds=(0, None), prior=Gamma(2, 0.1)),
        dict(name='sigw_w', scale=1e-2 * sT ** 0.5, bounds=(0, None), prior=Gamma(2, 0.1)),
        dict(name='sigw_i', value=0, transform='fixed'),
        dict(name='sigv', scale=1e-2, bounds=(0, None), prior=Gamma(2, 0.1)),
        dict(name='x0_w', loc=25, scale=7, prior=Normal(0, 1)),
        dict(name='x0_i', value=y0, transform='fixed'),
        dict(name='sigx0_w', value=0.1, transform='fixed'),
        dict(name='sigx0_i', value=0.1, transform='fixed'),
    ]

    reg = Regressor(TwTi_RoRi(parameters, hold_order=1))

    fit = reg.fit(
        df=df,
        inputs=inputs,
        outputs='T_int',
        options={'n_cpu': n_cpu, 'dense_mass_matrix': dense_mass_matrix},
    )

    ym = reg.posterior_predictive(trace=fit.posterior, df=df, inputs=inputs, n_cpu=n_cpu)[0]

    pwloglik = reg.pointwise_log_likelihood(
        trace=fit.posterior, df=df, inputs=inputs, outputs='T_int', n_cpu=n_cpu
    )

    # Inference diagnosis
    diag_ = fit.diagnostic
    assert np.all(diag_['ebfmi'] > 0.9)
    assert np.all((diag_['mean accept_prob'] > 0.7) & (diag_['mean accept_prob'] < 0.9))
    assert np.sum(diag_['sum diverging']) == 0
    assert np.sum(diag_['sum max_tree_depth']) == 0

    # Convergence diagnosis
    sum_ = az.summary(fit.posterior, round_to='none')
    assert np.all(sum_['r_hat'] < 1.01)
    assert np.all(sum_[['ess_mean', 'ess_sd', 'ess_bulk', 'ess_tail']] > 1000)
    assert sum_['mean']['Ro'] == pytest.approx(1.778e-02, rel=1e-2)
    assert sum_['mean']['Ri'] == pytest.approx(2.001e-03, rel=1e-2)
    assert sum_['mean']['Cw'] == pytest.approx(1.714e02, rel=1e-2)
    assert sum_['mean']['Ci'] == pytest.approx(1.908e01, rel=1e-2)
    assert sum_['mean']['sigw_w'] == pytest.approx(5.503e-01, rel=1e-2)
    assert sum_['mean']['sigv'] == pytest.approx(3.470e-02, rel=1e-2)
    assert sum_['mean']['x0_w'] == pytest.approx(2.659e01, rel=1e-2)
    assert sum_['mean']['lp_'] == pytest.approx(-3.012e02, rel=1e-2)

    # Predictions tests
    assert np.mean(np.mean((df['T_int'].values - ym) ** 2, axis=1) ** 0.5) == pytest.approx(
        8.358e-01, rel=1e-2
    )

    # Point-wise log-likelihood tests
    assert pwloglik['log_likelihood'].sum(axis=2).mean() == pytest.approx(3.274e02, rel=1e-2)
def regressor(matern_statespace):
    return Regressor(ss=matern_statespace)
Exemple #19
0
def regressor_armadillo(statespace_armadillo):
    return Regressor(ss=statespace_armadillo)