def test_RandomForestRegressionLolo_2():
    """Non-trivial test case, including standard deviation."""

    n, m, xlen = 100, 600, 10
    train_inputs = np.reshape(np.linspace(-xlen / 2, +xlen / 2, n), (n, 1))
    train_labels = (train_inputs * 2 + 1).flatten()
    train_data = smlb.TabularData(data=train_inputs, labels=train_labels)
    train_data = smlb.LabelNoise(noise=smlb.NormalNoise(
        rng=0)).fit(train_data).apply(train_data)

    valid_inputs = np.reshape(np.linspace(-xlen / 2, +xlen / 2, m), (m, 1))
    valid_labels = (valid_inputs * 2 + 1).flatten()
    valid_data = smlb.TabularData(data=valid_inputs, labels=valid_labels)
    valid_data = smlb.LabelNoise(noise=smlb.NormalNoise(
        rng=1)).fit(valid_data).apply(valid_data)

    # 12 trees meets minimal requirements for jackknife estimates
    rf = RandomForestRegressionLolo()
    preds = rf.fit(train_data).apply(valid_data)
    mae = smlb.MeanAbsoluteError().evaluate(valid_data.labels(), preds)

    # for perfect predictions, expect MAE of 1.12943
    # (absolute difference between draws from two unit normal distributions)
    assert np.allclose(mae, 1.13, atol=0.25)
    assert np.allclose(np.median(preds.stddev), 1, atol=0.5)
Esempio n. 2
0
def test_GaussianProcessRegressionSklearn_3():
    """All predictive distributions.

    Linear noisy function, linear kernel + white noise kernel.
    The optimized noise level is expected to go to its true value.
    """

    kernel = skl.gaussian_process.kernels.DotProduct(
        sigma_0=0, sigma_0_bounds="fixed"
    ) + skl.gaussian_process.kernels.WhiteKernel(noise_level=1, noise_level_bounds=(1e-5, 1e5))
    gpr = GaussianProcessRegressionSklearn(kernel=kernel, random_state=1)
    n, nlsd = 100, 0.5
    data = smlb.TabularData(data=np.ones(shape=(n, 1)) * 2, labels=np.ones(shape=n) * 3)
    data = smlb.LabelNoise(noise=smlb.NormalNoise(stddev=nlsd, rng=1)).fit(data).apply(data)
    preds = gpr.fit(data).apply(data)

    assert preds.has_signal_part and preds.has_noise_part
    conf, noise = preds.signal_part, preds.noise_part

    assert np.allclose(conf.mean, np.ones(n) * 3, atol=1e-1)
    assert np.allclose(conf.stddev, np.ones(n) * nlsd, atol=1e-1)

    assert (preds.mean == conf.mean).all()
    assert np.allclose(preds.stddev, np.sqrt(np.square(conf.stddev) + np.square(nlsd)), atol=1e-1)

    assert np.allclose(noise.mean, np.zeros(shape=n))
    assert np.allclose(noise.stddev, nlsd, atol=1e-1)
Esempio n. 3
0
def test_NormalNoise():
    """Test Gaussian noise."""

    # fail without specifying pseudo-random number generator seed
    with pytest.raises(smlb.InvalidParameterError):
        smlb.NormalNoise()

    # unit normal
    noise = smlb.NormalNoise(rng=1).noise(100)
    assert sp.stats.normaltest(noise)[1] > 0.05

    # same seed leads to identical noise
    noise2 = smlb.NormalNoise(rng=1).noise(100)
    assert (noise == noise2).all()

    # non-unit normal
    noise = smlb.NormalNoise(mean=10, stddev=0.5, rng=1).noise(100)
    assert sp.stats.normaltest(noise)[1] > 0.05
Esempio n. 4
0
def test_LabelNoise_NormalNoise(fixture_TabularData_ComputedLabels):
    """Test LabelNoise with NormalNoise."""

    arange = np.arange(0, 100)
    data1 = fixture_TabularData_ComputedLabels(
        size=100, labelf=lambda arg: arg.flatten())
    data2 = smlb.LabelNoise(noise=smlb.NormalNoise(
        rng=1)).fit(data1).apply(data1)
    assert sp.stats.normaltest(data2.labels(arange) - arange)[1] > 0.05
    assert sp.stats.normaltest(data2.labels(arange))[1] < 0.05

    # repeated evaluation of labels will yield different values
    assert (data2.labels(arange) != data2.labels(arange)).any()