Example #1
0
def check_classifier_ratio(clf, method, cv):
    # Passing distributions directly
    p0 = Normal(mu=0.0)
    p1 = Normal(mu=0.1)

    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(
        np.abs(np.log(ratio.predict(reals)) -
               ratio.predict(reals, log=True))) < 0.01

    # Passing X, y only
    X = np.vstack((p0.rvs(5000), p1.rvs(5000)))
    y = np.zeros(10000, dtype=np.int)
    y[5000:] = 1

    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv))
    ratio.fit(X=X, y=y)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(
        np.abs(np.log(ratio.predict(reals)) -
               ratio.predict(reals, log=True))) < 0.01
Example #2
0
def test_fit_with_constraints():
    p = Normal()
    X = st.norm(loc=0.05, scale=1.0).rvs(5000, random_state=0).reshape(-1, 1)
    p.fit(X,
          constraints=[{
              "param": p.mu,
              "type": "ineq",
              "fun": lambda mu: mu
          }, {
              "param": p.mu,
              "type": "ineq",
              "fun": lambda mu: 0.1 - mu
          }, {
              "param": p.sigma,
              "type": "ineq",
              "fun": lambda sigma: sigma
          }, {
              "param": (p.mu, p.sigma),
              "type": "ineq",
              "fun": lambda mu, sigma: mu * sigma
          }])

    assert p.mu.get_value() >= 0.0
    assert p.mu.get_value() <= 0.1
    assert p.sigma.get_value() >= 0.0
    assert p.mu.get_value() * p.sigma.get_value() >= 0.0
Example #3
0
def test_mixin_composition():
    # Check composed expressions as parameters
    a = theano.shared(0.0)
    b = theano.shared(-1.0)
    mu = a + b - 1.0
    sigma = T.abs_(a * b)
    p = Normal(mu=mu, sigma=sigma)
    assert a in p.parameters_
    assert b in p.parameters_

    # Compose parameters with observed variables
    a = theano.shared(1.0)
    b = theano.shared(0.0)
    y = T.dmatrix(name="y")
    p = Normal(mu=a * y + b)
    assert len(p.parameters_) == 3
    assert a in p.parameters_
    assert b in p.parameters_
    assert p.sigma in p.parameters_
    assert p.mu not in p.parameters_
    assert len(p.observeds_) == 1
    assert y in p.observeds_

    # Check signatures
    data_X = np.random.rand(10, 1)
    data_y = np.random.rand(10, 1)
    p.pdf(X=data_X, y=data_y)
    p.cdf(X=data_X, y=data_y)
    p.rvs(10, y=data_y)

    # Check error
    a = theano.shared(1.0)
    b = theano.shared(0.0)
    y = T.dmatrix()  # y must be named
    assert_raises(ValueError, Normal, mu=a * y + b)
Example #4
0
def test_mixture_api():
    # Check basic API
    p1 = Normal(mu=0.0, sigma=T.constant(1.0))
    p2 = Normal(mu=1.0, sigma=2.0)
    m = Mixture(components=[p1, p2], weights=[0.25])

    assert len(m.components) == 2
    assert len(m.weights) == 2

    assert len(m.parameters_) == 4
    assert len(m.constants_) == 1
    assert len(m.observeds_) == 0

    assert p1.mu in m.parameters_
    assert p1.sigma in m.constants_
    assert p2.mu in m.parameters_
    assert p2.sigma in m.parameters_
    assert m.X == p1.X
    assert m.X == p2.X
    assert m.ndim == p1.ndim
    assert m.ndim == p2.ndim

    m = Mixture(components=[p1, p2])
    w = m.compute_weights()
    assert_array_equal(w, [0.5, 0.5])

    y = T.dscalar(name="y")
    w1 = T.constant(0.25)
    w2 = y * 2
    m = Mixture(components=[p1, p2], weights=[w1, w2])
    assert y in m.observeds_

    # Check errors
    assert_raises(ValueError, Mixture, components=[p1, p1, p1], weights=[1.0])
Example #5
0
def test_likelihood_free_mixture():
    p1 = Normal(random_state=1)
    p2 = Normal(mu=2.0, random_state=1)
    h1 = Histogram(bins=50).fit(p1.rvs(10000))
    h2 = Histogram(bins=50).fit(p2.rvs(10000))
    m1 = Mixture(components=[p1, p2])
    m2 = Mixture(components=[h1, h2])

    # Check whether pdf, nnlf and cdf have been overriden
    assert isinstance(m1.pdf, theano.compile.function_module.Function)
    assert isinstance(m1.nnlf, theano.compile.function_module.Function)
    assert isinstance(m1.cdf, theano.compile.function_module.Function)
    assert isinstance(m2.pdf, types.MethodType)
    assert isinstance(m2.nnlf, types.MethodType)
    assert isinstance(m2.cdf, types.MethodType)

    # Compare pdfs
    rng = check_random_state(1)
    X = rng.rand(100, 1) * 10 - 5
    assert np.mean(np.abs(m1.pdf(X) - m2.pdf(X))) < 0.05

    # Test sampling
    X = m2.rvs(10)
    assert X.shape == (10, 1)

    # Check errors
    assert_raises(NotImplementedError, m2.fit, X)
Example #6
0
def check_fit(mu, sigma):
    p = Normal()
    X = st.norm(loc=mu, scale=sigma).rvs(5000, random_state=0).reshape(-1, 1)
    s0 = p.score(X)
    p.fit(X)
    assert np.abs(p.mu.get_value() - mu) <= 0.1
    assert np.abs(p.sigma.get_value() - sigma) <= 0.1
    assert p.score(X) >= s0
Example #7
0
File: test_cc.py Project: ibab/carl
def test_calibrated_classifier_ratio_identity():
    p = Normal(mu=0.0)
    ratio = CalibratedClassifierRatio(base_estimator=ElasticNetCV())
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Example #8
0
def test_linear_transform_1d():
    p0 = Normal()
    pt = LinearTransform(p0, A=np.array([[0.5]]))

    X0 = p0.rvs(10, random_state=0)
    Xt = pt.rvs(10, random_state=0)

    assert X0.shape == Xt.shape
    assert_array_equal(X0 * 0.5, Xt)
    assert_array_equal(p0.pdf(X0), pt.pdf(Xt))
    assert_array_equal(p0.nll(X0), pt.nll(Xt))
Example #9
0
def test_join():
    p = Join(components=[Normal(mu=0), Normal(mu=1), Normal(mu=2)])
    assert p.ndim == 3
    assert len(p.parameters_) == 6

    X = p.rvs(10000, random_state=1)
    assert X.shape == (10000, 3)
    assert np.abs(np.mean(X[:, 0]) - 0.) < 0.05
    assert np.abs(np.mean(X[:, 1]) - 1.) < 0.05
    assert np.abs(np.mean(X[:, 2]) - 2.) < 0.05
    assert_array_almost_equal(-np.log(p.pdf(X)), p.nll(X))
Example #10
0
def test_classifier_ratio_identity():
    p = Normal(mu=0.0)
    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=ElasticNetCV()))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Example #11
0
def test_linear_transform_1d():
    p0 = Normal()
    pt = LinearTransform(p0, A=np.array([[0.5]]))

    X0 = p0.rvs(10, random_state=0)
    Xt = pt.rvs(10, random_state=0)

    assert X0.shape == Xt.shape
    assert_array_equal(X0 * 0.5, Xt)
    assert_array_equal(p0.pdf(X0), pt.pdf(Xt))
    assert_array_equal(p0.nll(X0), pt.nll(Xt))
Example #12
0
def test_known_density():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p0 = Mixture(components=components, weights=[0.45, 0.1, 0.45])
    p1 = Mixture(components=[components[0]] + [components[2]])

    ratio = KnownDensityRatio(numerator=p0, denominator=p1)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.01
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01
    assert ratio.nllr(reals) == -ratio.predict(reals, log=True).sum()
Example #13
0
def check_normal(mu, sigma):
    rng = check_random_state(1)

    p_carl = Normal(mu=mu, sigma=sigma)
    p_scipy = st.norm(loc=mu, scale=sigma)
    X = rng.rand(50, 1)

    assert_array_almost_equal(p_carl.pdf(X),
                              p_scipy.pdf(X.ravel()))
    assert_array_almost_equal(p_carl.cdf(X),
                              p_scipy.cdf(X.ravel()))
    assert_array_almost_equal(-np.log(p_carl.pdf(X)),
                              p_carl.nll(X))
Example #14
0
def test_decomposed_ratio():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p0 = Mixture(components=components, weights=[0.45, 0.1, 0.45])
    p1 = Mixture(components=[components[0]] + [components[2]])

    ratio = DecomposedRatio(
        ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV())))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01
Example #15
0
def test_parameterized_regressor():
    mu = theano.shared(0)
    p = Normal(mu=mu)

    X = p.rvs(100)
    y = p.pdf(X).astype(np.float32)

    tf = ParameterStacker(params=[mu])
    clf = ParameterizedRegressor(DecisionTreeRegressor(), params=[mu])
    clf.fit(tf.transform(X), y)

    assert clf.n_features_ == 1
    assert_array_almost_equal(y, clf.predict(tf.transform(X)), decimal=3)
Example #16
0
def test_decomposed_ratio_identity():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p = Mixture(components=components, weights=[0.45, 0.1, 0.45])

    ratio = DecomposedRatio(
        ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV())))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Example #17
0
def test_fit_with_constraints():
    p = Normal()
    X = st.norm(loc=0.05, scale=1.0).rvs(5000, random_state=0).reshape(-1, 1)
    p.fit(X, constraints=[
        {"param": p.mu, "type": "ineq", "fun": lambda mu: mu},
        {"param": p.mu, "type": "ineq", "fun": lambda mu: 0.1 - mu},
        {"param": p.sigma, "type": "ineq", "fun": lambda sigma: sigma},
        {"param": (p.mu, p.sigma), "type": "ineq",
         "fun": lambda mu, sigma: mu * sigma}])

    assert p.mu.get_value() >= 0.0
    assert p.mu.get_value() <= 0.1
    assert p.sigma.get_value() >= 0.0
    assert p.mu.get_value() * p.sigma.get_value() >= 0.0
Example #18
0
def check_classifier_ratio(clf, method, cv):
    # Passing distributions directly
    p0 = Normal(mu=0.0)
    p1 = Normal(mu=0.1)

    ratio = ClassifierRatio(CalibratedClassifierCV(base_estimator=clf,
                                                   method=method,
                                                   cv=cv))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01

    # Passing X, y only
    X = np.vstack((p0.rvs(5000), p1.rvs(5000)))
    y = np.zeros(10000, dtype=np.int)
    y[5000:] = 1

    ratio = ClassifierRatio(CalibratedClassifierCV(base_estimator=clf,
                                                   method=method,
                                                   cv=cv))
    ratio.fit(X=X, y=y)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01
Example #19
0
def test_kde():
    # Test API
    p = Normal(random_state=1)
    X = p.rvs(10000)
    k = KernelDensity()
    k.fit(X)

    reals = np.linspace(-3, 3).reshape(-1, 1)
    assert np.mean(np.abs(p.pdf(reals) - k.pdf(reals))) < 0.05
    assert np.mean(np.abs(p.nnlf(reals) - k.nnlf(reals))) < 0.05

    # Test sampling
    X = k.rvs(10000)
    assert np.abs(np.mean(X)) < 0.05
Example #20
0
def test_mixin_composition():
    # Check composed expressions as parameters
    a = theano.shared(0.0)
    b = theano.shared(-1.0)
    mu = a + b - 1.0
    sigma = T.abs_(a * b)
    p = Normal(mu=mu, sigma=sigma)
    assert a in p.parameters_
    assert b in p.parameters_

    # Compose parameters with observed variables
    a = theano.shared(1.0)
    b = theano.shared(0.0)
    y = T.dmatrix(name="y")
    p = Normal(mu=a * y + b)
    assert len(p.parameters_) == 3
    assert a in p.parameters_
    assert b in p.parameters_
    assert p.sigma in p.parameters_
    assert p.mu not in p.parameters_
    assert len(p.observeds_) == 1
    assert y in p.observeds_

    # Check signatures
    data_X = np.random.rand(10, 1)
    data_y = np.random.rand(10, 1)
    p.pdf(X=data_X, y=data_y)
    p.cdf(X=data_X, y=data_y)
    p.rvs(10, y=data_y)

    # Check error
    a = theano.shared(1.0)
    b = theano.shared(0.0)
    y = T.dmatrix()  # y must be named
    assert_raises(ValueError, Normal, mu=a * y + b)
Example #21
0
def test_parameter_stacker():
    mu = theano.shared(0)
    sigma = theano.shared(1)
    p = Normal(mu=mu, sigma=sigma)
    X = p.rvs(10)

    tf = ParameterStacker(params=[mu, sigma])
    Xt = tf.transform(X)
    assert Xt.shape == (10, 1+2)
    assert_array_almost_equal(Xt[:, 1], np.zeros(10))
    assert_array_almost_equal(Xt[:, 2], np.ones(10))

    mu.set_value(1)
    Xt = tf.transform(X)
    assert_array_almost_equal(Xt[:, 1], np.ones(10))
Example #22
0
def check_mixture_pdf(w0, w1, mu1, sigma1, mu2, sigma2):
    rng = check_random_state(1)

    p1 = Normal(mu=mu1, sigma=sigma1)
    p2 = Normal(mu=mu2, sigma=sigma2)
    m = Mixture(components=[p1, p2], weights=[w0, w1])
    q1 = st.norm(loc=mu1, scale=sigma1)
    q2 = st.norm(loc=mu2, scale=sigma2)

    X = rng.rand(50, 1)

    assert_array_almost_equal(
        m.pdf(X).ravel(),
        w0 * q1.pdf(X).ravel() + (w1 if w1 is not None else
                                  (1 - w0)) * q2.pdf(X).ravel())
Example #23
0
def test_fit():
    p1 = Normal(mu=T.constant(0.0), sigma=T.constant(2.0))
    p2 = Normal(mu=T.constant(3.0), sigma=T.constant(2.0))
    p3 = Exponential(inverse_scale=T.constant(0.5))
    g = theano.shared(0.5)
    m = Mixture(components=[p1, p2, p3], weights=[g, g*g])

    X = np.concatenate([st.norm(loc=0.0, scale=2.0).rvs(300, random_state=0),
                        st.norm(loc=3.0, scale=2.0).rvs(100, random_state=1),
                        st.expon(scale=1. / 0.5).rvs(500, random_state=2)])
    X = X.reshape(-1, 1)
    s0 = m.score(X)

    m.fit(X)
    assert np.abs(g.eval() - 1. / 3.) < 0.05
    assert m.score(X) >= s0
Example #24
0
def test_mixin_external():
    # Check external parameters
    mu = theano.shared(0.0)
    sigma = theano.shared(1.0)
    p = Normal(mu=mu, sigma=sigma)
    assert mu == p.mu
    assert sigma == p.sigma
Example #25
0
def test_mixin_constants():
    # Check with constants
    mu = T.constant(0.0)
    sigma = T.constant(1.0)
    p = Normal(mu=mu, sigma=sigma)
    assert len(p.parameters_) == 0
    assert len(p.constants_) == 2
    assert mu in p.constants_
    assert sigma in p.constants_
Example #26
0
def test_mv_mixture():
    p1 = MultivariateNormal(mu=np.array([0.0, 0.0]), sigma=np.eye(2))
    p2 = MultivariateNormal(mu=np.array([2.0, 2.0]), sigma=0.5 * np.eye(2))
    m = Mixture(components=[p1, p2])
    assert m.ndim == 2
    X = m.rvs(100)
    assert X.shape == (100, 2)

    assert_raises(ValueError, Mixture, components=[p1, Normal()])
Example #27
0
def test_join_non_theano():
    h0 = Histogram(interpolation="linear", bins=30)
    h1 = Histogram(interpolation="linear", bins=30)
    h2 = Histogram(interpolation="linear", bins=30)

    h0.fit(Normal(mu=0).rvs(10000, random_state=0))
    h1.fit(Normal(mu=1).rvs(10000, random_state=1))
    h2.fit(Normal(mu=2).rvs(10000, random_state=2))

    p = Join(components=[h0, h1, h2])
    assert p.ndim == 3
    assert len(p.parameters_) == 0

    X = p.rvs(10000, random_state=1)
    assert X.shape == (10000, 3)
    assert np.abs(np.mean(X[:, 0]) - 0.) < 0.05
    assert np.abs(np.mean(X[:, 1]) - 1.) < 0.05
    assert np.abs(np.mean(X[:, 2]) - 2.) < 0.05
    assert_array_almost_equal(-np.log(p.pdf(X)), p.nll(X))
Example #28
0
def test_likelihood_free_mixture():
    p1 = Normal(random_state=1)
    p2 = Normal(mu=2.0, random_state=1)
    h1 = Histogram(bins=50).fit(p1.rvs(10000))
    h2 = Histogram(bins=50).fit(p2.rvs(10000))
    m1 = Mixture(components=[p1, p2])
    m2 = Mixture(components=[h1, h2])

    # Check whether pdf, nnlf and cdf have been overriden
    assert isinstance(m1.pdf, theano.compile.function_module.Function)
    assert isinstance(m1.nnlf, theano.compile.function_module.Function)
    assert isinstance(m1.cdf, theano.compile.function_module.Function)
    assert isinstance(m2.pdf, types.MethodType)
    assert isinstance(m2.nnlf, types.MethodType)
    assert isinstance(m2.cdf, types.MethodType)

    # Compare pdfs
    rng = check_random_state(1)
    X = rng.rand(100, 1) * 10 - 5
    assert np.mean(np.abs(m1.pdf(X) - m2.pdf(X))) < 0.05

    # Test sampling
    X = m2.rvs(10)
    assert X.shape == (10, 1)

    # Check errors
    assert_raises(NotImplementedError, m2.fit, X)
Example #29
0
def test_mixin_sklearn_params():
    # get_params
    p = Normal(mu=0.0, sigma=1.0)
    params = p.get_params()
    assert len(params) == 2
    assert "mu" in params
    assert "sigma" in params

    # for parameters, set_params should change the value contained
    old_mu = p.get_params()["mu"]
    p.set_params(mu=42.0)
    new_mu = p.get_params()["mu"]
    assert old_mu is new_mu
    assert new_mu.get_value() == 42.0

    # check errors
    p = Normal(mu=T.constant(0.0), sigma=1.0)
    assert_raises(ValueError, p.set_params, mu=1.0)
Example #30
0
def test_mixin_base():
    # Check raw parameters
    p = Normal(mu=0.0, sigma=1.0)
    assert isinstance(p, DistributionMixin)
    assert len(p.parameters_) == 2
    assert p.mu in p.parameters_
    assert p.sigma in p.parameters_
    assert isinstance(p.mu, SharedVariable)
    assert isinstance(p.sigma, SharedVariable)
    assert p.mu.get_value() == 0.0
    assert p.sigma.get_value() == 1.0
    assert len(p.observeds_) == 0
    assert isinstance(p.X, TensorVariable)
Example #31
0
def check_fit(mu, sigma):
    p = Normal()
    X = st.norm(loc=mu, scale=sigma).rvs(5000, random_state=0).reshape(-1, 1)
    s0 = p.score(X)
    p.fit(X)
    assert np.abs(p.mu.get_value() - mu) <= 0.1
    assert np.abs(p.sigma.get_value() - sigma) <= 0.1
    assert p.score(X) >= s0
Example #32
0
def check_normal(mu, sigma):
    rng = check_random_state(1)

    p_carl = Normal(mu=mu, sigma=sigma)
    p_scipy = st.norm(loc=mu, scale=sigma)
    X = rng.rand(50, 1)

    assert_array_almost_equal(p_carl.pdf(X), p_scipy.pdf(X.ravel()))
    assert_array_almost_equal(p_carl.cdf(X), p_scipy.cdf(X.ravel()))
    assert_array_almost_equal(-np.log(p_carl.pdf(X)), p_carl.nll(X))
Example #33
0
def test_mixin_sklearn_params():
    # get_params
    p = Normal(mu=0.0, sigma=1.0)
    params = p.get_params()
    assert len(params) == 2
    assert "mu" in params
    assert "sigma" in params

    # for parameters, set_params should change the value contained
    old_mu = p.get_params()["mu"]
    p.set_params(mu=42.0)
    new_mu = p.get_params()["mu"]
    assert old_mu is new_mu
    assert new_mu.get_value() == 42.0

    # check errors
    p = Normal(mu=T.constant(0.0), sigma=1.0)
    assert_raises(ValueError, p.set_params, mu=1.0)
Example #34
0
def generate_samples_for_blow_up_demo(n_samples=50000):
    """
    Generate 3 independent Gaussian variables and apply linear transformation to them.
    These Gaussian have different means and different sigmas for target and original distribution.
    
    This is example of samples with regions with high target samples number zero original samples. In this case exact reweighting rule blow up and the same happens for algorithms.
    :param int n_samples: number of generated samples for original/target distributions. For test samples 2*n_samples will be generated
    
    :return: train original, train target, exact weights for train original, test original, test target, exact weights for test original
    """
    p0 = Join(components=[
        Normal(mu=1, sigma=0.7),
        Normal(mu=-1, sigma=0.7),
        Normal(mu=1, sigma=1.5)
    ])

    p1 = Join(components=[
        Normal(mu=0, sigma=0.7),
        Normal(mu=0, sigma=0.7),
        Normal(mu=0, sigma=1.5)
    ])

    R = make_sparse_spd_matrix(3, alpha=0.5, random_state=7)
    p0 = LinearTransform(p0, R)
    p1 = LinearTransform(p1, R)

    X0 = p0.rvs(n_samples, random_state=777)
    X1 = p1.rvs(n_samples, random_state=777)
    exact_weights = numpy.exp(p0.nll(X0) - p1.nll(X0))
    exact_weights[numpy.isinf(exact_weights)] = 1.

    # generate samples to test reweighting rule (to avoid overfitting)
    X0_roc = p0.rvs(2 * n_samples, random_state=777 * 2)
    X1_roc = p1.rvs(2 * n_samples, random_state=777 * 2)
    # Weighted with true ratios
    exact_weights_roc = numpy.exp(p0.nll(X0_roc) - p1.nll(X0_roc))
    exact_weights_roc[numpy.isinf(exact_weights_roc)] = 1.

    draw_distributions(X0, X1, numpy.ones(len(X0)))
    print "Exact weights are used (inf weights are set to 1)"
    draw_distributions(X0, X1, exact_weights)

    return X0, X1, exact_weights, X0_roc, X1_roc, exact_weights_roc
Example #35
0
def test_rvs():
    p1 = Normal(mu=0.0, sigma=T.constant(1.0), random_state=0)
    p2 = Normal(mu=2.0, sigma=2.0, random_state=0)
    m = Mixture(components=[p1, p2], weights=[0.25], random_state=0)
    X = m.rvs(2000)
    assert (np.mean(X) - (0.25 * p1.mu.eval() + 0.75 * p2.mu.eval())) < 0.1
Example #36
0
def check_rvs(mu, sigma, random_state):
    p = Normal(mu=mu, sigma=sigma)
    samples = p.rvs(10000, random_state=random_state)
    assert np.abs(np.mean(samples) - mu) <= 0.05
    assert np.abs(np.std(samples) - sigma) <= 0.05
Example #37
0
def test_fit_with_bounds():
    p = Normal()
    X = st.norm(loc=0.05, scale=1.0).rvs(5000, random_state=0).reshape(-1, 1)
    p.fit(X, bounds=[{"param": p.sigma, "bounds": (0, None)}])
    assert p.sigma.get_value() >= 0.0
Example #38
0
def generate_samples(with_linear_transformation=False,
                     add_variation=False,
                     n_samples=50000,
                     verbose=True):
    """
    Generate 5 independent variables: two Gaussian, mixture of Gaussian, two exponents. 
    Two Gaussian have different means for original and target distributions.
    
    if with_linear_transformation is True then add linear transformation of generated 5 variables.
    
    if add_variation is True then add random values in variance to obtain gaussian pdf 
    for orignal and target samples not only with different mean but also with different variance.
    
    :param bool with_linear_transformation: apply or not linear transformation for samples features
    :param bool add_variation: make or not different variance for Gaussian distribution for original and target samples.
    :param int n_samples: number of generated samples for original/target distributions. For test samples 2*n_samples will be generated
    :param bool verbose: print and plot additional info during generation.
    
    :return: train original, train target, exact weights for train original, test original, test target, exact weights for test original
    """
    # define linear transformation matrix
    R = make_sparse_spd_matrix(5, alpha=0.5, random_state=7)

    variation_origin, variation_target = (0, 0)
    if add_variation:
        r = check_random_state(42)
        variation_origin, variation_target = r.uniform() / 3., r.uniform() / 3.

    p0 = Join(components=[
        Normal(mu=.5, sigma=1 + variation_origin),
        Normal(mu=-.5, sigma=3 + variation_origin),
        Mixture(components=[Normal(mu=-2, sigma=1),
                            Normal(mu=2, sigma=0.5)]),
        Exponential(inverse_scale=3.0),
        Exponential(inverse_scale=0.5)
    ])

    p1 = Join(components=[
        Normal(mu=0, sigma=1 + variation_target),
        Normal(mu=0, sigma=3 + variation_target),
        Mixture(components=[Normal(mu=-2, sigma=1),
                            Normal(mu=2, sigma=0.5)]),
        Exponential(inverse_scale=3.0),
        Exponential(inverse_scale=0.5)
    ])

    if with_linear_transformation:
        p0 = LinearTransform(p0, R)
        p1 = LinearTransform(p1, R)

    X0 = p0.rvs(n_samples, random_state=777)
    X1 = p1.rvs(n_samples, random_state=777)
    exact_weights = numpy.exp(p0.nll(X0) - p1.nll(X0))
    exact_weights[numpy.isinf(exact_weights)] = 0.

    # generate samples to test reweighting rule (to avoid overfitting)
    X0_roc = p0.rvs(2 * n_samples, random_state=777 * 2)
    X1_roc = p1.rvs(2 * n_samples, random_state=777 * 2)
    # Weighted with true ratios
    exact_weights_roc = numpy.exp(p0.nll(X0_roc) - p1.nll(X0_roc))
    exact_weights_roc[numpy.isinf(exact_weights_roc)] = 0.

    if verbose:
        print "Original distribution"
        fig = corner.corner(X0,
                            bins=20,
                            smooth=0.85,
                            labels=["X0", "X1", "X2", "X3", "X4"])
        plt.show()
        print "Target distribution"
        fig = corner.corner(X1,
                            bins=20,
                            smooth=0.85,
                            labels=["X0", "X1", "X2", "X3", "X4"])
        plt.show()
        print "Exact reweighting"
        # In this example, we know p0(x) and p1(x) exactly,
        #so we can compare the other can compare the approximate reweighting approaches with the exact weights.
        draw_distributions(X0, X1, exact_weights)

    return X0, X1, exact_weights, X0_roc, X1_roc, exact_weights_roc
Example #39
0
def test_fit_with_bounds():
    p = Normal()
    X = st.norm(loc=0.05, scale=1.0).rvs(5000, random_state=0).reshape(-1, 1)
    p.fit(X, bounds=[{"param": p.sigma, "bounds": (0, None)}])
    assert p.sigma.get_value() >= 0.0
Example #40
0
true_theta = np.array([1.0, -1.0])
make_plots = True

# Simulator

A = theano.shared(true_theta[0], name="A")
B = theano.shared(true_theta[1], name="B")
R = np.array([[1.31229955, 0.10499961, 0.48310515, -0.3249938, -0.26387927],
              [0.10499961, 1.15833058, -0.55865473, 0.25275522, -0.39790775],
              [0.48310515, -0.55865473, 2.25874579, -0.52087938, -0.39271231],
              [0.3249938, 0.25275522, -0.52087938, 1.4034925, -0.63521059],
              [-0.26387927, -0.39790775, -0.39271231, -0.63521059, 1.]])

p0 = LinearTransform(
    Join(components=[
        Normal(mu=A, sigma=1),
        Normal(mu=B, sigma=3),
        Mixture(components=[Normal(mu=-2, sigma=1),
                            Normal(mu=2, sigma=0.5)]),
        Exponential(inverse_scale=3.0),
        Exponential(inverse_scale=0.5)
    ]), R)


def simulator(theta, n_samples, random_state=None):
    A.set_value(theta[0])
    B.set_value(theta[1])
    return p0.rvs(n_samples, random_state=random_state)


X_obs = simulator(true_theta, 20000, random_state=rng)
Example #41
0
def check_rvs(mu, sigma, random_state):
    p = Normal(mu=mu, sigma=sigma)
    samples = p.rvs(10000, random_state=random_state)
    assert np.abs(np.mean(samples) - mu) <= 0.05
    assert np.abs(np.std(samples) - sigma) <= 0.05