Exemple #1
0
def test_mixture_api():
    # Check basic API
    p1 = Normal(mu=0.0, sigma=T.constant(1.0))
    p2 = Normal(mu=1.0, sigma=2.0)
    m = Mixture(components=[p1, p2], weights=[0.25])

    assert len(m.components) == 2
    assert len(m.weights) == 2

    assert len(m.parameters_) == 4
    assert len(m.constants_) == 1
    assert len(m.observeds_) == 0

    assert p1.mu in m.parameters_
    assert p1.sigma in m.constants_
    assert p2.mu in m.parameters_
    assert p2.sigma in m.parameters_
    assert m.X == p1.X
    assert m.X == p2.X
    assert m.ndim == p1.ndim
    assert m.ndim == p2.ndim

    m = Mixture(components=[p1, p2])
    w = m.compute_weights()
    assert_array_equal(w, [0.5, 0.5])

    y = T.dscalar(name="y")
    w1 = T.constant(0.25)
    w2 = y * 2
    m = Mixture(components=[p1, p2], weights=[w1, w2])
    assert y in m.observeds_

    # Check errors
    assert_raises(ValueError, Mixture,
                  components=[p1, p1, p1], weights=[1.0])
Exemple #2
0
def test_likelihood_free_mixture():
    p1 = Normal(random_state=1)
    p2 = Normal(mu=2.0, random_state=1)
    h1 = Histogram(bins=50).fit(p1.rvs(10000))
    h2 = Histogram(bins=50).fit(p2.rvs(10000))
    m1 = Mixture(components=[p1, p2])
    m2 = Mixture(components=[h1, h2])

    # Check whether pdf, nnlf and cdf have been overriden
    assert isinstance(m1.pdf, theano.compile.function_module.Function)
    assert isinstance(m1.nnlf, theano.compile.function_module.Function)
    assert isinstance(m1.cdf, theano.compile.function_module.Function)
    assert isinstance(m2.pdf, types.MethodType)
    assert isinstance(m2.nnlf, types.MethodType)
    assert isinstance(m2.cdf, types.MethodType)

    # Compare pdfs
    rng = check_random_state(1)
    X = rng.rand(100, 1) * 10 - 5
    assert np.mean(np.abs(m1.pdf(X) - m2.pdf(X))) < 0.05

    # Test sampling
    X = m2.rvs(10)
    assert X.shape == (10, 1)

    # Check errors
    assert_raises(NotImplementedError, m2.fit, X)
Exemple #3
0
def test_mv_mixture():
    p1 = MultivariateNormal(mu=np.array([0.0, 0.0]), sigma=np.eye(2))
    p2 = MultivariateNormal(mu=np.array([2.0, 2.0]), sigma=0.5 * np.eye(2))
    m = Mixture(components=[p1, p2])
    assert m.ndim == 2
    X = m.rvs(100)
    assert X.shape == (100, 2)

    assert_raises(ValueError, Mixture, components=[p1, Normal()])
Exemple #4
0
def test_mv_mixture():
    p1 = MultivariateNormal(mu=np.array([0.0, 0.0]),
                            sigma=np.eye(2))
    p2 = MultivariateNormal(mu=np.array([2.0, 2.0]),
                            sigma=0.5 * np.eye(2))
    m = Mixture(components=[p1, p2])
    assert m.ndim == 2
    X = m.rvs(100)
    assert X.shape == (100, 2)

    assert_raises(ValueError, Mixture, components=[p1, Normal()])
Exemple #5
0
def test_decomposed_ratio_identity():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p = Mixture(components=components, weights=[0.45, 0.1, 0.45])

    ratio = DecomposedRatio(
        ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV())))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Exemple #6
0
def test_decomposed_ratio():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p0 = Mixture(components=components, weights=[0.45, 0.1, 0.45])
    p1 = Mixture(components=[components[0]] + [components[2]])

    ratio = DecomposedRatio(
        ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV())))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01
Exemple #7
0
def test_decomposed_ratio_identity():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p = Mixture(components=components, weights=[0.45, 0.1, 0.45])

    ratio = DecomposedRatio(
        ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV())))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Exemple #8
0
def check_mixture_pdf(w0, w1, mu1, sigma1, mu2, sigma2):
    rng = check_random_state(1)

    p1 = Normal(mu=mu1, sigma=sigma1)
    p2 = Normal(mu=mu2, sigma=sigma2)
    m = Mixture(components=[p1, p2], weights=[w0, w1])
    q1 = st.norm(loc=mu1, scale=sigma1)
    q2 = st.norm(loc=mu2, scale=sigma2)

    X = rng.rand(50, 1)

    assert_array_almost_equal(
        m.pdf(X).ravel(),
        w0 * q1.pdf(X).ravel() + (w1 if w1 is not None else
                                  (1 - w0)) * q2.pdf(X).ravel())
Exemple #9
0
def check_mixture_pdf(w0, w1, mu1, sigma1, mu2, sigma2):
    rng = check_random_state(1)

    p1 = Normal(mu=mu1, sigma=sigma1)
    p2 = Normal(mu=mu2, sigma=sigma2)
    m = Mixture(components=[p1, p2], weights=[w0, w1])
    q1 = st.norm(loc=mu1, scale=sigma1)
    q2 = st.norm(loc=mu2, scale=sigma2)

    X = rng.rand(50, 1)

    assert_array_almost_equal(m.pdf(X).ravel(),
                              w0 * q1.pdf(X).ravel() +
                              (w1 if w1 is not None
                                  else (1 - w0)) * q2.pdf(X).ravel())
Exemple #10
0
def test_fit():
    p1 = Normal(mu=T.constant(0.0), sigma=T.constant(2.0))
    p2 = Normal(mu=T.constant(3.0), sigma=T.constant(2.0))
    p3 = Exponential(inverse_scale=T.constant(0.5))
    g = theano.shared(0.5)
    m = Mixture(components=[p1, p2, p3], weights=[g, g*g])

    X = np.concatenate([st.norm(loc=0.0, scale=2.0).rvs(300, random_state=0),
                        st.norm(loc=3.0, scale=2.0).rvs(100, random_state=1),
                        st.expon(scale=1. / 0.5).rvs(500, random_state=2)])
    X = X.reshape(-1, 1)
    s0 = m.score(X)

    m.fit(X)
    assert np.abs(g.eval() - 1. / 3.) < 0.05
    assert m.score(X) <= s0
Exemple #11
0
def test_likelihood_free_mixture():
    p1 = Normal(random_state=1)
    p2 = Normal(mu=2.0, random_state=1)
    h1 = Histogram(bins=50).fit(p1.rvs(10000))
    h2 = Histogram(bins=50).fit(p2.rvs(10000))
    m1 = Mixture(components=[p1, p2])
    m2 = Mixture(components=[h1, h2])

    # Check whether pdf, nnlf and cdf have been overriden
    assert isinstance(m1.pdf, theano.compile.function_module.Function)
    assert isinstance(m1.nnlf, theano.compile.function_module.Function)
    assert isinstance(m1.cdf, theano.compile.function_module.Function)
    assert isinstance(m2.pdf, types.MethodType)
    assert isinstance(m2.nnlf, types.MethodType)
    assert isinstance(m2.cdf, types.MethodType)

    # Compare pdfs
    rng = check_random_state(1)
    X = rng.rand(100, 1) * 10 - 5
    assert np.mean(np.abs(m1.pdf(X) - m2.pdf(X))) < 0.05

    # Test sampling
    X = m2.rvs(10)
    assert X.shape == (10, 1)

    # Check errors
    assert_raises(NotImplementedError, m2.fit, X)
Exemple #12
0
def test_mixture_api():
    # Check basic API
    p1 = Normal(mu=0.0, sigma=T.constant(1.0))
    p2 = Normal(mu=1.0, sigma=2.0)
    m = Mixture(components=[p1, p2], weights=[0.25])

    assert len(m.components) == 2
    assert len(m.weights) == 2

    assert len(m.parameters_) == 4
    assert len(m.constants_) == 1
    assert len(m.observeds_) == 0

    assert p1.mu in m.parameters_
    assert p1.sigma in m.constants_
    assert p2.mu in m.parameters_
    assert p2.sigma in m.parameters_
    assert m.X == p1.X
    assert m.X == p2.X
    assert m.ndim == p1.ndim
    assert m.ndim == p2.ndim

    m = Mixture(components=[p1, p2])
    w = m.compute_weights()
    assert_array_equal(w, [0.5, 0.5])

    y = T.dscalar(name="y")
    w1 = T.constant(0.25)
    w2 = y * 2
    m = Mixture(components=[p1, p2], weights=[w1, w2])
    assert y in m.observeds_

    # Check errors
    assert_raises(ValueError, Mixture, components=[p1, p1, p1], weights=[1.0])
Exemple #13
0
def test_known_density():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p0 = Mixture(components=components, weights=[0.45, 0.1, 0.45])
    p1 = Mixture(components=[components[0]] + [components[2]])

    ratio = KnownDensityRatio(numerator=p0, denominator=p1)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.01
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01
    assert ratio.nllr(reals) == -ratio.predict(reals, log=True).sum()
Exemple #14
0
def test_decomposed_ratio():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p0 = Mixture(components=components, weights=[0.45, 0.1, 0.45])
    p1 = Mixture(components=[components[0]] + [components[2]])

    ratio = DecomposedRatio(
        ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV())))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01
Exemple #15
0
def test_fit():
    p1 = Normal(mu=T.constant(0.0), sigma=T.constant(2.0))
    p2 = Normal(mu=T.constant(3.0), sigma=T.constant(2.0))
    p3 = Exponential(inverse_scale=T.constant(0.5))
    g = theano.shared(0.5)
    m = Mixture(components=[p1, p2, p3], weights=[g, g*g])

    X = np.concatenate([st.norm(loc=0.0, scale=2.0).rvs(300, random_state=0),
                        st.norm(loc=3.0, scale=2.0).rvs(100, random_state=1),
                        st.expon(scale=1. / 0.5).rvs(500, random_state=2)])
    X = X.reshape(-1, 1)
    s0 = m.score(X)

    m.fit(X)
    assert np.abs(g.eval() - 1. / 3.) < 0.05
    assert m.score(X) >= s0
Exemple #16
0
# Simulator

A = theano.shared(true_theta[0], name="A")
B = theano.shared(true_theta[1], name="B")
R = np.array([[1.31229955, 0.10499961, 0.48310515, -0.3249938, -0.26387927],
              [0.10499961, 1.15833058, -0.55865473, 0.25275522, -0.39790775],
              [0.48310515, -0.55865473, 2.25874579, -0.52087938, -0.39271231],
              [0.3249938, 0.25275522, -0.52087938, 1.4034925, -0.63521059],
              [-0.26387927, -0.39790775, -0.39271231, -0.63521059, 1.]])

p0 = LinearTransform(
    Join(components=[
        Normal(mu=A, sigma=1),
        Normal(mu=B, sigma=3),
        Mixture(components=[Normal(mu=-2, sigma=1),
                            Normal(mu=2, sigma=0.5)]),
        Exponential(inverse_scale=3.0),
        Exponential(inverse_scale=0.5)
    ]), R)


def simulator(theta, n_samples, random_state=None):
    A.set_value(theta[0])
    B.set_value(theta[1])
    return p0.rvs(n_samples, random_state=random_state)


X_obs = simulator(true_theta, 20000, random_state=rng)
n_params = len(true_theta)
n_features = X_obs.shape[1]
Exemple #17
0
def test_rvs():
    p1 = Normal(mu=0.0, sigma=T.constant(1.0), random_state=0)
    p2 = Normal(mu=2.0, sigma=2.0, random_state=0)
    m = Mixture(components=[p1, p2], weights=[0.25], random_state=0)
    X = m.rvs(2000)
    assert (np.mean(X) - (0.25 * p1.mu.eval() + 0.75 * p2.mu.eval())) < 0.1
Exemple #18
0
def generate_samples(with_linear_transformation=False,
                     add_variation=False,
                     n_samples=50000,
                     verbose=True):
    """
    Generate 5 independent variables: two Gaussian, mixture of Gaussian, two exponents. 
    Two Gaussian have different means for original and target distributions.
    
    if with_linear_transformation is True then add linear transformation of generated 5 variables.
    
    if add_variation is True then add random values in variance to obtain gaussian pdf 
    for orignal and target samples not only with different mean but also with different variance.
    
    :param bool with_linear_transformation: apply or not linear transformation for samples features
    :param bool add_variation: make or not different variance for Gaussian distribution for original and target samples.
    :param int n_samples: number of generated samples for original/target distributions. For test samples 2*n_samples will be generated
    :param bool verbose: print and plot additional info during generation.
    
    :return: train original, train target, exact weights for train original, test original, test target, exact weights for test original
    """
    # define linear transformation matrix
    R = make_sparse_spd_matrix(5, alpha=0.5, random_state=7)

    variation_origin, variation_target = (0, 0)
    if add_variation:
        r = check_random_state(42)
        variation_origin, variation_target = r.uniform() / 3., r.uniform() / 3.

    p0 = Join(components=[
        Normal(mu=.5, sigma=1 + variation_origin),
        Normal(mu=-.5, sigma=3 + variation_origin),
        Mixture(components=[Normal(mu=-2, sigma=1),
                            Normal(mu=2, sigma=0.5)]),
        Exponential(inverse_scale=3.0),
        Exponential(inverse_scale=0.5)
    ])

    p1 = Join(components=[
        Normal(mu=0, sigma=1 + variation_target),
        Normal(mu=0, sigma=3 + variation_target),
        Mixture(components=[Normal(mu=-2, sigma=1),
                            Normal(mu=2, sigma=0.5)]),
        Exponential(inverse_scale=3.0),
        Exponential(inverse_scale=0.5)
    ])

    if with_linear_transformation:
        p0 = LinearTransform(p0, R)
        p1 = LinearTransform(p1, R)

    X0 = p0.rvs(n_samples, random_state=777)
    X1 = p1.rvs(n_samples, random_state=777)
    exact_weights = numpy.exp(p0.nll(X0) - p1.nll(X0))
    exact_weights[numpy.isinf(exact_weights)] = 0.

    # generate samples to test reweighting rule (to avoid overfitting)
    X0_roc = p0.rvs(2 * n_samples, random_state=777 * 2)
    X1_roc = p1.rvs(2 * n_samples, random_state=777 * 2)
    # Weighted with true ratios
    exact_weights_roc = numpy.exp(p0.nll(X0_roc) - p1.nll(X0_roc))
    exact_weights_roc[numpy.isinf(exact_weights_roc)] = 0.

    if verbose:
        print "Original distribution"
        fig = corner.corner(X0,
                            bins=20,
                            smooth=0.85,
                            labels=["X0", "X1", "X2", "X3", "X4"])
        plt.show()
        print "Target distribution"
        fig = corner.corner(X1,
                            bins=20,
                            smooth=0.85,
                            labels=["X0", "X1", "X2", "X3", "X4"])
        plt.show()
        print "Exact reweighting"
        # In this example, we know p0(x) and p1(x) exactly,
        #so we can compare the other can compare the approximate reweighting approaches with the exact weights.
        draw_distributions(X0, X1, exact_weights)

    return X0, X1, exact_weights, X0_roc, X1_roc, exact_weights_roc
Exemple #19
0
def test_rvs():
    p1 = Normal(mu=0.0, sigma=T.constant(1.0), random_state=0)
    p2 = Normal(mu=2.0, sigma=2.0, random_state=0)
    m = Mixture(components=[p1, p2], weights=[0.25], random_state=0)
    X = m.rvs(2000)
    assert (np.mean(X) - (0.25 * p1.mu.eval() + 0.75 * p2.mu.eval())) < 0.1