def test_mixture_api(): # Check basic API p1 = Normal(mu=0.0, sigma=T.constant(1.0)) p2 = Normal(mu=1.0, sigma=2.0) m = Mixture(components=[p1, p2], weights=[0.25]) assert len(m.components) == 2 assert len(m.weights) == 2 assert len(m.parameters_) == 4 assert len(m.constants_) == 1 assert len(m.observeds_) == 0 assert p1.mu in m.parameters_ assert p1.sigma in m.constants_ assert p2.mu in m.parameters_ assert p2.sigma in m.parameters_ assert m.X == p1.X assert m.X == p2.X assert m.ndim == p1.ndim assert m.ndim == p2.ndim m = Mixture(components=[p1, p2]) w = m.compute_weights() assert_array_equal(w, [0.5, 0.5]) y = T.dscalar(name="y") w1 = T.constant(0.25) w2 = y * 2 m = Mixture(components=[p1, p2], weights=[w1, w2]) assert y in m.observeds_ # Check errors assert_raises(ValueError, Mixture, components=[p1, p1, p1], weights=[1.0])
def test_likelihood_free_mixture(): p1 = Normal(random_state=1) p2 = Normal(mu=2.0, random_state=1) h1 = Histogram(bins=50).fit(p1.rvs(10000)) h2 = Histogram(bins=50).fit(p2.rvs(10000)) m1 = Mixture(components=[p1, p2]) m2 = Mixture(components=[h1, h2]) # Check whether pdf, nnlf and cdf have been overriden assert isinstance(m1.pdf, theano.compile.function_module.Function) assert isinstance(m1.nnlf, theano.compile.function_module.Function) assert isinstance(m1.cdf, theano.compile.function_module.Function) assert isinstance(m2.pdf, types.MethodType) assert isinstance(m2.nnlf, types.MethodType) assert isinstance(m2.cdf, types.MethodType) # Compare pdfs rng = check_random_state(1) X = rng.rand(100, 1) * 10 - 5 assert np.mean(np.abs(m1.pdf(X) - m2.pdf(X))) < 0.05 # Test sampling X = m2.rvs(10) assert X.shape == (10, 1) # Check errors assert_raises(NotImplementedError, m2.fit, X)
def test_mv_mixture(): p1 = MultivariateNormal(mu=np.array([0.0, 0.0]), sigma=np.eye(2)) p2 = MultivariateNormal(mu=np.array([2.0, 2.0]), sigma=0.5 * np.eye(2)) m = Mixture(components=[p1, p2]) assert m.ndim == 2 X = m.rvs(100) assert X.shape == (100, 2) assert_raises(ValueError, Mixture, components=[p1, Normal()])
def test_decomposed_ratio_identity(): components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)] p = Mixture(components=components, weights=[0.45, 0.1, 0.45]) ratio = DecomposedRatio( ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV()))) ratio.fit(numerator=p, denominator=p, n_samples=10000) reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1) assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0 assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals))) assert_array_almost_equal(ratio.predict(reals, log=True), np.zeros(len(reals)))
def test_decomposed_ratio(): components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)] p0 = Mixture(components=components, weights=[0.45, 0.1, 0.45]) p1 = Mixture(components=[components[0]] + [components[2]]) ratio = DecomposedRatio( ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV()))) ratio.fit(numerator=p0, denominator=p1, n_samples=10000) reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1) assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1 assert np.mean(np.abs(np.log(ratio.predict(reals)) - ratio.predict(reals, log=True))) < 0.01
def check_mixture_pdf(w0, w1, mu1, sigma1, mu2, sigma2): rng = check_random_state(1) p1 = Normal(mu=mu1, sigma=sigma1) p2 = Normal(mu=mu2, sigma=sigma2) m = Mixture(components=[p1, p2], weights=[w0, w1]) q1 = st.norm(loc=mu1, scale=sigma1) q2 = st.norm(loc=mu2, scale=sigma2) X = rng.rand(50, 1) assert_array_almost_equal( m.pdf(X).ravel(), w0 * q1.pdf(X).ravel() + (w1 if w1 is not None else (1 - w0)) * q2.pdf(X).ravel())
def check_mixture_pdf(w0, w1, mu1, sigma1, mu2, sigma2): rng = check_random_state(1) p1 = Normal(mu=mu1, sigma=sigma1) p2 = Normal(mu=mu2, sigma=sigma2) m = Mixture(components=[p1, p2], weights=[w0, w1]) q1 = st.norm(loc=mu1, scale=sigma1) q2 = st.norm(loc=mu2, scale=sigma2) X = rng.rand(50, 1) assert_array_almost_equal(m.pdf(X).ravel(), w0 * q1.pdf(X).ravel() + (w1 if w1 is not None else (1 - w0)) * q2.pdf(X).ravel())
def test_fit(): p1 = Normal(mu=T.constant(0.0), sigma=T.constant(2.0)) p2 = Normal(mu=T.constant(3.0), sigma=T.constant(2.0)) p3 = Exponential(inverse_scale=T.constant(0.5)) g = theano.shared(0.5) m = Mixture(components=[p1, p2, p3], weights=[g, g*g]) X = np.concatenate([st.norm(loc=0.0, scale=2.0).rvs(300, random_state=0), st.norm(loc=3.0, scale=2.0).rvs(100, random_state=1), st.expon(scale=1. / 0.5).rvs(500, random_state=2)]) X = X.reshape(-1, 1) s0 = m.score(X) m.fit(X) assert np.abs(g.eval() - 1. / 3.) < 0.05 assert m.score(X) <= s0
def test_known_density(): components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)] p0 = Mixture(components=components, weights=[0.45, 0.1, 0.45]) p1 = Mixture(components=[components[0]] + [components[2]]) ratio = KnownDensityRatio(numerator=p0, denominator=p1) reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1) assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.01 assert np.mean(np.abs(np.log(ratio.predict(reals)) - ratio.predict(reals, log=True))) < 0.01 assert ratio.nllr(reals) == -ratio.predict(reals, log=True).sum()
def test_fit(): p1 = Normal(mu=T.constant(0.0), sigma=T.constant(2.0)) p2 = Normal(mu=T.constant(3.0), sigma=T.constant(2.0)) p3 = Exponential(inverse_scale=T.constant(0.5)) g = theano.shared(0.5) m = Mixture(components=[p1, p2, p3], weights=[g, g*g]) X = np.concatenate([st.norm(loc=0.0, scale=2.0).rvs(300, random_state=0), st.norm(loc=3.0, scale=2.0).rvs(100, random_state=1), st.expon(scale=1. / 0.5).rvs(500, random_state=2)]) X = X.reshape(-1, 1) s0 = m.score(X) m.fit(X) assert np.abs(g.eval() - 1. / 3.) < 0.05 assert m.score(X) >= s0
# Simulator A = theano.shared(true_theta[0], name="A") B = theano.shared(true_theta[1], name="B") R = np.array([[1.31229955, 0.10499961, 0.48310515, -0.3249938, -0.26387927], [0.10499961, 1.15833058, -0.55865473, 0.25275522, -0.39790775], [0.48310515, -0.55865473, 2.25874579, -0.52087938, -0.39271231], [0.3249938, 0.25275522, -0.52087938, 1.4034925, -0.63521059], [-0.26387927, -0.39790775, -0.39271231, -0.63521059, 1.]]) p0 = LinearTransform( Join(components=[ Normal(mu=A, sigma=1), Normal(mu=B, sigma=3), Mixture(components=[Normal(mu=-2, sigma=1), Normal(mu=2, sigma=0.5)]), Exponential(inverse_scale=3.0), Exponential(inverse_scale=0.5) ]), R) def simulator(theta, n_samples, random_state=None): A.set_value(theta[0]) B.set_value(theta[1]) return p0.rvs(n_samples, random_state=random_state) X_obs = simulator(true_theta, 20000, random_state=rng) n_params = len(true_theta) n_features = X_obs.shape[1]
def test_rvs(): p1 = Normal(mu=0.0, sigma=T.constant(1.0), random_state=0) p2 = Normal(mu=2.0, sigma=2.0, random_state=0) m = Mixture(components=[p1, p2], weights=[0.25], random_state=0) X = m.rvs(2000) assert (np.mean(X) - (0.25 * p1.mu.eval() + 0.75 * p2.mu.eval())) < 0.1
def generate_samples(with_linear_transformation=False, add_variation=False, n_samples=50000, verbose=True): """ Generate 5 independent variables: two Gaussian, mixture of Gaussian, two exponents. Two Gaussian have different means for original and target distributions. if with_linear_transformation is True then add linear transformation of generated 5 variables. if add_variation is True then add random values in variance to obtain gaussian pdf for orignal and target samples not only with different mean but also with different variance. :param bool with_linear_transformation: apply or not linear transformation for samples features :param bool add_variation: make or not different variance for Gaussian distribution for original and target samples. :param int n_samples: number of generated samples for original/target distributions. For test samples 2*n_samples will be generated :param bool verbose: print and plot additional info during generation. :return: train original, train target, exact weights for train original, test original, test target, exact weights for test original """ # define linear transformation matrix R = make_sparse_spd_matrix(5, alpha=0.5, random_state=7) variation_origin, variation_target = (0, 0) if add_variation: r = check_random_state(42) variation_origin, variation_target = r.uniform() / 3., r.uniform() / 3. p0 = Join(components=[ Normal(mu=.5, sigma=1 + variation_origin), Normal(mu=-.5, sigma=3 + variation_origin), Mixture(components=[Normal(mu=-2, sigma=1), Normal(mu=2, sigma=0.5)]), Exponential(inverse_scale=3.0), Exponential(inverse_scale=0.5) ]) p1 = Join(components=[ Normal(mu=0, sigma=1 + variation_target), Normal(mu=0, sigma=3 + variation_target), Mixture(components=[Normal(mu=-2, sigma=1), Normal(mu=2, sigma=0.5)]), Exponential(inverse_scale=3.0), Exponential(inverse_scale=0.5) ]) if with_linear_transformation: p0 = LinearTransform(p0, R) p1 = LinearTransform(p1, R) X0 = p0.rvs(n_samples, random_state=777) X1 = p1.rvs(n_samples, random_state=777) exact_weights = numpy.exp(p0.nll(X0) - p1.nll(X0)) exact_weights[numpy.isinf(exact_weights)] = 0. # generate samples to test reweighting rule (to avoid overfitting) X0_roc = p0.rvs(2 * n_samples, random_state=777 * 2) X1_roc = p1.rvs(2 * n_samples, random_state=777 * 2) # Weighted with true ratios exact_weights_roc = numpy.exp(p0.nll(X0_roc) - p1.nll(X0_roc)) exact_weights_roc[numpy.isinf(exact_weights_roc)] = 0. if verbose: print "Original distribution" fig = corner.corner(X0, bins=20, smooth=0.85, labels=["X0", "X1", "X2", "X3", "X4"]) plt.show() print "Target distribution" fig = corner.corner(X1, bins=20, smooth=0.85, labels=["X0", "X1", "X2", "X3", "X4"]) plt.show() print "Exact reweighting" # In this example, we know p0(x) and p1(x) exactly, #so we can compare the other can compare the approximate reweighting approaches with the exact weights. draw_distributions(X0, X1, exact_weights) return X0, X1, exact_weights, X0_roc, X1_roc, exact_weights_roc