def test_integral(): bounds = [ (-1, -0.9), (-0.65, -0.5), (-0.4, -0.32), (-0.31, -0.1), (0, 0.05), (0.2, np.inf) ] @np.vectorize def allowed_point(x): for l, u in bounds: if l < x and x < u: return 1 return 0 with Model() as model: mu = Parameter() sigma = Parameter(lower=0) X = Normal(mu, sigma, bounds=bounds) model.observed(X) model.initialize({mu: -0.4, sigma: 2}) xs = np.linspace(-1, 1, 1001) # Calculate the integral using scipy, zeroing points that are out of bounds out1 = st.norm.pdf(xs, -0.4, 2) * allowed_point(xs) integral = sum(st.norm.cdf(u, -0.4, 2) - st.norm.cdf(l, -0.4, 2) for l, u in bounds) out1 /= integral out2 = model.pdf(xs) assert_array_almost_equal(out1, out2, 15)
def test_bounds_contain_none(): FakeDistribution2D = get_fake_distribution(dimension=2) with Model(): FakeDistribution2D(bounds=[ [1.1, 2.2, 3.3, 4.4], [1.1, None, 3.3, 4.4], [1.1, 2.2, 3.3, 4.4] ])
def test_bounds_invalid_shape_2(): FakeDistribution2D = get_fake_distribution(dimension=2) with Model(): FakeDistribution2D(bounds=[ [1.1, 2.2, 3.3, 4.4], [1.1, 2.2, 3.3, 4.4], [1.1, 2.2, 3.3, 4.4] ])
def test_mcmc(): with Model() as model: x = Normal(0, 1) np.random.seed(0) model.observed() model.initialize({ x: 0.0, }) out = model.mcmc(samples=100)
def test_mix2_fit_with_mix2_input(): with Model() as model: mu = Parameter() sigma = Parameter(lower=1, upper=4) a = Parameter(lower=0.06) b = Parameter(lower=0) f_1 = Parameter(lower=0, upper=1) f_2 = Parameter(lower=0, upper=1) X1 = Normal(mu, sigma, bounds=[(-np.inf, 21), (22, np.inf)]) X2 = Exponential(a, bounds=[(-np.inf, 8), (10, 27), (31, np.inf)]) X12 = Mix2(f_1, X1, X2, bounds=[(6, 17), (18, 36)]) X3 = Exponential(b) X123 = Mix2(f_2, X12, X3, bounds=[(6, 17), (18, 36)]) model.observed(X123) model.initialize({mu: 23, sigma: 1.2, a: 0.2, b: 0.04, f_1: 0.3, f_2: 0.4}) # Generate some data to fit np.random.seed(42) exp_1_data = np.random.exponential(10, 200000) exp_1_data = exp_1_data[(6 < exp_1_data) & ((exp_1_data < 8) | (10 < exp_1_data)) & ((exp_1_data < 17) | (18 < exp_1_data)) & ((exp_1_data < 27) | (31 < exp_1_data)) & (exp_1_data < 36)] exp_2_data = np.random.exponential(20, 200000) exp_2_data = exp_2_data[(6 < exp_2_data) & ((exp_2_data < 17) | (18 < exp_2_data)) & (exp_2_data < 36)] # Include the data blinded by the Mix2 bounds as we use the len(norm_data) norm_data = np.random.normal(19, 2, 100000) norm_data = norm_data[((6 < norm_data) & (norm_data < 17)) | ((18 < norm_data) & (norm_data < 21)) | ((22 < norm_data) & (norm_data < 36))] data = np.concatenate([exp_1_data, exp_2_data, norm_data]) data = data[((6 < data) & (data < 17)) | ((18 < data) & (data < 36))] result = model.fit(data) assert result.success assert abs(model.state[mu] - 19) < 3e-2 assert abs(model.state[sigma] - 2) < 1e-3 assert abs(model.state[a] - 0.1) < 1e-3 assert abs(model.state[b] - 0.05) < 3e-4 assert abs(model.state[f_1] - (len(norm_data) / (len(exp_1_data) + len(norm_data)))) < 5e-3 assert abs(model.state[f_2] - ((len(exp_1_data) + len(norm_data)) / len(data))) < 5e-4
def test_bounds_1D(): FakeDistribution = get_fake_distribution() with Model() as model: A = FakeDistribution() B = FakeDistribution(lower=1.1) C = FakeDistribution(upper=2.2) D = FakeDistribution(lower=3.3, upper=4.4) E = FakeDistribution(bounds=[(5.5, 6.6), (7.7, 8.8), (9.9, 11.11)]) assert model._description[A].bounds == [Region(-np.inf, np.inf)] assert model._description[B].bounds == [Region(1.1, np.inf)] assert model._description[C].bounds == [Region(-np.inf, 2.2)] assert model._description[D].bounds == [Region(3.3, 4.4)] assert model._description[E].bounds == [Region(5.5, 6.6), Region(7.7, 8.8), Region(9.9, 11.11)]
def test_mix2_fit(): with Model() as model: mu = Parameter() sigma = Parameter(lower=1) a = Parameter(lower=0) f = Parameter(lower=0, upper=1) X1 = Normal(mu, sigma, bounds=[(-np.inf, 21), (22, np.inf)]) X2 = Exponential(a, bounds=[(-np.inf, 8), (10, np.inf)]) X12 = Mix2(f, X1, X2, bounds=[(6, 17), (18, 36)]) model.observed(X12) model.initialize({ mu: 23, sigma: 1.2, a: 0.2, f: 0.3, }) # Generate some data to fit np.random.seed(42) exp_data = np.random.exponential(10, 200000) exp_data = exp_data[(exp_data < 8) | (10 < exp_data)] # Include the data blinded by the Mix2 bounds as we use the len(norm_data) norm_data = np.random.normal(19, 2, 100000) norm_data = norm_data[ ((6 < norm_data) & (norm_data < 17)) | ((18 < norm_data) & (norm_data < 21)) | ((22 < norm_data) & (norm_data < 36)) ] data = np.concatenate([exp_data, norm_data]) data = data[((6 < data) & (data < 17)) | ((18 < data) & (data < 36))] result = model.fit(data) # Check the fit was successful assert result.success assert abs(model.state[mu] - 19) < 5e-3 assert abs(model.state[sigma] - 2) < 5e-3 assert abs(model.state[a] - 0.1) < 5e-4 assert abs(model.state[f] - (len(norm_data)/len(data))) < 5e-4
def test_fisher(): with Model() as model: mu = Parameter() sigma = Parameter(lower=0) X = Normal(mu, sigma) model.observed(X) model.initialize({ mu: 2, sigma: 2, }) np.random.seed(0) xs = np.random.normal(0, 1, 200) model.fit(xs) cov = fisher(model) # TODO(ibab) Make this more robust and useful # This is the result I got when running this assert np.isclose(cov[mu][mu], 0.00521652579559)
def test_mix2(): with Model() as model: mu1 = Uniform() mu2 = Uniform() sigma1 = Uniform(lower=0) sigma2 = Uniform(lower=0) f = Uniform(lower=0, upper=1) X = Mix2( f, Normal(mu1, sigma1), Normal(mu2, sigma2), ) model.observed(X) model.initialize({ f: 0.5, mu1: -1, mu2: 1, sigma1: 1, sigma2: 2, }) model.fit([1, 2, 3])
def test_bounds_ND(): FakeDistribution3D = get_fake_distribution(dimension=3) with Model() as model: A1, A2, A3 = FakeDistribution3D() B1, B2, B3 = FakeDistribution3D(lower=1.1) C1, C2, C3 = FakeDistribution3D(upper=2.2) D1, D2, D3 = FakeDistribution3D(lower=3.3, upper=4.4) E1, E2, E3 = FakeDistribution3D(bounds=[(5.5, 6.6), (7.7, 8.8), (9.9, 11.1)]) F1, F2, F3 = FakeDistribution3D(bounds=[ [(1.1, 2.1), (3.1, 4.1), (5.1, 6.1)], [(1.2, 2.2), (3.2, 4.2), (5.2, 6.2)], [(1.3, 2.3), (3.3, 4.3), (5.3, 6.3)], ]) assert model._description[A1].bounds == [Region(-np.inf, np.inf)] assert model._description[A2].bounds == [Region(-np.inf, np.inf)] assert model._description[A3].bounds == [Region(-np.inf, np.inf)] assert model._description[B1].bounds == [Region(1.1, np.inf)] assert model._description[B2].bounds == [Region(1.1, np.inf)] assert model._description[B3].bounds == [Region(1.1, np.inf)] assert model._description[C1].bounds == [Region(-np.inf, 2.2)] assert model._description[C2].bounds == [Region(-np.inf, 2.2)] assert model._description[C3].bounds == [Region(-np.inf, 2.2)] assert model._description[D1].bounds == [Region(3.3, 4.4)] assert model._description[D2].bounds == [Region(3.3, 4.4)] assert model._description[D3].bounds == [Region(3.3, 4.4)] assert model._description[E1].bounds == [Region(5.5, 6.6), Region(7.7, 8.8), Region(9.9, 11.1)] assert model._description[E2].bounds == [Region(5.5, 6.6), Region(7.7, 8.8), Region(9.9, 11.1)] assert model._description[E3].bounds == [Region(5.5, 6.6), Region(7.7, 8.8), Region(9.9, 11.1)] assert model._description[F1].bounds == [Region(1.1, 2.1), Region(3.1, 4.1), Region(5.1, 6.1)] assert model._description[F2].bounds == [Region(1.2, 2.2), Region(3.2, 4.2), Region(5.2, 6.2)] assert model._description[F3].bounds == [Region(1.3, 2.3), Region(3.3, 4.3), Region(5.3, 6.3)]
def test_using_lower_and_bounds(): FakeDistribution = get_fake_distribution() with Model(): FakeDistribution(lower=5, bounds=[5, np.inf])
def test_mix2_fit_with_mix2_input(): with Model() as model: mu = Parameter() sigma = Parameter(lower=1, upper=4) a = Parameter(lower=0.06) b = Parameter(lower=0) f_1 = Parameter(lower=0, upper=1) f_2 = Parameter(lower=0, upper=1) X1 = Normal(mu, sigma, bounds=[(-np.inf, 21), (22, np.inf)]) X2 = Exponential(a, bounds=[(-np.inf, 8), (10, 27), (31, np.inf)]) X12 = Mix2(f_1, X1, X2, bounds=[(6, 17), (18, 36)]) X3 = Exponential(b) X123 = Mix2(f_2, X12, X3, bounds=[(6, 17), (18, 36)]) model.observed(X123) model.initialize({ mu: 23, sigma: 1.2, a: 0.2, b: 0.04, f_1: 0.3, f_2: 0.4 }) # Generate some data to fit np.random.seed(42) exp_1_data = np.random.exponential(10, 200000) exp_1_data = exp_1_data[ (6 < exp_1_data) & ((exp_1_data < 8) | (10 < exp_1_data)) & ((exp_1_data < 17) | (18 < exp_1_data)) & ((exp_1_data < 27) | (31 < exp_1_data)) & (exp_1_data < 36) ] exp_2_data = np.random.exponential(20, 200000) exp_2_data = exp_2_data[ (6 < exp_2_data) & ((exp_2_data < 17) | (18 < exp_2_data)) & (exp_2_data < 36) ] # Include the data blinded by the Mix2 bounds as we use the len(norm_data) norm_data = np.random.normal(19, 2, 100000) norm_data = norm_data[ ((6 < norm_data) & (norm_data < 17)) | ((18 < norm_data) & (norm_data < 21)) | ((22 < norm_data) & (norm_data < 36)) ] data = np.concatenate([exp_1_data, exp_2_data, norm_data]) data = data[((6 < data) & (data < 17)) | ((18 < data) & (data < 36))] result = model.fit(data) # Check the fit was successful assert result.success assert abs(model.state[mu] - 19) < 3e-2 assert abs(model.state[sigma] - 2) < 1e-3 assert abs(model.state[a] - 0.1) < 1e-3 assert abs(model.state[b] - 0.05) < 3e-4 assert abs(model.state[f_1] - (len(norm_data)/(len(exp_1_data)+len(norm_data)))) < 5e-3 assert abs(model.state[f_2] - ((len(exp_1_data)+len(norm_data))/len(data))) < 5e-4 # Check if we can access the individual components xs = np.linspace(0, 41, 1001) def allowed_point(x, bounds): @np.vectorize def allowed_point(x): for l, u in bounds: if l < x and x < u: return 1 return 0 return allowed_point(x) # Normal bounds = [(6, 17), (18, 21), (22, 36)] out1 = st.norm.pdf(xs, model.state[mu], model.state[sigma]) * allowed_point(xs, bounds) integral = sum( st.norm.cdf(u, model.state[mu], model.state[sigma]) - st.norm.cdf(l, model.state[mu], model.state[sigma]) for l, u in bounds ) out1 *= model.state[f_1] * model.state[f_2] / integral out2 = model[X1].pdf(xs) assert_array_almost_equal(out1, out2, 11) # Exponential 1 bounds = [(6, 8), (10, 17), (18, 27), (31, 36)] out1 = st.expon.pdf(xs, 0, 1/model.state[a]) * allowed_point(xs, bounds) integral = sum( st.expon.cdf(u, 0, 1/model.state[a]) - st.expon.cdf(l, 0, 1/model.state[a]) for l, u in bounds ) out1 *= (1-model.state[f_1]) * model.state[f_2] / integral out2 = model[X2].pdf(xs) assert_array_almost_equal(out1, out2, 11) # Exponential 2 bounds = [(6, 17), (18, 36)] out1 = st.expon.pdf(xs, 0, 1/model.state[b]) * allowed_point(xs, bounds) integral = sum( st.expon.cdf(u, 0, 1/model.state[b]) - st.expon.cdf(l, 0, 1/model.state[b]) for l, u in bounds ) out1 *= (1-model.state[f_2]) / integral out2 = model[X3].pdf(xs) assert_array_almost_equal(out1, out2, 11)
def test_mix2_extended(): np.random.seed(0) exp_data = np.random.exponential(10, 20000) exp_data = exp_data[(6 < exp_data) & (exp_data < 36)] norm1_data = np.random.normal(19, 2, 10000) norm1_data = norm1_data[(6 < norm1_data) & (norm1_data < 36)] data = np.concatenate([exp_data, norm1_data]) data = data[((6 < data) & (data < 36))] with Model() as model: mu = Parameter() sigma = Parameter(lower=1) a = Parameter(lower=0) N1 = Parameter(lower=0) N2 = Parameter(lower=0) N = Poisson(N1+N2) X1 = Normal(mu, sigma) X2 = Exponential(a) X12 = Mix2(N1/(N1+N2), X1, X2, bounds=[(6, 36)]) model.observed(X12, N) model.initialize({ mu: 23, sigma: 1.2, a: 0.2, N1: len(data)/5, N2: len(data)*4/5 }) result = model.fit(data, np.ones_like(data)*len(data), optimizer=MigradOptimizer()) assert result.success assert abs(model.state[mu] - 19) < 3e-2 assert abs(model.state[sigma] - 2) < 3e-2 assert abs(model.state[a] - 0.1) < 1e-3 assert abs(model.state[N1] - len(norm1_data)) < np.sqrt(len(norm1_data)) assert abs(model.state[N2] - len(exp_data)) < np.sqrt(len(exp_data)) # Check if the pdf is correct xs = np.linspace(0, 41, 101) def allowed_point(x, bounds): @np.vectorize def allowed_point(x): for l, u in bounds: if l < x and x < u: return 1 return 0 return allowed_point(x) out1a = st.norm.pdf(xs, model.state[mu], model.state[sigma]) * allowed_point(xs, [(6, 36)]) integral = st.norm.cdf(36, model.state[mu], model.state[sigma]) integral -= st.norm.cdf(6, model.state[mu], model.state[sigma]) out1a *= model.state[N1] / (model.state[N1]+model.state[N2]) / integral out1b = st.expon.pdf(xs, 0, 1/model.state[a]) * allowed_point(xs, [(6, 36)]) integral = st.expon.cdf(36, 0, 1/model.state[a]) - st.expon.cdf(6, 0, 1/model.state[a]) out1b *= model.state[N2] / (model.state[N1]+model.state[N2]) / integral out1 = out1a + out1b out2 = model.pdf(xs, None) assert_array_almost_equal(out1, out2, 16)
def test_bounds_invalid_shape_1(): FakeDistribution = get_fake_distribution() with Model(): FakeDistribution(bounds=[[1.1, 2.2, 3.3]])
def test_inside_another_graph(): FakeDistribution = get_fake_distribution() other_sessions = tf.Session() with Model(): with other_sessions.graph.as_default(): FakeDistribution()
def test_bounds_invalid_odd(): FakeDistribution = get_fake_distribution() with Model(): FakeDistribution(bounds=[5.5, 6.6, 7.7, 8.8, 9.9])
def test_using_upper_and_bounds(): FakeDistribution = get_fake_distribution() with Model(): FakeDistribution(upper=101, bounds=[np.inf, 101])
from tensorprob import Model, Parameter, Normal with Model() as model: a = Parameter() b = Parameter() sigma = Parameter(lower=0) X = Parameter() y = Normal(a * X + b, sigma) model.observed(X, y) model.assign({ a: 2, b: 2, sigma: 10, }) import numpy as np xs = np.linspace(0, 1, 100) ys = 1 * xs + 0 + np.random.normal(0, .1, len(xs)) print(model.fit(xs, ys)) import matplotlib.pyplot as plt plt.plot(xs, ys, 'ro') x_ = np.linspace(0, 1, 200) plt.plot(x_, model.state[a] * x_ + model.state[b], 'b-') plt.show()
def test_numeric_integral(): FakeDistribution = get_fake_distribution(integral=None) with Model(): FakeDistribution()
def test_requiring_logp(): FakeDistribution = get_fake_distribution(logp=None) with Model(): FakeDistribution()