def test_unweighted(self): #Define parameters num_samples = 1000 xmax = 5 bins=21 #Generate equal-weighted samples samples = np.random.normal(size=num_samples) weights = np.ones(num_samples) / num_samples empirical_distr = distrs.EmpiricalDistr(particles=samples, weights=weights) #Plot a histogram plt.hist(empirical_distr.particles, bins, (-xmax, xmax), histtype='stepfilled', alpha=.2, density=True, color='k', label='histogram') #Construct a KDE and plot it pdf = kde.GaussianKDE(empirical_distr) x = np.linspace(-xmax, xmax, 200) y = pdf(x) plt.plot(x, y, label='kde') #Plot the samples plt.scatter(samples, np.zeros_like(samples), marker='x', color='k', alpha=.1, label='samples') #Plot the true pdf y = stats.norm().pdf(x) plt.plot(x,y, label='true PDF') #Boiler plate plt.xlabel('Variable') plt.ylabel('Density') plt.legend(loc='best', frameon=False) plt.tight_layout() plt.show()
def test_multinomial_resample(self): rnd.random_state(np.random.RandomState(seed=42), force=True) normal_distr = distrs.NormalDistr(mean=[10., 100.], cov=[[4., -3.], [-3., 9.]]) particles = normal_distr.sample(size=100000) approx_normal_empirical_2d = distrs.EmpiricalDistr(particles=particles, weights=np.ones((100000,))) self.assertEqual(approx_normal_empirical_2d.particle_count, 100000) npt.assert_almost_equal(approx_normal_empirical_2d.effective_particle_count, 100000.) self.assertEqual(approx_normal_empirical_2d.dim, 2) npt.assert_almost_equal(approx_normal_empirical_2d.particles, particles) npt.assert_almost_equal(approx_normal_empirical_2d.particle(0), npu.col(*particles[0])) npt.assert_almost_equal(approx_normal_empirical_2d.weights, npu.col(*np.ones((100000,)))) npt.assert_almost_equal(approx_normal_empirical_2d.weight(0), 1.) npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((100000,))) / 100000.) npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weight(0), .00001) self.assertEqual(approx_normal_empirical_2d.weight_sum, 100000.) npt.assert_almost_equal(approx_normal_empirical_2d.mean, [[ 9.9866994], [ 100.0141095]]) npt.assert_almost_equal(approx_normal_empirical_2d.var_n, [[ 3.9902435], [ 9.0362717]]) npt.assert_almost_equal(approx_normal_empirical_2d.var_n_minus_1, [[ 3.9902834], [ 9.036362 ]]) npt.assert_almost_equal(approx_normal_empirical_2d.cov_n, [[ 3.9902435, -3.011222 ], [-3.011222 , 9.0362717]]) npt.assert_almost_equal(approx_normal_empirical_2d.cov_n_minus_1, [[ 3.9902834, -3.0112521], [-3.0112521, 9.036362 ]]) npt.assert_almost_equal(approx_normal_empirical_2d.cov, [[ 3.9902435, -3.011222 ], [-3.011222 , 9.0362717]]) npt.assert_almost_equal(approx_normal_empirical_2d.vol_n, [[ 1.9975594, 0. ], [-1.5074505, 2.6007431]]) npt.assert_almost_equal(approx_normal_empirical_2d.vol_n_minus_1, [[ 1.9975694, 0. ], [-1.5074581, 2.6007561]]) npt.assert_almost_equal(approx_normal_empirical_2d.vol, [[ 1.9975594, 0. ], [-1.5074505, 2.6007431]]) rnd.random_state(np.random.RandomState(seed=43), force=True) resampled_approx_normal_empirical_2d = distrs.multinomial_resample(approx_normal_empirical_2d) self.assertEqual(resampled_approx_normal_empirical_2d.particle_count, 100000) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.effective_particle_count, 100000.) self.assertEqual(resampled_approx_normal_empirical_2d.dim, 2) # The resampled particles should ("almost certainly") be different from the original ones: self.assertFalse(np.sum(resampled_approx_normal_empirical_2d.particles) == np.sum(particles)) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.particle(0), npu.col(*particles[1])) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.weights, npu.col(*np.ones((100000,)))) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.weight(0), 1.) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((100000,))) / 100000.) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.normalised_weight(0), .00001) self.assertEqual(resampled_approx_normal_empirical_2d.weight_sum, 100000.) # But the stats should be pretty close to those of the original empirical distribution, though not to seven # decimal places: npt.assert_almost_equal(resampled_approx_normal_empirical_2d.mean, [[ 9.9866994], [ 100.0141095]], decimal=1) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.var_n, [[ 3.9902435], [ 9.0362717]], decimal=1) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.var_n_minus_1, [[ 3.9902834], [ 9.036362 ]], decimal=1) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.cov_n, [[ 3.9902435, -3.011222 ], [-3.011222 , 9.0362717]], decimal=1) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.cov_n_minus_1, [[ 3.9902834, -3.0112521], [-3.0112521, 9.036362 ]], decimal=1) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.cov, [[ 3.9902435, -3.011222 ], [-3.011222 , 9.0362717]], decimal=1) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.vol_n, [[ 1.9975594, 0. ], [-1.5074505, 2.6007431]], decimal=1) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.vol_n_minus_1, [[ 1.9975694, 0. ], [-1.5074581, 2.6007561]], decimal=1) npt.assert_almost_equal(resampled_approx_normal_empirical_2d.vol, [[ 1.9975594, 0. ], [-1.5074505, 2.6007431]], decimal=1) rnd.random_state(np.random.RandomState(seed=43), force=True) resampled_approx_normal_empirical_2d_particles = approx_normal_empirical_2d.sample(size=100000) npt.assert_almost_equal(resampled_approx_normal_empirical_2d_particles, resampled_approx_normal_empirical_2d.particles) subsampled_approx_normal_empirical_2d = distrs.multinomial_resample(approx_normal_empirical_2d, target_particle_count=40000) self.assertEqual(subsampled_approx_normal_empirical_2d.particle_count, 40000) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.effective_particle_count, 40000.) self.assertEqual(subsampled_approx_normal_empirical_2d.dim, 2) # The resampled particles should ("almost certainly") be different from the original ones: self.assertFalse(np.sum(subsampled_approx_normal_empirical_2d.particles) == np.sum(particles)) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.particle(0), npu.col(*particles[1])) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.weights, npu.col(*np.ones((40000,)))) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.weight(0), 1.) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((40000,))) / 40000.) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.normalised_weight(0), .000025) self.assertEqual(subsampled_approx_normal_empirical_2d.weight_sum, 40000.) # But the stats should be pretty close to those of the original empirical distribution, though not to seven # decimal places: npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.mean, [[ 9.9866994], [ 100.0141095]], decimal=1) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.var_n, [[ 3.9902435], [ 9.0362717]], decimal=1) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.var_n_minus_1, [[ 3.9902834], [ 9.036362 ]], decimal=1) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.cov_n, [[ 3.9902435, -3.011222 ], [-3.011222 , 9.0362717]], decimal=1) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.cov_n_minus_1, [[ 3.9902834, -3.0112521], [-3.0112521, 9.036362 ]], decimal=1) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.cov, [[ 3.9902435, -3.011222 ], [-3.011222 , 9.0362717]], decimal=1) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.vol_n, [[ 1.9975594, 0. ], [-1.5074505, 2.6007431]], decimal=1) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.vol_n_minus_1, [[ 1.9975694, 0. ], [-1.5074581, 2.6007561]], decimal=1) npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.vol, [[ 1.9975594, 0. ], [-1.5074505, 2.6007431]], decimal=1) supersampled_approx_normal_empirical_2d = distrs.multinomial_resample(approx_normal_empirical_2d, target_particle_count=300000) self.assertEqual(supersampled_approx_normal_empirical_2d.particle_count, 300000) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.effective_particle_count, 300000.) self.assertEqual(supersampled_approx_normal_empirical_2d.dim, 2) # The resampled particles should ("almost certainly") be different from the original ones: self.assertFalse(np.sum(supersampled_approx_normal_empirical_2d.particles) == np.sum(particles)) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.particle(0), npu.col(*particles[0])) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.weights, npu.col(*np.ones((300000,)))) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.weight(0), 1.) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((300000,))) / 300000.) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.normalised_weight(0), 3.3333333333333333e-06) self.assertEqual(supersampled_approx_normal_empirical_2d.weight_sum, 300000.) # But the stats should be pretty close to those of the original empirical distribution, though not to seven # decimal places: npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.mean, [[ 9.9866994], [ 100.0141095]], decimal=1) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.var_n, [[ 3.9902435], [ 9.0362717]], decimal=1) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.var_n_minus_1, [[ 3.9902834], [ 9.036362 ]], decimal=1) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.cov_n, [[ 3.9902435, -3.011222 ], [-3.011222 , 9.0362717]], decimal=1) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.cov_n_minus_1, [[ 3.9902834, -3.0112521], [-3.0112521, 9.036362 ]], decimal=1) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.cov, [[ 3.9902435, -3.011222 ], [-3.011222 , 9.0362717]], decimal=1) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.vol_n, [[ 1.9975594, 0. ], [-1.5074505, 2.6007431]], decimal=1) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.vol_n_minus_1, [[ 1.9975694, 0. ], [-1.5074581, 2.6007561]], decimal=1) npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.vol, [[ 1.9975594, 0. ], [-1.5074505, 2.6007431]], decimal=1)
def test_empirical_distr(self): rnd.random_state(np.random.RandomState(seed=42), force=True) trivial_empirical_1d = distrs.EmpiricalDistr(particles=[[0.]], weights=[1.]) self.assertEqual(trivial_empirical_1d.particle_count, 1) npt.assert_almost_equal(trivial_empirical_1d.effective_particle_count, 1.) self.assertEqual(trivial_empirical_1d.dim, 1) npt.assert_almost_equal(trivial_empirical_1d.particles, np.array([[0.]])) npt.assert_almost_equal(trivial_empirical_1d.particle(0), np.array([[0.]])) npt.assert_almost_equal(trivial_empirical_1d.weights, np.array([[1.]])) npt.assert_almost_equal(trivial_empirical_1d.weight(0), 1.) npt.assert_almost_equal(trivial_empirical_1d.normalised_weights, np.array([[1.]])) npt.assert_almost_equal(trivial_empirical_1d.normalised_weight(0), 1.) self.assertEqual(trivial_empirical_1d.weight_sum, 1.) self.assertEqual(trivial_empirical_1d.mean, 0.) self.assertEqual(trivial_empirical_1d.var_n, 0.) npt.assert_almost_equal(trivial_empirical_1d.var_n_minus_1, np.nan) self.assertEqual(trivial_empirical_1d.var, 0.) self.assertEqual(trivial_empirical_1d.cov_n, 0.) npt.assert_almost_equal(trivial_empirical_1d.cov_n_minus_1, np.nan) self.assertEqual(trivial_empirical_1d.cov, 0.) with self.assertRaises(np.linalg.LinAlgError): # Matrix is not positive definite trivial_empirical_1d.vol_n with self.assertRaises(np.linalg.LinAlgError): # Matrix is not positive definite trivial_empirical_1d.vol_n_minus_1 with self.assertRaises(np.linalg.LinAlgError): # Matrix is not positive definite trivial_empirical_1d.vol simple_empirical_1d = distrs.EmpiricalDistr(particles=[[-1.], [1.]], weights=[.5, .5]) self.assertEqual(simple_empirical_1d.particle_count, 2) npt.assert_almost_equal(simple_empirical_1d.effective_particle_count, 2.) self.assertEqual(simple_empirical_1d.dim, 1) npt.assert_almost_equal(simple_empirical_1d.particles, np.array([[-1.], [1.]])) npt.assert_almost_equal(simple_empirical_1d.particle(0), np.array([[-1.]])) npt.assert_almost_equal(simple_empirical_1d.weights, np.array([[.5], [.5]])) npt.assert_almost_equal(simple_empirical_1d.weight(0), .5) npt.assert_almost_equal(simple_empirical_1d.normalised_weights, np.array([[.5], [.5]])) npt.assert_almost_equal(simple_empirical_1d.normalised_weight(0), .5) self.assertEqual(simple_empirical_1d.weight_sum, 1.) self.assertEqual(simple_empirical_1d.mean, 0.) self.assertEqual(simple_empirical_1d.var_n, 1.) # "n minus 1" (unbiased) stats don't make sense as we are not using "repeat"-type weights, meaning that each # weight represents the number of occurrences of one observation: npt.assert_almost_equal(simple_empirical_1d.var_n_minus_1, np.inf) self.assertEqual(simple_empirical_1d.cov_n, 1.) # "n minus 1" (unbiased) stats don't make sense as we are not using "repeat"-type weights, meaning that each # weight represents the number of occurrences of one observation: npt.assert_almost_equal(simple_empirical_1d.cov_n_minus_1, np.inf) self.assertEqual(simple_empirical_1d.cov, 1.) self.assertEqual(simple_empirical_1d.vol_n, 1.) # "n minus 1" (unbiased) stats don't make sense as we are not using "repeat"-type weights, meaning that each # weight represents the number of occurrences of one observation: self.assertEqual(simple_empirical_1d.vol_n_minus_1, np.inf) self.assertEqual(simple_empirical_1d.vol, 1.) # The weights can be specified as a one-dimensional array... simple_empirical_1d = distrs.EmpiricalDistr(particles=[[-1.], [1.]], weights=[.5, .5]) self.assertEqual(simple_empirical_1d.particle_count, 2) npt.assert_almost_equal(simple_empirical_1d.effective_particle_count, 2.) self.assertEqual(simple_empirical_1d.dim, 1) npt.assert_almost_equal(simple_empirical_1d.particles, np.array([[-1.], [1.]])) # ...but they come back as a (two-dimensional) column vector: npt.assert_almost_equal(simple_empirical_1d.weights, np.array([[.5], [.5]])) # ...alternatively, the weights can be specified as a (two-dimensional) column vector: simple_empirical_1d = distrs.EmpiricalDistr(particles=[[-1.], [1.]], weights=[[.5], [.5]]) self.assertEqual(simple_empirical_1d.particle_count, 2) npt.assert_almost_equal(simple_empirical_1d.effective_particle_count, 2.) self.assertEqual(simple_empirical_1d.dim, 1) npt.assert_almost_equal(simple_empirical_1d.particles, np.array([[-1.], [1.]])) # ...they always come back as a (two-dimensional) column vector: npt.assert_almost_equal(simple_empirical_1d.weights, np.array([[.5], [.5]])) # If the particles are specified as a one-dimensional array, they are interpreted as... simple_empirical_1d = distrs.EmpiricalDistr(particles=[-1., 1.], weights=[.5, .5]) self.assertEqual(simple_empirical_1d.particle_count, 2) npt.assert_almost_equal(simple_empirical_1d.effective_particle_count, 2.) self.assertEqual(simple_empirical_1d.dim, 1) # ...multiple one-dimensional particles (each row corresponds to a particle, each column to a dimension): npt.assert_almost_equal(simple_empirical_1d.particles, np.array([[-1.], [1.]])) npt.assert_almost_equal(simple_empirical_1d.weights, np.array([[.5], [.5]])) # Now we shall be using "repeat"-type weights: repeat_empirical_1d = distrs.EmpiricalDistr(particles=[[-1.], [1.]], weights=[2., 1.]) self.assertEqual(repeat_empirical_1d.particle_count, 2) npt.assert_almost_equal(repeat_empirical_1d.effective_particle_count, 1.7999999999999998) self.assertEqual(repeat_empirical_1d.dim, 1) npt.assert_almost_equal(repeat_empirical_1d.particles, np.array([[-1.], [1.]])) npt.assert_almost_equal(repeat_empirical_1d.particle(0), np.array([[-1.]])) npt.assert_almost_equal(repeat_empirical_1d.weights, np.array([[2.], [1.]])) npt.assert_almost_equal(repeat_empirical_1d.weight(0), 2.) npt.assert_almost_equal(repeat_empirical_1d.normalised_weights, np.array([[ 0.6666667], [ 0.3333333]])) npt.assert_almost_equal(repeat_empirical_1d.normalised_weight(0), 0.6666667) self.assertEqual(repeat_empirical_1d.weight_sum, 3.) npt.assert_almost_equal(repeat_empirical_1d.mean, -0.33333333) npt.assert_almost_equal(repeat_empirical_1d.var_n, 0.88888889) npt.assert_almost_equal(repeat_empirical_1d.var_n_minus_1, 1.3333333) npt.assert_almost_equal(repeat_empirical_1d.cov_n, 0.88888889) npt.assert_almost_equal(repeat_empirical_1d.cov_n_minus_1, 1.3333333) npt.assert_almost_equal(repeat_empirical_1d.cov, 0.88888889) npt.assert_almost_equal(repeat_empirical_1d.vol_n, 0.94280904) npt.assert_almost_equal(repeat_empirical_1d.vol_n_minus_1, 1.15470054) npt.assert_almost_equal(repeat_empirical_1d.vol, 0.94280904) # Now we shall be using "repeat"-type weights. There are three two-dimensional particles: repeat_empirical_2d = distrs.EmpiricalDistr(particles=[[-2., 2.], [0., 0.], [1., -1.]], weights=[2., 1., 1.]) self.assertEqual(repeat_empirical_2d.particle_count, 3) npt.assert_almost_equal(repeat_empirical_2d.effective_particle_count, 2.6666666666666665) self.assertEqual(repeat_empirical_2d.dim, 2) npt.assert_almost_equal(repeat_empirical_2d.particles, np.array([[-2., 2.], [0., 0.], [1., -1.]])) npt.assert_almost_equal(repeat_empirical_2d.particle(0), np.array([[-2.], [2.]])) npt.assert_almost_equal(repeat_empirical_2d.weights, np.array([[2.], [1.], [1.]])) npt.assert_almost_equal(repeat_empirical_2d.weight(0), 2.) npt.assert_almost_equal(repeat_empirical_2d.normalised_weights, np.array([[ 0.5 ], [ 0.25], [ 0.25]])) npt.assert_almost_equal(repeat_empirical_2d.normalised_weight(0), .5) self.assertEqual(repeat_empirical_2d.weight_sum, 4.) npt.assert_almost_equal(repeat_empirical_2d.mean, [[-0.75], [ 0.75]]) npt.assert_almost_equal(repeat_empirical_2d.var_n, [[ 1.6875], [ 1.6875]]) npt.assert_almost_equal(repeat_empirical_2d.var_n_minus_1, [[ 2.25], [ 2.25]]) npt.assert_almost_equal(repeat_empirical_2d.cov_n, [[ 1.6875, -1.6875], [-1.6875, 1.6875]]) npt.assert_almost_equal(repeat_empirical_2d.cov_n_minus_1, [[ 2.25, -2.25], [-2.25, 2.25]]) npt.assert_almost_equal(repeat_empirical_2d.cov, [[ 1.6875, -1.6875], [-1.6875, 1.6875]]) with self.assertRaises(np.linalg.LinAlgError): # Matrix is not positive definite repeat_empirical_2d.vol_n with self.assertRaises(np.linalg.LinAlgError): # Matrix is not positive definite repeat_empirical_2d.vol_n_minus_1 with self.assertRaises(np.linalg.LinAlgError): # Matrix is not positive definite repeat_empirical_2d.vol normal_distr = distrs.NormalDistr(mean=[10., 100.], cov=[[4., -3.], [-3., 9.]]) particles = normal_distr.sample(size=100) approx_normal_empirical_2d = distrs.EmpiricalDistr(particles=particles, weights=np.ones((100,))) self.assertEqual(approx_normal_empirical_2d.particle_count, 100) npt.assert_almost_equal(approx_normal_empirical_2d.effective_particle_count, 100.) self.assertEqual(approx_normal_empirical_2d.dim, 2) npt.assert_almost_equal(approx_normal_empirical_2d.particles, particles) npt.assert_almost_equal(approx_normal_empirical_2d.particle(0), npu.col(*particles[0])) npt.assert_almost_equal(approx_normal_empirical_2d.weights, npu.col(*np.ones((100,)))) npt.assert_almost_equal(approx_normal_empirical_2d.weight(0), 1.) npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((100,))) / 100.) npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weight(0), .01) self.assertEqual(approx_normal_empirical_2d.weight_sum, 100.) npt.assert_almost_equal(approx_normal_empirical_2d.mean, [[ 10.2077457], [ 99.6856645]]) npt.assert_almost_equal(approx_normal_empirical_2d.var_n, [[ 3.3516275], [ 6.7649298]]) npt.assert_almost_equal(approx_normal_empirical_2d.var_n_minus_1, [[ 3.3854823], [ 6.8332624]]) npt.assert_almost_equal(approx_normal_empirical_2d.cov_n, [[ 3.3516275, -1.8258307], [-1.8258307, 6.7649298]]) npt.assert_almost_equal(approx_normal_empirical_2d.cov_n_minus_1, [[ 3.3854823, -1.8442735], [-1.8442735, 6.8332624]]) npt.assert_almost_equal(approx_normal_empirical_2d.cov, [[ 3.3516275, -1.8258307], [-1.8258307, 6.7649298]]) npt.assert_almost_equal(approx_normal_empirical_2d.vol_n, [[ 1.8307451, 0. ], [-0.9973157, 2.4021431]]) npt.assert_almost_equal(approx_normal_empirical_2d.vol_n_minus_1, [[ 1.839968 , 0. ], [-1.00234 , 2.4142446]]) npt.assert_almost_equal(approx_normal_empirical_2d.vol, [[ 1.8307451, 0. ], [-0.9973157, 2.4021431]]) # Using more particles more faithfully approximates the mean and covariance of the normal distribution: normal_distr = distrs.NormalDistr(mean=[10., 100.], cov=[[4., -3.], [-3., 9.]]) particles = normal_distr.sample(size=100000) approx_normal_empirical_2d = distrs.EmpiricalDistr(particles=particles, weights=np.ones((100000,))) self.assertEqual(approx_normal_empirical_2d.particle_count, 100000) npt.assert_almost_equal(approx_normal_empirical_2d.effective_particle_count, 100000.) self.assertEqual(approx_normal_empirical_2d.dim, 2) npt.assert_almost_equal(approx_normal_empirical_2d.particles, particles) npt.assert_almost_equal(approx_normal_empirical_2d.particle(0), npu.col(*particles[0])) npt.assert_almost_equal(approx_normal_empirical_2d.weights, npu.col(*np.ones((100000,)))) npt.assert_almost_equal(approx_normal_empirical_2d.weight(0), 1.) npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((100000,))) / 100000.) npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weight(0), .00001) self.assertEqual(approx_normal_empirical_2d.weight_sum, 100000.) npt.assert_almost_equal(approx_normal_empirical_2d.mean, [[ 9.9863195], [ 100.0145412]]) npt.assert_almost_equal(approx_normal_empirical_2d.var_n, [[ 3.9901799], [ 9.0390325]]) npt.assert_almost_equal(approx_normal_empirical_2d.var_n_minus_1, [[ 3.9902198], [ 9.0391229]]) npt.assert_almost_equal(approx_normal_empirical_2d.cov_n, [[ 3.9901799, -3.0120428], [-3.0120428, 9.0390325]]) npt.assert_almost_equal(approx_normal_empirical_2d.cov_n_minus_1, [[ 3.9902198, -3.0120729], [-3.0120729, 9.0391229]]) npt.assert_almost_equal(approx_normal_empirical_2d.cov, [[ 3.9901799, -3.0120428], [-3.0120428, 9.0390325]]) npt.assert_almost_equal(approx_normal_empirical_2d.vol_n, [[ 1.9975435, 0. ], [-1.5078735, 2.6010287]]) npt.assert_almost_equal(approx_normal_empirical_2d.vol_n_minus_1, [[ 1.9975535, 0. ], [-1.507881 , 2.6010417]]) npt.assert_almost_equal(approx_normal_empirical_2d.vol, [[ 1.9975435, 0. ], [-1.5078735, 2.6010287]])
def test_weighted_1d(self): bins = 21 #Define a Gaussian mixture to draw samples from num_samples = 10000 xmin, xmax = -10, 8 #Weight attributed to each component of the mixture gaussian_weights = np.array([2, 1], dtype=np.float) gaussian_weights /= np.sum(gaussian_weights) #Mean and std of each mixture gaussian_means = np.array([-1, 1]) gaussian_std = np.array([2, 1]) #Observation probability of each mixture gaussian_observation = np.array([1, .5]) #How many samples belong to each mixture? gaussian_samples = np.random.multinomial(num_samples, gaussian_weights) samples = [] weights = [] #Generate samples and observed samples for each mixture component for n, m, s, o in zip(gaussian_samples, gaussian_means, gaussian_std, gaussian_observation): _samples = np.random.normal(m, s, n) _samples = _samples[o > np.random.uniform(size=n)] samples.extend(_samples) weights.extend(np.ones_like(_samples) / o) #Renormalise the sample weights weights = np.array(weights, np.float) weights /= np.sum(weights) samples = np.array(samples) #Compute the true pdf x = np.linspace(xmin, xmax, 200) true_pdf = 0 for w, m, s in zip(gaussian_weights, gaussian_means, gaussian_std): true_pdf = true_pdf + w * stats.norm(m, s).pdf(x) #Plot a histogram plt.hist(samples, bins, (xmin, xmax), histtype='stepfilled', alpha=.2, density=True, color='k', label='histogram', weights=weights) #Construct a KDE and plot it empirical_distr = distrs.EmpiricalDistr(particles=samples, weights=weights) pdf = kde.GaussianKDE(empirical_distr) y = pdf(x) plt.plot(x, y, label='weighted kde') #Compare with a naive kde pdf = stats.gaussian_kde(samples) y = pdf(x) plt.plot(x, y, label='unweighted kde') #Plot the samples plt.scatter(samples, np.zeros_like(samples), marker='x', color='k', alpha=.02, label='samples') #Plot the true pdf plt.plot(x,true_pdf, label='true PDF') #Boiler plate plt.xlabel('Variable') plt.ylabel('Density') plt.legend(loc='best', frameon=False) plt.tight_layout() plt.show()
def test_weighted_2d(self): bins = 21 #Define a Gaussian mixture to draw samples from num_samples = 10000 xmin, xmax = -10, 8 #Weight attributed to each component of the mixture gaussian_weights = np.array([2, 1], dtype=np.float) gaussian_weights /= np.sum(gaussian_weights) #Mean and std of each mixture gaussian_means = np.array([-1, 1]) gaussian_std = np.array([2, 1]) #Observation probability of each mixture gaussian_observation = np.array([1, .5]) #How many samples belong to each mixture? gaussian_samples = np.random.multinomial(num_samples, gaussian_weights) samples = [] weights = [] #Generate samples and observed samples for each mixture component for n, m, s, o in zip(gaussian_samples, gaussian_means, gaussian_std, gaussian_observation): _samples = np.random.normal(m, s, (n, 2)) _samples = _samples[o > np.random.uniform(size=n)] samples.extend(_samples) weights.extend(np.ones(len(_samples)) / o) #Renormalise the sample weights weights = np.array(weights, np.float) weights /= np.sum(weights) samples = np.transpose(samples) #Evaluate the true pdf on a grid x = np.linspace(xmin, xmax, 100) xx, yy = np.meshgrid(x, x) true_pdf = 0 for w, m, s in zip(gaussian_weights, gaussian_means, gaussian_std): true_pdf = true_pdf + w * stats.norm(m, s).pdf(xx) * stats.norm(m, s).pdf(yy) #Evaluate the kde on a grid empirical_distr = distrs.EmpiricalDistr(particles=samples.T, weights=weights) pdf = kde.GaussianKDE(empirical_distr) points = (np.ravel(xx), np.ravel(yy)) points = np.array(points).T zz = pdf(points) zz = np.reshape(zz, xx.shape) kwargs = dict(extent=(xmin, xmax, xmin, xmax), cmap='hot', origin='lower') #Plot the true pdf plt.subplot(221) plt.imshow(true_pdf.T, **kwargs) plt.title('true PDF') #Plot the kde plt.subplot(222) plt.imshow(zz.T, **kwargs) plt.title('kde') plt.tight_layout() #Plot a histogram ax = plt.subplot(223) plt.hist2d(samples[0], samples[1], bins, ((xmin, xmax), (xmin, xmax)), True, weights, cmap='hot') ax.set_aspect(1) plt.title('histogram') plt.tight_layout() plt.show()