예제 #1
0
 def test_unweighted(self):
     #Define parameters
     num_samples = 1000
     xmax = 5
     bins=21
     
     #Generate equal-weighted samples
     samples = np.random.normal(size=num_samples)
     weights = np.ones(num_samples) / num_samples
     empirical_distr = distrs.EmpiricalDistr(particles=samples, weights=weights)
     
     #Plot a histogram
     plt.hist(empirical_distr.particles, bins, (-xmax, xmax), histtype='stepfilled', alpha=.2, density=True, color='k', label='histogram')
     
     #Construct a KDE and plot it
     pdf = kde.GaussianKDE(empirical_distr)
     x = np.linspace(-xmax, xmax, 200)
     y = pdf(x)
     plt.plot(x, y, label='kde')
     #Plot the samples
     plt.scatter(samples, np.zeros_like(samples), marker='x', color='k', alpha=.1, label='samples')
     
     #Plot the true pdf
     y = stats.norm().pdf(x)
     plt.plot(x,y, label='true PDF')
     
     #Boiler plate
     plt.xlabel('Variable')
     plt.ylabel('Density')
     plt.legend(loc='best', frameon=False)
     plt.tight_layout()
     plt.show()
예제 #2
0
    def test_multinomial_resample(self):
        rnd.random_state(np.random.RandomState(seed=42), force=True)
        
        normal_distr = distrs.NormalDistr(mean=[10., 100.], cov=[[4., -3.], [-3., 9.]])
        particles = normal_distr.sample(size=100000)
        approx_normal_empirical_2d = distrs.EmpiricalDistr(particles=particles, weights=np.ones((100000,)))
        self.assertEqual(approx_normal_empirical_2d.particle_count, 100000)
        npt.assert_almost_equal(approx_normal_empirical_2d.effective_particle_count, 100000.)
        self.assertEqual(approx_normal_empirical_2d.dim, 2)
        npt.assert_almost_equal(approx_normal_empirical_2d.particles, particles)
        npt.assert_almost_equal(approx_normal_empirical_2d.particle(0), npu.col(*particles[0]))
        npt.assert_almost_equal(approx_normal_empirical_2d.weights, npu.col(*np.ones((100000,))))
        npt.assert_almost_equal(approx_normal_empirical_2d.weight(0), 1.)
        npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((100000,))) / 100000.)
        npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weight(0), .00001)
        self.assertEqual(approx_normal_empirical_2d.weight_sum, 100000.)
        npt.assert_almost_equal(approx_normal_empirical_2d.mean, [[   9.9866994], [ 100.0141095]])
        npt.assert_almost_equal(approx_normal_empirical_2d.var_n, [[ 3.9902435], [ 9.0362717]])
        npt.assert_almost_equal(approx_normal_empirical_2d.var_n_minus_1, [[ 3.9902834], [ 9.036362 ]])
        npt.assert_almost_equal(approx_normal_empirical_2d.cov_n, [[ 3.9902435, -3.011222 ], [-3.011222 ,  9.0362717]])
        npt.assert_almost_equal(approx_normal_empirical_2d.cov_n_minus_1, [[ 3.9902834, -3.0112521], [-3.0112521,  9.036362 ]])
        npt.assert_almost_equal(approx_normal_empirical_2d.cov, [[ 3.9902435, -3.011222 ], [-3.011222 ,  9.0362717]])
        npt.assert_almost_equal(approx_normal_empirical_2d.vol_n, [[ 1.9975594,  0.       ], [-1.5074505,  2.6007431]])
        npt.assert_almost_equal(approx_normal_empirical_2d.vol_n_minus_1, [[ 1.9975694,  0.       ], [-1.5074581,  2.6007561]])
        npt.assert_almost_equal(approx_normal_empirical_2d.vol, [[ 1.9975594,  0.       ], [-1.5074505,  2.6007431]])
        
        rnd.random_state(np.random.RandomState(seed=43), force=True)

        resampled_approx_normal_empirical_2d = distrs.multinomial_resample(approx_normal_empirical_2d)
        self.assertEqual(resampled_approx_normal_empirical_2d.particle_count, 100000)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.effective_particle_count, 100000.)
        self.assertEqual(resampled_approx_normal_empirical_2d.dim, 2)
        # The resampled particles should ("almost certainly") be different from the original ones:
        self.assertFalse(np.sum(resampled_approx_normal_empirical_2d.particles) == np.sum(particles))
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.particle(0), npu.col(*particles[1]))
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.weights, npu.col(*np.ones((100000,))))
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.weight(0), 1.)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((100000,))) / 100000.)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.normalised_weight(0), .00001)
        self.assertEqual(resampled_approx_normal_empirical_2d.weight_sum, 100000.)
        # But the stats should be pretty close to those of the original empirical distribution, though not to seven
        # decimal places:
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.mean, [[   9.9866994], [ 100.0141095]], decimal=1)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.var_n, [[ 3.9902435], [ 9.0362717]], decimal=1)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.var_n_minus_1, [[ 3.9902834], [ 9.036362 ]], decimal=1)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.cov_n, [[ 3.9902435, -3.011222 ], [-3.011222 ,  9.0362717]], decimal=1)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.cov_n_minus_1, [[ 3.9902834, -3.0112521], [-3.0112521,  9.036362 ]], decimal=1)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.cov, [[ 3.9902435, -3.011222 ], [-3.011222 ,  9.0362717]], decimal=1)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.vol_n, [[ 1.9975594,  0.       ], [-1.5074505,  2.6007431]], decimal=1)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.vol_n_minus_1, [[ 1.9975694,  0.       ], [-1.5074581,  2.6007561]], decimal=1)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d.vol, [[ 1.9975594,  0.       ], [-1.5074505,  2.6007431]], decimal=1)
        
        rnd.random_state(np.random.RandomState(seed=43), force=True)
        
        resampled_approx_normal_empirical_2d_particles = approx_normal_empirical_2d.sample(size=100000)
        npt.assert_almost_equal(resampled_approx_normal_empirical_2d_particles, resampled_approx_normal_empirical_2d.particles)

        subsampled_approx_normal_empirical_2d = distrs.multinomial_resample(approx_normal_empirical_2d, target_particle_count=40000)
        self.assertEqual(subsampled_approx_normal_empirical_2d.particle_count, 40000)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.effective_particle_count, 40000.)
        self.assertEqual(subsampled_approx_normal_empirical_2d.dim, 2)
        # The resampled particles should ("almost certainly") be different from the original ones:
        self.assertFalse(np.sum(subsampled_approx_normal_empirical_2d.particles) == np.sum(particles))
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.particle(0), npu.col(*particles[1]))
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.weights, npu.col(*np.ones((40000,))))
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.weight(0), 1.)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((40000,))) / 40000.)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.normalised_weight(0), .000025)
        self.assertEqual(subsampled_approx_normal_empirical_2d.weight_sum, 40000.)
        # But the stats should be pretty close to those of the original empirical distribution, though not to seven
        # decimal places:
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.mean, [[   9.9866994], [ 100.0141095]], decimal=1)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.var_n, [[ 3.9902435], [ 9.0362717]], decimal=1)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.var_n_minus_1, [[ 3.9902834], [ 9.036362 ]], decimal=1)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.cov_n, [[ 3.9902435, -3.011222 ], [-3.011222 ,  9.0362717]], decimal=1)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.cov_n_minus_1, [[ 3.9902834, -3.0112521], [-3.0112521,  9.036362 ]], decimal=1)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.cov, [[ 3.9902435, -3.011222 ], [-3.011222 ,  9.0362717]], decimal=1)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.vol_n, [[ 1.9975594,  0.       ], [-1.5074505,  2.6007431]], decimal=1)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.vol_n_minus_1, [[ 1.9975694,  0.       ], [-1.5074581,  2.6007561]], decimal=1)
        npt.assert_almost_equal(subsampled_approx_normal_empirical_2d.vol, [[ 1.9975594,  0.       ], [-1.5074505,  2.6007431]], decimal=1)

        supersampled_approx_normal_empirical_2d = distrs.multinomial_resample(approx_normal_empirical_2d, target_particle_count=300000)
        self.assertEqual(supersampled_approx_normal_empirical_2d.particle_count, 300000)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.effective_particle_count, 300000.)
        self.assertEqual(supersampled_approx_normal_empirical_2d.dim, 2)
        # The resampled particles should ("almost certainly") be different from the original ones:
        self.assertFalse(np.sum(supersampled_approx_normal_empirical_2d.particles) == np.sum(particles))
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.particle(0), npu.col(*particles[0]))
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.weights, npu.col(*np.ones((300000,))))
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.weight(0), 1.)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((300000,))) / 300000.)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.normalised_weight(0), 3.3333333333333333e-06)
        self.assertEqual(supersampled_approx_normal_empirical_2d.weight_sum, 300000.)
        # But the stats should be pretty close to those of the original empirical distribution, though not to seven
        # decimal places:
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.mean, [[   9.9866994], [ 100.0141095]], decimal=1)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.var_n, [[ 3.9902435], [ 9.0362717]], decimal=1)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.var_n_minus_1, [[ 3.9902834], [ 9.036362 ]], decimal=1)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.cov_n, [[ 3.9902435, -3.011222 ], [-3.011222 ,  9.0362717]], decimal=1)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.cov_n_minus_1, [[ 3.9902834, -3.0112521], [-3.0112521,  9.036362 ]], decimal=1)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.cov, [[ 3.9902435, -3.011222 ], [-3.011222 ,  9.0362717]], decimal=1)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.vol_n, [[ 1.9975594,  0.       ], [-1.5074505,  2.6007431]], decimal=1)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.vol_n_minus_1, [[ 1.9975694,  0.       ], [-1.5074581,  2.6007561]], decimal=1)
        npt.assert_almost_equal(supersampled_approx_normal_empirical_2d.vol, [[ 1.9975594,  0.       ], [-1.5074505,  2.6007431]], decimal=1)
예제 #3
0
    def test_empirical_distr(self):
        rnd.random_state(np.random.RandomState(seed=42), force=True)
        
        trivial_empirical_1d = distrs.EmpiricalDistr(particles=[[0.]], weights=[1.])
        self.assertEqual(trivial_empirical_1d.particle_count, 1)
        npt.assert_almost_equal(trivial_empirical_1d.effective_particle_count, 1.)
        self.assertEqual(trivial_empirical_1d.dim, 1)
        npt.assert_almost_equal(trivial_empirical_1d.particles, np.array([[0.]]))
        npt.assert_almost_equal(trivial_empirical_1d.particle(0), np.array([[0.]]))
        npt.assert_almost_equal(trivial_empirical_1d.weights, np.array([[1.]]))
        npt.assert_almost_equal(trivial_empirical_1d.weight(0), 1.)
        npt.assert_almost_equal(trivial_empirical_1d.normalised_weights, np.array([[1.]]))
        npt.assert_almost_equal(trivial_empirical_1d.normalised_weight(0), 1.)
        self.assertEqual(trivial_empirical_1d.weight_sum, 1.)
        self.assertEqual(trivial_empirical_1d.mean, 0.)
        self.assertEqual(trivial_empirical_1d.var_n, 0.)
        npt.assert_almost_equal(trivial_empirical_1d.var_n_minus_1, np.nan)
        self.assertEqual(trivial_empirical_1d.var, 0.)
        self.assertEqual(trivial_empirical_1d.cov_n, 0.)
        npt.assert_almost_equal(trivial_empirical_1d.cov_n_minus_1, np.nan)
        self.assertEqual(trivial_empirical_1d.cov, 0.)
        with self.assertRaises(np.linalg.LinAlgError):  # Matrix is not positive definite
            trivial_empirical_1d.vol_n
        with self.assertRaises(np.linalg.LinAlgError):  # Matrix is not positive definite
            trivial_empirical_1d.vol_n_minus_1
        with self.assertRaises(np.linalg.LinAlgError):  # Matrix is not positive definite
            trivial_empirical_1d.vol

        simple_empirical_1d = distrs.EmpiricalDistr(particles=[[-1.], [1.]], weights=[.5, .5])
        self.assertEqual(simple_empirical_1d.particle_count, 2)
        npt.assert_almost_equal(simple_empirical_1d.effective_particle_count, 2.)
        self.assertEqual(simple_empirical_1d.dim, 1)
        npt.assert_almost_equal(simple_empirical_1d.particles, np.array([[-1.], [1.]]))
        npt.assert_almost_equal(simple_empirical_1d.particle(0), np.array([[-1.]]))
        npt.assert_almost_equal(simple_empirical_1d.weights, np.array([[.5], [.5]]))
        npt.assert_almost_equal(simple_empirical_1d.weight(0), .5)
        npt.assert_almost_equal(simple_empirical_1d.normalised_weights, np.array([[.5], [.5]]))
        npt.assert_almost_equal(simple_empirical_1d.normalised_weight(0), .5)
        self.assertEqual(simple_empirical_1d.weight_sum, 1.)
        self.assertEqual(simple_empirical_1d.mean, 0.)
        self.assertEqual(simple_empirical_1d.var_n, 1.)
        # "n minus 1" (unbiased) stats don't make sense as we are not using "repeat"-type weights, meaning that each
        # weight represents the number of occurrences of one observation:
        npt.assert_almost_equal(simple_empirical_1d.var_n_minus_1, np.inf)
        self.assertEqual(simple_empirical_1d.cov_n, 1.)
        # "n minus 1" (unbiased) stats don't make sense as we are not using "repeat"-type weights, meaning that each
        # weight represents the number of occurrences of one observation:
        npt.assert_almost_equal(simple_empirical_1d.cov_n_minus_1, np.inf)
        self.assertEqual(simple_empirical_1d.cov, 1.)
        self.assertEqual(simple_empirical_1d.vol_n, 1.)
        # "n minus 1" (unbiased) stats don't make sense as we are not using "repeat"-type weights, meaning that each
        # weight represents the number of occurrences of one observation:
        self.assertEqual(simple_empirical_1d.vol_n_minus_1, np.inf)
        self.assertEqual(simple_empirical_1d.vol, 1.)

        # The weights can be specified as a one-dimensional array...
        simple_empirical_1d = distrs.EmpiricalDistr(particles=[[-1.], [1.]], weights=[.5, .5])
        self.assertEqual(simple_empirical_1d.particle_count, 2)
        npt.assert_almost_equal(simple_empirical_1d.effective_particle_count, 2.)
        self.assertEqual(simple_empirical_1d.dim, 1)
        npt.assert_almost_equal(simple_empirical_1d.particles, np.array([[-1.], [1.]]))
        # ...but they come back as a (two-dimensional) column vector:
        npt.assert_almost_equal(simple_empirical_1d.weights, np.array([[.5], [.5]]))

        # ...alternatively, the weights can be specified as a (two-dimensional) column vector:
        simple_empirical_1d = distrs.EmpiricalDistr(particles=[[-1.], [1.]], weights=[[.5], [.5]])
        self.assertEqual(simple_empirical_1d.particle_count, 2)
        npt.assert_almost_equal(simple_empirical_1d.effective_particle_count, 2.)
        self.assertEqual(simple_empirical_1d.dim, 1)
        npt.assert_almost_equal(simple_empirical_1d.particles, np.array([[-1.], [1.]]))
        # ...they always come back as a (two-dimensional) column vector:
        npt.assert_almost_equal(simple_empirical_1d.weights, np.array([[.5], [.5]]))

        # If the particles are specified as a one-dimensional array, they are interpreted as...
        simple_empirical_1d = distrs.EmpiricalDistr(particles=[-1., 1.], weights=[.5, .5])
        self.assertEqual(simple_empirical_1d.particle_count, 2)
        npt.assert_almost_equal(simple_empirical_1d.effective_particle_count, 2.)
        self.assertEqual(simple_empirical_1d.dim, 1)
        # ...multiple one-dimensional particles (each row corresponds to a particle, each column to a dimension):
        npt.assert_almost_equal(simple_empirical_1d.particles, np.array([[-1.], [1.]]))
        npt.assert_almost_equal(simple_empirical_1d.weights, np.array([[.5], [.5]]))

        # Now we shall be using "repeat"-type weights:
        repeat_empirical_1d = distrs.EmpiricalDistr(particles=[[-1.], [1.]], weights=[2., 1.])
        self.assertEqual(repeat_empirical_1d.particle_count, 2)
        npt.assert_almost_equal(repeat_empirical_1d.effective_particle_count, 1.7999999999999998)
        self.assertEqual(repeat_empirical_1d.dim, 1)
        npt.assert_almost_equal(repeat_empirical_1d.particles, np.array([[-1.], [1.]]))
        npt.assert_almost_equal(repeat_empirical_1d.particle(0), np.array([[-1.]]))
        npt.assert_almost_equal(repeat_empirical_1d.weights, np.array([[2.], [1.]]))
        npt.assert_almost_equal(repeat_empirical_1d.weight(0), 2.)
        npt.assert_almost_equal(repeat_empirical_1d.normalised_weights, np.array([[ 0.6666667], [ 0.3333333]]))
        npt.assert_almost_equal(repeat_empirical_1d.normalised_weight(0), 0.6666667)
        self.assertEqual(repeat_empirical_1d.weight_sum, 3.)
        npt.assert_almost_equal(repeat_empirical_1d.mean, -0.33333333)
        npt.assert_almost_equal(repeat_empirical_1d.var_n, 0.88888889)
        npt.assert_almost_equal(repeat_empirical_1d.var_n_minus_1, 1.3333333)
        npt.assert_almost_equal(repeat_empirical_1d.cov_n, 0.88888889)
        npt.assert_almost_equal(repeat_empirical_1d.cov_n_minus_1, 1.3333333)
        npt.assert_almost_equal(repeat_empirical_1d.cov, 0.88888889)
        npt.assert_almost_equal(repeat_empirical_1d.vol_n, 0.94280904)
        npt.assert_almost_equal(repeat_empirical_1d.vol_n_minus_1, 1.15470054)
        npt.assert_almost_equal(repeat_empirical_1d.vol, 0.94280904)

        # Now we shall be using "repeat"-type weights. There are three two-dimensional particles:
        repeat_empirical_2d = distrs.EmpiricalDistr(particles=[[-2., 2.], [0., 0.], [1., -1.]], weights=[2., 1., 1.])
        self.assertEqual(repeat_empirical_2d.particle_count, 3)
        npt.assert_almost_equal(repeat_empirical_2d.effective_particle_count, 2.6666666666666665)
        self.assertEqual(repeat_empirical_2d.dim, 2)
        npt.assert_almost_equal(repeat_empirical_2d.particles, np.array([[-2., 2.], [0., 0.], [1., -1.]]))
        npt.assert_almost_equal(repeat_empirical_2d.particle(0), np.array([[-2.], [2.]]))
        npt.assert_almost_equal(repeat_empirical_2d.weights, np.array([[2.], [1.], [1.]]))
        npt.assert_almost_equal(repeat_empirical_2d.weight(0), 2.)
        npt.assert_almost_equal(repeat_empirical_2d.normalised_weights, np.array([[ 0.5 ], [ 0.25], [ 0.25]]))
        npt.assert_almost_equal(repeat_empirical_2d.normalised_weight(0), .5)
        self.assertEqual(repeat_empirical_2d.weight_sum, 4.)
        npt.assert_almost_equal(repeat_empirical_2d.mean, [[-0.75], [ 0.75]])
        npt.assert_almost_equal(repeat_empirical_2d.var_n, [[ 1.6875], [ 1.6875]])
        npt.assert_almost_equal(repeat_empirical_2d.var_n_minus_1, [[ 2.25], [ 2.25]])
        npt.assert_almost_equal(repeat_empirical_2d.cov_n, [[ 1.6875, -1.6875], [-1.6875,  1.6875]])
        npt.assert_almost_equal(repeat_empirical_2d.cov_n_minus_1, [[ 2.25, -2.25], [-2.25,  2.25]])
        npt.assert_almost_equal(repeat_empirical_2d.cov, [[ 1.6875, -1.6875], [-1.6875,  1.6875]])
        with self.assertRaises(np.linalg.LinAlgError):  # Matrix is not positive definite
            repeat_empirical_2d.vol_n
        with self.assertRaises(np.linalg.LinAlgError):  # Matrix is not positive definite
            repeat_empirical_2d.vol_n_minus_1
        with self.assertRaises(np.linalg.LinAlgError):  # Matrix is not positive definite
            repeat_empirical_2d.vol
        
        normal_distr = distrs.NormalDistr(mean=[10., 100.], cov=[[4., -3.], [-3., 9.]])
        particles = normal_distr.sample(size=100)
        approx_normal_empirical_2d = distrs.EmpiricalDistr(particles=particles, weights=np.ones((100,)))
        self.assertEqual(approx_normal_empirical_2d.particle_count, 100)
        npt.assert_almost_equal(approx_normal_empirical_2d.effective_particle_count, 100.)
        self.assertEqual(approx_normal_empirical_2d.dim, 2)
        npt.assert_almost_equal(approx_normal_empirical_2d.particles, particles)
        npt.assert_almost_equal(approx_normal_empirical_2d.particle(0), npu.col(*particles[0]))
        npt.assert_almost_equal(approx_normal_empirical_2d.weights, npu.col(*np.ones((100,))))
        npt.assert_almost_equal(approx_normal_empirical_2d.weight(0), 1.)
        npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((100,))) / 100.)
        npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weight(0), .01)
        self.assertEqual(approx_normal_empirical_2d.weight_sum, 100.)
        npt.assert_almost_equal(approx_normal_empirical_2d.mean, [[ 10.2077457], [ 99.6856645]])
        npt.assert_almost_equal(approx_normal_empirical_2d.var_n, [[ 3.3516275], [ 6.7649298]])
        npt.assert_almost_equal(approx_normal_empirical_2d.var_n_minus_1, [[ 3.3854823], [ 6.8332624]])
        npt.assert_almost_equal(approx_normal_empirical_2d.cov_n, [[ 3.3516275, -1.8258307], [-1.8258307,  6.7649298]])
        npt.assert_almost_equal(approx_normal_empirical_2d.cov_n_minus_1, [[ 3.3854823, -1.8442735], [-1.8442735,  6.8332624]])
        npt.assert_almost_equal(approx_normal_empirical_2d.cov, [[ 3.3516275, -1.8258307], [-1.8258307,  6.7649298]])
        npt.assert_almost_equal(approx_normal_empirical_2d.vol_n, [[ 1.8307451,  0.       ], [-0.9973157,  2.4021431]])
        npt.assert_almost_equal(approx_normal_empirical_2d.vol_n_minus_1, [[ 1.839968 ,  0.       ], [-1.00234  ,  2.4142446]])
        npt.assert_almost_equal(approx_normal_empirical_2d.vol, [[ 1.8307451,  0.       ], [-0.9973157,  2.4021431]])

        # Using more particles more faithfully approximates the mean and covariance of the normal distribution:
        normal_distr = distrs.NormalDistr(mean=[10., 100.], cov=[[4., -3.], [-3., 9.]])
        particles = normal_distr.sample(size=100000)
        approx_normal_empirical_2d = distrs.EmpiricalDistr(particles=particles, weights=np.ones((100000,)))
        self.assertEqual(approx_normal_empirical_2d.particle_count, 100000)
        npt.assert_almost_equal(approx_normal_empirical_2d.effective_particle_count, 100000.)
        self.assertEqual(approx_normal_empirical_2d.dim, 2)
        npt.assert_almost_equal(approx_normal_empirical_2d.particles, particles)
        npt.assert_almost_equal(approx_normal_empirical_2d.particle(0), npu.col(*particles[0]))
        npt.assert_almost_equal(approx_normal_empirical_2d.weights, npu.col(*np.ones((100000,))))
        npt.assert_almost_equal(approx_normal_empirical_2d.weight(0), 1.)
        npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weights, npu.col(*np.ones((100000,))) / 100000.)
        npt.assert_almost_equal(approx_normal_empirical_2d.normalised_weight(0), .00001)
        self.assertEqual(approx_normal_empirical_2d.weight_sum, 100000.)
        npt.assert_almost_equal(approx_normal_empirical_2d.mean, [[ 9.9863195], [ 100.0145412]])
        npt.assert_almost_equal(approx_normal_empirical_2d.var_n, [[ 3.9901799], [ 9.0390325]])
        npt.assert_almost_equal(approx_normal_empirical_2d.var_n_minus_1, [[ 3.9902198], [ 9.0391229]])
        npt.assert_almost_equal(approx_normal_empirical_2d.cov_n, [[ 3.9901799, -3.0120428], [-3.0120428,  9.0390325]])
        npt.assert_almost_equal(approx_normal_empirical_2d.cov_n_minus_1, [[ 3.9902198, -3.0120729], [-3.0120729,  9.0391229]])
        npt.assert_almost_equal(approx_normal_empirical_2d.cov, [[ 3.9901799, -3.0120428], [-3.0120428,  9.0390325]])
        npt.assert_almost_equal(approx_normal_empirical_2d.vol_n, [[ 1.9975435,  0.       ], [-1.5078735,  2.6010287]])
        npt.assert_almost_equal(approx_normal_empirical_2d.vol_n_minus_1, [[ 1.9975535,  0.       ], [-1.507881 ,  2.6010417]])
        npt.assert_almost_equal(approx_normal_empirical_2d.vol, [[ 1.9975435,  0.       ], [-1.5078735,  2.6010287]])
예제 #4
0
 def test_weighted_1d(self):
     bins = 21
     
     #Define a Gaussian mixture to draw samples from
     num_samples = 10000
     xmin, xmax = -10, 8
     #Weight attributed to each component of the mixture
     gaussian_weights = np.array([2, 1], dtype=np.float)
     gaussian_weights /= np.sum(gaussian_weights)
     #Mean and std of each mixture
     gaussian_means = np.array([-1, 1])
     gaussian_std = np.array([2, 1])
     #Observation probability of each mixture
     gaussian_observation = np.array([1, .5])
     
     #How many samples belong to each mixture?
     gaussian_samples = np.random.multinomial(num_samples, gaussian_weights)
     samples = []
     weights = []
     #Generate samples and observed samples for each mixture component
     for n, m, s, o in zip(gaussian_samples, gaussian_means, gaussian_std, gaussian_observation):
         _samples = np.random.normal(m, s, n)
         _samples = _samples[o > np.random.uniform(size=n)]
         samples.extend(_samples)
         weights.extend(np.ones_like(_samples) / o)
     
     #Renormalise the sample weights
     weights = np.array(weights, np.float)
     weights /= np.sum(weights)
     samples = np.array(samples)
     
     #Compute the true pdf
     x = np.linspace(xmin, xmax, 200)
     true_pdf = 0
     for w, m, s in zip(gaussian_weights, gaussian_means, gaussian_std):
         true_pdf = true_pdf + w * stats.norm(m, s).pdf(x)
     
     #Plot a histogram
     plt.hist(samples, bins, (xmin, xmax), histtype='stepfilled', alpha=.2, density=True, color='k', label='histogram', weights=weights)
     
     #Construct a KDE and plot it
     empirical_distr = distrs.EmpiricalDistr(particles=samples, weights=weights)
     pdf = kde.GaussianKDE(empirical_distr)
     y = pdf(x)
     plt.plot(x, y, label='weighted kde')
     
     #Compare with a naive kde
     pdf = stats.gaussian_kde(samples)
     y = pdf(x)
     plt.plot(x, y, label='unweighted kde')
     
     #Plot the samples
     plt.scatter(samples, np.zeros_like(samples), marker='x', color='k', alpha=.02, label='samples')
     
     #Plot the true pdf
     plt.plot(x,true_pdf, label='true PDF')
     
     #Boiler plate
     plt.xlabel('Variable')
     plt.ylabel('Density')
     plt.legend(loc='best', frameon=False)
     plt.tight_layout()
     plt.show()
예제 #5
0
 def test_weighted_2d(self):
     bins = 21
     
     #Define a Gaussian mixture to draw samples from
     num_samples = 10000
     xmin, xmax = -10, 8
     #Weight attributed to each component of the mixture
     gaussian_weights = np.array([2, 1], dtype=np.float)
     gaussian_weights /= np.sum(gaussian_weights)
     #Mean and std of each mixture
     gaussian_means = np.array([-1, 1])
     gaussian_std = np.array([2, 1])
     #Observation probability of each mixture
     gaussian_observation = np.array([1, .5])
     
     #How many samples belong to each mixture?
     gaussian_samples = np.random.multinomial(num_samples, gaussian_weights)
     samples = []
     weights = []
     #Generate samples and observed samples for each mixture component
     for n, m, s, o in zip(gaussian_samples, gaussian_means, gaussian_std, gaussian_observation):
         _samples = np.random.normal(m, s, (n, 2))
         _samples = _samples[o > np.random.uniform(size=n)]
         samples.extend(_samples)
         weights.extend(np.ones(len(_samples)) / o)
     
     #Renormalise the sample weights
     weights = np.array(weights, np.float)
     weights /= np.sum(weights)
     samples = np.transpose(samples)
     #Evaluate the true pdf on a grid
     x = np.linspace(xmin, xmax, 100)
     xx, yy = np.meshgrid(x, x)
     true_pdf = 0
     for w, m, s in zip(gaussian_weights, gaussian_means, gaussian_std):
         true_pdf = true_pdf + w * stats.norm(m, s).pdf(xx) * stats.norm(m, s).pdf(yy)
     #Evaluate the kde on a grid
     empirical_distr = distrs.EmpiricalDistr(particles=samples.T, weights=weights)
     pdf = kde.GaussianKDE(empirical_distr)
     points = (np.ravel(xx), np.ravel(yy))
     points = np.array(points).T
     zz = pdf(points)
     zz = np.reshape(zz, xx.shape)
     kwargs = dict(extent=(xmin, xmax, xmin, xmax), cmap='hot', origin='lower')
     #Plot the true pdf
     plt.subplot(221)
     plt.imshow(true_pdf.T, **kwargs)
     plt.title('true PDF')
     
     #Plot the kde
     plt.subplot(222)
     plt.imshow(zz.T, **kwargs)
     plt.title('kde')
     plt.tight_layout()
     
     #Plot a histogram
     ax = plt.subplot(223)
     plt.hist2d(samples[0], samples[1], bins, ((xmin, xmax), (xmin, xmax)), True, weights, cmap='hot')
     ax.set_aspect(1)
     plt.title('histogram')
     plt.tight_layout()
     plt.show()