def test_pdf_boundary_simple(self): self.x_train = np.array([0]) for bw in [1e-3, 1e-2, 1e-1]: # note: for larger bandwidths, the pdf also needs to be truncated as +1, # which leads to something different than twice the pdf hp_kernel1 = hp_kernels.Gaussian(data=self.x_train, bandwidth=bw, fix_boundary=False) hp_kernel2 = hp_kernels.Gaussian(data=self.x_train, bandwidth=bw, fix_boundary=True) hp_values1 = hp_kernel1(self.x_test) hp_values2 = hp_kernel2(self.x_test) self.assertTrue(np.allclose(2*hp_values1, hp_values2, 1e-4)) self.x_train = np.array([1]) for bw in [1e-3, 1e-2, 1e-1]: # note: for larger bandwidths, the pdf also needs to be truncated as +1, # which leads to something different than twice the pdf hp_kernel1 = hp_kernels.Gaussian(data=self.x_train, bandwidth=bw, fix_boundary=False) hp_kernel2 = hp_kernels.Gaussian(data=self.x_train, bandwidth=bw, fix_boundary=True) hp_values1 = hp_kernel1(self.x_test) hp_values2 = hp_kernel2(self.x_test) self.assertTrue(np.allclose(2*hp_values1, hp_values2, 1e-4)) # simple test based on 68, 95, 99% rule self.x_train = np.array([0.5]) for bw, w in ([0.5, 0.6827], [0.25, 0.9545], [1/6, 0.9973]): hp_kernel = hp_kernels.Gaussian(data=self.x_train, bandwidth=bw, fix_boundary=True) self.assertAlmostEqual(hp_kernel.weights[0], 1/w, delta=1e-4)
def test_values(self): for bw in [1e-3, 1e-2, 1e-1, 1]: sm_values = sm_kernels.gaussian(bw, self.x_train[:,None], self.x_test[None,:]) hp_kernel = hp_kernels.Gaussian(data=self.x_train, bandwidth=bw, fix_boundary=False) hp_values = hp_kernel(self.x_test) self.assertTrue(np.allclose(hp_values, sm_values/bw, 1e-4))
def test_pdf_boundary_quadrature(self): for bw in [1e-2, 1e-1, 1]: hp_kernel = hp_kernels.Gaussian(data=self.x_train, bandwidth=bw, fix_boundary=True) def quad_me(x): x_test = np.array([x]) pdfs = hp_kernel(x_test) return(pdfs.mean()) self.assertAlmostEqual(quadrature(quad_me, 0, 1)[0], 1, delta=1e-4)
def test_sample(self): num_samples = 2**20 for bw in [1e-1, 5e-1, 1]: hp_kernel = hp_kernels.Gaussian(data=self.x_train, bandwidth=bw, fix_boundary=True) samples = hp_kernel.sample(num_samples=num_samples) phat1, x = np.histogram(samples, normed=True) phat2 = hp_kernel((x[1:] + x[:-1])/2).mean(axis=0) for p1, p2 in zip(phat1, phat2): self.assertAlmostEqual(p1, p2, delta=5e-2)
def test_pdf_boundary_simple(self): self.x_train = np.array([0]) self.x_test = np.array([0, 1, 2, 3]) for bw in [1e-3, 1e-2]: # note: for larger bandwidths, the pdf also needs to be truncated as +1, # which leads to something different than the scaling computed here hp_kernel1 = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, num_values=4, fix_boundary=False) hp_kernel2 = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, num_values=4, fix_boundary=True) hp_values1 = hp_kernel1(self.x_test).squeeze() hp_values2 = hp_kernel2(self.x_test).squeeze() weight = 1 - hp_values1[1:].sum() self.assertTrue(np.allclose(hp_values1 / weight, hp_values2, 1e-4)) self.x_train = np.array([3]) self.x_test = np.array([0, 1, 2, 3]) for bw in [1e-3, 1e-2]: # note: for larger bandwidths, the pdf also needs to be truncated as +1, # which leads to something different than the scaling computed here hp_kernel1 = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, num_values=4, fix_boundary=False) hp_kernel2 = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, num_values=4, fix_boundary=True) hp_values1 = hp_kernel1(self.x_test).squeeze() hp_values2 = hp_kernel2(self.x_test).squeeze() weight = 1 - hp_values1[:-1].sum() self.assertTrue(np.allclose(hp_values1 / weight, hp_values2, 1e-4)) # simple test based on 68, 95, 99% rule self.x_train = np.array([0.5]) for bw, w in ([0.5, 0.6827], [0.25, 0.9545], [1 / 6, 0.9973]): hp_kernel = hp_kernels.Gaussian(data=self.x_train, bandwidth=bw, fix_boundary=True) self.assertAlmostEqual(hp_kernel.weights[0], 1 / w, delta=1e-4)