def test_kl_simple(): # verified by hand # Dhat(P||Q) = \log m/(n-1) + d / n \sum_{i=1}^n \log \nu_k(i)/rho_k(i) x = np.reshape([0., 1, 3], (3, 1)) y = np.reshape([.2, 1.2, 3.2, 7.2], (4, 1)) n = x.shape[0] m = y.shape[0] x_to_y = np.log( m / (n - 1)) + 1 / n * (np.log(1.2 / 3) + np.log(.8 / 2) + np.log(1.8 / 3)) y_to_x = np.log(n / (m - 1)) + 1 / m * (np.log(.8 / 3) + np.log(1.2 / 2) + np.log(2.2 / 3) + np.log(6.2 / 6)) # NOTE: clamping makes this test useless. x_to_y = max(x_to_y, 0) y_to_x = max(y_to_x, 0) res = estimate_divs(Features([x, y]), specs=['kl'], Ks=[2]).squeeze() assert res[0, 0] == 0 assert res[1, 1] == 0 assert np.allclose(res[1, 0], y_to_x), "{} vs {}".format(res[1, 0], y_to_x) assert np.allclose(res[0, 1], x_to_y), "{} vs {}".format(res[0, 1], x_to_y)
def generate_gaussians(name, mean_std_nums, dim, min_pts, max_pts): bags = [] categories = [] for mean, std, num in mean_std_nums: cat_name = 'mean{}-std{}'.format(mean, std) for x in range(num): n_pts = np.random.randint(min_pts, max_pts + 1) feats = np.random.normal(mean, std, size=(n_pts, dim)) bags.append(feats) categories.append(cat_name) features = Features(bags, categories=categories) features.save_as_hdf5('data/{}.h5'.format(name))
def test_js_simple(): # verified by hand x = np.reshape([0, 1, 3], (3, 1)) y = np.reshape([.2, 1.2, 3.2, 6.2], (4, 1)) mix_ent = np.log(2) + np.log(3) + psi(2) \ + (np.log(.2) + np.log(.8) + np.log(1.8) - psi(1) - 2*psi(2)) / 6 \ + (np.log(.2) + np.log(2) + np.log(3.2) - psi(1) - 3*psi(2)) / 8 x_ent = np.log(2) + (np.log(3) + np.log(2) + np.log(3)) / 3 y_ent = np.log(3) + (np.log(3) + np.log(2) + np.log(3) + np.log(5)) / 4 right_js = mix_ent - (x_ent + y_ent) / 2 expected = np.array([[0, right_js], [right_js, 0]]) # TODO: what about clamping??? est = estimate_divs(Features([x, y]), specs=['js'], Ks=[2], status_fn=None).squeeze() assert_close(est, expected, atol=5e-5, msg="JS estimate not as expected")