Beispiel #1
0
def generate_gaussians(name, mean_std_nums, dim, min_pts, max_pts):
    bags = []
    categories = []
    for mean, std, num in mean_std_nums:
        cat_name = 'mean{}-std{}'.format(mean, std)
        for x in range(num):
            n_pts = np.random.randint(min_pts, max_pts+1)
            feats = np.random.normal(mean, std, size=(n_pts, dim))
            bags.append(feats)
            categories.append(cat_name)
    features = Features(bags, categories=categories)
    features.save_as_hdf5('data/{}.h5'.format(name))
Beispiel #2
0
def generate_gaussians(name, mean_std_nums, dim, min_pts, max_pts):
    bags = []
    categories = []
    for mean, std, num in mean_std_nums:
        cat_name = 'mean{}-std{}'.format(mean, std)
        for x in range(num):
            n_pts = np.random.randint(min_pts, max_pts + 1)
            feats = np.random.normal(mean, std, size=(n_pts, dim))
            bags.append(feats)
            categories.append(cat_name)
    features = Features(bags, categories=categories)
    features.save_as_hdf5('data/{}.h5'.format(name))
Beispiel #3
0
def test_with_and_without_js():
    dir = os.path.join(os.path.dirname(__file__), 'data')
    name = 'gaussian-2d-mean0-std1,2'
    feats = Features.load_from_hdf5(os.path.join(dir, name + '.h5'))

    specs = ['hellinger']
    Ks = [3, 5]

    with h5py.File(os.path.join(dir, name + '.divs.h5'), 'r') as f:
        expected = load_divs(f, specs, Ks)
        min_dist = f.attrs['min_dist']

    est = partial(estimate_divs, feats, Ks=Ks, min_dist=min_dist,
                  status_fn=None)

    with capture_output(True, True, merge=False):
        ds = est(specs=specs + ['js'])
        oth_with = ds[:, :, :-1, :]
        js_with = ds[:, :, -1, :]

        js_without = est(specs=['js'])[:, :, 0, :]

    assert_close(oth_with, expected, atol=5e-5,
                 msg="others ({}) broke with JS".format(', '.join(specs)))
    assert_close(js_with, js_without, atol=5e-5,
                 msg="JS different with/without others")
Beispiel #4
0
def test_divs():
    dir = os.path.join(os.path.dirname(__file__), 'data')
    argses = [{'cores': cores, 'status_fn': status_fn}
              for cores in [1, None]
              for status_fn in [None]]  # , True]]
    # TODO: test a custom status_fn also

    specs = ['hellinger', 'kl', 'l2', 'linear',
             'renyi:0.5', 'renyi:0.7', 'renyi:0.9', 'renyi:0.99']
    Ks = [1, 3, 5, 10]
    for name in ['gaussian-2d-mean0-std1,2', 'gaussian-20d-mean0-std1,2']:
        for dtype in [np.float64, np.float32]:
            feats = Features.load_from_hdf5(
                os.path.join(dir, name + '.h5'),
                features_dtype=dtype)

            with h5py.File(os.path.join(dir, name + '.divs.h5'), 'r') as f:
                expected = load_divs(f, specs, Ks)
                min_dist = f.attrs['min_dist']

            tests = []
            for args in argses:
                tests.extend(check_div(feats, expected, specs, Ks, name,
                                       min_dist=min_dist, **args))
            for test in sorted(tests, key=lambda t: t[0].description):
                yield test
Beispiel #5
0
def test_kl_simple():
    # verified by hand
    # Dhat(P||Q) = \log m/(n-1) + d / n  \sum_{i=1}^n \log \nu_k(i)/rho_k(i)
    x = np.reshape([0., 1, 3], (3, 1))
    y = np.reshape([.2, 1.2, 3.2, 7.2], (4, 1))

    n = x.shape[0]
    m = y.shape[0]

    x_to_y = np.log(
        m /
        (n - 1)) + 1 / n * (np.log(1.2 / 3) + np.log(.8 / 2) + np.log(1.8 / 3))
    y_to_x = np.log(n / (m - 1)) + 1 / m * (np.log(.8 / 3) + np.log(1.2 / 2) +
                                            np.log(2.2 / 3) + np.log(6.2 / 6))

    # NOTE: clamping makes this test useless.
    x_to_y = max(x_to_y, 0)
    y_to_x = max(y_to_x, 0)

    res = estimate_divs(Features([x, y]), specs=['kl'], Ks=[2]).squeeze()

    assert res[0, 0] == 0
    assert res[1, 1] == 0
    assert np.allclose(res[1, 0], y_to_x), "{} vs {}".format(res[1, 0], y_to_x)
    assert np.allclose(res[0, 1], x_to_y), "{} vs {}".format(res[0, 1], x_to_y)
Beispiel #6
0
def test_with_and_without_js():
    dir = os.path.join(os.path.dirname(__file__), 'data')
    name = 'gaussian-2d-mean0-std1,2'
    feats = Features.load_from_hdf5(os.path.join(dir, name + '.h5'))

    specs = ['hellinger']
    Ks = [3, 5]

    with h5py.File(os.path.join(dir, name + '.divs.h5'), 'r') as f:
        expected = load_divs(f, specs, Ks)
        min_dist = f.attrs['min_dist']

    est = partial(estimate_divs,
                  feats,
                  Ks=Ks,
                  min_dist=min_dist,
                  status_fn=None)

    with capture_output(True, True, merge=False):
        ds = est(specs=specs + ['js'])
        oth_with = ds[:, :, :-1, :]
        js_with = ds[:, :, -1, :]

        js_without = est(specs=['js'])[:, :, 0, :]

    assert_close(oth_with,
                 expected,
                 atol=5e-5,
                 msg="others ({}) broke with JS".format(', '.join(specs)))
    assert_close(js_with,
                 js_without,
                 atol=5e-5,
                 msg="JS different with/without others")
Beispiel #7
0
def test_divs():
    dir = os.path.join(os.path.dirname(__file__), 'data')
    argses = [{
        'cores': cores,
        'status_fn': status_fn
    } for cores in [1, None] for status_fn in [None]]  # , True]]
    # TODO: test a custom status_fn also

    specs = [
        'hellinger', 'kl', 'l2', 'linear', 'renyi:0.5', 'renyi:0.7',
        'renyi:0.9', 'renyi:0.99'
    ]
    Ks = [1, 3, 5, 10]
    for name in ['gaussian-2d-mean0-std1,2', 'gaussian-20d-mean0-std1,2']:
        for dtype in [np.float64, np.float32]:
            feats = Features.load_from_hdf5(os.path.join(dir, name + '.h5'),
                                            features_dtype=dtype)

            with h5py.File(os.path.join(dir, name + '.divs.h5'), 'r') as f:
                expected = load_divs(f, specs, Ks)
                min_dist = f.attrs['min_dist']

            tests = []
            for args in argses:
                tests.extend(
                    check_div(feats,
                              expected,
                              specs,
                              Ks,
                              name,
                              min_dist=min_dist,
                              **args))
            for test in sorted(tests, key=lambda t: t[0].description):
                yield test
Beispiel #8
0
def test_js_simple():
    # verified by hand
    x = np.reshape([0, 1, 3], (3, 1))
    y = np.reshape([.2, 1.2, 3.2, 6.2], (4, 1))

    mix_ent = np.log(2) + np.log(3) + psi(2) \
        + (np.log(.2) + np.log(.8) + np.log(1.8) - psi(1) - 2*psi(2)) / 6 \
        + (np.log(.2) + np.log(2) + np.log(3.2) - psi(1) - 3*psi(2)) / 8
    x_ent = np.log(2) + (np.log(3) + np.log(2) + np.log(3)) / 3
    y_ent = np.log(3) + (np.log(3) + np.log(2) + np.log(3) + np.log(5)) / 4
    right_js = mix_ent - (x_ent + y_ent) / 2
    expected = np.array([[0, right_js], [right_js, 0]])
    # TODO: what about clamping???

    est = estimate_divs(Features([x, y]), specs=['js'], Ks=[2],
                        status_fn=None).squeeze()
    assert_close(est, expected, atol=5e-5, msg="JS estimate not as expected")