def test_knn_sanity_slow(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") dim = 3 n = 20 np.random.seed(47) bags = Features( [np.random.randn(np.random.randint(30, 100), dim) for _ in xrange(n)]) # just make sure it runs div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8') Ks = (3, 4) est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks) res = est.fit_transform(bags) assert res.shape == (len(div_funcs), len(Ks), n, n) assert np.all(np.isfinite(res)) # test that JS blows up when there's a huge difference in bag sizes # (so that K is too low) assert_raises( ValueError, partial(est.fit_transform, bags + [np.random.randn(1000, dim)])) # test fit() and then transform() with JS, with different-sized test bags est = KNNDivergenceEstimator(div_funcs=('js', ), Ks=(5, )) est.fit(bags, get_rhos=True) with LogCapture('skl_groups.divergences.knn', level=logging.WARNING) as l: res = est.transform([np.random.randn(300, dim)]) assert res.shape == (1, 1, 1, len(bags)) assert len(l.records) == 1 assert l.records[0].message.startswith('Y_rhos had a lower max_K') # test that passing div func more than once raises def blah(df): est = KNNDivergenceEstimator(div_funcs=[df, df]) return est.fit(bags) assert_raises(ValueError, lambda: blah('kl')) assert_raises(ValueError, lambda: blah('renyi:.8')) assert_raises(ValueError, lambda: blah('l2'))
def test_knn_sanity_slow(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") dim = 3 n = 20 np.random.seed(47) bags = Features([np.random.randn(np.random.randint(30, 100), dim) for _ in xrange(n)]) # just make sure it runs div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8') Ks = (3, 4) est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks) res = est.fit_transform(bags) assert res.shape == (len(div_funcs), len(Ks), n, n) assert np.all(np.isfinite(res)) # test that JS blows up when there's a huge difference in bag sizes # (so that K is too low) assert_raises( ValueError, partial(est.fit_transform, bags + [np.random.randn(1000, dim)])) # test fit() and then transform() with JS, with different-sized test bags est = KNNDivergenceEstimator(div_funcs=('js',), Ks=(5,)) est.fit(bags, get_rhos=True) with LogCapture('skl_groups.divergences.knn', level=logging.WARNING) as l: res = est.transform([np.random.randn(300, dim)]) assert res.shape == (1, 1, 1, len(bags)) assert len(l.records) == 1 assert l.records[0].message.startswith('Y_rhos had a lower max_K') # test that passing div func more than once raises def blah(df): est = KNNDivergenceEstimator(div_funcs=[df, df]) return est.fit(bags) assert_raises(ValueError, lambda: blah('kl')) assert_raises(ValueError, lambda: blah('renyi:.8')) assert_raises(ValueError, lambda: blah('l2'))
def distribution_divergence(X_s, X_l, k=10): """ This function computes l2 and js divergences from samples of two distributions. The implementation use `skl-groups`, which implements non-parametric estimation of divergences. Args: + X_s: a numpy array containing point cloud in state space + X_e: a numpy array containing point cloud in latent space """ # We discard cases with too large dimensions if X_s.shape[1] > 50: return {'l2_divergence': -1., 'js_divergence': -1.} # We instantiate the divergence object div = KNNDivergenceEstimator(div_funcs=['l2', 'js'], Ks=[k], n_jobs=4, clamp=True) # We turn both data to float32 X_s = X_s.astype(np.float32) X_l = X_l.astype(np.float32) # We generate Features f_s = Features(X_s, n_pts=[X_s.shape[0]]) f_l = Features(X_l, n_pts=[X_l.shape[0]]) # We create the knn graph div.fit(X=f_s) # We compute the divergences l2, js = div.transform(X=f_l).squeeze() # We construct the returned dictionnary output = {'l2_divergence': l2, 'js_divergence': js} return output
def test_knn_js(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") # verified by hand x = np.reshape([0., 1, 3, 6], (4, 1)) n = 4 y = np.reshape([.2, 1.2, 3.2, 6.2, 10.2], (5, 1)) m = 5 M = 2 right_js = ( np.log(n + m - 1) + psi(M) + 1/(2*n) * ( # x weight is 1/7, y weight is 4/35, quantile 1/4 np.log(.2) - psi(1) # 0 => .2(y), 1(x) + np.log(.8) - psi(2) # 1 => 1.2(y), .2(y) + np.log(1.8) - psi(2) # 3 => 3.2(y), 1.2(y) + np.log(2.8) - psi(2) # 6 => 6.2(y), 3.2(y) ) + 1/(2*m) * ( # x weight is 5/36, y weight is 1/9, quantile 1/4 np.log(.2) - psi(1) # .2 => 0(x) + np.log(1) - psi(2) # 1.2 => 1(x), .2(y) + np.log(2) - psi(2) # 3.2 => 3(x), 1.2(y) + np.log(3) - psi(2) # 6.2 => 6(x), 3.2(y) + np.log(4.2) - psi(2) # 10.2 => 6.2(y), 6(x) ) - 1/2 * np.log(n-1) - 1/(2*n) * ( np.log(3) + np.log(2) + np.log(3) + np.log(5)) - 1/2 * np.log(m-1) - 1/(2*m) * ( np.log(3) + np.log(2) + np.log(3) + np.log(4) + np.log(7)) ) msg = "got {}, expected {}" est = KNNDivergenceEstimator(div_funcs=['js'], Ks=[2], clamp=False) res = est.fit([x]).transform([y]) assert res.shape == (1, 1, 1, 1) res = res[0, 0, 0, 0] assert np.allclose(res, right_js, atol=1e-6), msg.format(res, right_js)
def test_knn_js(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") # verified by hand x = np.reshape([0., 1, 3, 6], (4, 1)) n = 4 y = np.reshape([.2, 1.2, 3.2, 6.2, 10.2], (5, 1)) m = 5 M = 2 right_js = ( np.log(n + m - 1) + psi(M) + 1 / (2 * n) * ( # x weight is 1/7, y weight is 4/35, quantile 1/4 np.log(.2) - psi(1) # 0 => .2(y), 1(x) + np.log(.8) - psi(2) # 1 => 1.2(y), .2(y) + np.log(1.8) - psi(2) # 3 => 3.2(y), 1.2(y) + np.log(2.8) - psi(2) # 6 => 6.2(y), 3.2(y) ) + 1 / (2 * m) * ( # x weight is 5/36, y weight is 1/9, quantile 1/4 np.log(.2) - psi(1) # .2 => 0(x) + np.log(1) - psi(2) # 1.2 => 1(x), .2(y) + np.log(2) - psi(2) # 3.2 => 3(x), 1.2(y) + np.log(3) - psi(2) # 6.2 => 6(x), 3.2(y) + np.log(4.2) - psi(2) # 10.2 => 6.2(y), 6(x) ) - 1 / 2 * np.log(n - 1) - 1 / (2 * n) * (np.log(3) + np.log(2) + np.log(3) + np.log(5)) - 1 / 2 * np.log(m - 1) - 1 / (2 * m) * (np.log(3) + np.log(2) + np.log(3) + np.log(4) + np.log(7))) msg = "got {}, expected {}" est = KNNDivergenceEstimator(div_funcs=['js'], Ks=[2], clamp=False) res = est.fit([x]).transform([y]) assert res.shape == (1, 1, 1, 1) res = res[0, 0, 0, 0] assert np.allclose(res, right_js, atol=1e-6), msg.format(res, right_js)
def blah(df): est = KNNDivergenceEstimator(div_funcs=[df, df]) return est.fit(bags)