def test_mi_correlated_gaussians(): """Test estimators on correlated Gaussian data.""" expected_mi, source, source_uncorr, target = _get_gauss_data() # Run OpenCL estimator. settings = {'debug': True} ocl_est = OpenCLKraskovMI(settings=settings) mi_ocl, dist, n_range_var1, n_range_var2 = ocl_est.estimate(source, target) mi_ocl = mi_ocl[0] # Run JIDT estimator. jidt_est = JidtKraskovMI(settings={}) mi_jidt = jidt_est.estimate(source, target) print('JIDT MI result: {0:.4f} nats; OpenCL MI result: {1:.4f} nats; ' 'expected to be close to {2:.4f} nats for correlated ' 'Gaussians.'.format(mi_jidt, mi_ocl, expected_mi)) assert np.isclose(mi_jidt, expected_mi, atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'JIDT estimator failed (error larger 0.05).') assert np.isclose(mi_ocl, expected_mi, atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose(mi_ocl, mi_jidt, atol=0.0001), ( 'MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).')
def test_gauss_data(): """Test bivariate MI estimation from correlated Gaussians.""" # Generate data and add a delay one one sample. expected_mi, source, source_uncorr, target = _get_gauss_data() source = source[1:] source_uncorr = source_uncorr[1:] target = target[:-1] data = Data(np.hstack((source, source_uncorr, target)), dim_order='sp') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 2, 'min_lag_sources': 1} nw = MultivariateMI() results = nw.analyse_single_target( settings, data, target=2, sources=[0, 1]) mi = results.get_single_target(2, fdr=False)['mi'][0] sources = results.get_target_sources(2, fdr=False) # Assert that only the correlated source was detected. assert len(sources) == 1, 'Wrong no. inferred sources: {0}.'.format( len(sources)) assert sources[0] == 0, 'Wrong inferred source: {0}.'.format(sources[0]) # Compare BivarateMI() estimate to JIDT estimate. est = JidtKraskovMI({'lag_mi': 1}) jidt_mi = est.estimate(var1=source, var2=target) print('Estimated MI: {0:0.6f}, estimated MI using JIDT core estimator: ' '{1:0.6f} (expected: {2:0.6f}).'.format(mi, jidt_mi, expected_mi)) assert np.isclose(mi, jidt_mi, atol=0.005), ( 'Estimated MI {0:0.6f} differs from JIDT estimate {1:0.6f} (expected: ' 'MI {2:0.6f}).'.format(mi, jidt_mi, expected_mi))
def test_mi_gauss_data(): """Test MI estimators on correlated Gauss data. Note that the calculation is based on a random variable (because the generated data is a set of random variables) - the result will be of the order of what we expect, but not exactly equal to it; in fact, there will be a large variance around it. """ expected_mi, source1, source2, target = _get_gauss_data() # Test Kraskov mi_estimator = JidtKraskovMI(settings={}) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtKraskovMI', 'CMI (no cond.)') _assert_result(mi_uncor, 0, 'JidtKraskovMI', 'CMI (uncorr., no cond.)') # Test Gaussian mi_estimator = JidtGaussianMI(settings={}) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtGaussianMI', 'CMI (no cond.)') _assert_result(mi_uncor, 0, 'JidtGaussianMI', 'CMI (uncorr., no cond.)') # Test Discrete settings = {'discretise_method': 'equal', 'n_discrete_bins': 5} mi_estimator = JidtDiscreteMI(settings=settings) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtDiscreteMI', 'CMI (no cond.)', 0.08) # More variability here _assert_result(mi_uncor, 0, 'JidtDiscreteMI', 'CMI (uncorr., no cond.)', 0.08) # More variability here
def test_zero_lag(): """Test analysis for 0 lag.""" expected_mi, source, source_uncorr, target = _get_gauss_data() data = Data(np.hstack((source, target)), dim_order='sp', normalise=False) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'tau_sources': 0, # this is not required, but shouldn't throw an error if provided 'max_lag_sources': 0, 'min_lag_sources': 0 } nw = MultivariateMI() results = nw.analyse_single_target(settings, data, target=1, sources='all') mi_estimator = JidtKraskovMI(settings={'normalise': False}) jidt_mi = mi_estimator.estimate(source, target) omnibus_mi = results.get_single_target(1, fdr=False).omnibus_mi print('Estimated omnibus MI: {0:0.6f}, estimated MI using JIDT core ' 'estimator: {1:0.6f} (expected: {2:0.6f}).'.format( omnibus_mi, jidt_mi, expected_mi)) assert np.isclose(omnibus_mi, jidt_mi, atol=0.005), ( 'Zero-lag omnibus MI ({0:0.6f}) differs from JIDT estimate ' '({1:0.6f}).'.format(omnibus_mi, jidt_mi)) assert np.isclose( omnibus_mi, expected_mi, atol=0.05), ('Zero-lag omnibus MI ({0:0.6f}) differs from expected MI ' '({1:0.6f}).'.format(omnibus_mi, expected_mi))
def test_mi_correlated_gaussians(): """Test estimators on correlated Gaussian data.""" expected_mi, source, source_uncorr, target = _get_gauss_data() # Run OpenCL estimator. settings = {'debug': True, 'return_counts': True} ocl_est = OpenCLKraskovMI(settings=settings) mi_ocl, dist, n_range_var1, n_range_var2 = ocl_est.estimate(source, target) mi_ocl = mi_ocl[0] # Run JIDT estimator. jidt_est = JidtKraskovMI(settings={}) mi_jidt = jidt_est.estimate(source, target) print('JIDT MI result: {0:.4f} nats; OpenCL MI result: {1:.4f} nats; ' 'expected to be close to {2:.4f} nats for correlated ' 'Gaussians.'.format(mi_jidt, mi_ocl, expected_mi)) assert np.isclose( mi_jidt, expected_mi, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'JIDT estimator failed (error larger 0.05).') assert np.isclose( mi_ocl, expected_mi, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose( mi_ocl, mi_jidt, atol=0.0001), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).')
def test_mi_uncorrelated_gaussians_three_dims(): """Test MI estimator on uncorrelated 3D Gaussian data.""" n_obs = 10000 dim = 3 var1 = np.random.randn(n_obs, dim) var2 = np.random.randn(n_obs, dim) # Run OpenCL estimator. settings = {'debug': True} ocl_est = OpenCLKraskovMI(settings=settings) mi_ocl, dist, n_range_var1, n_range_var2 = ocl_est.estimate(var1, var2) mi_ocl = mi_ocl[0] # Run JIDT estimator. jidt_est = JidtKraskovMI(settings={}) mi_jidt = jidt_est.estimate(var1, var2) print('JIDT MI result: {0:.4f} nats; OpenCL MI result: {1:.4f} nats; ' 'expected to be close to 0 nats for uncorrelated ' 'Gaussians.'.format(mi_jidt, mi_ocl)) assert np.isclose(mi_jidt, 0, atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'JIDT estimator failed (error larger 0.05).') assert np.isclose(mi_ocl, 0, atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose(mi_ocl, mi_jidt, atol=0.0001), ( 'MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).')
def test_zero_lag(): """Test analysis for 0 lag.""" covariance = 0.4 n = 10000 source = np.random.normal(0, 1, size=n) target = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) # expected_corr = covariance / (np.sqrt(covariance**2 + (1-covariance)**2)) corr = np.corrcoef(source, target)[0, 1] expected_mi = -0.5 * np.log(1 - corr**2) data = Data(np.vstack((source, target)), dim_order='ps', normalise=False) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 0, 'min_lag_sources': 0 } nw = MultivariateMI() results = nw.analyse_single_target(settings, data, target=1, sources='all') mi_estimator = JidtKraskovMI(settings={}) jidt_mi = mi_estimator.estimate(source, target) omnibus_mi = results.get_single_target(1, fdr=False).omnibus_mi print('Estimated omnibus MI: {0:0.6f}, estimated MI using JIDT core ' 'estimator: {1:0.6f} (expected: {2:0.6f}).'.format( omnibus_mi, jidt_mi, expected_mi)) assert np.isclose(omnibus_mi, jidt_mi, rtol=0.05), ( 'Zero-lag omnibus MI ({0:0.6f}) differs from JIDT estimate ({1:0.6f}).' .format(omnibus_mi, jidt_mi)) assert np.isclose(omnibus_mi, expected_mi, rtol=0.05), ( 'Zero-lag omnibus MI ({0:0.6f}) differs from expected MI ({1:0.6f}).'. format(omnibus_mi, expected_mi))
def test_zero_lag(): """Test analysis for 0 lag.""" covariance = 0.4 n = 10000 source = np.random.normal(0, 1, size=n) target = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) # expected_corr = covariance / (np.sqrt(covariance**2 + (1-covariance)**2)) corr = np.corrcoef(source, target)[0, 1] expected_mi = -0.5 * np.log(1 - corr**2) data = Data(np.vstack((source, target)), dim_order='ps', normalise=False) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 0, 'min_lag_sources': 0} nw = MultivariateMI() results = nw.analyse_single_target( settings, data, target=1, sources='all') mi_estimator = JidtKraskovMI(settings={}) jidt_mi = mi_estimator.estimate(source, target) omnibus_mi = results.get_single_target(1, fdr=False).omnibus_mi print('Estimated omnibus MI: {0:0.6f}, estimated MI using JIDT core ' 'estimator: {1:0.6f} (expected: {2:0.6f}).'.format( omnibus_mi, jidt_mi, expected_mi)) assert np.isclose(omnibus_mi, jidt_mi, rtol=0.05), ( 'Zero-lag omnibus MI ({0:0.6f}) differs from JIDT estimate ({1:0.6f}).'.format( omnibus_mi, jidt_mi)) assert np.isclose(omnibus_mi, expected_mi, rtol=0.05), ( 'Zero-lag omnibus MI ({0:0.6f}) differs from expected MI ({1:0.6f}).'.format( omnibus_mi, expected_mi))
def test_mi_uncorrelated_gaussians_three_dims(): """Test MI estimator on uncorrelated 3D Gaussian data.""" n_obs = 10000 dim = 3 var1 = np.random.randn(n_obs, dim) var2 = np.random.randn(n_obs, dim) # Run OpenCL estimator. settings = {'debug': True, 'return_counts': True} ocl_est = OpenCLKraskovMI(settings=settings) mi_ocl, dist, n_range_var1, n_range_var2 = ocl_est.estimate(var1, var2) mi_ocl = mi_ocl[0] # Run JIDT estimator. jidt_est = JidtKraskovMI(settings={}) mi_jidt = jidt_est.estimate(var1, var2) print('JIDT MI result: {0:.4f} nats; OpenCL MI result: {1:.4f} nats; ' 'expected to be close to 0 nats for uncorrelated ' 'Gaussians.'.format(mi_jidt, mi_ocl)) assert np.isclose( mi_jidt, 0, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'JIDT estimator failed (error larger 0.05).') assert np.isclose( mi_ocl, 0, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose( mi_ocl, mi_jidt, atol=0.0001), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).')
def test_lagged_mi(): """Test estimation of lagged MI.""" n = 10000 cov = 0.4 source = [rn.normalvariate(0, 1) for r in range(n)] target = [0] + [sum(pair) for pair in zip( [cov * y for y in source[0:n - 1]], [(1 - cov) * y for y in [rn.normalvariate(0, 1) for r in range(n - 1)]])] source = np.array(source) target = np.array(target) settings = { 'discretise_method': 'equal', 'n_discrete_bins': 4, 'history': 1, 'history_target': 1, 'lag_mi': 1, 'source_target_delay': 1} est_te_k = JidtKraskovTE(settings) te_k = est_te_k.estimate(source, target) est_te_d = JidtDiscreteTE(settings) te_d = est_te_d.estimate(source, target) est_d = JidtDiscreteMI(settings) mi_d = est_d.estimate(source, target) est_k = JidtKraskovMI(settings) mi_k = est_k.estimate(source, target) est_g = JidtGaussianMI(settings) mi_g = est_g.estimate(source, target) _compare_result(mi_d, te_d, 'JidtDiscreteMI', 'JidtDiscreteTE', 'lagged MI', tol=0.05) _compare_result(mi_k, te_k, 'JidtKraskovMI', 'JidtKraskovTE', 'lagged MI', tol=0.05) _compare_result(mi_g, te_k, 'JidtGaussianMI', 'JidtKraskovTE', 'lagged MI', tol=0.05)
def test_mi_gauss_data(): """Test MI estimators on correlated Gauss data. Note that the calculation is based on a random variable (because the generated data is a set of random variables) - the result will be of the order of what we expect, but not exactly equal to it; in fact, there will be a large variance around it. """ expected_mi, source1, source2, target = _get_gauss_data() # Test Kraskov mi_estimator = JidtKraskovMI(settings={}) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtKraskovMI', 'CMI (no cond.)') _assert_result(mi_uncor, 0, 'JidtKraskovMI', 'CMI (uncorr., no cond.)') # Test Gaussian mi_estimator = JidtGaussianMI(settings={}) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtGaussianMI', 'CMI (no cond.)') _assert_result(mi_uncor, 0, 'JidtGaussianMI', 'CMI (uncorr., no cond.)') # Test Discrete settings = {'discretise_method': 'equal', 'n_discrete_bins': 5} mi_estimator = JidtDiscreteMI(settings=settings) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtDiscreteMI', 'CMI (no cond.)') _assert_result(mi_uncor, 0, 'JidtDiscreteMI', 'CMI (uncorr., no cond.)')
def test_estimate_parallel(): """Test estimate_parallel() against estimate().""" expected_mi, source1, source2, target = _get_gauss_data() source_chunks = np.vstack((source1, source1)) target_chunks = np.vstack((target, target)) # Compare MI-estimates from serial and parallel estimator. mi_estimator = JidtKraskovMI(settings={'noise_level': 0}) mi = mi_estimator.estimate(source1, target) with pytest.raises(AssertionError): mi_estimator.estimate_parallel(n_chunks=2, var1=source_chunks, var2=target) mi_parallel1 = mi_estimator.estimate_parallel(n_chunks=2, re_use=['var2'], var1=source_chunks, var2=target) mi_parallel2 = mi_estimator.estimate_parallel(n_chunks=2, var1=source_chunks, var2=target_chunks) assert (mi_parallel1 == mi_parallel2).all(), ( 'Results for stacked ({0}) and re-used ({1}) target differ.'.format( mi_parallel1, mi_parallel2)) assert mi_parallel1[0] == mi, ( 'Results for first chunk differ from serial estimate.') assert mi_parallel1[1] == mi, ( 'Results for second chunk differ from serial estimate.') assert np.isclose( mi, expected_mi, rtol=0.05), ('Estimated ({0}) and expected ({1}) MI differ.'.format( mi, expected_mi)) # Check if a single chunk is returned if all variables are defined as # reusable. mi_parallel3 = mi_estimator.estimate_parallel(n_chunks=2, re_use=['var1', 'var2'], var1=source1, var2=target) assert len(mi_parallel3) == 1, ( 'Single chunk data returned more than one estimate.') assert np.isclose( mi_parallel3[0], expected_mi, rtol=0.05), ('Estimated ({0}) and expected ({1}) MI differ.'.format( mi_parallel3[0], expected_mi)) # Check assertion for incorrect number of samples in data to be reused in # parallel estimator. with pytest.raises(AssertionError): mi_estimator.estimate_parallel(n_chunks=2, re_use=['var2'], var1=source_chunks, var2=target[:100])
def __init__(self, data_loader: DataLoader, pca_size=50, debug=False): super().__init__(data_loader=data_loader, pca_size=pca_size, debug=debug) settings = {'kraskov_k': 4} try: self.estimator = OpenCLKraskovMI(settings=settings) except RuntimeError: warnings.warn("No OpenCL backed detected. Run " "'conda install -c conda-forge pyopencl' " "in a terminal.") self.estimator = JidtKraskovMI(settings=settings)
def test_estimate_parallel(): """Test estimate_parallel() against estimate().""" expected_mi, source1, source2, target = _get_gauss_data() source_chunks = np.vstack((source1, source1)) target_chunks = np.vstack((target, target)) # Compare MI-estimates from serial and parallel estimator. mi_estimator = JidtKraskovMI(settings={'noise_level': 0}) mi = mi_estimator.estimate(source1, target) with pytest.raises(AssertionError): mi_estimator.estimate_parallel( n_chunks=2, var1=source_chunks, var2=target) mi_parallel1 = mi_estimator.estimate_parallel( n_chunks=2, re_use=['var2'], var1=source_chunks, var2=target) mi_parallel2 = mi_estimator.estimate_parallel( n_chunks=2, var1=source_chunks, var2=target_chunks) assert (mi_parallel1 == mi_parallel2).all(), ( 'Results for stacked ({0}) and re-used ({1}) target differ.'.format( mi_parallel1, mi_parallel2)) assert mi_parallel1[0] == mi, ( 'Results for first chunk differ from serial estimate.') assert mi_parallel1[1] == mi, ( 'Results for second chunk differ from serial estimate.') assert np.isclose(mi, expected_mi, rtol=0.05), ( 'Estimated ({0}) and expected ({1}) MI differ.'.format( mi, expected_mi)) # Check if a single chunk is returned if all variables are defined as # reusable. mi_parallel3 = mi_estimator.estimate_parallel( n_chunks=2, re_use=['var1', 'var2'], var1=source1, var2=target) assert len(mi_parallel3) == 1, ( 'Single chunk data returned more than one estimate.') assert np.isclose(mi_parallel3[0], expected_mi, rtol=0.05), ( 'Estimated ({0}) and expected ({1}) MI differ.'.format( mi_parallel3[0], expected_mi)) # Check assertion for incorrect number of samples in data to be reused in # parallel estimator. with pytest.raises(AssertionError): mi_estimator.estimate_parallel( n_chunks=2, re_use=['var2'], var1=source_chunks, var2=target[:100])
def test_lagged_mi(): """Test estimation of lagged MI.""" n = 10000 cov = 0.4 source = [rn.normalvariate(0, 1) for r in range(n)] target = [0] + [ sum(pair) for pair in zip([cov * y for y in source[0:n - 1]], [ (1 - cov) * y for y in [rn.normalvariate(0, 1) for r in range(n - 1)] ]) ] source = np.array(source) target = np.array(target) settings = { 'discretise_method': 'equal', 'n_discrete_bins': 4, 'history': 1, 'history_target': 1, 'lag_mi': 1, 'source_target_delay': 1 } est_te_k = JidtKraskovTE(settings) te_k = est_te_k.estimate(source, target) est_te_d = JidtDiscreteTE(settings) te_d = est_te_d.estimate(source, target) est_d = JidtDiscreteMI(settings) mi_d = est_d.estimate(source, target) est_k = JidtKraskovMI(settings) mi_k = est_k.estimate(source, target) est_g = JidtGaussianMI(settings) mi_g = est_g.estimate(source, target) _compare_result(mi_d, te_d, 'JidtDiscreteMI', 'JidtDiscreteTE', 'lagged MI', tol=0.05) _compare_result(mi_k, te_k, 'JidtKraskovMI', 'JidtKraskovTE', 'lagged MI', tol=0.05) _compare_result(mi_g, te_k, 'JidtGaussianMI', 'JidtKraskovTE', 'lagged MI', tol=0.05)
def test_insufficient_no_points(): """Test if estimation aborts for too few data points.""" expected_mi, source1, source2, target = _get_gauss_data(n=4) settings = { 'kraskov_k': 4, 'theiler_t': 0, 'history': 1, 'history_target': 1, 'lag_mi': 1, 'source_target_delay': 1 } # Test first settings combination with k==N est = JidtKraskovTE(settings) with pytest.raises(RuntimeError): est.estimate(source1, target) est = JidtKraskovMI(settings) with pytest.raises(RuntimeError): est.estimate(source1, target) est = JidtKraskovCMI(settings) with pytest.raises(RuntimeError): est.estimate(source1, target, target) est = JidtKraskovAIS(settings) with pytest.raises(RuntimeError): est.estimate(source1) # Test a second combination with a Theiler-correction != 0 settings['theiler_t'] = 1 settings['kraskov_k'] = 2 est = JidtKraskovTE(settings) with pytest.raises(RuntimeError): est.estimate(source1, target) est = JidtKraskovMI(settings) with pytest.raises(RuntimeError): est.estimate(source1, target) est = JidtKraskovCMI(settings) with pytest.raises(RuntimeError): est.estimate(source1, target, target) est = JidtKraskovAIS(settings) with pytest.raises(RuntimeError): est.estimate(source1)
def test_gauss_data(): """Test multivariate MI estimation from correlated Gaussians.""" # Generate data and add a delay one one sample. expected_mi, source, source_uncorr, target = _get_gauss_data() source = source[1:] source_uncorr = source_uncorr[1:] target = target[:-1] data = Data(np.hstack((source, source_uncorr, target)), dim_order='sp', normalise=False) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 2, 'min_lag_sources': 1 } nw = MultivariateMI() results = nw.analyse_single_target(settings, data, target=2, sources=[0, 1]) mi = results.get_single_target(2, fdr=False)['mi'][0] sources = results.get_target_sources(2, fdr=False) # Assert that only the correlated source was detected. assert len(sources) == 1, 'Wrong no. inferred sources: {0}.'.format( len(sources)) assert sources[0] == 0, 'Wrong inferred source: {0}.'.format(sources[0]) # Compare BivarateMI() estimate to JIDT estimate. Mimick realisations used # internally by the algorithm. est = JidtKraskovMI({'lag_mi': 0, 'normalise': False}) jidt_mi = est.estimate(var1=source[1:-1], var2=target[2:]) print('Estimated MI: {0:0.6f}, estimated MI using JIDT core estimator: ' '{1:0.6f} (expected: {2:0.6f}).'.format(mi, jidt_mi, expected_mi)) assert np.isclose(mi, jidt_mi, atol=0.005), ( 'Estimated MI {0:0.6f} differs from JIDT estimate {1:0.6f} (expected: ' 'MI {2:0.6f}).'.format(mi, jidt_mi, expected_mi)) assert np.isclose(mi, expected_mi, atol=0.05), ( 'Estimated MI {0:0.6f} differs from expected MI {1:0.6f}.'.format( mi, expected_mi))
def test_mi_correlated_gaussians_two_chunks(): """Test estimators on two chunks of correlated Gaussian data.""" expected_mi, source, source_uncorr, target = _get_gauss_data(n=20000, seed=SEED) n_points = source.shape[0] # Run OpenCL estimator. n_chunks = 2 settings = {'debug': True, 'return_counts': True} ocl_est = OpenCLKraskovMI(settings=settings) mi_ocl, dist, n_range_var1, n_range_var2 = ocl_est.estimate( source, target, n_chunks=n_chunks) # Run JIDT estimator. jidt_est = JidtKraskovMI(settings={}) mi_jidt = jidt_est.estimate(source[0:int(n_points / 2), :], target[0:int(n_points / 2), :]) print('JIDT MI result: {0:.4f} nats; OpenCL MI result: [{1:.4f}, {2:.4f}] ' 'nats; expected to be close to {3:.4f} nats for correlated ' 'Gaussians.'.format(mi_jidt, mi_ocl[0], mi_ocl[1], expected_mi)) assert np.isclose( mi_jidt, expected_mi, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'JIDT estimator failed (error larger 0.05).') assert np.isclose( mi_ocl[0], expected_mi, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose( mi_ocl[0], mi_jidt, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose( mi_ocl[1], mi_jidt, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose( mi_ocl[0], mi_ocl[1], atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).')
def test_one_two_dim_input_kraskov(): """Test one- and two-dimensional input for Kraskov estimators.""" expected_mi, src_one, s, target_one = _get_gauss_data(expand=False) src_two = np.expand_dims(src_one, axis=1) target_two = np.expand_dims(target_one, axis=1) ar_src_one, s = _get_ar_data(expand=False) ar_src_two = np.expand_dims(ar_src_one, axis=1) # MI mi_estimator = JidtKraskovMI(settings={}) mi_cor_one = mi_estimator.estimate(src_one, target_one) _assert_result(mi_cor_one, expected_mi, 'JidtKraskovMI', 'MI') mi_cor_two = mi_estimator.estimate(src_two, target_two) _assert_result(mi_cor_two, expected_mi, 'JidtKraskovMI', 'MI') _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovMI one dim', 'JidtKraskovMI two dim', 'MI') # CMI cmi_estimator = JidtKraskovCMI(settings={}) mi_cor_one = cmi_estimator.estimate(src_one, target_one) _assert_result(mi_cor_one, expected_mi, 'JidtKraskovCMI', 'CMI') mi_cor_two = cmi_estimator.estimate(src_two, target_two) _assert_result(mi_cor_two, expected_mi, 'JidtKraskovCMI', 'CMI') _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovMI one dim', 'JidtKraskovMI two dim', 'CMI') # TE te_estimator = JidtKraskovTE(settings={'history_target': 1}) mi_cor_one = te_estimator.estimate(src_one[1:], target_one[:-1]) _assert_result(mi_cor_one, expected_mi, 'JidtKraskovTE', 'TE') mi_cor_two = te_estimator.estimate(src_one[1:], target_one[:-1]) _assert_result(mi_cor_two, expected_mi, 'JidtKraskovTE', 'TE') _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovMI one dim', 'JidtKraskovMI two dim', 'TE') # AIS ais_estimator = JidtKraskovAIS(settings={'history': 2}) mi_cor_one = ais_estimator.estimate(ar_src_one) mi_cor_two = ais_estimator.estimate(ar_src_two) _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovAIS one dim', 'JidtKraskovAIS two dim', 'AIS (AR process)')
def test_one_two_dim_input_kraskov(): """Test one- and two-dimensional input for Kraskov estimators.""" expected_mi, src_one, s, target_one = _get_gauss_data(expand=False, seed=SEED) src_two = np.expand_dims(src_one, axis=1) target_two = np.expand_dims(target_one, axis=1) ar_src_one, s = _get_ar_data(expand=False, seed=SEED) ar_src_two = np.expand_dims(ar_src_one, axis=1) # MI mi_estimator = JidtKraskovMI(settings={}) mi_cor_one = mi_estimator.estimate(src_one, target_one) _assert_result(mi_cor_one, expected_mi, 'JidtKraskovMI', 'MI') mi_cor_two = mi_estimator.estimate(src_two, target_two) _assert_result(mi_cor_two, expected_mi, 'JidtKraskovMI', 'MI') _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovMI one dim', 'JidtKraskovMI two dim', 'MI') # CMI cmi_estimator = JidtKraskovCMI(settings={}) mi_cor_one = cmi_estimator.estimate(src_one, target_one) _assert_result(mi_cor_one, expected_mi, 'JidtKraskovCMI', 'CMI') mi_cor_two = cmi_estimator.estimate(src_two, target_two) _assert_result(mi_cor_two, expected_mi, 'JidtKraskovCMI', 'CMI') _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovMI one dim', 'JidtKraskovMI two dim', 'CMI') # TE te_estimator = JidtKraskovTE(settings={'history_target': 1}) mi_cor_one = te_estimator.estimate(src_one[1:], target_one[:-1]) _assert_result(mi_cor_one, expected_mi, 'JidtKraskovTE', 'TE') mi_cor_two = te_estimator.estimate(src_one[1:], target_one[:-1]) _assert_result(mi_cor_two, expected_mi, 'JidtKraskovTE', 'TE') _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovMI one dim', 'JidtKraskovMI two dim', 'TE') # AIS ais_estimator = JidtKraskovAIS(settings={'history': 2}) mi_cor_one = ais_estimator.estimate(ar_src_one) mi_cor_two = ais_estimator.estimate(ar_src_two) _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovAIS one dim', 'JidtKraskovAIS two dim', 'AIS (AR process)')
def test_mi_correlated_gaussians_two_chunks(): """Test estimators on two chunks of correlated Gaussian data.""" expected_mi, source, source_uncorr, target = _get_gauss_data(n=20000) n_points = source.shape[0] # Run OpenCL estimator. n_chunks = 2 settings = {'debug': True} ocl_est = OpenCLKraskovMI(settings=settings) mi_ocl, dist, n_range_var1, n_range_var2 = ocl_est.estimate( source, target, n_chunks=n_chunks) # Run JIDT estimator. jidt_est = JidtKraskovMI(settings={}) mi_jidt = jidt_est.estimate(source[0:int(n_points/2), :], target[0:int(n_points/2), :]) print('JIDT MI result: {0:.4f} nats; OpenCL MI result: [{1:.4f}, {2:.4f}] ' 'nats; expected to be close to {3:.4f} nats for correlated ' 'Gaussians.'.format(mi_jidt, mi_ocl[0], mi_ocl[1], expected_mi)) assert np.isclose(mi_jidt, expected_mi, atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'JIDT estimator failed (error larger 0.05).') assert np.isclose(mi_ocl[0], expected_mi, atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose(mi_ocl[0], mi_jidt, atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose(mi_ocl[1], mi_jidt, atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose(mi_ocl[0], mi_ocl[1], atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).')
def test_jidt_kraskov_alg1And2(): """ Test that JIDT estimate changes properly when we change KSG algorithm """ n = 100 source = [ sum(pair) for pair in zip([y for y in range(n)], [rn.normalvariate(0, 0.000001) for r in range(n)]) ] source = np.array(source) target = np.array(source) # Target copies source on purpose # We've generated simple data 0:99, plus a little noise to ensure # we only even get K nearest neighbours in each space. # So result should be: settings = {'lag': 0, 'kraskov_k': 4, 'noise_level': 0, 'algorithm_num': 1} for k in range(4, 16): settings['kraskov_k'] = k settings['algorithm_num'] = 1 est1 = JidtKraskovMI(settings) mi_alg1 = est1.estimate(source, target) # Neighbour counts n_x and n_y will be k-1 because they are # *strictly* within the boundary expected_alg1 = digamma(k) - 2 * digamma((k - 1) + 1) + digamma(n) _compare_result(mi_alg1, expected_alg1, 'JidtDiscreteMI_alg1', 'Analytic', 'MI', tol=0.00001) settings['algorithm_num'] = 2 est2 = JidtKraskovMI(settings) mi_alg2 = est2.estimate(source, target) expected_alg2 = digamma(k) - 1 / k - 2 * digamma(k) + digamma(n) _compare_result(mi_alg2, expected_alg2, 'JidtDiscreteMI_alg2', 'Analytic', 'MI', tol=0.00001) # And now check that it doesn't work for algorithm "3" settings['algorithm_num'] = 3 caughtAssertionError = False try: est3 = JidtKraskovMI(settings) except AssertionError: caughtAssertionError = True assert caughtAssertionError, 'Assertion error not raised for KSG algorithm 3 request'
def test_local_values(): """Test estimation of local values and their return type.""" expected_mi, source, s, target = _get_gauss_data(expand=False) ar_proc, s = _get_ar_data(expand=False) settings = { 'discretise_method': 'equal', 'n_discrete_bins': 4, 'history_target': 1, 'history': 2, 'local_values': True } # MI - Discrete mi_estimator = JidtDiscreteMI(settings=settings) mi = mi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtDiscreteMI', 'MI', 0.08) # More variability here assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # MI - Gaussian mi_estimator = JidtGaussianMI(settings=settings) mi = mi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtGaussianMI', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # MI - Kraskov mi_estimator = JidtKraskovMI(settings=settings) mi = mi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtKraskovMI', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # CMI - Discrete cmi_estimator = JidtDiscreteCMI(settings=settings) mi = cmi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtDiscreteCMI', 'CMI', 0.08) # More variability here assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # MI - Gaussian mi_estimator = JidtGaussianCMI(settings=settings) mi = mi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtGaussianCMI', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # MI - Kraskov mi_estimator = JidtKraskovCMI(settings=settings) mi = mi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtKraskovCMI', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # TE - Discrete te_estimator = JidtDiscreteTE(settings=settings) mi = te_estimator.estimate(source[1:], target[:-1]) _assert_result(np.mean(mi), expected_mi, 'JidtDiscreteTE', 'TE', 0.08) # More variability here assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # TE - Gaussian mi_estimator = JidtGaussianTE(settings=settings) mi = mi_estimator.estimate(source[1:], target[:-1]) _assert_result(np.mean(mi), expected_mi, 'JidtGaussianTE', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # TE - Kraskov mi_estimator = JidtKraskovTE(settings=settings) mi = mi_estimator.estimate(source[1:], target[:-1]) _assert_result(np.mean(mi), expected_mi, 'JidtKraskovTE', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # AIS - Kraskov ais_estimator = JidtKraskovAIS(settings=settings) mi_k = ais_estimator.estimate(ar_proc) assert type(mi_k) is np.ndarray, 'Local values are not a numpy array.' # AIS - Discrete ais_estimator = JidtDiscreteAIS(settings=settings) mi_d = ais_estimator.estimate(ar_proc) assert type(mi_d) is np.ndarray, 'Local values are not a numpy array.' # TODO should we compare these? # _compare_result(np.mean(mi_k), np.mean(mi_d), # 'JidtKraskovAIS', 'JidtDiscreteAIS', 'AIS (AR process)') # AIS - Gaussian ais_estimator = JidtGaussianAIS(settings=settings) mi_g = ais_estimator.estimate(ar_proc) assert type(mi_g) is np.ndarray, 'Local values are not a numpy array.' _compare_result(np.mean(mi_k), np.mean(mi_g), 'JidtKraskovAIS', 'JidtGaussianAIS', 'AIS (AR process)')
class MutualInfoIDTxl(MutualInfoPCA): """ IDTxl Mutual Information Estimator [1]_, followed by PCA dimensionality reduction. Parameters ---------- data_loader : DataLoader The data loader. pca_size : int, optional PCA dimension size. Default: 100 debug : bool, optional If True, shows more informative plots. Default: False Attributes ---------- ignore_layers : tuple A tuple to ignore layer classes to monitor for MI. References ---------- .. [1] P. Wollstadt, J. T. Lizier, R. Vicente, C. Finn, M. Martinez-Zarzuela, P. Mediano, L. Novelli, M. Wibral (2018). IDTxl: The Information Dynamics Toolkit xl: a Python package for the efficient analysis of multivariate information dynamics in networks. Journal of Open Source Software, 4(34), 1081. https://doi.org/10.21105/joss.01081. Source code: https://github.com/pwollstadt/IDTxl """ def __init__(self, data_loader: DataLoader, pca_size=50, debug=False): super().__init__(data_loader=data_loader, pca_size=pca_size, debug=debug) settings = {'kraskov_k': 4} try: self.estimator = OpenCLKraskovMI(settings=settings) except RuntimeError: warnings.warn("No OpenCL backed detected. Run " "'conda install -c conda-forge pyopencl' " "in a terminal.") self.estimator = JidtKraskovMI(settings=settings) def _prepare_input_finished(self): super()._prepare_input_finished() for key in ['input', 'target']: self.quantized[key] = self.quantized[key].numpy().astype( np.float64) def _process_activations(self, layer_name: str, activations: List[torch.FloatTensor]): pass def _save_mutual_info(self): hidden_layers_name = set(self.activations.keys()) hidden_layers_name.difference_update({'input', 'target'}) for layer_name in hidden_layers_name: activations = torch.cat(self.activations[layer_name]).numpy() if self.pca_size is not None and activations.shape[ -1] > self.pca_size: pca = sklearn.decomposition.PCA(n_components=self.pca_size) activations = pca.fit_transform(activations) activations = (activations - activations.mean()) / activations.std() activations = activations.astype(np.float64) info_x = self.estimator.estimate(self.quantized['input'], activations) info_y = self.estimator.estimate(activations, self.quantized['target']) self.information[layer_name] = (self.to_bits(float(info_x)), self.to_bits(float(info_y)))
print('Estimated MI: {0:.5f}, expected MI: {1:.5f}'.format(mi, expected_mi)) settings['history_target'] = 1 est = JidtDiscreteTE(settings) te = est.estimate(source_cor[1:n], target[0:n - 1]) print('Estimated TE: {0:.5f}, expected TE: {1:.5f}'.format(te, expected_mi)) settings['history'] = 1 est = JidtDiscreteAIS(settings) ais = est.estimate(target) print('Estimated AIS: {0:.5f}, expected AIS: ~0'.format(ais)) # JIDT Kraskov estimators settings = {} est = JidtKraskovCMI(settings) cmi = est.estimate(source_cor, target, source_uncor) print('Estimated CMI: {0:.5f}, expected CMI: {1:.5f}'.format(cmi, expected_mi)) est = JidtKraskovMI(settings) mi = est.estimate(source_cor, target) print('Estimated MI: {0:.5f}, expected MI: {1:.5f}'.format(mi, expected_mi)) settings['history_target'] = 1 est = JidtKraskovTE(settings) te = est.estimate(source_cor[1:n], target[0:n - 1]) print('Estimated TE: {0:.5f}, expected TE: {1:.5f}'.format(te, expected_mi)) settings['history'] = 1 est = JidtKraskovAIS(settings) ais = est.estimate(target) print('Estimated AIS: {0:.5f}, expected AIS: ~0'.format(ais)) # JIDT Gaussian estimators settings = {} est = JidtGaussianCMI(settings) cmi = est.estimate(source_cor, target, source_uncor)
def test_invalid_settings_input(): """Test handling of wrong inputs for settings dictionary.""" # Wrong input type for settings dict. with pytest.raises(TypeError): JidtDiscreteMI(settings=1) with pytest.raises(TypeError): JidtDiscreteCMI(settings=1) with pytest.raises(TypeError): JidtDiscreteAIS(settings=1) with pytest.raises(TypeError): JidtDiscreteTE(settings=1) with pytest.raises(TypeError): JidtGaussianMI(settings=1) with pytest.raises(TypeError): JidtGaussianCMI(settings=1) with pytest.raises(TypeError): JidtGaussianAIS(settings=1) with pytest.raises(TypeError): JidtGaussianTE(settings=1) with pytest.raises(TypeError): JidtKraskovMI(settings=1) with pytest.raises(TypeError): JidtKraskovCMI(settings=1) with pytest.raises(TypeError): JidtKraskovAIS(settings=1) with pytest.raises(TypeError): JidtKraskovTE(settings=1) # Test if settings dict is initialised correctly. e = JidtDiscreteMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' e = JidtDiscreteCMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' e = JidtGaussianMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' e = JidtGaussianCMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' e = JidtKraskovMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' e = JidtKraskovCMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' # History parameter missing for AIS and TE estimation. with pytest.raises(RuntimeError): JidtDiscreteAIS(settings={}) with pytest.raises(RuntimeError): JidtDiscreteTE(settings={}) with pytest.raises(RuntimeError): JidtGaussianAIS(settings={}) with pytest.raises(RuntimeError): JidtGaussianTE(settings={}) with pytest.raises(RuntimeError): JidtKraskovAIS(settings={}) with pytest.raises(RuntimeError): JidtKraskovTE(settings={})