def test_cmi_uncorrelated_gaussians(): """Test CMI estimator on uncorrelated Gaussian data.""" n_obs = 10000 var1 = np.random.randn(n_obs, 1) var2 = np.random.randn(n_obs, 1) var3 = np.random.randn(n_obs, 1) # Run OpenCL estimator. settings = {'debug': True, 'return_counts': True} ocl_est = OpenCLKraskovCMI(settings=settings) (mi_ocl, dist, n_range_var1, n_range_var2, n_range_var3) = ocl_est.estimate(var1, var2, var3) mi_ocl = mi_ocl[0] # Run JIDT estimator. jidt_est = JidtKraskovCMI(settings={}) mi_jidt = jidt_est.estimate(var1, var2, var3) print('JIDT MI result: {0:.4f} nats; OpenCL MI result: {1:.4f} nats; ' 'expected to be close to 0 nats for uncorrelated ' 'Gaussians.'.format(mi_jidt, mi_ocl)) assert np.isclose( mi_jidt, 0, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'JIDT estimator failed (error larger 0.05).') assert np.isclose( mi_ocl, 0, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose( mi_ocl, mi_jidt, atol=0.0001), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).')
def test_cmi_no_cond_correlated_gaussians(): """Test estimators on correlated Gaussian data without conditional.""" expected_mi, source, source_uncorr, target = _get_gauss_data() # Run OpenCL estimator. settings = {'debug': True} ocl_est = OpenCLKraskovCMI(settings=settings) mi_ocl, dist, n_range_var1, n_range_var2 = ocl_est.estimate(source, target) mi_ocl = mi_ocl[0] # Run JIDT estimator. jidt_est = JidtKraskovCMI(settings={}) mi_jidt = jidt_est.estimate(source, target) cov_effective = np.cov(np.squeeze(source), np.squeeze(target))[1, 0] expected_mi = math.log(1 / (1 - math.pow(cov_effective, 2))) print('JIDT MI result: {0:.4f} nats; OpenCL MI result: {1:.4f} nats; ' 'expected to be close to {2:.4f} nats for correlated ' 'Gaussians.'.format(mi_jidt, mi_ocl, expected_mi)) assert np.isclose(mi_jidt, expected_mi, atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'JIDT estimator failed (error larger 0.05).') assert np.isclose(mi_ocl, expected_mi, atol=0.05), ( 'MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose(mi_ocl, mi_jidt, atol=0.0001), ( 'MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).')
def test_cmi_correlated_gaussians(): """Test estimators on correlated Gaussian data with conditional.""" expected_mi, source, source_uncorr, target = _get_gauss_data() # Run OpenCL estimator. settings = {'debug': True, 'return_counts': True} ocl_est = OpenCLKraskovCMI(settings=settings) (mi_ocl, dist, n_range_var1, n_range_var2, n_range_cond) = ocl_est.estimate(source, target, source_uncorr) mi_ocl = mi_ocl[0] # Run JIDT estimator. jidt_est = JidtKraskovCMI(settings={}) mi_jidt = jidt_est.estimate(source, target, source_uncorr) print('JIDT MI result: {0:.4f} nats; OpenCL MI result: {1:.4f} nats; ' 'expected to be close to {2:.4f} nats for correlated ' 'Gaussians.'.format(mi_jidt, mi_ocl, expected_mi)) assert np.isclose( mi_jidt, expected_mi, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'JIDT estimator failed (error larger 0.05).') assert np.isclose( mi_ocl, expected_mi, atol=0.05), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).') assert np.isclose( mi_ocl, mi_jidt, atol=0.0001), ('MI estimation for uncorrelated Gaussians using the ' 'OpenCL estimator failed (error larger 0.05).')
def test_cmi_gauss_data(): """Test CMI estimation on two sets of Gaussian random data. The first test is on correlated variables, the second on uncorrelated variables. Note that the calculation is based on a random variable (because the generated data is a set of random variables) - the result will be of the order of what we expect, but not exactly equal to it; in fact, there will be a large variance around it. """ expected_mi, source1, source2, target = _get_gauss_data() # Test Kraskov mi_estimator = JidtKraskovCMI(settings={}) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtKraskovCMI', 'CMI (corr.)') _assert_result(mi_uncor, 0, 'JidtKraskovCMI', 'CMI (uncorr.)') # Test Gaussian mi_estimator = JidtGaussianCMI(settings={}) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtGaussianCMI', 'CMI (corr.)') _assert_result(mi_uncor, 0, 'JidtGaussianCMI', 'CMI (uncorr.)') # Test Discrete settings = {'discretise_method': 'equal', 'num_discrete_bins': 5} mi_estimator = JidtDiscreteCMI(settings=settings) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtDiscreteCMI', 'CMI (corr.)') _assert_result(mi_uncor, 0, 'JidtDiscreteCMI', 'CMI (uncorr.)')
def test_cmi_gauss_data_no_cond(): """Test estimators on correlated Gauss data without a conditional. The estimators should return the MI if no conditional variable is provided. Note that the calculation is based on a random variable (because the generated data is a set of random variables) - the result will be of the order of what we expect, but not exactly equal to it; in fact, there will be a large variance around it. """ expected_mi, source1, source2, target = _get_gauss_data() # Test Kraskov mi_estimator = JidtKraskovCMI(settings={}) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtKraskovCMI', 'CMI (no cond.)') _assert_result(mi_uncor, 0, 'JidtKraskovCMI', 'CMI (uncorr., no cond.)') # Test Gaussian mi_estimator = JidtGaussianCMI(settings={}) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtGaussianCMI', 'CMI (no cond.)') _assert_result(mi_uncor, 0, 'JidtGaussianCMI', 'CMI (uncorr., no cond.)') # Test Discrete settings = {'discretise_method': 'equal', 'num_discrete_bins': 5} mi_estimator = JidtDiscreteCMI(settings=settings) mi_cor = mi_estimator.estimate(source1, target) mi_uncor = mi_estimator.estimate(source2, target) _assert_result(mi_cor, expected_mi, 'JidtDiscreteCMI', 'CMI (no cond.)') _assert_result(mi_uncor, 0, 'JidtDiscreteCMI', 'CMI (uncorr., no cond.)')
def test_insufficient_no_points(): """Test if estimation aborts for too few data points.""" expected_mi, source1, source2, target = _get_gauss_data(n=4) settings = { 'kraskov_k': 4, 'theiler_t': 0, 'history': 1, 'history_target': 1, 'lag_mi': 1, 'source_target_delay': 1 } # Test first settings combination with k==N est = JidtKraskovTE(settings) with pytest.raises(RuntimeError): est.estimate(source1, target) est = JidtKraskovMI(settings) with pytest.raises(RuntimeError): est.estimate(source1, target) est = JidtKraskovCMI(settings) with pytest.raises(RuntimeError): est.estimate(source1, target, target) est = JidtKraskovAIS(settings) with pytest.raises(RuntimeError): est.estimate(source1) # Test a second combination with a Theiler-correction != 0 settings['theiler_t'] = 1 settings['kraskov_k'] = 2 est = JidtKraskovTE(settings) with pytest.raises(RuntimeError): est.estimate(source1, target) est = JidtKraskovMI(settings) with pytest.raises(RuntimeError): est.estimate(source1, target) est = JidtKraskovCMI(settings) with pytest.raises(RuntimeError): est.estimate(source1, target, target) est = JidtKraskovAIS(settings) with pytest.raises(RuntimeError): est.estimate(source1)
def test_gauss_data(): """Test bivariate TE estimation from correlated Gaussians.""" # Generate data and add a delay one one sample. expected_mi, source, source_uncorr, target = _get_gauss_data() source = source[1:] source_uncorr = source_uncorr[1:] target = target[:-1] data = Data(np.hstack((source, source_uncorr, target)), dim_order='sp') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 2, 'min_lag_sources': 1, 'max_lag_target': 1 } nw = BivariateTE() results = nw.analyse_single_target(settings, data, target=2, sources=[0, 1]) te = results.get_single_target(2, fdr=False)['te'][0] sources = results.get_target_sources(2, fdr=False) # Assert that only the correlated source was detected. assert len(sources) == 1, 'Wrong no. inferred sources: {0}.'.format( len(sources)) assert sources[0] == 0, 'Wrong inferred source: {0}.'.format(sources[0]) # Compare BivarateTE() estimate to JIDT estimate. current_value = (2, 2) source_vars = results.get_single_target(2, False)['selected_vars_sources'] target_vars = results.get_single_target(2, False)['selected_vars_target'] var1 = data.get_realisations(current_value, source_vars)[0] var2 = data.get_realisations(current_value, [current_value])[0] cond = data.get_realisations(current_value, target_vars)[0] est = JidtKraskovCMI({}) jidt_cmi = est.estimate(var1=var1, var2=var2, conditional=cond) print('Estimated TE: {0:0.6f}, estimated TE using JIDT core estimator: ' '{1:0.6f} (expected: {2:0.6f}).'.format(te, jidt_cmi, expected_mi)) assert np.isclose(te, jidt_cmi, atol=0.005), ( 'Estimated TE {0:0.6f} differs from JIDT estimate {1:0.6f} (expected: ' 'TE {2:0.6f}).'.format(te, jidt_cmi, expected_mi))
def test_one_two_dim_input_kraskov(): """Test one- and two-dimensional input for Kraskov estimators.""" expected_mi, src_one, s, target_one = _get_gauss_data(expand=False, seed=SEED) src_two = np.expand_dims(src_one, axis=1) target_two = np.expand_dims(target_one, axis=1) ar_src_one, s = _get_ar_data(expand=False, seed=SEED) ar_src_two = np.expand_dims(ar_src_one, axis=1) # MI mi_estimator = JidtKraskovMI(settings={}) mi_cor_one = mi_estimator.estimate(src_one, target_one) _assert_result(mi_cor_one, expected_mi, 'JidtKraskovMI', 'MI') mi_cor_two = mi_estimator.estimate(src_two, target_two) _assert_result(mi_cor_two, expected_mi, 'JidtKraskovMI', 'MI') _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovMI one dim', 'JidtKraskovMI two dim', 'MI') # CMI cmi_estimator = JidtKraskovCMI(settings={}) mi_cor_one = cmi_estimator.estimate(src_one, target_one) _assert_result(mi_cor_one, expected_mi, 'JidtKraskovCMI', 'CMI') mi_cor_two = cmi_estimator.estimate(src_two, target_two) _assert_result(mi_cor_two, expected_mi, 'JidtKraskovCMI', 'CMI') _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovMI one dim', 'JidtKraskovMI two dim', 'CMI') # TE te_estimator = JidtKraskovTE(settings={'history_target': 1}) mi_cor_one = te_estimator.estimate(src_one[1:], target_one[:-1]) _assert_result(mi_cor_one, expected_mi, 'JidtKraskovTE', 'TE') mi_cor_two = te_estimator.estimate(src_one[1:], target_one[:-1]) _assert_result(mi_cor_two, expected_mi, 'JidtKraskovTE', 'TE') _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovMI one dim', 'JidtKraskovMI two dim', 'TE') # AIS ais_estimator = JidtKraskovAIS(settings={'history': 2}) mi_cor_one = ais_estimator.estimate(ar_src_one) mi_cor_two = ais_estimator.estimate(ar_src_two) _compare_result(mi_cor_one, mi_cor_two, 'JidtKraskovAIS one dim', 'JidtKraskovAIS two dim', 'AIS (AR process)')
def test_invalid_settings_input(): """Test handling of wrong inputs for settings dictionary.""" # Wrong input type for settings dict. with pytest.raises(TypeError): JidtDiscreteMI(settings=1) with pytest.raises(TypeError): JidtDiscreteCMI(settings=1) with pytest.raises(TypeError): JidtDiscreteAIS(settings=1) with pytest.raises(TypeError): JidtDiscreteTE(settings=1) with pytest.raises(TypeError): JidtGaussianMI(settings=1) with pytest.raises(TypeError): JidtGaussianCMI(settings=1) with pytest.raises(TypeError): JidtGaussianAIS(settings=1) with pytest.raises(TypeError): JidtGaussianTE(settings=1) with pytest.raises(TypeError): JidtKraskovMI(settings=1) with pytest.raises(TypeError): JidtKraskovCMI(settings=1) with pytest.raises(TypeError): JidtKraskovAIS(settings=1) with pytest.raises(TypeError): JidtKraskovTE(settings=1) # Test if settings dict is initialised correctly. e = JidtDiscreteMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' e = JidtDiscreteCMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' e = JidtGaussianMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' e = JidtGaussianCMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' e = JidtKraskovMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' e = JidtKraskovCMI() assert type( e.settings) is dict, 'Did not initialise settings as dictionary.' # History parameter missing for AIS and TE estimation. with pytest.raises(RuntimeError): JidtDiscreteAIS(settings={}) with pytest.raises(RuntimeError): JidtDiscreteTE(settings={}) with pytest.raises(RuntimeError): JidtGaussianAIS(settings={}) with pytest.raises(RuntimeError): JidtGaussianTE(settings={}) with pytest.raises(RuntimeError): JidtKraskovAIS(settings={}) with pytest.raises(RuntimeError): JidtKraskovTE(settings={})
def test_local_values(): """Test estimation of local values and their return type.""" expected_mi, source, s, target = _get_gauss_data(expand=False) ar_proc, s = _get_ar_data(expand=False) settings = { 'discretise_method': 'equal', 'n_discrete_bins': 4, 'history_target': 1, 'history': 2, 'local_values': True } # MI - Discrete mi_estimator = JidtDiscreteMI(settings=settings) mi = mi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtDiscreteMI', 'MI', 0.08) # More variability here assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # MI - Gaussian mi_estimator = JidtGaussianMI(settings=settings) mi = mi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtGaussianMI', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # MI - Kraskov mi_estimator = JidtKraskovMI(settings=settings) mi = mi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtKraskovMI', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # CMI - Discrete cmi_estimator = JidtDiscreteCMI(settings=settings) mi = cmi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtDiscreteCMI', 'CMI', 0.08) # More variability here assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # MI - Gaussian mi_estimator = JidtGaussianCMI(settings=settings) mi = mi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtGaussianCMI', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # MI - Kraskov mi_estimator = JidtKraskovCMI(settings=settings) mi = mi_estimator.estimate(source, target) _assert_result(np.mean(mi), expected_mi, 'JidtKraskovCMI', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # TE - Discrete te_estimator = JidtDiscreteTE(settings=settings) mi = te_estimator.estimate(source[1:], target[:-1]) _assert_result(np.mean(mi), expected_mi, 'JidtDiscreteTE', 'TE', 0.08) # More variability here assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # TE - Gaussian mi_estimator = JidtGaussianTE(settings=settings) mi = mi_estimator.estimate(source[1:], target[:-1]) _assert_result(np.mean(mi), expected_mi, 'JidtGaussianTE', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # TE - Kraskov mi_estimator = JidtKraskovTE(settings=settings) mi = mi_estimator.estimate(source[1:], target[:-1]) _assert_result(np.mean(mi), expected_mi, 'JidtKraskovTE', 'MI') assert type(mi) is np.ndarray, 'Local values are not a numpy array.' # AIS - Kraskov ais_estimator = JidtKraskovAIS(settings=settings) mi_k = ais_estimator.estimate(ar_proc) assert type(mi_k) is np.ndarray, 'Local values are not a numpy array.' # AIS - Discrete ais_estimator = JidtDiscreteAIS(settings=settings) mi_d = ais_estimator.estimate(ar_proc) assert type(mi_d) is np.ndarray, 'Local values are not a numpy array.' # TODO should we compare these? # _compare_result(np.mean(mi_k), np.mean(mi_d), # 'JidtKraskovAIS', 'JidtDiscreteAIS', 'AIS (AR process)') # AIS - Gaussian ais_estimator = JidtGaussianAIS(settings=settings) mi_g = ais_estimator.estimate(ar_proc) assert type(mi_g) is np.ndarray, 'Local values are not a numpy array.' _compare_result(np.mean(mi_k), np.mean(mi_g), 'JidtKraskovAIS', 'JidtGaussianAIS', 'AIS (AR process)')
def test_calculate_single_link(): """Test calculation of single link (conditional) MI and TE.""" expected_mi, source, source_uncorr, target = _get_gauss_data() source = source[1:] source_uncorr = source_uncorr[1:] target = target[:-1] data = Data(np.hstack((source, source_uncorr, target)), dim_order='sp') n = NetworkAnalysis() n._cmi_estimator = JidtKraskovCMI(settings={}) n.settings = {'local_values': False} current_value = (2, 1) # Test single link estimation for a single and multiple sources for # cases: no target vars, source vars/no source vars (tests if the # conditioning set is built correctly for conditioning='full'). source_realisations = data.get_realisations(current_value, [(0, 0)])[0] current_value_realisations = data.get_realisations(current_value, [current_value])[0] expected_mi = n._cmi_estimator.estimate(current_value_realisations, source_realisations) # cond. on second source cond_realisations = data.get_realisations(current_value, [(1, 0)])[0] expected_mi_cond1 = n._cmi_estimator.estimate(current_value_realisations, source_realisations, cond_realisations) for sources in ['all', [0]]: for conditioning in ['full', 'target', 'none']: for source_vars in [[(0, 0)], [(0, 0), (1, 0)]]: mi = n._calculate_single_link(data, current_value, source_vars, target_vars=None, sources=sources, conditioning=conditioning) if mi.shape[0] > 1: # array for source='all' mi = mi[0] if source_vars == [(0, 0)]: # no conditioning assert np.isclose(mi, expected_mi, rtol=0.05), ( 'Estimated single-link MI ({0}) differs from expected ' 'MI ({1}).'.format(mi, expected_mi)) else: if conditioning == 'full': # cond. on second source assert np.isclose(mi, expected_mi_cond1, rtol=0.05), ( 'Estimated single-link MI ({0}) differs from ' 'expected MI ({1}).'.format(mi, expected_mi_cond1)) else: # no conditioning assert np.isclose(mi, expected_mi, rtol=0.05), ( 'Estimated single-link MI ({0}) differs from ' 'expected MI ({1}).'.format(mi, expected_mi)) # Test single link estimation for a single and multiple sources for # cases: target vars/no target vars, source vars (tests if the # conditioning set is built correctly for conditioning='full'). cond_realisations = np.hstack(( # cond. on second source and target data.get_realisations(current_value, [(1, 0)])[0], data.get_realisations(current_value, [(2, 0)])[0])) expected_mi_cond2 = n._cmi_estimator.estimate( current_value_realisations, source_realisations, cond_realisations) # cond. on target cond_realisations = data.get_realisations(current_value, [(2, 0)])[0] expected_mi_cond3 = n._cmi_estimator.estimate( current_value_realisations, source_realisations, cond_realisations) for target_vars in [None, [(2, 0)]]: for conditioning in ['full', 'target', 'none']: mi = n._calculate_single_link(data, current_value, source_vars=[(0, 0), (1, 0)], target_vars=target_vars, sources=sources, conditioning=conditioning) if mi.shape[0] > 1: # array for source='all' mi = mi[0] if conditioning == 'none': # no conditioning assert np.isclose(mi, expected_mi, rtol=0.05), ( 'Estimated single-link MI ({0}) differs from expected ' 'MI ({1}).'.format(mi, expected_mi)) else: # target only if target_vars is not None and conditioning == 'target': assert np.isclose(mi, expected_mi_cond3, rtol=0.05), ( 'Estimated single-link MI ({0}) differs from ' 'expected MI ({1}).'.format(mi, expected_mi_cond3)) # target and 2nd source if target_vars is not None and conditioning == 'full': assert np.isclose(mi, expected_mi_cond2, rtol=0.05), ( 'Estimated single-link MI ({0}) differs from ' 'expected MI ({1}).'.format(mi, expected_mi_cond2)) # target is None, condition on second target else: if conditioning == 'full': assert np.isclose( mi, expected_mi_cond1, rtol=0.05 ), ('Estimated single-link MI ({0}) differs from expected ' 'MI ({1}).'.format(mi, expected_mi_cond1)) # Test requested sources not in source vars with pytest.raises(RuntimeError): mi = n._calculate_single_link(data, current_value, source_vars=[(0, 0), (3, 0)], target_vars=None, sources=4, conditioning='full') # Test source vars not in data/processes with pytest.raises(IndexError): mi = n._calculate_single_link(data, current_value, source_vars=[(0, 0), (10, 0)], target_vars=None, sources='all', conditioning='full') # Test unknown conditioning with pytest.raises(RuntimeError): mi = n._calculate_single_link(data, current_value, source_vars=[(0, 0)], conditioning='test')
from idtxl.estimators_jidt import JidtKraskovCMI, JidtKraskovTE from idtxl.multivariate_te import MultivariateTE from idtxl.data import Data ## Use IDTxl's core esitmator if lags/embeddings are known # Generate high-dimensional example processes n = 1000 source_dim = 3 cond_dim = 2 target = np.random.randn(n) source = np.random.randn(n, source_dim) conditional = np.random.randn(n, cond_dim) settings = {} est = JidtKraskovCMI(settings) cmi = est.estimate(source, target, conditional) print(f'CMI estimate: {cmi:.4f}') ## Use IDTxl's network inference algorithm to optimize lags/embeddings # Generate test data, we assume that the processes represent sources, a target, # and additional processes, we want to condition on. For the conditioning, we # have to provide tuples of past variables plus a lag. data = Data(np.random.randn(5, n), dim_order='ps') target = 0 sources = [1, 2] cond_1_ind = 3 cond_2_ind = 4 cond_1_lag = 1 cond_2_lag = 1