def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ dat = Data() dat.generate_mute_data(n_samples=1000, n_replications=10) analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } network_analysis = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=3, options=analysis_opts) res = network_analysis.analyse_network(dat, targets=[1, 2])
def test_multivariate_te_one_realisation_per_replication(): """Test boundary case of one realisation per replication.""" # Create a data set where one pattern fits into the time series exactly # once, this way, we get one realisation per replication for each variable. # This is easyer to assert/verify later. We also test data.get_realisations # this way. analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} max_lag_target = 5 max_lag_sources = max_lag_target min_lag_sources = 4 target = 0 dat = Data(normalise=False) n_repl = 10 n_procs = 2 n_points = n_procs * (max_lag_sources + 1) * n_repl dat.set_data( np.arange(n_points).reshape(n_procs, max_lag_sources + 1, n_repl), 'psr') nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) nw_0._initialise(dat, 'all', target) assert (not nw_0.selected_vars_full) assert (not nw_0.selected_vars_sources) assert (not nw_0.selected_vars_target) assert ((nw_0._replication_index == np.arange(n_repl)).all()) assert (nw_0._current_value == (target, max(max_lag_sources, max_lag_target))) assert (nw_0._current_value_realisations[:, 0] == dat.data[target, -1, :]).all()
def test_check_source_set(): """Test the method _check_source_set. This method sets the list of source processes from which candidates are taken for multivariate TE estimation. """ dat = Data() dat.generate_mute_data(100, 5) max_lag_sources = 7 min_lag_sources = 5 max_lag_target = 5 analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) sources = [1, 2, 3] nw_0._check_source_set(sources, dat.n_processes) # Assert that initialisation fails if the target is also in the source list sources = [0, 1, 2, 3] nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0.target = 0 with pytest.raises(RuntimeError): nw_0._check_source_set(sources, dat.n_processes) sources = 1 nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0._check_source_set(sources, dat.n_processes) assert (type(nw_0.source_set) is list)
def test_analyse_network(): """Test method for full network analysis.""" n_processes = 5 # the MuTE network has 5 nodes dat = Data() dat.generate_mute_data(10, 5) nw_0 = Multivariate_te(max_lag_sources=5, min_lag_sources=4, options={'cmi_calc_name': 'jidt_kraskov'}, max_lag_target=5) # Test all to all analysis r = nw_0.analyse_network(dat, targets='all', sources='all') try: del r['fdr'] except: pass k = list(r.keys()) sources = np.arange(n_processes) assert all(np.array(k) == np.arange(n_processes)), ( 'Network analysis did not run on all targets.') for t in r.keys(): s = np.array(list(set(sources) - set([t]))) assert all(np.array(r[t]['sources_tested']) == s), ( 'Network analysis did not run on all sources for target ' '{0}'.format(t)) # Test analysis for subset of targets target_list = [1, 2, 3] r = nw_0.analyse_network(dat, targets=target_list, sources='all') try: del r['fdr'] except: pass k = list(r.keys()) assert all(np.array(k) == np.array(target_list)), ( 'Network analysis did not run on correct subset of targets.') for t in r.keys(): s = np.array(list(set(sources) - set([t]))) assert all(np.array(r[t]['sources_tested']) == s), ( 'Network analysis did not run on all sources for target ' '{0}'.format(t)) # Test analysis for subset of sources source_list = [1, 2, 3] target_list = [0, 4] r = nw_0.analyse_network(dat, targets=target_list, sources=source_list) try: del r['fdr'] except: pass k = list(r.keys()) assert all(np.array(k) == np.array(target_list)), ( 'Network analysis did not run for all targets.') for t in r.keys(): assert all(r[t]['sources_tested'] == np.array(source_list)), ( 'Network analysis did not run on the correct subset of sources ' 'for target {0}'.format(t))
def test_multivariate_te_init(): analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} max_lag_target = 5 max_lag_sources = 7 min_lag_sources = 4 target = 0 sources = [2, 3, 4] dat = Data() dat.generate_mute_data(100, 5) nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0.analyse_single_target(dat, target, sources) # This should just run: Test what happens if the target max lag is bigger # than the source max lag max_lag_sources = 5 max_lag_target = 7 nw_1 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_1.analyse_single_target(dat, target, sources) # The following should crash: min lag bigger than max lag max_lag_sources = 5 min_lag_sources = 7 nw_2 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) with pytest.raises(AssertionError): nw_2.analyse_single_target(dat, target, sources)
def test_include_source_candidates(): analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} target = 1 tau_target = 3 max_lag_target = 10 current_val = (target, 10) procs = [target] samples = np.arange(current_val[1] - 1, current_val[1] - max_lag_target, -tau_target) nw = Multivariate_te(5, 1, 5, analysis_opts) candidates = nw._define_candidates(procs, samples) assert (1, 9) in candidates, 'Sample missing from candidates: (1, 9).' assert (1, 6) in candidates, 'Sample missing from candidates: (1, 6).' assert (1, 3) in candidates, 'Sample missing from candidates: (1, 3).'
def test_plot_selected_vars(): dat = Data() dat.generate_mute_data(100, 5) max_lag = 5 min_lag = 4 analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 25, 'n_perm_min_stat': 25, 'n_perm_omnibus': 50, 'n_perm_max_seq': 50, } network_analysis = Multivariate_te(max_lag, min_lag, analysis_opts) res = network_analysis.analyse_single_target(dat, target=2) vis.plot_selected_vars(res)
def test_define_candidates(): """Test candidate definition from a list of procs and a list of samples.""" analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} target = 1 tau_target = 3 max_lag_target = 10 current_val = (target, 10) procs = [target] samples = np.arange(current_val[1] - 1, current_val[1] - max_lag_target, -tau_target) nw = Multivariate_te(5, 1, analysis_opts, 5) candidates = nw._define_candidates(procs, samples) assert (1, 9) in candidates, 'Sample missing from candidates: (1, 9).' assert (1, 6) in candidates, 'Sample missing from candidates: (1, 6).' assert (1, 3) in candidates, 'Sample missing from candidates: (1, 3).'
def test_multivariate_te_lorenz_2(): """Test multivariate TE estimation on bivariately couled Lorenz systems. Run the multivariate TE algorithm on two Lorenz systems with a coupling from first to second system with delay u = 45 samples. Both directions are analyzed, the algorithm should not find a coupling from system two to one. Note: This test takes several hours and may take one to two days on some machines. """ d = np.load( os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) dat = Data() dat.set_data(d, 'psr') analysis_opts = { 'cmi_calc_name': 'jidt_discrete', 'discretise_method': 'max_ent', 'n_perm_max_stat': 21, # 200 'n_perm_min_stat': 21, # 200 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } lorenz_analysis = Multivariate_te(max_lag_sources=47, min_lag_sources=42, max_lag_target=20, tau_target=2, options=analysis_opts) # FOR DEBUGGING: add the whole history for k = 20, tau = 2 to the # estimation, this makes things faster, b/c these don't have to be # tested again. analysis_opts['add_conditionals'] = [(1, 44), (1, 42), (1, 40), (1, 38), (1, 36), (1, 34), (1, 32), (1, 30), (1, 28)] lorenz_analysis = Multivariate_te(max_lag_sources=60, min_lag_sources=31, tau_sources=2, max_lag_target=0, tau_target=1, options=analysis_opts) # res = lorenz_analysis.analyse_network(dat) # res_0 = lorenz_analysis.analyse_single_target(dat, 0) # no coupling # print(res_0) res_1 = lorenz_analysis.analyse_single_target(dat, 1) # coupling print(res_1)
def test_visualise_multivariate_te(): """Visualise output of multivariate TE estimation.""" dat = Data() dat.generate_mute_data(100, 5) max_lag = 5 min_lag = 4 analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 25, 'n_perm_min_stat': 25, 'n_perm_omnibus': 50, 'n_perm_max_seq': 50, } network_analysis = Multivariate_te(max_lag, min_lag, analysis_opts) res = network_analysis.analyse_network(dat, targets=[0, 1, 2]) vis.plot_network(res)
def test_add_conditional_manually(): """Adda variable that is not in the data set.""" analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'add_conditionals': (8, 0) } nw_1 = Multivariate_te(max_lag_sources=5, min_lag_sources=3, options=analysis_opts, max_lag_target=7) dat = Data() dat.generate_mute_data() sources = [1, 2, 3] target = 0 with pytest.raises(IndexError): nw_1._initialise(dat, sources, target)
def test_multivariate_te_lagged_copies(): """Test multivariate TE estimation on a lagged copy of random data. Run the multivariate TE algorithm on two sets of random data, where the second set is a lagged copy of the first. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ lag = 3 d_0 = np.random.rand(1, 1000, 20) d_1 = np.hstack((np.random.rand(1, lag, 20), d_0[:, lag:, :])) dat = Data() dat.set_data(np.vstack((d_0, d_1)), 'psr') analysis_opts = { 'cmi_calc_name': 'jidt_discrete', 'discretise_method': 'max_ent', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = Multivariate_te(max_lag_sources=5, options=analysis_opts) # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for target in range(2): res = random_analysis.analyse_single_target(dat, target) assert (len(res['conditional_full']) == 1), ('Conditional contains ' 'more/less than 1 ' 'variables.') assert (not res['conditional_sources']), ('Conditional sources is not ' 'empty.') assert (len(res['conditional_target']) == 1), ('Conditional target ' 'contains more/less ' 'than 1 variable.') assert (res['cond_sources_pval'] is None), ('Conditional p-value is ' 'not None.') assert (res['omnibus_pval'] is None), ('Omnibus p-value is not None.') assert (res['omnibus_sign'] is None), ('Omnibus significance is not ' 'None.') assert (res['conditional_sources_te'] is None), ('Conditional TE ' 'values is not None.')
def test_multivariate_te_initialise(): """Test if all values are set correctly in _initialise().""" # Create a data set where one pattern fits into the time series exactly # once, this way, we get one realisation per replication for each variable. # This is easyer to assert/verify later. We also test data.get_realisations # this way. analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} max_lag_target = 5 max_lag_sources = max_lag_target min_lag_sources = 4 target = 1 dat = Data(normalise=False) n_repl = 30 n_procs = 2 n_points = n_procs * (max_lag_sources + 1) * n_repl dat.set_data(np.arange(n_points).reshape(n_procs, max_lag_sources + 1, n_repl), 'psr') nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0._initialise(dat, 'all', target) assert (not nw_0.selected_vars_full) assert (not nw_0.selected_vars_sources) assert (not nw_0.selected_vars_target) assert ((nw_0._replication_index == np.arange(n_repl)).all()) assert (nw_0._current_value == (target, max(max_lag_sources, max_lag_target))) assert ((nw_0._current_value_realisations == np.arange(n_points - n_repl, n_points).reshape(n_repl, 1)).all()) # Check if the Faes method is working analysis_opts['add_conditionals'] = 'faes' nw_1 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) dat.generate_mute_data() sources = [1, 2, 3] target = [0] nw_1._initialise(dat, sources, target) assert (nw_1._selected_vars_sources == [i for i in it.product(sources, [nw_1.current_value[1]])]), ( 'Did not add correct additional conditioning vars.') # Adding a variable that is not in the data set. analysis_opts['add_conditionals'] = (8, 0) nw_1 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) dat.generate_mute_data() sources = [1, 2, 3] target = [0] with pytest.raises(IndexError): nw_1._initialise(dat, sources, target)
def test_faes_method(): """Check if the Faes method is working.""" analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'add_conditionals': 'faes' } nw_1 = Multivariate_te(max_lag_sources=5, min_lag_sources=3, max_lag_target=7, options=analysis_opts) dat = Data() dat.generate_mute_data() sources = [1, 2, 3] target = 0 nw_1._initialise(dat, sources, target) assert (nw_1._selected_vars_sources == [ i for i in it.product(sources, [nw_1.current_value[1]]) ]), ('Did not add correct additional conditioning vars.')
def test_multivariate_te_lorenz_2(): """Test multivariate TE estimation on bivariately couled Lorenz systems. Run the multivariate TE algorithm on two Lorenz systems with a coupling from first to second system with delay u = 45 samples. Both directions are analyzed, the algorithm should not find a coupling from system two to one. Note: This test takes several hours and may take one to two days on some machines. """ d = np.load(os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) dat = Data() dat.set_data(d, 'psr') analysis_opts = { 'cmi_calc_name': 'jidt_discrete', 'discretise_method': 'max_ent', 'n_perm_max_stat': 21, # 200 'n_perm_min_stat': 21, # 200 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } lorenz_analysis = Multivariate_te(max_lag_sources=47, min_lag_sources=42, max_lag_target=20, tau_target=2, options=analysis_opts) # FOR DEBUGGING: add the whole history for k = 20, tau = 2 to the # estimation, this makes things faster, b/c these don't have to be # tested again. analysis_opts['add_conditionals'] = [(1, 44), (1, 42), (1, 40), (1, 38), (1, 36), (1, 34), (1, 32), (1, 30), (1, 28)] lorenz_analysis = Multivariate_te(max_lag_sources=60, min_lag_sources=31, tau_sources=2, max_lag_target=0, tau_target=1, options=analysis_opts) # res = lorenz_analysis.analyse_network(dat) # res_0 = lorenz_analysis.analyse_single_target(dat, 0) # no coupling # print(res_0) res_1 = lorenz_analysis.analyse_single_target(dat, 1) # coupling print(res_1)
def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ dat = Data() dat.generate_mute_data(n_samples=1000, n_replications=10) analysis_opts = { 'cmi_calc_name': 'jidt_discrete', 'discretise_method': 'max_ent', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } network_analysis = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=3, options=analysis_opts) res_me = network_analysis.analyse_network(dat, targets=[1, 2]) analysis_opts = {'discretise_method': 'equal'} res_eq = network_analysis.analyse_network(dat, targets=[1, 2]) assert (np.isclose( res_eq[1]['omnibus_te'], res_me[1]['omnibus_te'], rtol=0.05)), ('TE into first target is not equal for both binning' ' methods.') assert (np.isclose( res_eq[2]['omnibus_te'], res_me[2]['omnibus_te'], rtol=0.05)), ('TE into second target is not equal for both binning' ' methods.')
def test_multivariate_te_random(): """Test multivariate TE estimation on two random data sets. Run the multivariate TE algorithm on two sets of random data with no coupling. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ d = np.random.rand(2, 1000, 20) dat = Data() dat.set_data(d, 'psr') analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = Multivariate_te(max_lag_sources=5, options=analysis_opts) # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for target in range(2): res = random_analysis.analyse_single_target(dat, target) assert (len(res['conditional_full']) == 1), ('Conditional contains ' 'more/less than 1 ' 'variables.') assert (not res['conditional_sources']), ('Conditional sources is not ' 'empty.') assert (len(res['conditional_target']) == 1), ('Conditional target ' 'contains more/less ' 'than 1 variable.') assert (res['cond_sources_pval'] is None), ('Conditional p-value is ' 'not None.') assert (res['omnibus_pval'] is None), ('Omnibus p-value is not None.') assert (res['omnibus_sign'] is None), ('Omnibus significance is not ' 'None.') assert (res['conditional_sources_te'] is None), ('Conditional TE ' 'values is not None.')
def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ dat = Data() dat.generate_mute_data(n_samples=1000, n_replications=10) analysis_opts = { 'cmi_calc_name': 'jidt_discrete', 'discretise_method': 'max_ent', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } network_analysis = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=3, options=analysis_opts) res_me = network_analysis.analyse_network(dat, targets=[1, 2]) analysis_opts = {'discretise_method': 'equal'} res_eq = network_analysis.analyse_network(dat, targets=[1, 2]) assert (np.isclose(res_eq[1]['omnibus_te'], res_me[1]['omnibus_te'], rtol=0.05)), ('TE into first target is not equal for both binning' ' methods.') assert (np.isclose(res_eq[2]['omnibus_te'], res_me[2]['omnibus_te'], rtol=0.05)), ('TE into second target is not equal for both binning' ' methods.')
def test_multivariate_te_multiple_runs(): """Test TE estimation using multiple runs on the GPU. Test if data is correctly split over multiple runs, if the problem size exceeds the GPU global memory and thus requires multiple runs. Using a number of permutations of 7000 requires two runs on a GPU with global memory of about 6 GB. """ dat = Data() dat.generate_mute_data(n_samples=1000, n_replications=10) analysis_opts = { 'cmi_calc_name': 'opencl_kraskov', 'n_perm_max_stat': 7000, 'n_perm_min_stat': 7000, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } network_analysis = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=3, options=analysis_opts) res = network_analysis.analyse_network(dat, targets=[1, 2])
def test_max_statistic_sequential(): dat = Data() dat.generate_mute_data(104, 10) opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, } setup = Multivariate_te(max_lag_sources=5, min_lag_sources=1, max_lag_target=5, options=opts) setup.current_value = (0, 4) setup.selected_vars_sources = [(1, 1), (1, 2)] setup.selected_vars_full = [(0, 1), (1, 1), (1, 2)] setup._selected_vars_realisations = np.random.rand(dat.n_realisations(setup.current_value), len(setup.selected_vars_full)) setup._current_value_realisations = np.random.rand(dat.n_realisations(setup.current_value), 1) [sign, p, te] = stats.max_statistic_sequential(analysis_setup=setup, data=dat, opts=opts)
import os import time import numpy as np from idtxl.multivariate_te import Multivariate_te from idtxl.data import Data start_time = time.time() # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load(os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) dat = Data() dat.set_data(d[:, :, 0:100], 'psr') analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } lorenz_analysis = Multivariate_te(max_lag_sources=50, min_lag_sources=40, max_lag_target=30, tau_sources=1, tau_target=3, options=analysis_opts) res_1 = lorenz_analysis.analyse_single_target(dat, 0) res_2 = lorenz_analysis.analyse_single_target(dat, 1) runtime = time.time() - start_time print("---- {0} minutes".format(runtime / 60)) path = os.path.dirname(__file__) + 'output/' np.savez(path + 'test_lorenz', res_1, res_2) np.save(path + 'test_lorenz_time', runtime)
def test_multivariate_te_init(): """Test instance creation for Multivariate_te class.""" # Test error on missing estimator with pytest.raises(KeyError): Multivariate_te(max_lag_sources=5, min_lag_sources=3, max_lag_target=7, options={}) # Test setting of min and max lags analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} dat = Data() dat.generate_mute_data(100, 5) # Valid: max lag sources bigger than max lag target Multivariate_te(max_lag_sources=5, min_lag_sources=3, max_lag_target=7, options=analysis_opts) # Valid: max lag sources smaller than max lag target Multivariate_te(max_lag_sources=7, min_lag_sources=3, max_lag_target=5, options=analysis_opts) # Invalid: min lag sources bigger than max lag with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=7, min_lag_sources=8, max_lag_target=5, options=analysis_opts) # Invalid: taus bigger than lags with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=4, min_lag_sources=2, max_lag_target=5, tau_sources=10, options=analysis_opts) with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=4, min_lag_sources=2, max_lag_target=5, tau_target=10, options=analysis_opts) # Invalid: negative lags or taus with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=-7, min_lag_sources=-4, max_lag_target=-1, options=analysis_opts) with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=1, min_lag_sources=-4, max_lag_target=-1, options=analysis_opts) with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=1, min_lag_sources=1, max_lag_target=-1, options=analysis_opts) with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=1, min_lag_sources=1, max_lag_target=1, tau_sources=-1, options=analysis_opts) with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=1, min_lag_sources=1, max_lag_target=1, tau_target=-1, options=analysis_opts) # Invalid: lags or taus are not positive integers with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=3, min_lag_sources=1.5, max_lag_target=5, options=analysis_opts) with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=3.5, min_lag_sources=1, max_lag_target=5, options=analysis_opts) with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=3, min_lag_sources=-1, max_lag_target=5, options=analysis_opts) with pytest.raises(RuntimeError): nw = Multivariate_te(max_lag_sources=-1, min_lag_sources=1, max_lag_target=5, options=analysis_opts) # Invalid: sources or target is no int nw = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=2, options=analysis_opts) with pytest.raises(RuntimeError): # no int nw.analyse_single_target(data=dat, target=1.5, sources='all') with pytest.raises(RuntimeError): # negative nw.analyse_single_target(data=dat, target=-1, sources='all') with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(data=dat, target=10, sources='all') with pytest.raises(RuntimeError): # wrong type nw.analyse_single_target(data=dat, target={}, sources='all') with pytest.raises(RuntimeError): # negative nw.analyse_single_target(data=dat, target=0, sources=-1) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(data=dat, target=0, sources=[-1]) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(data=dat, target=0, sources=20) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(data=dat, target=0, sources=[20]) # Force conditionals analysis_opts['add_conditionals'] = [(0, 1), (1, 3)] nw = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=3, options=analysis_opts) analysis_opts['add_conditionals'] = (8, 0) nw = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=3, options=analysis_opts)
def test_check_source_set(): """Test the method _check_source_set. This method sets the list of source processes from which candidates are taken for multivariate TE estimation. """ dat = Data() dat.generate_mute_data(100, 5) max_lag_sources = 7 min_lag_sources = 5 max_lag_target = 5 analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) # Add list of sources. sources = [1, 2, 3] nw_0._check_source_set(sources, dat.n_processes) assert nw_0.source_set == sources, 'Sources were not added correctly.' # Assert that initialisation fails if the target is also in the source list sources = [0, 1, 2, 3] nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) nw_0.target = 0 with pytest.raises(RuntimeError): nw_0._check_source_set(sources=[0, 1, 2, 3], n_processes=dat.n_processes) # Test if a single source, no list is added correctly. sources = 1 nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) nw_0._check_source_set(sources, dat.n_processes) assert (type(nw_0.source_set) is list) # Test if 'all' is handled correctly nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) nw_0.target = 0 nw_0._check_source_set('all', dat.n_processes) assert nw_0.source_set == [1, 2, 3, 4], 'Sources were not added correctly.' # Test invalid inputs. nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) with pytest.raises(RuntimeError): # sources greater than no. procs nw_0._check_source_set(8, dat.n_processes) with pytest.raises(RuntimeError): # negative value as source nw_0._check_source_set(-3, dat.n_processes)
def test_multivariate_te_corr_gaussian(estimator=None): """Test multivariate TE estimation on correlated Gaussians. Run the multivariate TE algorithm on two sets of random Gaussian data with a given covariance. The second data set is shifted by one sample creating a source-target delay of one sample. This example is modeled after the JIDT demo 4 for transfer entropy. The resulting TE can be compared to the analytical result (but expect some error in the estimate). The simulated delay is 1 sample, i.e., the algorithm should find significant TE from sample (0, 1), a sample in process 0 with lag/delay 1. The final target sample should always be (1, 1), the mandatory sample at lat 1, because there is no memory in the process. Note: This test runs considerably faster than other system tests. This produces strange small values for non-coupled sources. TODO """ if estimator is None: estimator = 'jidt_kraskov' n = 1000 cov = 0.4 source_1 = [rn.normalvariate(0, 1) for r in range(n)] # correlated src # source_2 = [rn.normalvariate(0, 1) for r in range(n)] # uncorrelated src target = [ sum(pair) for pair in zip([cov * y for y in source_1], [(1 - cov) * y for y in [rn.normalvariate(0, 1) for r in range(n)]]) ] # Cast everything to numpy so the idtxl estimator understands it. source_1 = np.expand_dims(np.array(source_1), axis=1) # source_2 = np.expand_dims(np.array(source_2), axis=1) target = np.expand_dims(np.array(target), axis=1) dat = Data(normalise=True) dat.set_data(np.vstack((source_1[1:].T, target[:-1].T)), 'ps') analysis_opts = { 'cmi_calc_name': estimator, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, } random_analysis = Multivariate_te(max_lag_sources=5, min_lag_sources=1, max_lag_target=5, options=analysis_opts) # res = random_analysis.analyse_network(dat) # full network # utils.print_dict(res) res_1 = random_analysis.analyse_single_target(dat, 1) # coupled direction # Assert that there are significant conditionals from the source for target # 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for # examples 1 and 2 respectively. The maximum errors were 0.093841 and # 0.05833172 repectively. This inspired the following error boundaries. expected_res = np.log(1 / (1 - np.power(cov, 2))) diff = np.abs(max(res_1['cond_sources_te']) - expected_res) print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)') print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:' '{2:2.2f} % ').format(max(res_1['cond_sources_te']), expected_res, diff / expected_res)) assert (diff < 0.1), ('Multivariate TE calculation for correlated ' 'Gaussians failed (error larger 0.1: {0}, expected: ' '{1}, actual: {2}).'.format( diff, expected_res, res_1['cond_sources_te']))
import time import numpy as np from idtxl.multivariate_te import Multivariate_te from idtxl.data import Data start_time = time.time() # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load(os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) dat = Data() dat.set_data(d[:, :, 0:100], 'psr') analysis_opts = { 'cmi_calc_name': 'opencl_kraskov', 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } lorenz_analysis = Multivariate_te(max_lag_sources=50, min_lag_sources=40, max_lag_target=30, tau_sources=1, tau_target=3, options=analysis_opts) res_1 = lorenz_analysis.analyse_single_target(dat, 0) res_2 = lorenz_analysis.analyse_single_target(dat, 1) runtime = time.time() - start_time print("---- {0} minutes".format(runtime / 60)) np.savez('/home/patriciaw/Dropbox/BIC/#idtxl/test/test_lorenz', res_1, res_2) np.save('/home/patriciaw/Dropbox/BIC/#idtxl/test/test_lorenz_time', runtime)
import os import time import numpy as np from idtxl.multivariate_te import Multivariate_te from idtxl.data import Data start_time = time.time() dat = Data() # initialise an empty data object dat.generate_mute_data(n_samples=1000, n_replications=10) analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } network_analysis = Multivariate_te(max_lag_sources=5, min_lag_sources=1, options=analysis_opts) res = network_analysis.analyse_network(dat) runtime = time.time() - start_time print("---- {0} minutes".format(runtime / 60)) path = os.path.dirname(__file__) + 'output/' np.save(path + 'test', res) np.save(path + 'test_time', runtime)
import time import numpy as np from idtxl.multivariate_te import Multivariate_te from idtxl.data import Data start_time = time.time() dat = Data() # initialise an empty data object dat.generate_mute_data(n_samples=1000, n_replications=10) max_lag = 5 analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } network_analysis = Multivariate_te(max_lag, analysis_opts) res = network_analysis.analyse_network(dat) runtime = time.time() - start_time print("---- {0} minutes".format(runtime / 60)) np.save('/home/patriciaw/Dropbox/BIC/#idtxl/test/test', res) np.save('/home/patriciaw/Dropbox/BIC/#idtxl/test/test_time', runtime)
"""Plot graph output from multivariate TE estimation. author: patricia """ from idtxl.data import Data from idtxl.multivariate_te import Multivariate_te from idtxl import visualise_graph # Generate some example output data = Data() data.generate_mute_data(n_replications=2, n_samples=500) print('Demo data with {0} procs, {1} samples, {2} reps.'.format( data.n_processes, data.n_samples, data.n_replications)) opts = {'cmi_calc_name': 'jidt_kraskov'} mte = Multivariate_te(max_lag_sources=3, max_lag_target=3, min_lag_sources=1, options=opts) res_single = mte.analyse_single_target(data=data, target=3) res_full = mte.analyse_network(data=data) # generate graph plots g_single = visualise_graph.plot_selected_vars(res_single, mte) g_full = visualise_graph.plot_network(res_full)
def test_multivariate_te_corr_gaussian(estimator=None): """Test multivariate TE estimation on correlated Gaussians. Run the multivariate TE algorithm on two sets of random Gaussian data with a given covariance. The second data set is shifted by one sample creating a source-target delay of one sample. This example is modeled after the JIDT demo 4 for transfer entropy. The resulting TE can be compared to the analytical result (but expect some error in the estimate). The simulated delay is 1 sample, i.e., the algorithm should find significant TE from sample (0, 1), a sample in process 0 with lag/delay 1. The final target sample should always be (1, 1), the mandatory sample at lat 1, because there is no memory in the process. Note: This test runs considerably faster than other system tests. This produces strange small values for non-coupled sources. TODO """ if estimator is None: estimator = 'jidt_kraskov' n = 1000 cov = 0.4 source_1 = [rn.normalvariate(0, 1) for r in range(n)] # correlated src # source_2 = [rn.normalvariate(0, 1) for r in range(n)] # uncorrelated src target = [sum(pair) for pair in zip( [cov * y for y in source_1], [(1 - cov) * y for y in [rn.normalvariate(0, 1) for r in range(n)]])] # Cast everything to numpy so the idtxl estimator understands it. source_1 = np.expand_dims(np.array(source_1), axis=1) # source_2 = np.expand_dims(np.array(source_2), axis=1) target = np.expand_dims(np.array(target), axis=1) dat = Data(normalise=True) dat.set_data(np.vstack((source_1[1:].T, target[:-1].T)), 'ps') analysis_opts = { 'cmi_calc_name': estimator, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, } random_analysis = Multivariate_te(max_lag_sources=5, min_lag_sources=1, max_lag_target=5, options=analysis_opts) # res = random_analysis.analyse_network(dat) # full network # utils.print_dict(res) res_1 = random_analysis.analyse_single_target(dat, 1) # coupled direction # Assert that there are significant conditionals from the source for target # 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for # examples 1 and 2 respectively. The maximum errors were 0.093841 and # 0.05833172 repectively. This inspired the following error boundaries. expected_res = np.log(1 / (1 - np.power(cov, 2))) diff = np.abs(max(res_1['cond_sources_te']) - expected_res) print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)') print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:' '{2:2.2f} % ').format(max(res_1['cond_sources_te']), expected_res, diff / expected_res)) assert (diff < 0.1), ('Multivariate TE calculation for correlated ' 'Gaussians failed (error larger 0.1: {0}, expected: ' '{1}, actual: {2}).'.format(diff, expected_res, res_1['cond_sources_te'] ))