def test_swap_blocks(): """Test block-wise swapping of samples.""" d = Data() d.generate_mute_data() # block_size divides the length of the data to be permuted, swap_range # leads to 2 remaining blocks n = 50 block_size = 5 swap_range = 4 perm = d._swap_blocks(n, block_size, swap_range) assert perm.shape[0] == n, 'Incorrect length of permuted indices.' # block_size leads to one block of length 1, swap_range divides the no. # blocks n = 50 block_size = 7 swap_range = 4 perm = d._swap_blocks(n, block_size, swap_range) assert perm.shape[0] == n, 'Incorrect length of permuted indices.' n_blocks = np.ceil(n / 7).astype(int) assert n_blocks == 8, 'No. blocks is incorrect.' assert sum(perm == n_blocks - 1) == 1, ('No. remaining samples in the last' ' block is incorrect.') # no remaining samples or blocks n = 30 block_size = 5 swap_range = 3 perm = d._swap_blocks(n, block_size, swap_range) assert perm.shape[0] == n, 'Incorrect length of permuted indices.'
def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ data = Data() data.generate_mute_data(n_samples=1000, n_replications=10) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 3, 'min_lag_sources': 1, 'max_lag_target': 3, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21 } # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats network_analysis = MultivariateTE() network_analysis.analyse_network(settings, data, targets=[1, 2])
def test_get_data(): """Test low-level function for data retrieval.""" dat = Data() dat.generate_mute_data() idx_list = [(0, 4), (0, 6)] current_value = (0, 3) with pytest.raises(RuntimeError): dat._get_data(idx_list, current_value) # Test retrieved data for one/two replications in time (i.e., the current # value is equal to the last sample) n = 7 d = Data(np.arange(n + 1), 's', normalise=False) current_value = (0, n) dat = d._get_data([(0, 1)], current_value)[0] assert (dat[0][0] == 1) assert (dat.shape == (1, 1)) d = Data(np.arange(n + 2), 's', normalise=False) current_value = (0, n) dat = d._get_data([(0, 1)], current_value)[0] assert (dat[0][0] == 1) assert (dat[1][0] == 2) assert (dat.shape == (2, 1)) # Test retrieval of realisations of the current value. n = 7 d = Data(np.arange(n), 's', normalise=False) current_value = (0, n) dat = d._get_data([current_value], current_value)[0]
def test_visualise_multivariate_te(): """Visualise output of multivariate TE estimation.""" data = Data() data.generate_mute_data(100, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 4, 'n_perm_max_stat': 25, 'n_perm_min_stat': 25, 'n_perm_omnibus': 50, 'n_perm_max_seq': 50, } network_analysis = MultivariateTE() results = network_analysis.analyse_network(settings, data, targets=[0, 1, 2]) # generate graph plots visualise_graph.plot_selected_vars(results, target=1, sign_sources=False) plt.show() visualise_graph.plot_network(results, fdr=False) plt.show() visualise_graph.plot_network(results, fdr=True) plt.show() visualise_graph.plot_selected_vars(results, target=1, sign_sources=True) plt.show()
def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ dat = Data() dat.generate_mute_data(n_samples=1000, n_replications=10) analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } network_analysis = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=3, options=analysis_opts) res = network_analysis.analyse_network(dat, targets=[1, 2])
def test_analyse_network(): """Test method for full network analysis.""" n_processes = 5 # the MuTE network has 5 nodes data = Data() data.generate_mute_data(10, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 5, 'min_lag_sources': 4 } nw_0 = MultivariateMI() # Test all to all analysis results = nw_0.analyse_network(settings, data, targets='all', sources='all') targets_analysed = results.targets_analysed sources = np.arange(n_processes) assert all(np.array(targets_analysed) == np.arange(n_processes)), ( 'Network analysis did not run on all targets.') for t in targets_analysed: s = np.array(list(set(sources) - set([t]))) assert all(np.array(results._single_target[t].sources_tested) == s), ( 'Network analysis did not run on all sources for target ' '{0}'.format(t)) # Test analysis for subset of targets target_list = [1, 2, 3] results = nw_0.analyse_network(settings, data, targets=target_list, sources='all') targets_analysed = results.targets_analysed assert all(np.array(targets_analysed) == np.array(target_list)), ( 'Network analysis did not run on correct subset of targets.') for t in targets_analysed: s = np.array(list(set(sources) - set([t]))) assert all(np.array(results._single_target[t].sources_tested) == s), ( 'Network analysis did not run on all sources for target ' '{0}'.format(t)) # Test analysis for subset of sources source_list = [1, 2, 3] target_list = [0, 4] results = nw_0.analyse_network(settings, data, targets=target_list, sources=source_list) targets_analysed = results.targets_analysed assert all(np.array(targets_analysed) == np.array(target_list)), ( 'Network analysis did not run for all targets.') for t in targets_analysed: assert all( results._single_target[t].sources_tested == np.array(source_list) ), ('Network analysis did not run on the correct subset ' 'of sources for target {0}'.format(t))
def test_add_conditional_manually(): """Enforce the conditioning on additional variables.""" settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 3 } nw = MultivariateMI() data = Data() data.generate_mute_data() # Add a conditional with a lag bigger than the max_lag requested above settings['add_conditionals'] = (8, 0) with pytest.raises(IndexError): nw._initialise(settings, data, sources=[1, 2], target=0) # Add valid conditionals and test if they were added settings['add_conditionals'] = [(0, 1), (1, 3)] nw._initialise(settings=settings, data=data, target=0, sources=[1, 2]) # Get list of conditionals after intialisation and convert absolute samples # back to lags for comparison. cond_list = nw._idx_to_lag(nw.selected_vars_full) assert settings['add_conditionals'][0] in cond_list, ( 'First enforced conditional is missing from results.') assert settings['add_conditionals'][1] in cond_list, ( 'Second enforced conditional is missing from results.')
def test_add_conditional_manually(): """Enforce the conditioning on additional variables.""" settings = {'cmi_estimator': 'JidtKraskovCMI', 'max_lag': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_mi': 21} data = Data() data.generate_mute_data(10, 3) ais = ActiveInformationStorage() # Add a conditional with a lag bigger than the max_lag requested above settings['add_conditionals'] = (8, 0) with pytest.raises(IndexError): ais.analyse_single_process(settings=settings, data=data, process=0) # Add valid conditionals and test if they were added settings['add_conditionals'] = [(0, 1), (1, 3)] ais._initialise(settings, data, 0) # Get list of conditionals after intialisation and convert absolute samples # back to lags for comparison. cond_list = ais._idx_to_lag(ais.selected_vars_full) assert settings['add_conditionals'][0] in cond_list, ( 'First enforced conditional is missing from results.') assert settings['add_conditionals'][1] in cond_list, ( 'Second enforced conditional is missing from results.')
def test_ActiveInformationStorage_init(): """Test instance creation for ActiveInformationStorage class.""" # Test error on missing estimator settings = {'max_lag': 5} data = Data() data.generate_mute_data(10, 3) ais = ActiveInformationStorage() with pytest.raises(RuntimeError): ais.analyse_single_process(settings, data, process=0) # Test tau larger than maximum lag settings['cmi_estimator'] = 'JidtKraskovCMI' settings['tau'] = 10 with pytest.raises(RuntimeError): ais.analyse_single_process(settings, data, process=0) # Test negative tau and maximum lag settings['tau'] = -10 with pytest.raises(RuntimeError): ais.analyse_single_process(settings, data, process=0) settings['tau'] = 1 settings['max_lag'] = -5 with pytest.raises(RuntimeError): ais.analyse_single_process(settings, data, process=0) # Invalid: process is not an int settings['max_lag'] = 5 with pytest.raises(RuntimeError): # no int ais.analyse_single_process(settings, data, process=1.5) with pytest.raises(RuntimeError): # negative ais.analyse_single_process(settings, data, process=-1) with pytest.raises(RuntimeError): # not in data ais.analyse_single_process(settings, data, process=10) with pytest.raises(RuntimeError): # wrong type ais.analyse_single_process(settings, data, process={})
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 settings = { 'cmi_estimator': 'JidtKraskovCMI', 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_mi': 21, 'max_lag': max_lag, 'tau': 1} data = Data() data.generate_mute_data(100, 3) ais = ActiveInformationStorage() processes = [1, 2] results = ais.analyse_network(settings, data, processes) for p in processes: lais = results.get_single_process(p, fdr=False)['ais'] if lais is np.nan: continue assert type(lais) is np.ndarray, ( 'LAIS estimation did not return an array of values: {0}'.format( lais)) assert lais.shape[0] == data.n_replications, ( 'Wrong dim (no. replications) in LAIS estimate: {0}'.format( lais.shape)) assert lais.shape[1] == data.n_realisations_samples((0, max_lag)), ( 'Wrong dim (no. samples) in LAIS estimate: {0}'.format(lais.shape))
def test_swap_blocks(): """Test block-wise swapping of samples.""" d = Data() d.generate_mute_data() # block_size divides the length of the data to be permuted, swap_range # leads to 2 remaining blocks n = 50 block_size = 5 swap_range = 4 perm = d._swap_blocks(n, block_size, swap_range) assert perm.shape[0] == n, 'Incorrect length of permuted indices.' # block_size leads to one block of length 1, swap_range divides the no. # blocks n = 50 block_size = 7 swap_range = 4 perm = d._swap_blocks(n, block_size, swap_range) assert perm.shape[0] == n, 'Incorrect length of permuted indices.' n_blocks = np.ceil(n/7).astype(int) assert n_blocks == 8, 'No. blocks is incorrect.' assert sum(perm == n_blocks - 1) == 1, ('No. remaining samples in the last' ' block is incorrect.') # no remaining samples or blocks n = 30 block_size = 5 swap_range = 3 perm = d._swap_blocks(n, block_size, swap_range) assert perm.shape[0] == n, 'Incorrect length of permuted indices.'
def test_get_realisations(): """Test low-level function for data retrieval.""" dat = Data() dat.generate_mute_data() idx_list = [(0, 4), (0, 6)] current_value = (0, 3) with pytest.raises(RuntimeError): dat.get_realisations(current_value, idx_list) # Test retrieved data for one/two replications in time (i.e., the current # value is equal to the last sample) n = 7 d = Data(np.arange(n + 1), 's', normalise=False) current_value = (0, n) dat = d.get_realisations(current_value, [(0, 1)])[0] assert (dat[0][0] == 1) assert (dat.shape == (1, 1)) d = Data(np.arange(n + 2), 's', normalise=False) current_value = (0, n) dat = d.get_realisations(current_value, [(0, 1)])[0] assert (dat[0][0] == 1) assert (dat[1][0] == 2) assert (dat.shape == (2, 1)) # Test retrieval of realisations of the current value. n = 7 d = Data(np.arange(n), 's', normalise=False) current_value = (0, n - 1) dat = d.get_realisations(current_value, [current_value])[0]
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 settings = { 'cmi_estimator': 'JidtKraskovCMI', 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_mi': 21, 'max_lag': max_lag, 'tau': 1 } data = Data() data.generate_mute_data(100, 3) ais = ActiveInformationStorage() processes = [1, 2] results = ais.analyse_network(settings, data, processes) for p in processes: lais = results.get_single_process(p, fdr=False)['ais'] if lais is np.nan: continue assert type(lais) is np.ndarray, ( 'LAIS estimation did not return an array of values: {0}'.format( lais)) assert lais.shape[0] == data.n_replications, ( 'Wrong dim (no. replications) in LAIS estimate: {0}'.format( lais.shape)) assert lais.shape[1] == data.n_realisations_samples((0, max_lag)), ( 'Wrong dim (no. samples) in LAIS estimate: {0}'.format(lais.shape))
def test_calculate_mean(): """Test if mean over CMI estimates is calculated correctly.""" data = Data() data.generate_mute_data(100, 5) res_0 = pickle.load( open(os.path.join(os.path.dirname(__file__), 'data/mute_results_0.p'), 'rb')) comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two', 'n_perm_comp': 6, 'alpha_comp': 0.2, 'stats_type': 'dependent' } comp = NetworkComparison() comp._initialise(comp_settings) comp._create_union(res_0) cmi = comp._calculate_cmi_all_links(data) cmi_mean = comp._calculate_mean([cmi, cmi]) for t in comp.union.targets_analysed: assert (cmi_mean[t] == cmi[t]).all(), ('Error in mean of CMI for ' 'target {0}'.format(t)) if len(cmi[t]) == 0: # skip if no links in results continue assert (cmi_mean[t] == cmi[t][0]).all(), ( 'Error in mean of CMI for target {0} - actual: ({1}), expected: ' '({2})'.format(t, cmi_mean[t], cmi[t][0]))
def test_save_json(): # Test writing dictionaries to JSON files data = Data() data.generate_mute_data(n_samples=100, n_replications=1) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 2, 'min_lag_sources': 1 } target = 1 sources = [0] nw = MultivariateTE() nw._initialise(settings, data, sources, target) nw._include_target_candidates(data) nw._include_source_candidates(data) nw._prune_candidates(data) nw._test_final_conditional(data) fd, file_path = tempfile.mkstemp() try: # Save settings after running multivariate TE estimation with minimal # settings. _save_load_json(nw.settings, file_path) # Add numpy array nw.settings['y_test_array'] = np.arange(10) _save_load_json(nw.settings, file_path) # Add numpy float nw.settings['z_test_float'] = np.float64(10) _save_load_json(nw.settings, file_path) finally: os.remove(file_path)
def test_add_conditional_manually(): """Enforce the conditioning on additional variables.""" settings = {'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 3, 'max_lag_target': 7} nw = BivariateTE() data = Data() data.generate_mute_data() # Add a conditional with a lag bigger than the max_lag requested above settings['add_conditionals'] = (8, 0) with pytest.raises(IndexError): nw.analyse_single_target(settings=settings, data=data, target=0) # Add valid conditionals and test if they were added settings['add_conditionals'] = [(0, 1), (1, 3)] nw._initialise(settings=settings, data=data, target=0, sources=[1, 2]) # Get list of conditionals after intialisation and convert absolute samples # back to lags for comparison. cond_list = nw._idx_to_lag(nw.selected_vars_full) assert settings['add_conditionals'][0] in cond_list, ( 'First enforced conditional is missing from results.') assert settings['add_conditionals'][1] in cond_list, ( 'Second enforced conditional is missing from results.')
def test_max_statistic_sequential(): data = Data() data.generate_mute_data(104, 10) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, 'max_lag_sources': 5, 'min_lag_sources': 1, 'max_lag_target': 5 } setup = MultivariateTE() setup._initialise(settings, data, sources=[0, 1], target=2) setup.current_value = (0, 4) setup.selected_vars_sources = [(1, 1), (1, 2)] setup.selected_vars_full = [(0, 1), (1, 1), (1, 2)] setup._selected_vars_realisations = np.random.rand( data.n_realisations(setup.current_value), len(setup.selected_vars_full)) setup._current_value_realisations = np.random.rand( data.n_realisations(setup.current_value), 1) [sign, p, te] = stats.max_statistic_sequential(analysis_setup=setup, data=data)
def test_add_conditional_manually(): """Enforce the conditioning on additional variables.""" settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_mi': 21 } data = Data() data.generate_mute_data(10, 3) ais = ActiveInformationStorage() # Add a conditional with a lag bigger than the max_lag requested above settings['add_conditionals'] = (8, 0) with pytest.raises(IndexError): ais.analyse_single_process(settings=settings, data=data, process=0) # Add valid conditionals and test if they were added settings['add_conditionals'] = [(0, 1), (1, 3)] ais._initialise(settings, data, 0) # Get list of conditionals after intialisation and convert absolute samples # back to lags for comparison. cond_list = ais._idx_to_lag(ais.selected_vars_full) assert settings['add_conditionals'][0] in cond_list, ( 'First enforced conditional is missing from results.') assert settings['add_conditionals'][1] in cond_list, ( 'Second enforced conditional is missing from results.')
def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ data = Data() data.generate_mute_data(n_samples=1000, n_replications=10) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 3, 'min_lag_sources': 1, 'max_lag_target': 3, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21} # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats network_analysis = MultivariateTE() network_analysis.analyse_network(settings, data, targets=[1, 2])
def test_calculate_mean(): """Test if mean over CMI estimates is calculated correctly.""" data = Data() data.generate_mute_data(100, 5) res_0 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_results_0.p')) comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two', 'n_perm_comp': 6, 'alpha_comp': 0.2, 'stats_type': 'dependent' } comp = NetworkComparison() comp._initialise(comp_settings) comp._create_union(res_0) cmi = comp._calculate_cmi_all_links(data) cmi_mean = comp._calculate_mean([cmi, cmi]) for t in comp.union.targets_analysed: assert (cmi_mean[t] == cmi[t]).all(), ('Error in mean of CMI for ' 'target {0}'.format(t))
def test_calculate_mean(): """Test if mean over CMI estimates is calculated correctly.""" dat = Data() dat.generate_mute_data(100, 5) res_0 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_0.pkl')) comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two', 'n_perm_comp': 6, 'alpha_comp': 0.2, 'stats_type': 'dependent' } comp = NetworkComparison() comp._initialise(comp_settings) comp._create_union(res_0) cmi = comp._calculate_cmi_all_links(dat) cmi_mean = comp._calculate_mean([cmi, cmi]) for t in comp.union['targets']: assert (cmi_mean[t] == cmi[t]).all(), ('Error in mean of CMI for ' 'target {0}'.format(t))
def test_check_source_set(): """Test the method _check_source_set. This method sets the list of source processes from which candidates are taken for multivariate TE estimation. """ dat = Data() dat.generate_mute_data(100, 5) max_lag_sources = 7 min_lag_sources = 5 max_lag_target = 5 analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) sources = [1, 2, 3] nw_0._check_source_set(sources, dat.n_processes) # Assert that initialisation fails if the target is also in the source list sources = [0, 1, 2, 3] nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0.target = 0 with pytest.raises(RuntimeError): nw_0._check_source_set(sources, dat.n_processes) sources = 1 nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0._check_source_set(sources, dat.n_processes) assert (type(nw_0.source_set) is list)
def analyse_mute_te_data(): # Generate example data: the following was ran once to generate example # data, which is now in the data sub-folder of the test-folder. data = Data() data.generate_mute_data(100, 5) # analysis settings settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'max_lag_target': 5, 'max_lag_sources': 5, 'min_lag_sources': 1, 'permute_in_time': True } # network inference for individual data sets nw_0 = MultivariateTE() res_0 = nw_0.analyse_network(settings, data, targets=[0, 1], sources='all') pickle.dump(res_0, open(path + 'mute_results_0.p', 'wb')) res_1 = nw_0.analyse_network(settings, data, targets=[1, 2], sources='all') pickle.dump(res_1, open(path + 'mute_results_1.p', 'wb')) res_2 = nw_0.analyse_network(settings, data, targets=[0, 2], sources='all') pickle.dump(res_2, open(path + 'mute_results_2.p', 'wb')) res_3 = nw_0.analyse_network(settings, data, targets=[0, 1, 2], sources='all') pickle.dump(res_3, open(path + 'mute_results_3.p', 'wb')) res_4 = nw_0.analyse_network(settings, data, targets=[1, 2], sources='all') pickle.dump(res_4, open(path + 'mute_results_4.p', 'wb')) res_5 = nw_0.analyse_network(settings, data) pickle.dump(res_5, open(path + 'mute_results_full.p', 'wb'))
def test_compare_jidt_open_cl_estimator(): """Compare results from OpenCl and JIDT estimators for AIS calculation.""" dat = Data() dat.generate_mute_data(100, 2) max_lag = 5 analysis_opts = { 'cmi_calc_name': 'opencl_kraskov', 'n_perm_mi': 22, 'alpha_mi': 0.05, 'tail_mi': 'one', } processes = [2, 3] network_analysis = Single_process_storage(max_lag, analysis_opts, tau=1) res_opencl = network_analysis.analyse_network(dat, processes) analysis_opts['cmi_calc_name'] = 'jidt_kraskov' network_analysis = Single_process_storage(max_lag, analysis_opts, tau=1) res_jidt = network_analysis.analyse_network(dat, processes) # Note that I require equality up to three digits. Results become more exact for bigger # data sizes, but this takes too long for a unit test. np.testing.assert_approx_equal(res_opencl[2]['ais'], res_jidt[2]['ais'], significant=3, err_msg='AIS results differ between OpenCl and JIDT estimator.') np.testing.assert_approx_equal(res_opencl[3]['ais'], res_jidt[3]['ais'], significant=3, err_msg='AIS results differ between OpenCl and JIDT estimator.') print('AIS for MUTE data proc 2 - opencl: {0} and jidt: {1}'.format(res_opencl[2]['ais'], res_jidt[2]['ais'])) print('AIS for MUTE data proc 3 - opencl: {0} and jidt: {1}'.format(res_opencl[3]['ais'], res_jidt[3]['ais']))
def test_multivariate_te_init(): analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} max_lag_target = 5 max_lag_sources = 7 min_lag_sources = 4 target = 0 sources = [2, 3, 4] dat = Data() dat.generate_mute_data(100, 5) nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0.analyse_single_target(dat, target, sources) # This should just run: Test what happens if the target max lag is bigger # than the source max lag max_lag_sources = 5 max_lag_target = 7 nw_1 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_1.analyse_single_target(dat, target, sources) # The following should crash: min lag bigger than max lag max_lag_sources = 5 min_lag_sources = 7 nw_2 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) with pytest.raises(AssertionError): nw_2.analyse_single_target(dat, target, sources)
def test_active_information_storage_init(): """Test instance creation for Active_information_storage class.""" # Test error on missing estimator with pytest.raises(KeyError): Active_information_storage(max_lag=5, options={}) # Test tau larger than maximum lag analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} with pytest.raises(RuntimeError): Active_information_storage(max_lag=5, options=analysis_opts, tau=10) # Test negative tau and maximum lag with pytest.raises(RuntimeError): Active_information_storage(max_lag=5, options=analysis_opts, tau=-10) with pytest.raises(RuntimeError): Active_information_storage(max_lag=-5, options=analysis_opts, tau=1) # Invalid: process is not an int dat = Data() dat.generate_mute_data(10, 3) ais = Active_information_storage(max_lag=5, tau=1, options=analysis_opts) with pytest.raises(RuntimeError): # no int ais.analyse_single_process(data=dat, process=1.5) with pytest.raises(RuntimeError): # negative ais.analyse_single_process(data=dat, process=-1) with pytest.raises(RuntimeError): # not in data ais.analyse_single_process(data=dat, process=10) with pytest.raises(RuntimeError): # wrong type ais.analyse_single_process(data=dat, process={}) # Force conditionals analysis_opts['add_conditionals'] = [(0, 1), (1, 3)] ais = Active_information_storage(max_lag=5, tau=1, options=analysis_opts)
def test_compare_links_within(): """Test comparison of two links within a single network.""" data = Data() data.generate_mute_data(100, 5) path = os.path.join(os.path.dirname(__file__), 'data/') res = pickle.load(open(path + 'mute_results_1.p', 'rb')) # comparison settings comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'alpha_comp': 0.26, 'n_perm_comp': 4, 'tail': 'two' } link_a = [0, 1] link_b = [0, 2] comp = NetworkComparison() comp_settings['stats_type'] = 'independent' res_indep = comp.compare_links_within(settings=comp_settings, link_a=link_a, link_b=link_b, network=res, data=data) comp_settings['stats_type'] = 'dependent' res_dep = comp.compare_links_within(settings=comp_settings, link_a=[0, 1], link_b=[0, 2], network=res, data=data) for r in [res_indep, res_dep]: adj_mat_diff = r.get_adjacency_matrix('diff_abs') adj_mat_comp = r.get_adjacency_matrix('comparison') adj_mat_pval = r.get_adjacency_matrix('pvalue') assert (adj_mat_diff._weight_matrix[link_a[0], link_a[1]] == adj_mat_diff._weight_matrix[link_b[0], link_b[1]]), ( 'Absolute differences for link comparison not equal.') assert (adj_mat_comp._weight_matrix[link_a[0], link_a[1]] == adj_mat_comp._weight_matrix[link_b[0], link_b[1]]), ( 'Comparison results for link comparison not equal.') assert (adj_mat_pval._weight_matrix[link_a[0], link_a[1]] == adj_mat_pval._weight_matrix[link_b[0], link_b[1]]), ( 'P-value for link comparison not equal.') assert (r.targets_analysed == [ link_a[1], link_b[1] ]).all(), ('Analysed targets are not correct.') with pytest.raises(RuntimeError): comp.compare_links_within(settings=comp_settings, link_a=link_a, link_b=[3, 4], network=res, data=data)
def test_ais_fdr(): settings = {'n_perm_max_seq': 1000, 'n_perm_mi': 1000} process_0 = { 'selected_vars': [(0, 1), (0, 2), (0, 3)], 'ais_pval': 0.0001, 'ais_sign': True} process_1 = { 'selected_vars': [(1, 0), (1, 1), (1, 2)], 'ais_pval': 0.031, 'ais_sign': True} process_2 = { 'selected_vars': [], 'ais_pval': 0.41, 'ais_sign': False} res_1 = ResultsSingleProcessAnalysis( n_nodes=3, n_realisations=1000, normalised=True) res_1._add_single_result(process=0, settings=settings, results=process_0) res_1._add_single_result(process=1, settings=settings, results=process_1) res_2 = ResultsSingleProcessAnalysis( n_nodes=3, n_realisations=1000, normalised=True) res_2._add_single_result(process=2, settings=settings, results=process_2) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'alpha_fdr': 0.05, 'max_lag': 3} data = Data() data.generate_mute_data(n_samples=100, n_replications=3) analysis_setup = ActiveInformationStorage() analysis_setup._initialise(settings=settings, data=data, process=1) res_pruned = stats.ais_fdr(settings, res_1, res_2) assert (not res_pruned._single_process[2].selected_vars_sources), ( 'Process 2 has not been pruned from results.') alpha_fdr = res_pruned.settings.alpha_fdr for k in res_pruned.processes_analysed: if not res_pruned._single_process[k]['ais_sign']: assert (res_pruned._single_process[k]['ais_pval'] > alpha_fdr), ( 'P-value of non-sign. AIS is not 1.') assert (not res_pruned._single_process[k]['selected_vars']), ( 'List of significant past variables is not empty') else: assert (res_pruned._single_process[k]['ais_pval'] < 1), ( 'P-value of sign. AIS is not smaller 1.') assert (res_pruned._single_process[k]['selected_vars']), ( 'List of significant past variables is empty') # Test function call for single result res_pruned = stats.ais_fdr(settings, res_1) print('successful call on single result dict.') # Test None result for insufficient no. permutations, no FDR-corrected # results (the results class throws an error if no FDR-corrected results # exist). res_1.settings['n_perm_mi'] = 2 res_2.settings['n_perm_mi'] = 2 res_pruned = stats.ais_fdr(settings, res_1, res_2) with pytest.raises(RuntimeError): res_pruned.get_significant_processes(fdr=True)
def test_get_permuted_replications(): """Test if permutation of replications works.""" # Load previously generated example data path = os.path.join(os.path.dirname(__file__), 'data/') res_0 = pickle.load(open(path + 'mute_results_0.p', 'rb')) res_1 = pickle.load(open(path + 'mute_results_1.p', 'rb')) comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two', 'n_perm_comp': 6, 'alpha_comp': 0.2, 'stats_type': 'dependent' } comp = NetworkComparison() comp._initialise(comp_settings) comp._create_union(res_0, res_1) # Check permutation for dependent samples test: Replace realisations by # zeros and ones, check if realisations get swapped correctly. dat1 = Data() dat1.normalise = False dat1.set_data(np.zeros((5, 100, 5)), 'psr') dat2 = Data() dat2.normalise = False dat2.set_data(np.ones((5, 100, 5)), 'psr') [cond_a_perm, cv_a_perm, cond_b_perm, cv_b_perm] = comp._get_permuted_replications(data_a=dat1, data_b=dat2, target=1) n_vars = cond_a_perm.shape[1] assert (np.sum(cond_a_perm + cond_b_perm, axis=1) == n_vars).all(), ( 'Dependent samples permutation did not work correctly.') assert np.logical_xor(cond_a_perm, cond_b_perm).all(), ( 'Dependent samples permutation did not work correctly.') # Check permutations for independent samples test: Check the sum over # realisations. comp_settings['stats_type'] = 'independent' comp = NetworkComparison() comp._initialise(comp_settings) comp._create_union(res_0, res_1) [cond_a_perm, cv_a_perm, cond_b_perm, cv_b_perm] = comp._get_permuted_replications(data_a=dat1, data_b=dat2, target=1) n_samples = n_vars * dat1.n_realisations((0, comp.union['max_lag'])) assert np.sum(cond_a_perm + cond_b_perm, axis=None) == n_samples, ( 'Independent samples permutation did not work correctly.') # test unequal number of replications dat2.generate_mute_data(100, 7) with pytest.raises(AssertionError): comp._get_permuted_replications(data_a=dat1, data_b=dat2, target=1)
def test_analyse_network(): """Test method for full network analysis.""" n_processes = 5 # the MuTE network has 5 nodes dat = Data() dat.generate_mute_data(10, 5) nw_0 = Multivariate_te(max_lag_sources=5, min_lag_sources=4, options={'cmi_calc_name': 'jidt_kraskov'}, max_lag_target=5) # Test all to all analysis r = nw_0.analyse_network(dat, targets='all', sources='all') try: del r['fdr'] except: pass k = list(r.keys()) sources = np.arange(n_processes) assert all(np.array(k) == np.arange(n_processes)), ( 'Network analysis did not run on all targets.') for t in r.keys(): s = np.array(list(set(sources) - set([t]))) assert all(np.array(r[t]['sources_tested']) == s), ( 'Network analysis did not run on all sources for target ' '{0}'.format(t)) # Test analysis for subset of targets target_list = [1, 2, 3] r = nw_0.analyse_network(dat, targets=target_list, sources='all') try: del r['fdr'] except: pass k = list(r.keys()) assert all(np.array(k) == np.array(target_list)), ( 'Network analysis did not run on correct subset of targets.') for t in r.keys(): s = np.array(list(set(sources) - set([t]))) assert all(np.array(r[t]['sources_tested']) == s), ( 'Network analysis did not run on all sources for target ' '{0}'.format(t)) # Test analysis for subset of sources source_list = [1, 2, 3] target_list = [0, 4] r = nw_0.analyse_network(dat, targets=target_list, sources=source_list) try: del r['fdr'] except: pass k = list(r.keys()) assert all(np.array(k) == np.array(target_list)), ( 'Network analysis did not run for all targets.') for t in r.keys(): assert all(r[t]['sources_tested'] == np.array(source_list)), ( 'Network analysis did not run on the correct subset of sources ' 'for target {0}'.format(t))
def test_circular_shift(): """Test circular shifting of samples.""" d = Data() d.generate_mute_data() n = 20 max_shift = 10 [perm, shift] = d._circular_shift(n, max_shift) assert perm[0] == (n - shift), 'First index after circular shift is wrong!' assert shift <= max_shift, 'Actual shift exceeded max_shift.' assert perm.shape[0] == n, 'Incorrect length of permuted indices.'
def test_analyse_network(): """Test method for full network analysis.""" n_processes = 5 # the MuTE network has 5 nodes data = Data() data.generate_mute_data(10, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 5, 'min_lag_sources': 4, 'max_lag_target': 5} nw_0 = MultivariateTE() # Test all to all analysis results = nw_0.analyse_network( settings, data, targets='all', sources='all') targets_analysed = results.targets_analysed sources = np.arange(n_processes) assert all(np.array(targets_analysed) == np.arange(n_processes)), ( 'Network analysis did not run on all targets.') for t in results.targets_analysed: s = np.array(list(set(sources) - set([t]))) assert all(np.array(results._single_target[t].sources_tested) == s), ( 'Network analysis did not run on all sources for target ' '{0}'. format(t)) # Test analysis for subset of targets target_list = [1, 2, 3] results = nw_0.analyse_network( settings, data, targets=target_list, sources='all') targets_analysed = results.targets_analysed assert all(np.array(targets_analysed) == np.array(target_list)), ( 'Network analysis did not run on correct subset of targets.') for t in results.targets_analysed: s = np.array(list(set(sources) - set([t]))) assert all(np.array(results._single_target[t].sources_tested) == s), ( 'Network analysis did not run on all sources for target ' '{0}'. format(t)) # Test analysis for subset of sources source_list = [1, 2, 3] target_list = [0, 4] results = nw_0.analyse_network(settings, data, targets=target_list, sources=source_list) targets_analysed = results.targets_analysed assert all(np.array(targets_analysed) == np.array(target_list)), ( 'Network analysis did not run for all targets.') for t in results.targets_analysed: assert all(results._single_target[t].sources_tested == np.array(source_list)), ( 'Network analysis did not run on the correct subset ' 'of sources for target {0}'.format(t))
def test_compare_links_within(): """Test comparison of two links within a single network.""" data = Data() data.generate_mute_data(100, 5) path = os.path.join(os.path.dirname(__file__), 'data/') res = pickle.load(open(path + 'mute_results_1.p', 'rb')) # comparison settings comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'alpha_comp': 0.26, 'n_perm_comp': 4, 'tail': 'two' } link_a = [0, 1] link_b = [0, 2] comp = NetworkComparison() comp_settings['stats_type'] = 'independent' res_indep = comp.compare_links_within(settings=comp_settings, link_a=link_a, link_b=link_b, network=res, data=data) comp_settings['stats_type'] = 'dependent' res_dep = comp.compare_links_within(settings=comp_settings, link_a=[0, 1], link_b=[0, 2], network=res, data=data) for r in [res_indep, res_dep]: assert (r.get_adjacency_matrix('diff_abs')[link_a[0], link_a[1]] == r.get_adjacency_matrix('diff_abs')[link_b[0], link_b[1]]), ( 'Absolute differences for link comparison not equal.') assert (r.get_adjacency_matrix('comparison')[link_a[0], link_a[1]] == r.get_adjacency_matrix('comparison')[link_b[0], link_b[1]]), ( 'Comparison results for link comparison not equal.') assert (r.get_adjacency_matrix('pvalue')[link_a[0], link_a[1]] == r.get_adjacency_matrix('pvalue')[link_b[0], link_b[1]]), ( 'P-value for link comparison not equal.') assert (r.targets_analysed == [link_a[1], link_b[1]]).all(), ( 'Analysed targets are not correct.') with pytest.raises(RuntimeError): comp.compare_links_within(settings=comp_settings, link_a=link_a, link_b=[3, 4], network=res, data=data)
def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ data = Data() data.generate_mute_data(n_samples=1000, n_replications=10) settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'max_ent', 'max_lag_sources': 3, 'min_lag_sources': 1, 'max_lag_target': 3, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } network_analysis = MultivariateTE() results_me = network_analysis.analyse_network(settings, data, targets=[1, 2]) settings['discretise_method'] = 'equal' results_eq = network_analysis.analyse_network(settings, data, targets=[1, 2]) for t in [1, 2]: print('Target {0}: equal binning: {1}, max. ent. binning: {2}'.format( t, results_eq.get_single_target(t, fdr=False).omnibus_te, results_me.get_single_target(t, fdr=False).omnibus_te )) # Skip comparison of estimates if analyses returned different source # sets. This will always lead to different estimates. if (results_eq.get_single_target(t, fdr=False).selected_vars_sources == results_me.get_single_target(t, fdr=False).selected_vars_sources): assert (np.isclose( results_eq.get_single_target(1, fdr=False).omnibus_te, results_me.get_single_target(1, fdr=False).omnibus_te, rtol=0.05)), ('Target {0}: unequl results for both binning ' 'methods.'.format(t)) else: continue
def test_export_networkx(): """Test export to networkx DiGrap() object.""" # Test export of graph with unconnected nodes. max_lag = 3 data = Data(seed=SEED) data.generate_mute_data(500, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'noise_level': 0, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': 1, 'max_lag_target': max_lag } target = 3 sources = [0, 4] te = MultivariateTE() results = te.analyse_single_target(settings, data, target=target, sources=sources) weights = 'binary' adj_matrix = results.get_adjacency_matrix(weights=weights, fdr=False) digraph = io.export_networkx_graph(adjacency_matrix=adj_matrix, weights=weights) np.testing.assert_array_equal(np.sort(digraph.nodes), np.arange(data.n_processes), err_msg='Wrong nodes in exported DiGraph.') # raise AssertionError('Test not yet implemented.') # Test export of networx graph for network inference results. weights = 'binary' adj_matrix = res_0.get_adjacency_matrix(weights=weights, fdr=False) io.export_networkx_graph(adjacency_matrix=adj_matrix, weights=weights) # Test export of source graph for s in [True, False]: io.export_networkx_source_graph(results=res_0, target=1, sign_sources=s, fdr=False) # Test export of networx graph for network comparison results. for weight in ['union', 'comparison', 'pvalue', 'diff_abs']: adj_matrix = res_within.get_adjacency_matrix(weights=weight) io.export_networkx_graph(adjacency_matrix=adj_matrix, weights=weight) for s in [True, False]: io.export_networkx_source_graph(results=res_0, target=1, sign_sources=s, fdr=False)
def test_console_output(): data = Data() data.generate_mute_data(n_samples=10, n_replications=5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 4, 'max_lag_target': 5 } nw = MultivariateTE() r = nw.analyse_network(settings, data, targets='all', sources='all') r.print_edge_list(fdr=False, weights='binary')
def test_console_output(): dat = Data() dat.generate_mute_data(n_samples=10, n_replications=5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 4, 'max_lag_target': 5 } nw = MultivariateTE() r = nw.analyse_network(settings, dat, targets='all', sources='all') print_res_to_console(dat, r, fdr=False)
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 data = Data() data.generate_mute_data(500, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': 4, 'max_lag_target': max_lag } target = 1 te = MultivariateTE() results = te.analyse_network(settings, data, targets=[target]) # Test if any sources were inferred. If not, return (this may happen # sometimes due to too few samples, however, a higher no. samples is not # feasible for a unit test). if results.get_single_target(target, fdr=False)['te'] is None: return lte = results.get_single_target(target, fdr=False)['te'] n_sources = len(results.get_target_sources(target, fdr=False)) assert type(lte) is np.ndarray, ( 'LTE estimation did not return an array of values: {0}'.format(lte)) assert lte.shape[0] == n_sources, ( 'Wrong dim (no. sources) in LTE estimate: {0}'.format(lte.shape)) assert lte.shape[1] == data.n_realisations_samples( (0, max_lag)), ('Wrong dim (no. samples) in LTE estimate: {0}'.format( lte.shape)) assert lte.shape[2] == data.n_replications, ( 'Wrong dim (no. replications) in LTE estimate: {0}'.format(lte.shape)) # Test for correctnes of single link TE estimation by comparing it to the # omnibus TE. In this case (single source), the two should be the same. settings['local_values'] = False results_avg = te.analyse_network(settings, data, targets=[target]) if results_avg.get_single_target(target, fdr=False)['te'] is None: return te_single_link = results_avg.get_single_target(target, fdr=False)['te'][0] te_omnibus = results_avg.get_single_target(target, fdr=False)['omnibus_te'] assert np.isclose(te_single_link, te_omnibus), ( 'Single link TE is not equal to omnibus information transfer.') # Compare mean local TE to average TE. assert np.isclose( te_single_link, np.mean(lte)), ('Single link average TE and mean LTE deviate.')
def test_create_union_network(): """Test creation of union of multiple networks.""" dat1 = Data() dat1.generate_mute_data(100, 5) dat2 = Data() dat2.generate_mute_data(100, 5) # Load previously generated example data path = os.path.join(os.path.dirname(__file__), 'data/') res_0 = pickle.load(open(path + 'mute_results_0.p', 'rb')) res_1 = pickle.load(open(path + 'mute_results_1.p', 'rb')) # comparison settings comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two', 'n_perm_comp': 6, 'alpha_comp': 0.2, 'stats_type': 'independent' } comp = NetworkComparison() comp._initialise(comp_settings) src_1 = [(0, 2), (0, 1)] src_2 = [(0, 4), (0, 5)] res_0._single_target[1].selected_vars_sources = src_1 res_1._single_target[1].selected_vars_sources = src_2 comp._create_union(res_0, res_1) ref_targets = np.array([0, 1, 2]) assert (comp.union.targets_analysed == ref_targets).all(), ( 'Union does not include all targets.') assert np.array([ True for i in ref_targets if i in comp.union.keys()]).all(), ( 'Not all targets contained in union network.') assert comp.union['max_lag'] == res_0._single_target[1].current_value[1], ( 'The max. lag was not defined correctly.') src_union = comp._idx_to_lag( comp.union._single_target[1]['selected_vars_sources'], comp.union['max_lag']) assert src_union == (src_1 + src_2), ( 'Sources for target 1 were not combined correctly.') # unequal current values in single networks res_0._single_target[1].current_value = (1, 7) # the original is (1, 5) with pytest.raises(ValueError): comp._create_union(res_0, res_1)
def test_create_union_network(): """Test creation of union of multiple networks.""" dat1 = Data() dat1.generate_mute_data(100, 5) dat2 = Data() dat2.generate_mute_data(100, 5) # Load previously generated example data res_0 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_0.pkl')) res_1 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_1.pkl')) # comparison settings comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two', 'n_perm_comp': 6, 'alpha_comp': 0.2, 'stats_type': 'independent' } comp = NetworkComparison() comp._initialise(comp_settings) src_1 = [(0, 2), (0, 1)] src_2 = [(0, 4), (0, 5)] res_0[1]['selected_vars_sources'] = src_1 res_1[1]['selected_vars_sources'] = src_2 comp._create_union(res_0, res_1) ref_targets = [0, 1, 2] assert (comp.union['targets'] == ref_targets).all(), ( 'Union does not include all targets.') assert np.array([True for i in ref_targets if i in comp.union.keys()]).all(), ( 'Not all targets contained in union network.') assert comp.union['max_lag'] == res_0[0]['current_value'][1], ( 'The max. lag was not defined correctly.') src_union = comp._idx_to_lag(comp.union[1]['selected_vars_sources'], comp.union['max_lag']) assert src_union == (src_1 + src_2), ('Sources for target 1 were not ' 'combined correctly.') # unequal current values in single networks res_0[1]['current_value'] = (1, 7) # the original is (1, 5) with pytest.raises(ValueError): comp._create_union(res_0, res_1)
def test_p_value_union(): """Test if the p-value is calculated correctly.""" data = Data() data.generate_mute_data(100, 5) path = os.path.join(os.path.dirname(__file__), 'data/') res_0 = pickle.load(open(path + 'mute_results_0.p', 'rb')) res_1 = pickle.load(open(path + 'mute_results_1.p', 'rb')) comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'n_perm_comp': 6, 'alpha_comp': 0.2, 'tail_comp': 'one_bigger', 'stats_type': 'independent' } comp = NetworkComparison() comp.compare_within(comp_settings, res_0, res_1, data, data) # Replace the surrogate CMI by all zeros for source 0 and all ones for # source 1. Set the CMI difference to 0.5 for both sources. Check if this # results in one significant and one non-significant result with the # correct p-values. comp._initialise(comp_settings) comp._create_union(res_0, res_1) comp._calculate_cmi_diff_within(data, data) comp._create_surrogate_distribution_within(data, data) target = 1 source = 0 comp.cmi_surr[target] = np.zeros((1, comp_settings['n_perm_comp'])) comp.cmi_diff[target] = np.array([0.5]) comp._p_value_union() p = comp.pvalue s = comp.significance assert s[target][source], ( 'The significance was not determined correctly: {0}'.format(s[target])) assert p[target][source] == 1 / comp_settings['n_perm_comp'], ( 'The p-value was not calculated correctly: {0}'.format(p[target])) comp.cmi_surr[target] = np.ones((1, comp_settings['n_perm_comp'])) comp.cmi_diff[target] = np.array([0.5]) comp._p_value_union() p = comp.pvalue s = comp.significance assert not s[target][source], ( 'The significance was not determined correctly: {0}'.format(s[target])) assert p[target][source] == 1.0, ( 'The p-value was not calculated correctly: {0}'.format(p[target]))
def test_multivariate_te_initialise(): """Test if all values are set correctly in _initialise().""" # Create a data set where one pattern fits into the time series exactly # once, this way, we get one realisation per replication for each variable. # This is easyer to assert/verify later. We also test data.get_realisations # this way. analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} max_lag_target = 5 max_lag_sources = max_lag_target min_lag_sources = 4 target = 1 dat = Data(normalise=False) n_repl = 30 n_procs = 2 n_points = n_procs * (max_lag_sources + 1) * n_repl dat.set_data(np.arange(n_points).reshape(n_procs, max_lag_sources + 1, n_repl), 'psr') nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0._initialise(dat, 'all', target) assert (not nw_0.selected_vars_full) assert (not nw_0.selected_vars_sources) assert (not nw_0.selected_vars_target) assert ((nw_0._replication_index == np.arange(n_repl)).all()) assert (nw_0._current_value == (target, max(max_lag_sources, max_lag_target))) assert ((nw_0._current_value_realisations == np.arange(n_points - n_repl, n_points).reshape(n_repl, 1)).all()) # Check if the Faes method is working analysis_opts['add_conditionals'] = 'faes' nw_1 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) dat.generate_mute_data() sources = [1, 2, 3] target = [0] nw_1._initialise(dat, sources, target) assert (nw_1._selected_vars_sources == [i for i in it.product(sources, [nw_1.current_value[1]])]), ( 'Did not add correct additional conditioning vars.') # Adding a variable that is not in the data set. analysis_opts['add_conditionals'] = (8, 0) nw_1 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) dat.generate_mute_data() sources = [1, 2, 3] target = [0] with pytest.raises(IndexError): nw_1._initialise(dat, sources, target)
def test_check_source_set(): """Test the method _check_source_set. This method sets the list of source processes from which candidates are taken for multivariate TE estimation. """ dat = Data() dat.generate_mute_data(100, 5) max_lag_sources = 7 min_lag_sources = 5 max_lag_target = 5 analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) # Add list of sources. sources = [1, 2, 3] nw_0._check_source_set(sources, dat.n_processes) assert nw_0.source_set == sources, 'Sources were not added correctly.' # Assert that initialisation fails if the target is also in the source list sources = [0, 1, 2, 3] nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) nw_0.target = 0 with pytest.raises(RuntimeError): nw_0._check_source_set(sources=[0, 1, 2, 3], n_processes=dat.n_processes) # Test if a single source, no list is added correctly. sources = 1 nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) nw_0._check_source_set(sources, dat.n_processes) assert (type(nw_0.source_set) is list) # Test if 'all' is handled correctly nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) nw_0.target = 0 nw_0._check_source_set('all', dat.n_processes) assert nw_0.source_set == [1, 2, 3, 4], 'Sources were not added correctly.' # Test invalid inputs. nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, analysis_opts, max_lag_target) with pytest.raises(RuntimeError): # sources greater than no. procs nw_0._check_source_set(8, dat.n_processes) with pytest.raises(RuntimeError): # negative value as source nw_0._check_source_set(-3, dat.n_processes)
def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ data = Data() data.generate_mute_data(n_samples=1000, n_replications=10) settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'max_ent', 'max_lag_sources': 3, 'min_lag_sources': 1, 'max_lag_target': 3, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } network_analysis = MultivariateTE() results_me = network_analysis.analyse_network(settings, data, targets=[1, 2]) settings['discretise_method'] = 'equal' results_eq = network_analysis.analyse_network(settings, data, targets=[1, 2]) assert (np.isclose( results_eq.get_single_target(1, fdr=False).omnibus_te, results_me.get_single_target(1, fdr=False).omnibus_te, rtol=0.05)), ( 'TE into first target is not equal for both binning methods.') assert (np.isclose( results_eq.get_single_target(2, fdr=False).omnibus_te, results_me.get_single_target(2, fdr=False).omnibus_te, rtol=0.05)), ( 'TE into second target is not equal for both binning methods.')
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 data = Data() data.generate_mute_data(500, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': 4, 'max_lag_target': max_lag} target = 1 te = MultivariateTE() results = te.analyse_network(settings, data, targets=[target]) # Test if any sources were inferred. If not, return (this may happen # sometimes due to too few samples, however, a higher no. samples is not # feasible for a unit test). if results.get_single_target(target, fdr=False)['te'] is None: return lte = results.get_single_target(target, fdr=False)['te'] n_sources = len(results.get_target_sources(target, fdr=False)) assert type(lte) is np.ndarray, ( 'LTE estimation did not return an array of values: {0}'.format(lte)) assert lte.shape[0] == n_sources, ( 'Wrong dim (no. sources) in LTE estimate: {0}'.format(lte.shape)) assert lte.shape[1] == data.n_realisations_samples((0, max_lag)), ( 'Wrong dim (no. samples) in LTE estimate: {0}'.format(lte.shape)) assert lte.shape[2] == data.n_replications, ( 'Wrong dim (no. replications) in LTE estimate: {0}'.format(lte.shape)) # Test for correctnes of single link TE estimation by comparing it to the # omnibus TE. In this case (single source), the two should be the same. settings['local_values'] = False results_avg = te.analyse_network(settings, data, targets=[target]) if results_avg.get_single_target(target, fdr=False)['te'] is None: return te_single_link = results_avg.get_single_target(target, fdr=False)['te'][0] te_omnibus = results_avg.get_single_target(target, fdr=False)['omnibus_te'] assert np.isclose(te_single_link, te_omnibus), ( 'Single link TE is not equal to omnibus information transfer.') # Compare mean local TE to average TE. assert np.isclose(te_single_link, np.mean(lte)), ( 'Single link average TE and mean LTE deviate.')
def test_faes_method(): """Check if the Faes method is working.""" settings = {'cmi_estimator': 'JidtKraskovCMI', 'add_conditionals': 'faes', 'max_lag_sources': 5, 'min_lag_sources': 3} nw_1 = MultivariateMI() data = Data() data.generate_mute_data() sources = [1, 2, 3] target = 0 nw_1._initialise(settings, data, sources, target) assert (nw_1._selected_vars_sources == [i for i in it.product(sources, [nw_1.current_value[1]])]), ( 'Did not add correct additional conditioning vars.')
def test_single_source_storage_opencl(): """Test AIS estimation in MuTE example network.""" dat = Data() dat.generate_mute_data(1000, 5) max_lag = 5 analysis_opts = { 'cmi_calc_name': 'opencl_kraskov', 'n_perm_mi': 22, 'alpha_mi': 0.05, 'tail_mi': 'one', } processes = [1, 2, 3] network_analysis = Single_process_storage(max_lag, analysis_opts, tau=1) res = network_analysis.analyse_network(dat, processes) print('AIS for MUTE data proc 1: {0}'.format(res[1]['ais'])) print('AIS for MUTE data proc 2: {0}'.format(res[2]['ais'])) print('AIS for MUTE data proc 3: {0}'.format(res[3]['ais']))
def test_permute_time(): """Create surrogates by permuting data in time instead of over replic.""" # Test if perm type is set to default default = 'random' data = Data() data.generate_mute_data(10, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 5, 'min_lag_sources': 4, 'permute_in_time': True} nw_0 = MultivariateMI() results = nw_0.analyse_network( settings, data, targets='all', sources='all') assert results.settings.perm_type == default, ( 'Perm type was not set to default.')
def test_compare_jidt_open_cl_estimator(): """Compare results from OpenCl and JIDT estimators for AIS calculation.""" data = Data() data.generate_mute_data(1000, 2) settings = { 'cmi_estimator': 'OpenCLKraskovCMI', 'alpha_mi': 0.05, 'tail_mi': 'one_bigger', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_mi': 21, 'max_lag': 5, 'tau': 1 } processes = [2, 3] network_analysis = ActiveInformationStorage() res_opencl = network_analysis.analyse_network(settings, data, processes) settings['cmi_estimator'] = 'JidtKraskovCMI' res_jidt = network_analysis.analyse_network(settings, data, processes) # Note that I require equality up to three digits. Results become more # exact for bigger data sizes, but this takes too long for a unit test. ais_opencl_2 = res_opencl._single_process[2].ais ais_jidt_2 = res_jidt._single_process[2].ais ais_opencl_3 = res_opencl._single_process[3].ais ais_jidt_3 = res_jidt._single_process[3].ais print('AIS for MUTE data proc 2 - opencl: {0} and jidt: {1}'.format( ais_opencl_2, ais_jidt_2)) print('AIS for MUTE data proc 3 - opencl: {0} and jidt: {1}'.format( ais_opencl_3, ais_jidt_3)) if not (ais_opencl_2 is np.nan or ais_jidt_2 is np.nan): assert (ais_opencl_2 - ais_jidt_2) < 0.05, ( 'AIS results differ between OpenCl and JIDT estimator.') else: assert ais_opencl_2 is ais_jidt_2, ( 'AIS results differ between OpenCl and JIDT estimator.') if not (ais_opencl_3 is np.nan or ais_jidt_3 is np.nan): assert (ais_opencl_3 - ais_jidt_3) < 0.05, ( 'AIS results differ between OpenCl and JIDT estimator.') else: assert ais_opencl_3 is ais_jidt_3, ( 'AIS results differ between OpenCl and JIDT estimator.')
def test_max_statistic_sequential(): dat = Data() dat.generate_mute_data(104, 10) opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, } setup = Multivariate_te(max_lag_sources=5, min_lag_sources=1, max_lag_target=5, options=opts) setup.current_value = (0, 4) setup.selected_vars_sources = [(1, 1), (1, 2)] setup.selected_vars_full = [(0, 1), (1, 1), (1, 2)] setup._selected_vars_realisations = np.random.rand(dat.n_realisations(setup.current_value), len(setup.selected_vars_full)) setup._current_value_realisations = np.random.rand(dat.n_realisations(setup.current_value), 1) [sign, p, te] = stats.max_statistic_sequential(analysis_setup=setup, data=dat, opts=opts)
def test_active_information_storage_opencl(): """Test AIS estimation in MuTE example network.""" data = Data() data.generate_mute_data(1000, 5) settings = { 'cmi_estimator': 'OpenCLKraskovCMI', 'max_lag': 5, 'tau': 1, 'n_perm_mi': 22, 'alpha_mi': 0.05, 'tail_mi': 'one', } processes = [1, 2, 3] network_analysis = ActiveInformationStorage() results = network_analysis.analyse_network(settings, data, processes) print('AIS for MUTE data proc 1: {0}'.format( results.get_single_process(1, fdr=False)['ais'])) print('AIS for MUTE data proc 2: {0}'.format( results.get_single_process(2, fdr=False)['ais'])) print('AIS for MUTE data proc 3: {0}'.format( results.get_single_process(3, fdr=False)['ais']))
def test_multivariate_te_multiple_runs(): """Test TE estimation using multiple runs on the GPU. Test if data is correctly split over multiple runs, if the problem size exceeds the GPU global memory and thus requires multiple runs. Using a number of permutations of 7000 requires two runs on a GPU with global memory of about 6 GB. """ dat = Data() dat.generate_mute_data(n_samples=1000, n_replications=10) analysis_opts = { 'cmi_calc_name': 'opencl_kraskov', 'n_perm_max_stat': 7000, 'n_perm_min_stat': 7000, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } network_analysis = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=3, options=analysis_opts) res = network_analysis.analyse_network(dat, targets=[1, 2])