def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ data = Data() data.generate_mute_data(n_samples=1000, n_replications=10) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 3, 'min_lag_sources': 1, 'max_lag_target': 3, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21} # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats network_analysis = MultivariateTE() network_analysis.analyse_network(settings, data, targets=[1, 2])
def test_check_source_set(): """Test the method _check_source_set. This method sets the list of source processes from which candidates are taken for multivariate TE estimation. """ dat = Data() dat.generate_mute_data(100, 5) max_lag_sources = 7 min_lag_sources = 5 max_lag_target = 5 analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) sources = [1, 2, 3] nw_0._check_source_set(sources, dat.n_processes) # Assert that initialisation fails if the target is also in the source list sources = [0, 1, 2, 3] nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0.target = 0 with pytest.raises(RuntimeError): nw_0._check_source_set(sources, dat.n_processes) sources = 1 nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0._check_source_set(sources, dat.n_processes) assert (type(nw_0.source_set) is list)
def test_add_conditional_manually(): """Enforce the conditioning on additional variables.""" settings = {'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 3, 'max_lag_target': 7} nw = BivariateTE() data = Data() data.generate_mute_data() # Add a conditional with a lag bigger than the max_lag requested above settings['add_conditionals'] = (8, 0) with pytest.raises(IndexError): nw.analyse_single_target(settings=settings, data=data, target=0) # Add valid conditionals and test if they were added settings['add_conditionals'] = [(0, 1), (1, 3)] nw._initialise(settings=settings, data=data, target=0, sources=[1, 2]) # Get list of conditionals after intialisation and convert absolute samples # back to lags for comparison. cond_list = nw._idx_to_lag(nw.selected_vars_full) assert settings['add_conditionals'][0] in cond_list, ( 'First enforced conditional is missing from results.') assert settings['add_conditionals'][1] in cond_list, ( 'Second enforced conditional is missing from results.')
def test_compare_jidt_open_cl_estimator(): """Compare results from OpenCl and JIDT estimators for AIS calculation.""" dat = Data() dat.generate_mute_data(100, 2) max_lag = 5 analysis_opts = { 'cmi_calc_name': 'opencl_kraskov', 'n_perm_mi': 22, 'alpha_mi': 0.05, 'tail_mi': 'one', } processes = [2, 3] network_analysis = Single_process_storage(max_lag, analysis_opts, tau=1) res_opencl = network_analysis.analyse_network(dat, processes) analysis_opts['cmi_calc_name'] = 'jidt_kraskov' network_analysis = Single_process_storage(max_lag, analysis_opts, tau=1) res_jidt = network_analysis.analyse_network(dat, processes) # Note that I require equality up to three digits. Results become more exact for bigger # data sizes, but this takes too long for a unit test. np.testing.assert_approx_equal(res_opencl[2]['ais'], res_jidt[2]['ais'], significant=3, err_msg='AIS results differ between OpenCl and JIDT estimator.') np.testing.assert_approx_equal(res_opencl[3]['ais'], res_jidt[3]['ais'], significant=3, err_msg='AIS results differ between OpenCl and JIDT estimator.') print('AIS for MUTE data proc 2 - opencl: {0} and jidt: {1}'.format(res_opencl[2]['ais'], res_jidt[2]['ais'])) print('AIS for MUTE data proc 3 - opencl: {0} and jidt: {1}'.format(res_opencl[3]['ais'], res_jidt[3]['ais']))
def test_bivariate_te_one_realisation_per_replication(): """Test boundary case of one realisation per replication.""" # Create a data set where one pattern fits into the time series exactly # once, this way, we get one realisation per replication for each variable. # This is easyer to assert/verify later. We also test data.get_realisations # this way. settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'max_lag_target': 5, 'max_lag_sources': 5, 'min_lag_sources': 4} target = 0 data = Data(normalise=False) n_repl = 10 n_procs = 2 n_points = n_procs * (settings['max_lag_sources'] + 1) * n_repl data.set_data(np.arange(n_points).reshape( n_procs, settings['max_lag_sources'] + 1, n_repl), 'psr') nw = BivariateTE() nw._initialise(settings, data, 'all', target) assert (not nw.selected_vars_full) assert (not nw.selected_vars_sources) assert (not nw.selected_vars_target) assert ((nw._replication_index == np.arange(n_repl)).all()) assert (nw._current_value == (target, max( settings['max_lag_sources'], settings['max_lag_target']))) assert (nw._current_value_realisations[:, 0] == data.data[target, -1, :]).all()
def test_calculate_mean(): """Test if mean over CMI estimates is calculated correctly.""" data = Data() data.generate_mute_data(100, 5) res_0 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_results_0.p')) comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two', 'n_perm_comp': 6, 'alpha_comp': 0.2, 'stats_type': 'dependent' } comp = NetworkComparison() comp._initialise(comp_settings) comp._create_union(res_0) cmi = comp._calculate_cmi_all_links(data) cmi_mean = comp._calculate_mean([cmi, cmi]) for t in comp.union.targets_analysed: assert (cmi_mean[t] == cmi[t]).all(), ('Error in mean of CMI for ' 'target {0}'.format(t))
def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ dat = Data() dat.generate_mute_data(n_samples=1000, n_replications=10) analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } network_analysis = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=3, options=analysis_opts) res = network_analysis.analyse_network(dat, targets=[1, 2])
def test_max_statistic_sequential(): data = Data() data.generate_mute_data(104, 10) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, 'max_lag_sources': 5, 'min_lag_sources': 1, 'max_lag_target': 5 } setup = MultivariateTE() setup._initialise(settings, data, sources=[0, 1], target=2) setup.current_value = (0, 4) setup.selected_vars_sources = [(1, 1), (1, 2)] setup.selected_vars_full = [(0, 1), (1, 1), (1, 2)] setup._selected_vars_realisations = np.random.rand( data.n_realisations(setup.current_value), len(setup.selected_vars_full)) setup._current_value_realisations = np.random.rand( data.n_realisations(setup.current_value), 1) [sign, p, te] = stats.max_statistic_sequential(analysis_setup=setup, data=data)
def test_visualise_multivariate_te(): """Visualise output of multivariate TE estimation.""" data = Data() data.generate_mute_data(100, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 4, 'n_perm_max_stat': 25, 'n_perm_min_stat': 25, 'n_perm_omnibus': 50, 'n_perm_max_seq': 50, } network_analysis = MultivariateTE() results = network_analysis.analyse_network(settings, data, targets=[0, 1, 2]) # generate graph plots visualise_graph.plot_selected_vars(results, target=1, sign_sources=False) plt.show() visualise_graph.plot_network(results, fdr=False) plt.show() visualise_graph.plot_network(results, fdr=True) plt.show() visualise_graph.plot_selected_vars(results, target=1, sign_sources=True) plt.show()
def test_multivariate_te_init(): analysis_opts = {'cmi_calc_name': 'jidt_kraskov'} max_lag_target = 5 max_lag_sources = 7 min_lag_sources = 4 target = 0 sources = [2, 3, 4] dat = Data() dat.generate_mute_data(100, 5) nw_0 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_0.analyse_single_target(dat, target, sources) # This should just run: Test what happens if the target max lag is bigger # than the source max lag max_lag_sources = 5 max_lag_target = 7 nw_1 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) nw_1.analyse_single_target(dat, target, sources) # The following should crash: min lag bigger than max lag max_lag_sources = 5 min_lag_sources = 7 nw_2 = Multivariate_te(max_lag_sources, min_lag_sources, max_lag_target, analysis_opts) with pytest.raises(AssertionError): nw_2.analyse_single_target(dat, target, sources)
def test_add_conditional_manually(): """Enforce the conditioning on additional variables.""" settings = {'cmi_estimator': 'JidtKraskovCMI', 'max_lag': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_mi': 21} data = Data() data.generate_mute_data(10, 3) ais = ActiveInformationStorage() # Add a conditional with a lag bigger than the max_lag requested above settings['add_conditionals'] = (8, 0) with pytest.raises(IndexError): ais.analyse_single_process(settings=settings, data=data, process=0) # Add valid conditionals and test if they were added settings['add_conditionals'] = [(0, 1), (1, 3)] ais._initialise(settings, data, 0) # Get list of conditionals after intialisation and convert absolute samples # back to lags for comparison. cond_list = ais._idx_to_lag(ais.selected_vars_full) assert settings['add_conditionals'][0] in cond_list, ( 'First enforced conditional is missing from results.') assert settings['add_conditionals'][1] in cond_list, ( 'Second enforced conditional is missing from results.')
def test_ActiveInformationStorage_init(): """Test instance creation for ActiveInformationStorage class.""" # Test error on missing estimator settings = {'max_lag': 5} data = Data() data.generate_mute_data(10, 3) ais = ActiveInformationStorage() with pytest.raises(RuntimeError): ais.analyse_single_process(settings, data, process=0) # Test tau larger than maximum lag settings['cmi_estimator'] = 'JidtKraskovCMI' settings['tau'] = 10 with pytest.raises(RuntimeError): ais.analyse_single_process(settings, data, process=0) # Test negative tau and maximum lag settings['tau'] = -10 with pytest.raises(RuntimeError): ais.analyse_single_process(settings, data, process=0) settings['tau'] = 1 settings['max_lag'] = -5 with pytest.raises(RuntimeError): ais.analyse_single_process(settings, data, process=0) # Invalid: process is not an int settings['max_lag'] = 5 with pytest.raises(RuntimeError): # no int ais.analyse_single_process(settings, data, process=1.5) with pytest.raises(RuntimeError): # negative ais.analyse_single_process(settings, data, process=-1) with pytest.raises(RuntimeError): # not in data ais.analyse_single_process(settings, data, process=10) with pytest.raises(RuntimeError): # wrong type ais.analyse_single_process(settings, data, process={})
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 settings = { 'cmi_estimator': 'JidtKraskovCMI', 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_mi': 21, 'max_lag': max_lag, 'tau': 1} data = Data() data.generate_mute_data(100, 3) ais = ActiveInformationStorage() processes = [1, 2] results = ais.analyse_network(settings, data, processes) for p in processes: lais = results.get_single_process(p, fdr=False)['ais'] if lais is np.nan: continue assert type(lais) is np.ndarray, ( 'LAIS estimation did not return an array of values: {0}'.format( lais)) assert lais.shape[0] == data.n_replications, ( 'Wrong dim (no. replications) in LAIS estimate: {0}'.format( lais.shape)) assert lais.shape[1] == data.n_realisations_samples((0, max_lag)), ( 'Wrong dim (no. samples) in LAIS estimate: {0}'.format(lais.shape))
def ft2idtxlconverter(filename, FTstructname, fileversion): # TODO: This will need better error handling ! if fileversion == "v7.3": # try: print('Creating Python dictionary from FT data structure: ' + FTstructname) NPData = _ft_trial_2_numpyarray(filename, FTstructname) label = _ft_label_2_list(filename, FTstructname) NPfsample = _ft_fsample_2_float(filename, FTstructname) NPtime = _ft_time_2_numpyarray(filename, FTstructname) # convert data into IDTxl's Data class d = Data() # fieldtrip had "channel x timesamples" data, # but numpy sees the data as stored internally in the hdf5 file as: # "timesamples x channel" # we collected the replications # in the tirhd diemsnion --> dimension are: # s(amples) x p(rocesses) x r(eplications) = 'spr' d.set_data(NPData, 'spr') TXLdata = {"dataset" : d , "label" : label, "time" : NPtime, "fsample" : NPfsample} # except(OSError, RuntimeError): # print('incorrect file version, the given file was not a MATLAB' # ' m-file version 7.3') # return else: print('At present only m-files in format 7.3 are aupported,' 'please consider reopening and resaving your m-file in that' 'version') return TXLdata
def test_ais_fdr(): settings = {'n_perm_max_seq': 1000, 'n_perm_mi': 1000} process_0 = { 'selected_vars': [(0, 1), (0, 2), (0, 3)], 'ais_pval': 0.0001, 'ais_sign': True} process_1 = { 'selected_vars': [(1, 0), (1, 1), (1, 2)], 'ais_pval': 0.031, 'ais_sign': True} process_2 = { 'selected_vars': [], 'ais_pval': 0.41, 'ais_sign': False} res_1 = ResultsSingleProcessAnalysis( n_nodes=3, n_realisations=1000, normalised=True) res_1._add_single_result(process=0, settings=settings, results=process_0) res_1._add_single_result(process=1, settings=settings, results=process_1) res_2 = ResultsSingleProcessAnalysis( n_nodes=3, n_realisations=1000, normalised=True) res_2._add_single_result(process=2, settings=settings, results=process_2) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'alpha_fdr': 0.05, 'max_lag': 3} data = Data() data.generate_mute_data(n_samples=100, n_replications=3) analysis_setup = ActiveInformationStorage() analysis_setup._initialise(settings=settings, data=data, process=1) res_pruned = stats.ais_fdr(settings, res_1, res_2) assert (not res_pruned._single_process[2].selected_vars_sources), ( 'Process 2 has not been pruned from results.') alpha_fdr = res_pruned.settings.alpha_fdr for k in res_pruned.processes_analysed: if not res_pruned._single_process[k]['ais_sign']: assert (res_pruned._single_process[k]['ais_pval'] > alpha_fdr), ( 'P-value of non-sign. AIS is not 1.') assert (not res_pruned._single_process[k]['selected_vars']), ( 'List of significant past variables is not empty') else: assert (res_pruned._single_process[k]['ais_pval'] < 1), ( 'P-value of sign. AIS is not smaller 1.') assert (res_pruned._single_process[k]['selected_vars']), ( 'List of significant past variables is empty') # Test function call for single result res_pruned = stats.ais_fdr(settings, res_1) print('successful call on single result dict.') # Test None result for insufficient no. permutations, no FDR-corrected # results (the results class throws an error if no FDR-corrected results # exist). res_1.settings['n_perm_mi'] = 2 res_2.settings['n_perm_mi'] = 2 res_pruned = stats.ais_fdr(settings, res_1, res_2) with pytest.raises(RuntimeError): res_pruned.get_significant_processes(fdr=True)
def ft2idtxlconverter(filename, FTstructname, fileversion): """Convert FieldTrip-style MATLAB-file into an IDTxl Data object. Import a MATLAB structure with fields "trial" (data), "label" (channel labels), "time" (time stamps for data samples), and "fsample" (sampling rate). This structure is the standard file format in the MATLAB toolbox FieldTrip and commonly use to represent neurophysiological data (see also http://www.fieldtriptoolbox.org/). The functions reads a mat-file from disc and returns a dictionary containing the information in the mat-file. Data is represented as an IDTxl Data object. Args: filename : string full (matlab) filename on disk FTstructname : string variable name of the MATLAB structure that is in FieldTrip format (autodetect will hopefully be possible later ...) fileversion : string version of the file, e.g. "v7.3" for MATLAB's 7.3 format Returns: dict "dataset": instance of IDTxl Data object; "label": list of channel labels; "time": numpy array of time stamps; "fsample": sampling rate """ # TODO: This will need better error handling ! if fileversion == "v7.3": print('Creating Python dictionary from FT data structure: ' + FTstructname) NPData = _ft_trial_2_numpyarray(filename, FTstructname) label = _ft_label_2_list(filename, FTstructname) NPfsample = _ft_fsample_2_float(filename, FTstructname) NPtime = _ft_time_2_numpyarray(filename, FTstructname) # convert data into IDTxl's Data class d = Data() # fieldtrip had "channel x timesamples" data, # but numpy sees the data as stored internally in the hdf5 file as: # "timesamples x channel" # we collected the replications # in the tirhd diemsnion --> dimension are: # s(amples) x p(rocesses) x r(eplications) = 'spr' d.set_data(NPData, 'spr') TXLdata = {"dataset": d, "label": label, "time": NPtime, "fsample": NPfsample} # except(OSError, RuntimeError): # print('incorrect file version, the given file was not a MATLAB' # ' m-file version 7.3') # return else: print('At present only m-files in format 7.3 are aupported,' 'please consider reopening and resaving your m-file in that' 'version') # TODO we could write a fallback option using numpy's loadmat? return TXLdata
def test_circular_shift(): """Test circular shifting of samples.""" d = Data() d.generate_mute_data() n = 20 max_shift = 10 [perm, shift] = d._circular_shift(n, max_shift) assert perm[0] == (n - shift), 'First index after circular shift is wrong!' assert shift <= max_shift, 'Actual shift exceeded max_shift.' assert perm.shape[0] == n, 'Incorrect length of permuted indices.'
def test_analyse_network(): """Test method for full network analysis.""" n_processes = 5 # the MuTE network has 5 nodes data = Data() data.generate_mute_data(10, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 5, 'min_lag_sources': 4, 'max_lag_target': 5} nw_0 = MultivariateTE() # Test all to all analysis results = nw_0.analyse_network( settings, data, targets='all', sources='all') targets_analysed = results.targets_analysed sources = np.arange(n_processes) assert all(np.array(targets_analysed) == np.arange(n_processes)), ( 'Network analysis did not run on all targets.') for t in results.targets_analysed: s = np.array(list(set(sources) - set([t]))) assert all(np.array(results._single_target[t].sources_tested) == s), ( 'Network analysis did not run on all sources for target ' '{0}'. format(t)) # Test analysis for subset of targets target_list = [1, 2, 3] results = nw_0.analyse_network( settings, data, targets=target_list, sources='all') targets_analysed = results.targets_analysed assert all(np.array(targets_analysed) == np.array(target_list)), ( 'Network analysis did not run on correct subset of targets.') for t in results.targets_analysed: s = np.array(list(set(sources) - set([t]))) assert all(np.array(results._single_target[t].sources_tested) == s), ( 'Network analysis did not run on all sources for target ' '{0}'. format(t)) # Test analysis for subset of sources source_list = [1, 2, 3] target_list = [0, 4] results = nw_0.analyse_network(settings, data, targets=target_list, sources=source_list) targets_analysed = results.targets_analysed assert all(np.array(targets_analysed) == np.array(target_list)), ( 'Network analysis did not run for all targets.') for t in results.targets_analysed: assert all(results._single_target[t].sources_tested == np.array(source_list)), ( 'Network analysis did not run on the correct subset ' 'of sources for target {0}'.format(t))
def test_compare_links_within(): """Test comparison of two links within a single network.""" data = Data() data.generate_mute_data(100, 5) path = os.path.join(os.path.dirname(__file__), 'data/') res = pickle.load(open(path + 'mute_results_1.p', 'rb')) # comparison settings comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'alpha_comp': 0.26, 'n_perm_comp': 4, 'tail': 'two' } link_a = [0, 1] link_b = [0, 2] comp = NetworkComparison() comp_settings['stats_type'] = 'independent' res_indep = comp.compare_links_within(settings=comp_settings, link_a=link_a, link_b=link_b, network=res, data=data) comp_settings['stats_type'] = 'dependent' res_dep = comp.compare_links_within(settings=comp_settings, link_a=[0, 1], link_b=[0, 2], network=res, data=data) for r in [res_indep, res_dep]: assert (r.get_adjacency_matrix('diff_abs')[link_a[0], link_a[1]] == r.get_adjacency_matrix('diff_abs')[link_b[0], link_b[1]]), ( 'Absolute differences for link comparison not equal.') assert (r.get_adjacency_matrix('comparison')[link_a[0], link_a[1]] == r.get_adjacency_matrix('comparison')[link_b[0], link_b[1]]), ( 'Comparison results for link comparison not equal.') assert (r.get_adjacency_matrix('pvalue')[link_a[0], link_a[1]] == r.get_adjacency_matrix('pvalue')[link_b[0], link_b[1]]), ( 'P-value for link comparison not equal.') assert (r.targets_analysed == [link_a[1], link_b[1]]).all(), ( 'Analysed targets are not correct.') with pytest.raises(RuntimeError): comp.compare_links_within(settings=comp_settings, link_a=link_a, link_b=[3, 4], network=res, data=data)
def test_data_properties(): n = 10 d = Data(np.arange(n), 's', normalise=False) real_time = d.n_realisations_samples() assert (real_time == n), 'Realisations in time are not returned correctly.' cv = (0, 8) real_time = d.n_realisations_samples(current_value=cv) assert (real_time == (n - cv[1])), ('Realisations in time are not ' 'returned correctly when current value' ' is set.')
def test_console_output(): data = Data() data.generate_mute_data(n_samples=10, n_replications=5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 4, 'max_lag_target': 5 } nw = MultivariateTE() r = nw.analyse_network(settings, data, targets='all', sources='all') r.print_edge_list(fdr=False, weights='binary')
def test_p_value_union(): """Test if the p-value is calculated correctly.""" data = Data() data.generate_mute_data(100, 5) path = os.path.join(os.path.dirname(__file__), 'data/') res_0 = pickle.load(open(path + 'mute_results_0.p', 'rb')) res_1 = pickle.load(open(path + 'mute_results_1.p', 'rb')) comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'n_perm_comp': 6, 'alpha_comp': 0.2, 'tail_comp': 'one_bigger', 'stats_type': 'independent' } comp = NetworkComparison() comp.compare_within(comp_settings, res_0, res_1, data, data) # Replace the surrogate CMI by all zeros for source 0 and all ones for # source 1. Set the CMI difference to 0.5 for both sources. Check if this # results in one significant and one non-significant result with the # correct p-values. comp._initialise(comp_settings) comp._create_union(res_0, res_1) comp._calculate_cmi_diff_within(data, data) comp._create_surrogate_distribution_within(data, data) target = 1 source = 0 comp.cmi_surr[target] = np.zeros((1, comp_settings['n_perm_comp'])) comp.cmi_diff[target] = np.array([0.5]) comp._p_value_union() p = comp.pvalue s = comp.significance assert s[target][source], ( 'The significance was not determined correctly: {0}'.format(s[target])) assert p[target][source] == 1 / comp_settings['n_perm_comp'], ( 'The p-value was not calculated correctly: {0}'.format(p[target])) comp.cmi_surr[target] = np.ones((1, comp_settings['n_perm_comp'])) comp.cmi_diff[target] = np.array([0.5]) comp._p_value_union() p = comp.pvalue s = comp.significance assert not s[target][source], ( 'The significance was not determined correctly: {0}'.format(s[target])) assert p[target][source] == 1.0, ( 'The p-value was not calculated correctly: {0}'.format(p[target]))
def test_data_type(): """Test if data class always returns the correct data type.""" # Change data type for the same object instance. d_int = np.random.randint(0, 10, size=(3, 50)) orig_type = type(d_int[0][0]) data = Data(d_int, dim_order='ps', normalise=False) # The concrete type depends on the platform: # https://mail.scipy.org/pipermail/numpy-discussion/2011-November/059261.html # Hence, compare against the type automatically assigned by Python or # against np.integer assert data.data_type is orig_type, 'Data type did not change.' assert issubclass(type(data.data[0, 0, 0]), np.integer), ( 'Data type is not an int.') d_float = np.random.randn(3, 50) data.set_data(d_float, dim_order='ps') assert data.data_type is np.float64, 'Data type did not change.' assert issubclass(type(data.data[0, 0, 0]), np.float), ( 'Data type is not a float.') # Check if data returned by the object have the correct type. d_int = np.random.randint(0, 10, size=(3, 50, 5)) data = Data(d_int, dim_order='psr', normalise=False) real = data.get_realisations((0, 5), [(1, 1), (1, 3)])[0] assert issubclass(type(real[0, 0]), np.integer), ( 'Realisations type is not an int.') sl = data._get_data_slice(0)[0] assert issubclass(type(sl[0, 0]), np.integer), ( 'Data slice type is not an int.') settings = {'perm_type': 'random'} sl_perm = data.slice_permute_samples(0, settings)[0] assert issubclass(type(sl_perm[0, 0]), np.integer), ( 'Permuted data slice type is not an int.') samples = data.permute_samples((0, 5), [(1, 1), (1, 3)], settings)[0] assert issubclass(type(samples[0, 0]), np.integer), ( 'Permuted samples type is not an int.')
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 data = Data() data.generate_mute_data(500, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': 4, 'max_lag_target': max_lag} target = 1 te = MultivariateTE() results = te.analyse_network(settings, data, targets=[target]) # Test if any sources were inferred. If not, return (this may happen # sometimes due to too few samples, however, a higher no. samples is not # feasible for a unit test). if results.get_single_target(target, fdr=False)['te'] is None: return lte = results.get_single_target(target, fdr=False)['te'] n_sources = len(results.get_target_sources(target, fdr=False)) assert type(lte) is np.ndarray, ( 'LTE estimation did not return an array of values: {0}'.format(lte)) assert lte.shape[0] == n_sources, ( 'Wrong dim (no. sources) in LTE estimate: {0}'.format(lte.shape)) assert lte.shape[1] == data.n_realisations_samples((0, max_lag)), ( 'Wrong dim (no. samples) in LTE estimate: {0}'.format(lte.shape)) assert lte.shape[2] == data.n_replications, ( 'Wrong dim (no. replications) in LTE estimate: {0}'.format(lte.shape)) # Test for correctnes of single link TE estimation by comparing it to the # omnibus TE. In this case (single source), the two should be the same. settings['local_values'] = False results_avg = te.analyse_network(settings, data, targets=[target]) if results_avg.get_single_target(target, fdr=False)['te'] is None: return te_single_link = results_avg.get_single_target(target, fdr=False)['te'][0] te_omnibus = results_avg.get_single_target(target, fdr=False)['omnibus_te'] assert np.isclose(te_single_link, te_omnibus), ( 'Single link TE is not equal to omnibus information transfer.') # Compare mean local TE to average TE. assert np.isclose(te_single_link, np.mean(lte)), ( 'Single link average TE and mean LTE deviate.')
def test_faes_method(): """Check if the Faes method is working.""" settings = {'cmi_estimator': 'JidtKraskovCMI', 'add_conditionals': 'faes', 'max_lag_sources': 5, 'min_lag_sources': 3} nw_1 = MultivariateMI() data = Data() data.generate_mute_data() sources = [1, 2, 3] target = 0 nw_1._initialise(settings, data, sources, target) assert (nw_1._selected_vars_sources == [i for i in it.product(sources, [nw_1.current_value[1]])]), ( 'Did not add correct additional conditioning vars.')
def test_zero_lag(): """Test analysis for 0 lag.""" expected_mi, source, source_uncorr, target = _get_gauss_data(seed=SEED) data = Data(np.hstack((source, target)), dim_order='sp', normalise=False) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'tau_sources': 0, # this is not required, but shouldn't throw an error if provided 'max_lag_sources': 0, 'min_lag_sources': 0 } nw = MultivariateMI() results = nw.analyse_single_target(settings, data, target=1, sources='all') mi_estimator = JidtKraskovMI(settings={'normalise': False}) jidt_mi = mi_estimator.estimate(source, target) omnibus_mi = results.get_single_target(1, fdr=False).omnibus_mi print('Estimated omnibus MI: {0:0.6f}, estimated MI using JIDT core ' 'estimator: {1:0.6f} (expected: {2:0.6f}).'.format( omnibus_mi, jidt_mi, expected_mi)) assert np.isclose(omnibus_mi, jidt_mi, atol=0.005), ( 'Zero-lag omnibus MI ({0:0.6f}) differs from JIDT estimate ' '({1:0.6f}).'.format(omnibus_mi, jidt_mi)) assert np.isclose( omnibus_mi, expected_mi, atol=0.05), ('Zero-lag omnibus MI ({0:0.6f}) differs from expected MI ' '({1:0.6f}).'.format(omnibus_mi, expected_mi))
def test_discrete_input(): """Test AIS estimation from discrete data.""" # Generate AR data order = 1 n = 10000 - order self_coupling = 0.5 process = np.zeros(n + order) process[0:order] = np.random.normal(size=(order)) for n in range(order, n + order): process[n] = self_coupling * process[n - 1] + np.random.normal() # Discretise data settings = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings) process_dis, temp = est._discretise_vars(var1=process, var2=process) data = Data(process_dis, dim_order='s', normalise=False) settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'none', 'n_discrete_bins': 5, # alphabet size of the variables 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_mi': 21, 'max_lag': 2 } nw = ActiveInformationStorage() nw.analyse_single_process(settings=settings, data=data, process=0)
def test_add_conditional_manually(): """Adda variable that is not in the data set.""" analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'add_conditionals': (8, 0) } nw_1 = Multivariate_te(max_lag_sources=5, min_lag_sources=3, options=analysis_opts, max_lag_target=7) dat = Data() dat.generate_mute_data() sources = [1, 2, 3] target = 0 with pytest.raises(IndexError): nw_1._initialise(dat, sources, target)
def test_analytical_surrogates(): # Test generation of analytical surrogates. # Generate data and discretise it such that we can use analytical # surrogates. expected_mi, source1, source2, target = _get_gauss_data(covariance=0.4) settings = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings) source_dis, target_dis = est._discretise_vars(var1=source1, var2=target) data = Data(np.hstack((source_dis, target_dis)), dim_order='sp', normalise=False) settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'n_discrete_bins': 5, # alphabet size of the variables analysed 'n_perm_max_stat': 100, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, 'max_lag_sources': 5, 'min_lag_sources': 1, 'max_lag_target': 5 } nw = MultivariateTE() res = nw.analyse_single_target(settings, data, target=1) # Check if generation of analytical surrogates is documented in the # settings. assert res.settings.analytical_surrogates, ( 'Surrogates were not created analytically.')
def test_data_normalisation(): """Test if data are normalised correctly when stored in a Data instance.""" a_1 = 100 a_2 = 1000 source = np.random.randint(a_1, size=1000) target = np.random.randint(a_2, size=1000) data = Data(normalise=True) data.set_data(np.vstack((source.T, target.T)), 'ps') source_std = utils.standardise(source) target_std = utils.standardise(target) assert (source_std == data.data[0, :, 0]).all(), ('Standardising the ' 'source did not work.') assert (target_std == data.data[1, :, 0]).all(), ('Standardising the ' 'target did not work.')
def test_visualise_multivariate_te(): """Visualise output of multivariate TE estimation.""" dat = Data() dat.generate_mute_data(100, 5) max_lag = 5 min_lag = 4 analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 25, 'n_perm_min_stat': 25, 'n_perm_omnibus': 50, 'n_perm_max_seq': 50, } network_analysis = Multivariate_te(max_lag, min_lag, analysis_opts) res = network_analysis.analyse_network(dat, targets=[0, 1, 2]) vis.plot_network(res)
def test_faes_method(): """Check if the Faes method is working.""" settings = {'cmi_estimator': 'JidtKraskovCMI', 'add_conditionals': 'faes', 'max_lag_sources': 5, 'min_lag_sources': 3, 'max_lag_target': 7} nw_1 = MultivariateTE() data = Data(seed=SEED) data.generate_mute_data() sources = [1, 2, 3] target = 0 nw_1._initialise(settings, data, sources, target) assert (nw_1._selected_vars_sources == [i for i in it.product(sources, [nw_1.current_value[1]])]), ( 'Did not add correct additional conditioning vars.')
def test_visualise_multivariate_te(): """Visualise output of multivariate TE estimation.""" dat = Data() dat.generate_mute_data(100, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 4, 'n_perm_max_stat': 25, 'n_perm_min_stat': 25, 'n_perm_omnibus': 50, 'n_perm_max_seq': 50, } network_analysis = MultivariateTE() res = network_analysis.analyse_network(settings, dat, targets=[0, 1, 2]) vis.plot_network(res)
def test_analyse_single_target(): """Test call to network_analysis method.""" n = 1000 alph = 2 x = np.random.randint(0, alph, n) y = np.random.randint(0, alph, n) z = np.logical_xor(x, y).astype(int) data = Data(np.vstack((x, y, z)), 'ps', normalise=False) # Run Goettingen estimator pid = MultivariatePID() settings = {'pid_estimator': 'SxPID', 'tail': 'two', 'lags_pid': [0, 0], 'verbose': False} est_goettingen = pid.analyse_single_target(settings=settings, data=data, target=2, sources=[0, 1]) assert 0.39 < est_goettingen._single_target[2]['avg'][((1,2,),)][2] <= 0.42, ( 'Goettingen estimator incorrect synergy: {0}, should approx. 0.415...'.format( est_goettingen._single_target[2]['avg'][((1,2,),)][2])) assert 0.56 < est_goettingen._single_target[2]['avg'][((1,),)][2] <= 0.6, ( 'Goettingen estimator incorrect unique s1: {0}, should approx. 0.5896...'.format( est_goettingen._single_target[2]['avg'][((1,),)][2])) assert 0.56 < est_goettingen._single_target[2]['avg'][((2,),)][2] <= 0.6, ( 'Goettingen estimator incorrect unique s2: {0}, should approx. 0.5896...'.format( est_goettingen._single_target[2]['avg'][((2,),)][2]))
def test_zero_lag(): """Test analysis for 0 lag.""" covariance = 0.4 n = 10000 source = np.random.normal(0, 1, size=n) target = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) # expected_corr = covariance / (np.sqrt(covariance**2 + (1-covariance)**2)) corr = np.corrcoef(source, target)[0, 1] expected_mi = -0.5 * np.log(1 - corr**2) data = Data(np.vstack((source, target)), dim_order='ps', normalise=False) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 0, 'min_lag_sources': 0 } nw = BivariateMI() results = nw.analyse_single_target(settings, data, target=1, sources='all') mi_estimator = JidtKraskovMI(settings={}) jidt_mi = mi_estimator.estimate(source, target) omnibus_mi = results.get_single_target(1, fdr=False).omnibus_mi print('Estimated omnibus MI: {0:0.6f}, estimated MI using JIDT core ' 'estimator: {1:0.6f} (expected: {2:0.6f}).'.format( omnibus_mi, jidt_mi, expected_mi)) assert np.isclose(omnibus_mi, jidt_mi, rtol=0.05), ( 'Zero-lag omnibus MI ({0:0.6f}) differs from JIDT estimate ({1:0.6f}).' .format(omnibus_mi, jidt_mi)) assert np.isclose(omnibus_mi, expected_mi, rtol=0.05), ( 'Zero-lag omnibus MI ({0:0.6f}) differs from expected MI ({1:0.6f}).'. format(omnibus_mi, expected_mi))
def runTE(): global dataframe response = {} if request.method == 'POST': if isinstance(dataframe, pd.DataFrame): inputTE = json.loads(request.form['inputTE']) inputTE = {k: v for k, v in inputTE.items() if len(v) > 0} for key, value in inputTE.items(): try: inputTE[key] = int(value) except: pass numpy_format = dataframe.to_numpy() arr_format = numpy_format.reshape((3, len(numpy_format), 1)) # settings = {'cmi_estimator': 'JidtGaussianCMI', # 'max_lag_sources': 5, # 'min_lag_sources': 1} mod = MultiVariateTime(Data(arr_format), inputTE) a, b = mod.run() b.savefig("test.png") #return send_file("test.png", mimetype='image/PNG') response['results'] = 'pass' response['details'] = 'Successfully discretize' else: response['results'] = 'fail' response['details'] = 'No dataframe uploaded' return generateResponse(response)
def test_analyse_single_target(): """Test call to network_analysis method.""" n = 50 alph = 2 x = np.random.randint(0, alph, n) y = np.random.randint(0, alph, n) z = np.logical_xor(x, y).astype(int) dat = Data(np.vstack((x, y, z)), 'ps', normalise=False) # Run Tartu estimator analysis_opts = {'pid_calc_name': 'pid_tartu', 'tail': 'two'} pid = Partial_information_decomposition(options=analysis_opts) est_tartu = pid.analyse_single_target(data=dat, target=2, sources=[0, 1], lags=[0, 0]) assert 0.9 < est_tartu['syn_s1_s2'] <= 1.1, ( 'Tartu estimator incorrect synergy: {0}, should approx. 1'.format( est_tartu['syn_s1_s2'])) assert est_tartu['unq_s1'] < 0.1, ('Tartu estimator incorrect unique ' 's1: {0}, should approx. 0'.format( est_tartu['unq_s1'])) assert est_tartu['unq_s2'] < 0.1, ('Tartu estimator incorrect unique ' 's2: {0}, should approx. 0'.format( est_tartu['unq_s2']))
def test_add_conditional_manually(): """Adda variable that is not in the data set.""" settings = { 'cmi_estimator': 'JidtKraskovCMI', 'add_conditionals': (8, 0), 'max_lag_sources': 5, 'min_lag_sources': 3, 'max_lag_target': 7 } nw_1 = MultivariateTE() dat = Data() dat.generate_mute_data() sources = [1, 2, 3] target = 0 with pytest.raises(IndexError): nw_1._initialise(settings, dat, sources, target)
def test_calculate_cmi_all_links(): """Test if the CMI is estimated correctly.""" expected_mi, source, source_uncorr, target = _get_gauss_data() source = source[1:] source_uncorr = source_uncorr[1:] target = target[:-1] data = Data(np.hstack((source, target)), dim_order='sp', normalise=False) res_0 = pickle.load( open(os.path.join(os.path.dirname(__file__), 'data/mute_results_0.p'), 'rb')) comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two', 'n_perm_comp': 6, 'alpha_comp': 0.2, 'stats_type': 'dependent' } comp = NetworkComparison() comp._initialise(comp_settings) comp._create_union(res_0) # Set selected variable to the source, one sample in the past of the # current_value (1, 5). comp.union._single_target[1]['selected_vars_sources'] = [(0, 4)] cmi = comp._calculate_cmi_all_links(data) print('correlated Gaussians: TE result {0:.4f} bits; expected to be ' '{1:0.4f} bit for the copy'.format(cmi[1][0], expected_mi)) assert np.isclose(cmi[1][0], expected_mi, atol=0.05), ( 'Estimated TE {0:0.6f} differs from expected TE {1:0.6f}.'.format( cmi[1][0], expected_mi))
def test_multivariate_te_lorenz_2(): """Test multivariate TE estimation on bivariately couled Lorenz systems. Run the multivariate TE algorithm on two Lorenz systems with a coupling from first to second system with delay u = 45 samples. Both directions are analyzed, the algorithm should not find a coupling from system two to one. Note: This test takes several hours and may take one to two days on some machines. """ d = np.load(os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) dat = Data() dat.set_data(d, 'psr') settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'max_ent', 'max_lag_sources': 47, 'min_lag_sources': 42, 'max_lag_target': 20, 'tau_target': 2, 'n_perm_max_stat': 21, # 200 'n_perm_min_stat': 21, # 200 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } lorenz_analysis = MultivariateTE() # FOR DEBUGGING: add the whole history for k = 20, tau = 2 to the # estimation, this makes things faster, b/c these don't have to be # tested again. settings['add_conditionals'] = [(1, 44), (1, 42), (1, 40), (1, 38), (1, 36), (1, 34), (1, 32), (1, 30), (1, 28)] settings['max_lag_sources'] = 60 settings['min_lag_sources'] = 31 settings['tau_sources'] = 2 settings['max_lag_target'] = 1 # was 0 before, but this is no longer allowed by the estimator settings['tau_target'] = 1 # res = lorenz_analysis.analyse_network(settings, dat) # res_0 = lorenz_analysis.analyse_single_target(settings, dat, 0) # no coupling # print(res_0) res_1 = lorenz_analysis.analyse_single_target(settings, dat, 1) # coupling print(res_1)
def test_multivariate_te_lorenz_2(): """Test multivariate TE estimation on bivariately couled Lorenz systems. Run the multivariate TE algorithm on two Lorenz systems with a coupling from first to second system with delay u = 45 samples. Both directions are analyzed, the algorithm should not find a coupling from system two to one. Note: This test takes several hours and may take one to two days on some machines. """ # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load( os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) dat = Data() dat.set_data(d, 'psr') analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 21, # 200 'n_perm_min_stat': 21, # 200 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } lorenz_analysis = Multivariate_te(max_lag_sources=47, min_lag_sources=42, max_lag_target=20, tau_target=2, options=analysis_opts) # FOR DEBUGGING: add the whole history for k = 20, tau = 2 to the # estimation, this makes things faster, b/c these don't have to be # tested again. analysis_opts['add_conditionals'] = [(1, 44), (1, 42), (1, 40), (1, 38), (1, 36), (1, 34), (1, 32), (1, 30), (1, 28)] lorenz_analysis = Multivariate_te(max_lag_sources=60, min_lag_sources=31, tau_sources=2, max_lag_target=0, tau_target=1, options=analysis_opts) # res = lorenz_analysis.analyse_network(dat) # res_0 = lorenz_analysis.analyse_single_target(dat, 0) # no coupling # print(res_0) res_1 = lorenz_analysis.analyse_single_target(dat, 1) # coupling print(res_1)
def test_multivariate_te_lagged_copies(): """Test multivariate TE estimation on a lagged copy of random data. Run the multivariate TE algorithm on two sets of random data, where the second set is a lagged copy of the first. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ lag = 3 d_0 = np.random.rand(1, 1000, 20) d_1 = np.hstack((np.random.rand(1, lag, 20), d_0[:, lag:, :])) dat = Data() dat.set_data(np.vstack((d_0, d_1)), 'psr') analysis_opts = { 'cmi_calc_name': 'jidt_discrete', 'discretise_method': 'max_ent', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = Multivariate_te(max_lag_sources=5, options=analysis_opts) # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for target in range(2): res = random_analysis.analyse_single_target(dat, target) assert (len(res['conditional_full']) == 1), ('Conditional contains ' 'more/less than 1 ' 'variables.') assert (not res['conditional_sources']), ('Conditional sources is not ' 'empty.') assert (len(res['conditional_target']) == 1), ('Conditional target ' 'contains more/less ' 'than 1 variable.') assert (res['cond_sources_pval'] is None), ('Conditional p-value is ' 'not None.') assert (res['omnibus_pval'] is None), ('Omnibus p-value is not None.') assert (res['omnibus_sign'] is None), ('Omnibus significance is not ' 'None.') assert (res['conditional_sources_te'] is None), ('Conditional TE ' 'values is not None.')
def test_multivariate_te_lagged_copies(): """Test multivariate TE estimation on a lagged copy of random data. Run the multivariate TE algorithm on two sets of random data, where the second set is a lagged copy of the first. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ lag = 3 d_0 = np.random.rand(1, 1000, 20) d_1 = np.hstack((np.random.rand(1, lag, 20), d_0[:, lag:, :])) data = Data() data.set_data(np.vstack((d_0, d_1)), 'psr') settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'max_ent', 'max_lag_sources': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = MultivariateTE() # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for t in range(2): results = random_analysis.analyse_single_target(settings, data, t) assert len(results.get_single_target(t, fdr=False).selected_vars_full) == 1, ( 'Conditional contains more/less than 1 variables.') assert not results.get_single_target(t, fdr=False).selected_vars_sources, ( 'Conditional sources is not empty.') assert len(results.get_single_target(t, fdr=False).selected_vars_target) == 1, ( 'Conditional target contains more/less than 1 variable.') assert results.get_single_target(t, fdr=False).selected_sources_pval is None, ( 'Conditional p-value is not None.') assert results.get_single_target(t, fdr=False).omnibus_pval is None, ( 'Omnibus p-value is not None.') assert results.get_single_target(t, fdr=False).omnibus_sign is None, ( 'Omnibus significance is not None.') assert results.get_single_target(t, fdr=False).selected_sources_te is None, ( 'Conditional TE values is not None.')
def test_multivariate_te_lorenz_2(): """Test multivariate TE estimation on bivariately couled Lorenz systems. Run the multivariate TE algorithm on two Lorenz systems with a coupling from first to second system with delay u = 45 samples. Both directions are analyzed, the algorithm should not find a coupling from system two to one. Note: This test takes several hours and may take one to two days on some machines. """ # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load( os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) data = Data() data.set_data(d, 'psr') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 47, 'min_lag_sources': 42, 'max_lag_target': 20, 'tau_target': 2, 'n_perm_max_stat': 21, # 200 'n_perm_min_stat': 21, # 200 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } lorenz_analysis = MultivariateTE() # FOR DEBUGGING: add the whole history for k = 20, tau = 2 to the # estimation, this makes things faster, b/c these don't have to be # tested again. Note conditionals are specified using lags. settings['add_conditionals'] = [(1, 19), (1, 17), (1, 15), (1, 13), (1, 11), (1, 9), (1, 7), (1, 5), (1, 3), (1, 1)] settings['max_lag_sources'] = 60 settings['min_lag_sources'] = 31 settings['tau_sources'] = 2 settings['max_lag_target'] = 1 settings['tau_target'] = 1 # Just analyse the direction of coupling results = lorenz_analysis.analyse_single_target(settings, data, target=1) print(results._single_target) print(results.get_adjacency_matrix('binary'))
def test_multivariate_te_lagged_copies(): """Test multivariate TE estimation on a lagged copy of random data. Run the multivariate TE algorithm on two sets of random data, where the second set is a lagged copy of the first. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ lag = 3 d_0 = np.random.rand(1, 1000, 20) d_1 = np.hstack((np.random.rand(1, lag, 20), d_0[:, lag:, :])) data = Data() data.set_data(np.vstack((d_0, d_1)), 'psr') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = MultivariateTE() # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for t in range(2): results = random_analysis.analyse_single_target(settings, data, t) assert len(results.get_single_target(t, fdr=False).selected_vars_full) == 1, ( 'Conditional contains more/less than 1 variables.') assert not results.get_single_target(t, fdr=False).selected_vars_sources.size, ( 'Conditional sources is not empty.') assert len(results.get_single_target(t, fdr=False).selected_vars_target) == 1, ( 'Conditional target contains more/less than 1 variable.') assert results.get_single_target(t, fdr=False).selected_sources_pval is None, ( 'Conditional p-value is not None.') assert results.get_single_target(t, fdr=False).omnibus_pval is None, ( 'Omnibus p-value is not None.') assert results.get_single_target(t, fdr=False).omnibus_sign is None, ( 'Omnibus significance is not None.') assert results.get_single_target(t, fdr=False).selected_sources_te is None, ( 'Conditional TE values is not None.')
def test_multivariate_te_lorenz_2(): """Test multivariate TE estimation on bivariately couled Lorenz systems. Run the multivariate TE algorithm on two Lorenz systems with a coupling from first to second system with delay u = 45 samples. Both directions are analyzed, the algorithm should not find a coupling from system two to one. Note: This test takes several hours and may take one to two days on some machines. """ # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load(os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) data = Data() data.set_data(d, 'psr') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 47, 'min_lag_sources': 42, 'max_lag_target': 20, 'tau_target': 2, 'n_perm_max_stat': 21, # 200 'n_perm_min_stat': 21, # 200 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } lorenz_analysis = MultivariateTE() # FOR DEBUGGING: add the whole history for k = 20, tau = 2 to the # estimation, this makes things faster, b/c these don't have to be # tested again. Note conditionals are specified using lags. settings['add_conditionals'] = [(1, 19), (1, 17), (1, 15), (1, 13), (1, 11), (1, 9), (1, 7), (1, 5), (1, 3), (1, 1)] settings['max_lag_sources'] = 60 settings['min_lag_sources'] = 31 settings['tau_sources'] = 2 settings['max_lag_target'] = 1 settings['tau_target'] = 1 # Just analyse the direction of coupling results = lorenz_analysis.analyse_single_target(settings, data, target=1) print(results._single_target) print(results.get_adjacency_matrix('binary'))
def test_single_source_storage_opencl(): """Test AIS estimation in MuTE example network.""" dat = Data() dat.generate_mute_data(1000, 5) max_lag = 5 analysis_opts = { 'cmi_calc_name': 'opencl_kraskov', 'n_perm_mi': 22, 'alpha_mi': 0.05, 'tail_mi': 'one', } processes = [1, 2, 3] network_analysis = Single_process_storage(max_lag, analysis_opts, tau=1) res = network_analysis.analyse_network(dat, processes) print('AIS for MUTE data proc 1: {0}'.format(res[1]['ais'])) print('AIS for MUTE data proc 2: {0}'.format(res[2]['ais'])) print('AIS for MUTE data proc 3: {0}'.format(res[3]['ais']))
def test_analyse_network(): """Test AIS estimation for the whole network.""" dat = Data() dat.generate_mute_data(10, 3) ais = Active_information_storage(max_lag=5, tau=1, options={'cmi_calc_name': 'jidt_kraskov'}) # Test analysis of 'all' processes r = ais.analyse_network(data=dat) k = list(r.keys()) assert all(np.array(k) == np.arange(dat.n_processes)), ( 'Network analysis did not run on all targets.') # Test check for correct definition of processes with pytest.raises(ValueError): # no list ais.analyse_network(data=dat, processes={}) with pytest.raises(ValueError): # no list of ints ais.analyse_network(data=dat, processes=[1.5, 0.7])
def test_swap_blocks(): """Test block-wise swapping of samples.""" d = Data() d.generate_mute_data() # block_size divides the length of the data to be permuted, swap_range # leads to 2 remaining blocks n = 50 block_size = 5 swap_range = 4 perm = d._swap_blocks(n, block_size, swap_range) assert perm.shape[0] == n, 'Incorrect length of permuted indices.' # block_size leads to one block of length 1, swap_range divides the no. # blocks n = 50 block_size = 7 swap_range = 4 perm = d._swap_blocks(n, block_size, swap_range) assert perm.shape[0] == n, 'Incorrect length of permuted indices.' n_blocks = np.ceil(n / 7).astype(int) assert n_blocks == 8, 'No. blocks is incorrect.' assert sum(perm == n_blocks - 1) == 1, ('No. remaining samples in the last' ' block is incorrect.') # no remaining samples or blocks n = 30 block_size = 5 swap_range = 3 perm = d._swap_blocks(n, block_size, swap_range) assert perm.shape[0] == n, 'Incorrect length of permuted indices.'
def test_faes_method(): """Check if the Faes method is working.""" analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'add_conditionals': 'faes' } nw_1 = Multivariate_te(max_lag_sources=5, min_lag_sources=3, max_lag_target=7, options=analysis_opts) dat = Data() dat.generate_mute_data() sources = [1, 2, 3] target = 0 nw_1._initialise(dat, sources, target) assert (nw_1._selected_vars_sources == [ i for i in it.product(sources, [nw_1.current_value[1]]) ]), ('Did not add correct additional conditioning vars.')
def test_single_source_storage_opencl(): """Test AIS estimation in MuTE example network.""" dat = Data() dat.generate_mute_data(1000, 5) settings = { 'cmi_estimator': 'OpenCLKraskovCMI', 'max_lag': 5, 'tau': 1, 'n_perm_mi': 22, 'alpha_mi': 0.05, 'tail_mi': 'one', } processes = [1, 2, 3] network_analysis = Single_process_storage() res = network_analysis.analyse_network(settings, dat, processes) print('AIS for MUTE data proc 1: {0}'.format(res[1]['ais'])) print('AIS for MUTE data proc 2: {0}'.format(res[2]['ais'])) print('AIS for MUTE data proc 3: {0}'.format(res[3]['ais']))
def test_gauss_data(): """Test bivariate TE estimation from correlated Gaussians.""" # Generate data and add a delay one one sample. expected_mi, source, source_uncorr, target = _get_gauss_data() source = source[1:] source_uncorr = source_uncorr[1:] target = target[:-1] data = Data(np.hstack((source, source_uncorr, target)), dim_order='sp') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 2, 'min_lag_sources': 1, 'max_lag_target': 1 } nw = BivariateTE() results = nw.analyse_single_target(settings, data, target=2, sources=[0, 1]) te = results.get_single_target(2, fdr=False)['te'][0] sources = results.get_target_sources(2, fdr=False) # Assert that only the correlated source was detected. assert len(sources) == 1, 'Wrong no. inferred sources: {0}.'.format( len(sources)) assert sources[0] == 0, 'Wrong inferred source: {0}.'.format(sources[0]) # Compare BivarateTE() estimate to JIDT estimate. current_value = (2, 2) source_vars = results.get_single_target(2, False)['selected_vars_sources'] target_vars = results.get_single_target(2, False)['selected_vars_target'] var1 = data.get_realisations(current_value, source_vars)[0] var2 = data.get_realisations(current_value, [current_value])[0] cond = data.get_realisations(current_value, target_vars)[0] est = JidtKraskovCMI({}) jidt_cmi = est.estimate(var1=var1, var2=var2, conditional=cond) print('Estimated TE: {0:0.6f}, estimated TE using JIDT core estimator: ' '{1:0.6f} (expected: {2:0.6f}).'.format(te, jidt_cmi, expected_mi)) assert np.isclose(te, jidt_cmi, atol=0.005), ( 'Estimated TE {0:0.6f} differs from JIDT estimate {1:0.6f} (expected: ' 'TE {2:0.6f}).'.format(te, jidt_cmi, expected_mi))
def te(mdf1): import numpy as np from idtxl.multivariate_te import MultivariateTE from idtxl.data import Data n_procs = 1 settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'n_perm_max_stat': 21, 'max_lag_target': 5, 'max_lag_sources': 5, 'min_lag_sources': 4 } settings['cmi_estimator'] = 'JidtDiscreteCMI' #JidtKraskovCMI binary_trains = [] for spiketrain in mdf1.spiketrains: x = conv.BinnedSpikeTrain(spiketrain, binsize=5 * pq.ms, t_start=0 * pq.s) binary_trains.append(x.to_array()) print(binary_trains) dat = Data(np.array(binary_trains), dim_order='spr') dat.n_procs = n_procs #import sklearn #NMF = sklearn.decomposition.NMF(sts) #print(NMF) settings = { 'cmi_estimator': 'JidtKraskov', 'max_lag_sources': 3, 'max_lag_target': 3, 'min_lag_sources': 1 } print(dat) mte = MultivariateTE() #res_single = mte.analyse_single_target(settings=settings, data=data, target=3) res_full = mte.analyse_network(settings=settings, data=dat) # generate graph plots g_single = visualise_graph.plot_selected_vars(res_single, mte) g_full = visualise_graph.plot_network(res_full)
def test_data_type(): """Test if stats always returns surrogates with the correct data type.""" # Change data type for the same object instance. d_int = np.random.randint(0, 10, size=(3, 50)) orig_type = type(d_int[0][0]) data = Data(d_int, dim_order='ps', normalise=False) # The concrete type depends on the platform: # https://mail.scipy.org/pipermail/numpy-discussion/2011-November/059261.html assert data.data_type is orig_type, 'Data type did not change.' assert issubclass(type(data.data[0, 0, 0]), np.integer), ( 'Data type is not an int.') settings = {'permute_in_time': True, 'perm_type': 'random'} surr = stats._get_surrogates(data=data, current_value=(0, 5), idx_list=[(1, 3), (2, 4)], n_perm=20, perm_settings=settings) assert issubclass(type(surr[0, 0]), np.integer), ( 'Realisations type is not an int.') surr = stats._generate_spectral_surrogates(data=data, scale=1, n_perm=20, perm_settings=settings) assert issubclass(type(surr[0, 0, 0]), np.integer), ( 'Realisations type is not an int.') d_float = np.random.randn(3, 50) data.set_data(d_float, dim_order='ps') assert data.data_type is np.float64, 'Data type did not change.' assert issubclass(type(data.data[0, 0, 0]), np.float), ( 'Data type is not a float.') surr = stats._get_surrogates(data=data, current_value=(0, 5), idx_list=[(1, 3), (2, 4)], n_perm=20, perm_settings=settings) assert issubclass(type(surr[0, 0]), np.float), ( 'Realisations type is not a float.') surr = stats._generate_spectral_surrogates(data=data, scale=1, n_perm=20, perm_settings=settings) assert issubclass(type(surr[0, 0, 0]), np.float), ('Realisations type is ' 'not a float.')
def test_multivariate_te_random(): """Test multivariate TE estimation on two random data sets. Run the multivariate TE algorithm on two sets of random data with no coupling. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ d = np.random.rand(2, 1000, 20) dat = Data() dat.set_data(d, 'psr') settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'max_ent', 'max_lag_sources': 5, 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = MultivariateTE() # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for target in range(2): res = random_analysis.analyse_single_target(settings, dat, target) assert (len(res['conditional_full']) == 1), ('Conditional contains ' 'more/less than 1 ' 'variables.') assert (not res['conditional_sources']), ('Conditional sources is not ' 'empty.') assert (len(res['conditional_target']) == 1), ('Conditional target ' 'contains more/less ' 'than 1 variable.') assert (res['cond_sources_pval'] is None), ('Conditional p-value is ' 'not None.') assert (res['omnibus_pval'] is None), ('Omnibus p-value is not None.') assert (res['omnibus_sign'] is None), ('Omnibus significance is not ' 'None.') assert (res['conditional_sources_te'] is None), ('Conditional TE ' 'values is not None.')
def test_p_value_union(): """Test if the p-value is calculated correctly.""" dat = Data() dat.generate_mute_data(100, 5) res_0 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_0.pkl')) res_1 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_1.pkl')) comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'n_perm_comp': 6, 'alpha_comp': 0.2, 'tail_comp': 'one_bigger', 'stats_type': 'independent' } comp = NetworkComparison() res_comp = comp.compare_within(comp_settings, res_0, res_1, dat, dat) # Replace the surrogate CMI by all zeros for source 0 and all ones for # source 1. Set the CMI difference to 0.5 for both sources. Check if this # results in one significant and one non-significant result with the # correct p-values. comp._initialise(comp_settings) comp._create_union(res_0, res_1) comp._calculate_cmi_diff_within(dat, dat) comp._create_surrogate_distribution_within(dat, dat) target = 1 for p in range(comp_settings['n_perm_comp']): comp.cmi_surr[p][target] = np.array([0, 1]) comp.cmi_diff[target] = np.array([0.5, 0.5]) [p, s] = comp._p_value_union() assert (s[target] == np.array([True, False])).all(), ( 'The significance was not determined ' 'correctly: {0}'.format(s[target])) p_1 = 1 / comp_settings['n_perm_comp'] p_2 = 1.0 print(p[target]) assert (p[target] == np.array([p_1, p_2])).all(), ( 'The p-value was not calculated correctly: {0}' .format(p[target]))
def test_multivariate_te_mute(): """Test multivariate TE estimation on the MUTE example network. Test data comes from a network that is used as an example in the paper on the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the following (non-linear) couplings: 0 -> 1, u = 2 0 -> 2, u = 3 0 -> 3, u = 2 (non-linear) 3 -> 4, u = 1 4 -> 3, u = 1 The maximum order of any single AR process is never higher than 2. """ dat = Data() dat.generate_mute_data(n_samples=1000, n_replications=10) analysis_opts = { 'cmi_calc_name': 'jidt_discrete', 'discretise_method': 'max_ent', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } network_analysis = Multivariate_te(max_lag_sources=3, min_lag_sources=1, max_lag_target=3, options=analysis_opts) res_me = network_analysis.analyse_network(dat, targets=[1, 2]) analysis_opts = {'discretise_method': 'equal'} res_eq = network_analysis.analyse_network(dat, targets=[1, 2]) assert (np.isclose( res_eq[1]['omnibus_te'], res_me[1]['omnibus_te'], rtol=0.05)), ('TE into first target is not equal for both binning' ' methods.') assert (np.isclose( res_eq[2]['omnibus_te'], res_me[2]['omnibus_te'], rtol=0.05)), ('TE into second target is not equal for both binning' ' methods.')