def multivariate_pid_4D(data, settings): src, trg = _parse_channels(settings, dim=4) dataEff = _shuffle_target(data, trg, settings) # np.save('test.npy', dataEff) # print(settings) # print("Check1", dataEff.shape, dataEff.dtype, src, trg) # print('Check2', issubclass(dataEff.dtype.type, np.integer)) # print('Check3', [issubclass(dataEff[:, i].dtype.type, np.integer) for i in src]) # print('Check4', issubclass(dataEff[:, trg].dtype.type, np.integer)) dataIDTxl = Data(dataEff, dim_order='rps', normalise=False) pid = MultivariatePID() rez = pid.analyse_single_target(settings=settings['settings_estimator'], data=dataIDTxl, target=trg, sources=src) return np.array([ rez.get_single_target(trg)['avg'][k][2] for k in multivariate_pid_key(dim=4) ])
def test_single_source_storage_gaussian(): n = 1000 np.random.seed(SEED) proc_1 = np.random.normal(0, 1, size=n) proc_2 = np.random.normal(0, 1, size=n) # Cast everything to numpy so the idtxl estimator understands it. data = Data(np.array([proc_1, proc_2]), dim_order='ps') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'alpha_mi': 0.05, 'tail_mi': 'one_bigger', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_mi': 21, 'max_lag': 5, 'tau': 1 } processes = [1] network_analysis = ActiveInformationStorage() results = network_analysis.analyse_network(settings, data, processes) print('AIS for random normal data without memory (expected is NaN): ' '{0}'.format(results._single_process[1].ais)) assert results._single_process[1].ais is np.nan, ( 'Estimator did not return nan for memoryless data.')
def idtxlParallelCPU(data, settings, NCore=None): # Get number of processes idxProcesses = settings['dim_order'].index("p") NProcesses = data.shape[idxProcesses] # Convert data to ITDxl format dataIDTxl = Data(data, dim_order=settings['dim_order']) # Initialise analysis object analysis_class = getAnalysisClass(settings['method']) # Initialize multiprocessing pool if NCore is None: NCore = pathos.multiprocessing.cpu_count() - 1 pool = pathos.multiprocessing.ProcessingPool(NCore) #pool = multiprocessing.Pool(NCore) #with contextlib.redirect_stdout(open('log_out.txt', 'w')): # with contextlib.redirect_stderr(open('log_err.txt', 'w')): targetLst = list(range(NProcesses)) parallelTask = lambda trg: analysis_class.analyse_single_target( settings=settings, data=dataIDTxl, target=trg) rez = pool.map(parallelTask, targetLst) return rez
def test_max_statistic_sequential(): dat = Data() dat.generate_mute_data(104, 10) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, 'max_lag_sources': 5, 'min_lag_sources': 1, 'max_lag_target': 5 } setup = MultivariateTE() setup._initialise(settings, dat, sources=[0, 1], target=2) setup.current_value = (0, 4) setup.selected_vars_sources = [(1, 1), (1, 2)] setup.selected_vars_full = [(0, 1), (1, 1), (1, 2)] setup._selected_vars_realisations = np.random.rand( dat.n_realisations(setup.current_value), len(setup.selected_vars_full)) setup._current_value_realisations = np.random.rand( dat.n_realisations(setup.current_value), 1) [sign, p, te] = stats.max_statistic_sequential(analysis_setup=setup, data=dat)
def test_multivariate_te_multiple_runs(): """Test TE estimation using multiple runs on the GPU. Test if data is correctly split over multiple runs, if the problem size exceeds the GPU global memory and thus requires multiple runs. Using a number of permutations of 7000 requires two runs on a GPU with global memory of about 6 GB. """ dat = Data() dat.generate_mute_data(n_samples=1000, n_replications=10) settings = { 'cmi_estimator': 'OpenCLKraskovCMI', 'max_lag_sources': 3, 'min_lag_sources': 1, 'max_lag_target': 3, 'n_perm_max_stat': 7000, 'n_perm_min_stat': 7000, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21 } # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats network_analysis = MultivariateTE() network_analysis.analyse_network(dat, settings, targets=[1, 2])
def test_compare_jidt_open_cl_estimator(): """Compare results from OpenCl and JIDT estimators for AIS calculation.""" dat = Data() dat.generate_mute_data(1000, 2) settings = { 'cmi_estimator': 'OpenCLKraskovCMI', 'n_perm_mi': 22, 'alpha_mi': 0.05, 'tail_mi': 'one_bigger', 'n_perm_max_stat': 21, 'max_lag': 5, 'tau': 1 } processes = [2, 3] network_analysis = ActiveInformationStorage() res_opencl = network_analysis.analyse_network(settings, dat, processes) settings['cmi_estimator'] = 'JidtKraskovCMI' res_jidt = network_analysis.analyse_network(settings, dat, processes) # Note that I require equality up to three digits. Results become more # exact for bigger data sizes, but this takes too long for a unit test. print('AIS for MUTE data proc 2 - opencl: {0} and jidt: {1}'.format( res_opencl[2]['ais'], res_jidt[2]['ais'])) print('AIS for MUTE data proc 3 - opencl: {0} and jidt: {1}'.format( res_opencl[3]['ais'], res_jidt[3]['ais'])) if not (res_opencl[2]['ais'] is np.nan or res_jidt[2]['ais'] is np.nan): assert (res_opencl[2]['ais'] - res_jidt[2]['ais']) < 0.05, ( 'AIS results differ between OpenCl and JIDT estimator.') else: assert res_opencl[2]['ais'] is res_jidt[2]['ais'], ( 'AIS results differ between OpenCl and JIDT estimator.') if not (res_opencl[3]['ais'] is np.nan or res_jidt[3]['ais'] is np.nan): assert (res_opencl[3]['ais'] - res_jidt[3]['ais']) < 0.05, ( 'AIS results differ between OpenCl and JIDT estimator.') else: assert res_opencl[3]['ais'] is res_jidt[3]['ais'], ( 'AIS results differ between OpenCl and JIDT estimator.')
def test_calculate_mean(): """Test if mean over CMI estimates is calculated correctly.""" dat = Data() dat.generate_mute_data(100, 5) res_0 = np.load( os.path.join(os.path.dirname(__file__), 'data/mute_res_0.pkl')) comparison_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two', 'n_perm_comp': 6, 'alpha_comp': 0.2, 'stats_type': 'dependent' } comp = Network_comparison(comparison_opts) comp._create_union(res_0) cmi = comp._calculate_cmi(dat) cmi_mean = comp._calculate_mean([cmi, cmi]) for t in comp.union['targets']: assert (cmi_mean[t] == cmi[t]).all(), ('Error in mean of CMI for ' 'target {0}'.format(t))
def test_add_conditional_manually(): """Enforce the conditioning on additional variables.""" settings = {'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 3} nw = BivariateMI() data = Data() data.generate_mute_data() # Add a conditional with a lag bigger than the max_lag requested above settings['add_conditionals'] = (8, 0) with pytest.raises(IndexError): nw.analyse_single_target(settings=settings, data=data, target=0) # Add valid conditionals and test if they were added settings['add_conditionals'] = [(0, 1), (1, 3)] nw._initialise(settings=settings, data=data, target=0, sources=[1, 2]) # Get list of conditionals after intialisation and convert absolute samples # back to lags for comparison. cond_list = nw._idx_to_lag(nw.selected_vars_full) assert settings['add_conditionals'][0] in cond_list, ( 'First enforced conditional is missing from results.') assert settings['add_conditionals'][1] in cond_list, ( 'Second enforced conditional is missing from results.')
def infer_network(network_inference, time_series, parallel_target_analysis=False): # Define parameter options dictionaries network_inference_algorithms = pd.DataFrame() network_inference_algorithms['Description'] = pd.Series({ 'bMI_greedy': 'Bivariate Mutual Information via greedy algorithm', 'bTE_greedy': 'Bivariate Transfer Entropy via greedy algorithm', 'mMI_greedy': 'Multivariate Mutual Information via greedy algorithm', 'mTE_greedy': 'Multivariate Transfer Entropy via greedy algorithm', 'cross_corr': 'Cross-correlation thresholding algorithm' }) network_inference_algorithms['Required parameters'] = pd.Series({ 'bMI_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'bTE_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'max_lag_target', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'mMI_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'mTE_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'max_lag_target', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'cross_corr': ['min_lag_sources', 'max_lag_sources'] }) try: # Ensure that a network inference algorithm has been specified if 'algorithm' not in network_inference: raise ParameterMissing('algorithm') # Ensure that the provided algorithm is implemented if network_inference.algorithm not in network_inference_algorithms.index: raise ParameterValue(network_inference.algorithm) # Ensure that all the parameters required by the algorithm have been provided par_required = network_inference_algorithms['Required parameters'][ network_inference.algorithm] for par in par_required: if par not in network_inference: raise ParameterMissing(par) except ParameterMissing as e: print(e.msg, e.par_names) raise except ParameterValue as e: print(e.msg, e.par_value) raise else: nodes_n = np.shape(time_series)[0] can_be_z_standardised = True if network_inference.z_standardise: # Check if data can be normalised per process (assuming the # first dimension represents processes, as in the rest of the code) can_be_z_standardised = np.all(np.std(time_series, axis=1) > 0) if not can_be_z_standardised: print('Time series can not be z-standardised') if len(time_series.shape) == 2: dim_order = 'ps' else: dim_order = 'psr' # initialise an empty data object dat = Data() # Load time series dat = Data(time_series, dim_order=dim_order, normalise=(network_inference.z_standardise & can_be_z_standardised)) algorithm = network_inference.algorithm if algorithm in [ 'bMI_greedy', 'mMI_greedy', 'bTE_greedy', 'mTE_greedy' ]: # Set analysis options if algorithm == 'bMI_greedy': network_analysis = BivariateMI() if algorithm == 'mMI_greedy': network_analysis = MultivariateMI() if algorithm == 'bTE_greedy': network_analysis = BivariateTE() if algorithm == 'mTE_greedy': network_analysis = MultivariateTE() settings = { 'min_lag_sources': network_inference.min_lag_sources, 'max_lag_sources': network_inference.max_lag_sources, 'tau_sources': network_inference.tau_sources, 'max_lag_target': network_inference.max_lag_target, 'tau_target': network_inference.tau_target, 'cmi_estimator': network_inference.cmi_estimator, 'kraskov_k': network_inference.kraskov_k, 'num_threads': network_inference.jidt_threads_n, 'permute_in_time': network_inference.permute_in_time, 'n_perm_max_stat': network_inference.n_perm_max_stat, 'n_perm_min_stat': network_inference.n_perm_min_stat, 'n_perm_omnibus': network_inference.n_perm_omnibus, 'n_perm_max_seq': network_inference.n_perm_max_seq, 'fdr_correction': network_inference.fdr_correction, 'alpha_max_stat': network_inference.p_value, 'alpha_min_stat': network_inference.p_value, 'alpha_omnibus': network_inference.p_value, 'alpha_max_seq': network_inference.p_value, 'alpha_fdr': network_inference.p_value } # # Add optional settings # optional_settings_keys = { # 'config.debug', # 'config.max_mem_frac' # } # for key in optional_settings_keys: # if traj.f_contains(key, shortcuts=True): # key_last = key.rpartition('.')[-1] # settings[key_last] = traj[key] # print('Using optional setting \'{0}\'={1}'.format( # key_last, # traj[key]) # ) if parallel_target_analysis: # Use SCOOP to create a generator of map results, each # correspinding to one map iteration res_iterator = futures.map_as_completed( network_analysis.analyse_single_target, itertools.repeat(settings, nodes_n), itertools.repeat(dat, nodes_n), list(range(nodes_n))) # Run analysis res_list = list(res_iterator) if settings['fdr_correction']: res = network_fdr({'alpha_fdr': settings['alpha_fdr']}, *res_list) else: res = res_list[0] res.combine_results(*res_list[1:]) else: # Run analysis res = network_analysis.analyse_network(settings=settings, data=dat) return res else: raise ParameterValue( algorithm, msg='Network inference algorithm not yet implemented')
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 data = Data() data.generate_mute_data(500, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': 4, 'max_lag_target': max_lag } target = 1 mi = MultivariateMI() results = mi.analyse_network(settings, data, targets=[target]) # Test if any sources were inferred. If not, return (this may happen # sometimes due to too few samples, however, a higher no. samples is not # feasible for a unit test). if results.get_single_target(target, fdr=False)['mi'] is None: return lmi = results.get_single_target(target, fdr=False)['mi'] n_sources = len(results.get_target_sources(target, fdr=False)) assert type(lmi) is np.ndarray, ( 'LMI estimation did not return an array of values: {0}'.format(lmi)) assert lmi.shape[0] == n_sources, ( 'Wrong dim (no. sources) in LMI estimate: {0}'.format(lmi.shape)) assert lmi.shape[1] == data.n_realisations_samples( (0, max_lag)), ('Wrong dim (no. samples) in LMI estimate: {0}'.format( lmi.shape)) assert lmi.shape[2] == data.n_replications, ( 'Wrong dim (no. replications) in LMI estimate: {0}'.format(lmi.shape)) # Test for correctnes of single link MI estimation by comparing it to the # omnibus MI. In this case (single source), the two should be the same. # Skip assertion if more than one source was inferred (this happens # sometime due to random data and low no. permutations for statistical # testing in unit tests). settings['local_values'] = False results_avg = mi.analyse_network(settings, data, targets=[target]) if results_avg.get_single_target(target, fdr=False)['mi'] is None: return mi_single_link = results_avg.get_single_target(target, fdr=False)['mi'] mi_omnibus = results_avg.get_single_target(target, fdr=False)['omnibus_mi'] sources_local = results.get_target_sources(target, fdr=False) sources_avg = results_avg.get_target_sources(target, fdr=False) if len(sources_avg) == 1: print('Compare single link and omnibus MI.') assert np.isclose(mi_single_link, mi_omnibus, rtol=0.00005), ( 'Single link MI ({0:.6f}) is not equal to omnibus information ' '({1:.6f}).'.format(mi_single_link[0], mi_omnibus)) # Check if average and mean local values are the same. Test each source # separately. Inferred sources may differ between the two calls to # analyse_network() due to low number of surrogates used in unit testing. for s in list(set(sources_avg).intersection(sources_local)): print('Compare average and local values.') i1 = np.where(sources_avg == s)[0][0] i2 = np.where(sources_local == s)[0][0] assert np.isclose( mi_single_link[i1], np.mean(lmi[i2, :, :]), rtol=0.00005), ( 'Single link average MI ({0:.6f}) and mean LMI ({1:.6f}) ' ' deviate.'.format(mi_single_link, np.mean(lmi)))
def test_multivariate_te_corr_gaussian(estimator=None): """Test multivariate TE estimation on correlated Gaussians. Run the multivariate TE algorithm on two sets of random Gaussian data with a given covariance. The second data set is shifted by one sample creating a source-target delay of one sample. This example is modeled after the JIDT demo 4 for transfer entropy. The resulting TE can be compared to the analytical result (but expect some error in the estimate). The simulated delay is 1 sample, i.e., the algorithm should find significant TE from sample (0, 1), a sample in process 0 with lag/delay 1. The final target sample should always be (1, 1), the mandatory sample at lat 1, because there is no memory in the process. Note: This test runs considerably faster than other system tests. This produces strange small values for non-coupled sources. TODO """ if estimator is None: estimator = 'JidtKraskovCMI' n = 1000 cov = 0.4 source = [rn.normalvariate(0, 1) for r in range(n)] target = [ sum(pair) for pair in zip([cov * y for y in source], [(1 - cov) * y for y in [rn.normalvariate(0, 1) for r in range(n)]]) ] # Cast everything to numpy so the idtxl estimator understands it. source = np.expand_dims(np.array(source), axis=1) target = np.expand_dims(np.array(target), axis=1) data = Data(normalise=True) data.set_data(np.vstack((source[1:].T, target[:-1].T)), 'ps') settings = { 'cmi_estimator': estimator, 'max_lag_sources': 5, 'min_lag_sources': 1, 'max_lag_target': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, } random_analysis = MultivariateTE() results = random_analysis.analyse_single_target(settings, data, 1) # Assert that there are significant conditionals from the source for target # 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for # examples 1 and 2 respectively. The maximum errors were 0.093841 and # 0.05833172 repectively. This inspired the following error boundaries. corr_expected = cov / (1 * np.sqrt(cov**2 + (1 - cov)**2)) expected_res = calculate_mi(corr_expected) estimated_res = results.get_single_target(1, fdr=False).omnibus_te diff = np.abs(estimated_res - expected_res) print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)') print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:' '{2:2.2f} % ').format(estimated_res, expected_res, diff / expected_res)) assert (diff < 0.1), ('Multivariate TE calculation for correlated ' 'Gaussians failed (error larger 0.1: {0}, expected: ' '{1}, actual: {2}).'.format(diff, expected_res, estimated_res))
def test_pid_xor_data(): """Test basic calls to PID class.""" n = 100 alph = 2 x = np.random.randint(0, alph, n) y = np.random.randint(0, alph, n) z = np.logical_xor(x, y).astype(int) data = Data(np.vstack((x, y, z)), 'ps', normalise=False) # Run Tartu estimator settings = {'pid_estimator': 'TartuPID', 'lags_pid': [0, 0]} pid = BivariatePID() tic = tm.time() est_tartu = pid.analyse_single_target(settings, data=data, target=2, sources=[0, 1]) t_tartu = tm.time() - tic # Run Sydney estimator settings = { 'n_perm': 11, 'alpha': 0.1, 'alph_s1': alph, 'alph_s2': alph, 'alph_t': alph, 'max_unsuc_swaps_row_parm': 60, 'num_reps': 63, 'max_iters': 1000, 'pid_estimator': 'SydneyPID', 'lags_pid': [0, 0]} pid = BivariatePID() tic = tm.time() est_sydney = pid.analyse_single_target(settings, data=data, target=2, sources=[0, 1]) t_sydney = tm.time() - tic print('\nResults Tartu estimator:') utils.print_dict(est_tartu.get_single_target(2)) print('\nResults Sydney estimator:') utils.print_dict(est_sydney.get_single_target(2)) print('\nLogical XOR') print('Estimator Sydney\t\tTartu\n') print('PID evaluation {:.3f} s\t\t{:.3f} s\n'.format(t_sydney, t_tartu)) print('Uni s1 {0:.8f}\t\t{1:.8f}'.format( est_sydney._single_target[2]['unq_s1'], est_tartu._single_target[2]['unq_s1'])) print('Uni s2 {0:.8f}\t\t{1:.8f}'.format( est_sydney._single_target[2]['unq_s2'], est_tartu._single_target[2]['unq_s2'])) print('Shared s1_s2 {0:.8f}\t\t{1:.8f}'.format( est_sydney._single_target[2]['shd_s1_s2'], est_tartu._single_target[2]['shd_s1_s2'])) print('Synergy s1_s2 {0:.8f}\t\t{1:.8f}'.format( est_sydney._single_target[2]['syn_s1_s2'], est_tartu._single_target[2]['syn_s1_s2'])) assert 0.9 < est_sydney._single_target[2]['syn_s1_s2'] <= 1.1, ( 'Sydney estimator incorrect synergy: {0}, should approx. 1'.format( est_sydney._single_target[2]['syn_s1_s2'])) assert 0.9 < est_tartu._single_target[2]['syn_s1_s2'] <= 1.1, ( 'Tartu estimator incorrect synergy: {0}, should approx. 1'.format( est_tartu._single_target[2]['syn_s1_s2']))
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 data = Data() data.generate_mute_data(500, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'noise_level': 0, 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': 4, 'max_lag_target': max_lag } target = 3 sources = [0, 4] mi = MultivariateMI() results = mi.analyse_single_target(settings, data, target=target, sources=sources) settings['local_values'] = False results_avg = mi.analyse_single_target(settings, data, target=target, sources=sources) # Test if any sources were inferred. If not, return (this may happen # sometimes due to too few samples, however, a higher no. samples is not # feasible for a unit test). if results.get_single_target(target, fdr=False)['mi'] is None: return if results_avg.get_single_target(target, fdr=False)['mi'] is None: return lmi = results.get_single_target(target, fdr=False)['mi'] n_sources = len(results.get_target_sources(target, fdr=False)) assert type(lmi) is np.ndarray, ( 'LMI estimation did not return an array of values: {0}'.format(lmi)) assert lmi.shape[0] == n_sources, ( 'Wrong dim (no. sources) in LMI estimate: {0}'.format(lmi.shape)) assert lmi.shape[1] == data.n_realisations_samples( (0, max_lag)), ('Wrong dim (no. samples) in LMI estimate: {0}'.format( lmi.shape)) assert lmi.shape[2] == data.n_replications, ( 'Wrong dim (no. replications) in LMI estimate: {0}'.format(lmi.shape)) # Check if average and mean local values are the same. Test each source # separately. Inferred sources and variables may differ between the two # calls to analyse_single_target() due to low number of surrogates used in # unit testing. mi_single_link = results_avg.get_single_target(target, fdr=False)['mi'] sources_local = results.get_target_sources(target, fdr=False) sources_avg = results_avg.get_target_sources(target, fdr=False) for s in list(set(sources_avg).intersection(sources_local)): i1 = np.where(sources_avg == s)[0][0] i2 = np.where(sources_local == s)[0][0] # Skip comparison if inferred variables differ between links. vars_local = [ v for v in results.get_single_target( target, fdr=False).selected_vars_sources if v[0] == s ] vars_avg = [ v for v in results_avg.get_single_target( target, fdr=False).selected_vars_sources if v[0] == s ] if vars_local != vars_avg: continue print('Compare average ({0:.4f}) and local values ({1:.4f}).'.format( mi_single_link[i1], np.mean(lmi[i2, :, :]))) assert np.isclose( mi_single_link[i1], np.mean(lmi[i2, :, :]), rtol=0.00005), ( 'Single link average MI ({0:.6f}) and mean LMI ({1:.6f}) ' ' deviate.'.format(mi_single_link[i1], np.mean(lmi[i2, :, :])))
def test_assertions(): """Test if input checks raise errors.""" data = Data() data.generate_mute_data(100, 5) # Load previously generated example data path = os.path.join(os.path.dirname(__file__), 'data/') res_0 = pickle.load(open(path + 'mute_results_0.p', 'rb')) res_1 = pickle.load(open(path + 'mute_results_1.p', 'rb')) res_2 = pickle.load(open(path + 'mute_results_2.p', 'rb')) res_3 = pickle.load(open(path + 'mute_results_3.p', 'rb')) # comparison settings comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two' } # no. permutations insufficient for requested alpha comp_settings['n_perm_comp'] = 6 comp_settings['alpha_comp'] = 0.001 comp_settings['stats_type'] = 'independent' comp = NetworkComparison() with pytest.raises(RuntimeError): comp._initialise(comp_settings) # data sets have unequal no. replications dat2 = Data() dat2.generate_mute_data(100, 3) comp_settings['stats_type'] = 'dependent' comp_settings['alpha_comp'] = 0.05 comp_settings['n_perm_comp'] = 1000 comp = NetworkComparison() with pytest.raises(AssertionError): comp.compare_within(comp_settings, res_0, res_1, data, dat2) # data sets have unequal no. realisations dat2 = Data() dat2.generate_mute_data(80, 5) comp_settings['stats_type'] = 'dependent' comp_settings['alpha_comp'] = 0.05 comp_settings['n_perm_comp'] = 21 comp = NetworkComparison() with pytest.raises(RuntimeError): comp.compare_within(comp_settings, res_0, res_1, data, dat2) # no. replications/subjects too small for dependent-samples test comp_settings['stats_type'] = 'dependent' comp_settings['n_perm_comp'] = 1000 comp = NetworkComparison() with pytest.raises(RuntimeError): # between comp.compare_between(comp_settings, network_set_a=np.array((res_0, res_1)), network_set_b=np.array((res_2, res_3)), data_set_a=np.array((data, data)), data_set_b=np.array((data, data))) with pytest.raises(RuntimeError): # within comp.compare_within(comp_settings, res_0, res_1, dat2, dat2) # no. replications/subjects too small for independent-samples test comp_settings['stats_type'] = 'independent' comp = NetworkComparison() with pytest.raises(RuntimeError): # between comp.compare_between(comp_settings, network_set_a=np.array((res_0, res_1)), network_set_b=np.array((res_2, res_3)), data_set_a=np.array((data, data)), data_set_b=np.array((data, data))) with pytest.raises(RuntimeError): # within comp.compare_within(comp_settings, res_0, res_1, dat2, dat2) # add target to network that is not in the data object dat2 = Data(np.random.rand(2, 1000, 50), dim_order='psr') comp_settings['alpha_comp'] = 0.05 comp_settings['n_perm_comp'] = 21 comp = NetworkComparison() with pytest.raises(IndexError): comp.compare_within(comp_settings, res_0, res_2, dat2, dat2)
def test_analyse_network(): """Test method for full network analysis.""" n_processes = 5 # the MuTE network has 5 nodes dat = Data() dat.generate_mute_data(10, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 30, 'max_lag_sources': 5, 'min_lag_sources': 4, 'max_lag_target': 5 } nw_0 = BivariateTE() # Test all to all analysis r = nw_0.analyse_network(settings, dat, targets='all', sources='all') try: del r['fdr_corrected'] except: pass k = list(r.keys()) sources = np.arange(n_processes) assert all(np.array(k) == np.arange(n_processes)), ( 'Network analysis did not run on all targets.') for t in r.keys(): s = np.array(list(set(sources) - set([t]))) assert all(np.array(r[t]['sources_tested']) == s), ( 'Network analysis did not run on all sources for target ' '{0}'.format(t)) # Test analysis for subset of targets target_list = [1, 2, 3] r = nw_0.analyse_network(settings, dat, targets=target_list, sources='all') try: del r['fdr_corrected'] except: pass k = list(r.keys()) assert all(np.array(k) == np.array(target_list)), ( 'Network analysis did not run on correct subset of targets.') for t in r.keys(): s = np.array(list(set(sources) - set([t]))) assert all(np.array(r[t]['sources_tested']) == s), ( 'Network analysis did not run on all sources for target ' '{0}'.format(t)) # Test analysis for subset of sources source_list = [1, 2, 3] target_list = [0, 4] r = nw_0.analyse_network(settings, dat, targets=target_list, sources=source_list) try: del r['fdr_corrected'] except: pass k = list(r.keys()) assert all(np.array(k) == np.array(target_list)), ( 'Network analysis did not run for all targets.') for t in r.keys(): assert all(r[t]['sources_tested'] == np.array(source_list)), ( 'Network analysis did not run on the correct subset of sources ' 'for target {0}'.format(t))
def test_network_fdr(): settings = {'n_perm_max_seq': 1000, 'n_perm_omnibus': 1000} target_0 = { 'selected_vars_sources': [(1, 1), (1, 2), (1, 3), (2, 1), (2, 0)], 'selected_vars_full': [(0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 3), (2, 1), (2, 0)], 'omnibus_pval': 0.0001, 'omnibus_sign': True, 'selected_sources_pval': np.array([0.001, 0.0014, 0.01, 0.045, 0.047]), 'selected_sources_te': np.array([1.1, 1.0, 0.8, 0.7, 0.63]), } target_1 = { 'selected_vars_sources': [(1, 2), (2, 1), (2, 2)], 'selected_vars_full': [(1, 0), (1, 1), (1, 2), (2, 1), (2, 2)], 'omnibus_pval': 0.031, 'omnibus_sign': True, 'selected_sources_pval': np.array([0.00001, 0.00014, 0.01]), 'selected_sources_te': np.array([1.8, 1.75, 0.75]), } target_2 = { 'selected_vars_sources': [], 'selected_vars_full': [(2, 0), (2, 1)], 'omnibus_pval': 0.41, 'omnibus_sign': False, 'selected_sources_pval': None, 'selected_sources_te': np.array([]), } res_1 = ResultsNetworkInference( n_nodes=3, n_realisations=1000, normalised=True) res_1._add_single_result(target=0, settings=settings, results=target_0) res_1._add_single_result(target=1, settings=settings, results=target_1) res_2 = ResultsNetworkInference( n_nodes=3, n_realisations=1000, normalised=True) res_2._add_single_result(target=2, settings=settings, results=target_2) for correct_by_target in [True, False]: settings = { 'cmi_estimator': 'JidtKraskovCMI', 'alpha_fdr': 0.05, 'max_lag_sources': 3, 'min_lag_sources': 1, 'max_lag_target': 3, 'correct_by_target': correct_by_target} data = Data() data.generate_mute_data(n_samples=100, n_replications=3) analysis_setup = MultivariateTE() analysis_setup._initialise(settings=settings, data=data, sources=[1, 2], target=0) res_pruned = stats.network_fdr(settings, res_1, res_2) assert (not res_pruned._single_target[2].selected_vars_sources), ( 'Target 2 has not been pruned from results.') for k in res_pruned.targets_analysed: if res_pruned._single_target[k]['selected_sources_pval'] is None: assert ( not res_pruned._single_target[k]['selected_vars_sources']) else: assert ( len(res_pruned._single_target[k]['selected_vars_sources']) == len(res_pruned._single_target[k]['selected_sources_pval'])), ( 'Source list and list of p-values should have ' 'the same length.') # Test function call for single result res_pruned = stats.network_fdr(settings, res_1) print('successful call on single result dict.') # Test None result for insufficient no. permutations, no FDR-corrected # results (the results class throws an error if no FDR-corrected results # exist). res_1.settings['n_perm_max_seq'] = 2 res_2.settings['n_perm_max_seq'] = 2 res_pruned = stats.network_fdr(settings, res_1, res_2) with pytest.raises(RuntimeError): res_pruned.get_adjacency_matrix('binary', fdr=True)
def import_matarray(file_name, array_name, file_version, dim_order, normalise=True): """Read Matlab hdf5 file into IDTxl. reads a matlab hdf5 file ("-v7.3' or higher, .mat) with a SINGLE array inside and returns a numpy array with dimensions that are channel x time x trials, using np.swapaxes where necessary Note: The import function squeezes the loaded mat-file, i.e., any singleton dimension will be removed. Hence do not enter singleton dimension into the 'dim_order', e.g., don't pass dim_order='ps' but dim_order='s' if you want to load a 1D-array where entries represent samples recorded from a single channel. Args: file_name : string full (matlab) file_name on disk array_name : string variable name of the MATLAB structure to be read file_version : string version of the file, e.g. 'v7.3' for MATLAB's 7.3 format, currently versions 'v4', 'v6', 'v7', and 'v7' are supported dim_order : string order of dimensions, accepts any combination of the characters 'p', 's', and 'r' for processes, samples, and replications; must have the same length as the data dimensionality, e.g., 'ps' for a two-dimensional array of data from several processes over time normalise : bool [optional] normalise data after import (default=True) Returns: Data() instance instance of IDTxl Data object, containing data from the 'trial' field list of strings list of channel labels, corresponding to the 'label' field numpy array time stamps for samples, corresponding to one entry in the 'time' field int sampling rate, corresponding to the 'fsample' field Created on Wed Mar 19 12:34:36 2014 @author: Michael Wibral """ if file_version == 'v7.3': mat_file = h5py.File(file_name) # Assert that at least one of the keys found at the top level of the # HDF file matches the name of the array we wanted if array_name not in mat_file.keys(): raise RuntimeError('Array {0} not in mat file or not a variable ' 'at the file''s top level.'.format(array_name)) # 2. Create an object for the matlab array (from the hdf5 hierachy), # the trailing [()] ensures everything is read mat_data = np.squeeze(np.asarray(mat_file[array_name][()])) elif file_version in ['v4', 'v6', 'v7']: try: m = loadmat(file_name, squeeze_me=True, variable_names=array_name) except NotImplementedError as err: raise RuntimeError('You may have provided an incorrect file ' 'version. The mat file was probably saved as ' 'version 7.3 (hdf5).') mat_data = m[array_name] # loadmat returns a dict containing variables else: raise ValueError('Unkown file version: {0}.'.format(file_version)) # Create output: IDTxl data object, list of labels, sampling info in unit # time steps (sampling rate of 1). print('Creating Data object from matlab array: {0}.'.format(array_name)) dat = Data(mat_data, dim_order=dim_order, normalise=normalise) label = [] for n in range(dat.n_processes): label.append('channel_{0:03d}'.format(n)) fsample = 1 timestamps = np.arange(dat.n_samples) return dat, label, timestamps, fsample
# Import classes from idtxl.active_information_storage import ActiveInformationStorage from idtxl.data import Data # a) Generate test data data = Data() data.generate_mute_data(n_samples=1000, n_replications=5) # b) Initialise analysis object and define settings network_analysis = ActiveInformationStorage() settings = {'cmi_estimator': 'JidtGaussianCMI', 'max_lag': 5} # c) Run analysis results = network_analysis.analyse_network(settings=settings, data=data) # d) Plot list of processes with significant AIS to console print(results.get_significant_processes(fdr=False))
def test_bivariate_te_init(): """Test instance creation for BivariateTE class.""" # Test error on missing estimator settings = { 'n_perm_max_stat': 21, 'n_perm_omnibus': 30, 'n_perm_max_seq': 30, 'max_lag_sources': 7, 'min_lag_sources': 2, 'max_lag_target': 5 } nw = BivariateTE() with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=Data(), target=1) # Test setting of min and max lags settings['cmi_estimator'] = 'JidtKraskovCMI' dat = Data() dat.generate_mute_data(n_samples=10, n_replications=5) # Valid: max lag sources bigger than max lag target nw.analyse_single_target(settings=settings, data=dat, target=1) # Valid: max lag sources smaller than max lag target settings['max_lag_sources'] = 3 nw.analyse_single_target(settings=settings, data=dat, target=1) # Invalid: min lag sources bigger than max lag settings['min_lag_sources'] = 8 settings['max_lag_sources'] = 7 settings['max_lag_target'] = 5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) # Invalid: taus bigger than lags settings['min_lag_sources'] = 2 settings['max_lag_sources'] = 4 settings['max_lag_target'] = 5 settings['tau_sources'] = 10 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) settings['tau_sources'] = 1 settings['tau_target'] = 10 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) # Invalid: negative lags or taus settings['min_lag_sources'] = 1 settings['max_lag_target'] = 5 settings['max_lag_sources'] = -7 settings['tau_target'] = 1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) settings['max_lag_sources'] = 7 settings['min_lag_sources'] = -4 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) settings['min_lag_sources'] = 4 settings['max_lag_target'] = -1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) settings['max_lag_target'] = 5 settings['tau_sources'] = -1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) settings['tau_sources'] = 1 settings['tau_target'] = -1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) # Invalid: lags or taus are no integers settings['tau_target'] = 1 settings['min_lag_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) settings['min_lag_sources'] = 1 settings['max_lag_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) settings['max_lag_sources'] = 7 settings['tau_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) settings['tau_sources'] = 1 settings['tau_target'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=dat, target=1) settings['tau_target'] = 1 # Invalid: sources or target is no int with pytest.raises(RuntimeError): # no int nw.analyse_single_target(settings=settings, data=dat, target=1.5) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=dat, target=-1) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=dat, target=10) with pytest.raises(RuntimeError): # wrong type nw.analyse_single_target(settings=settings, data=dat, target={}) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=dat, target=0, sources=-1) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=dat, target=0, sources=[-1]) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=dat, target=0, sources=20) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=dat, target=0, sources=[20]) # Force conditionals settings['add_conditionals'] = [(0, 1), (1, 3)] nw.analyse_single_target(settings=settings, data=dat, target=0) settings['add_conditionals'] = (8, 0) with pytest.raises(IndexError): nw.analyse_single_target(settings=settings, data=dat, target=0)
def test_get_permuted_replications(): """Test if permutation of replications works.""" # Load previously generated example data res_0 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_0.pkl')) res_1 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_1.pkl')) comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'tail': 'two', 'n_perm_comp': 6, 'alpha_comp': 0.2, 'stats_type': 'dependent' } comp = NetworkComparison() comp._initialise(comp_settings) comp._create_union(res_0, res_1) # Check permutation for dependent samples test: Replace realisations by # zeros and ones, check if realisations get swapped correctly. dat1 = Data() dat1.normalise = False dat1.set_data(np.zeros((5, 100, 5)), 'psr') dat2 = Data() dat2.normalise = False dat2.set_data(np.ones((5, 100, 5)), 'psr') [cond_a_perm, cv_a_perm, cond_b_perm, cv_b_perm] = comp._get_permuted_replications(data_a=dat1, data_b=dat2, target=1) n_vars = cond_a_perm.shape[1] assert (np.sum(cond_a_perm + cond_b_perm, axis=1) == n_vars).all(), ( 'Dependent samples permutation did not work correctly.') assert np.logical_xor(cond_a_perm, cond_b_perm).all(), ( 'Dependent samples permutation did not work correctly.') # Check permutations for independent samples test: Check the sum over # realisations. comp_settings['stats_type'] = 'independent' comp = NetworkComparison() comp._initialise(comp_settings) comp._create_union(res_0, res_1) [cond_a_perm, cv_a_perm, cond_b_perm, cv_b_perm] = comp._get_permuted_replications(data_a=dat1, data_b=dat2, target=1) n_samples = n_vars * dat1.n_realisations((0, comp.union['max_lag'])) assert np.sum(cond_a_perm + cond_b_perm, axis=None) == n_samples, ( 'Independent samples permutation did not work correctly.') # test unequal number of replications dat2.generate_mute_data(100, 7) with pytest.raises(AssertionError): comp._get_permuted_replications(data_a=dat1, data_b=dat2, target=1)
def test_network_comparison_use_cases(): """Run all intended use cases, within/between, dependent/independent.""" data = Data() data.generate_mute_data(100, 5) path = os.path.join(os.path.dirname(__file__), 'data/') res_0 = pickle.load(open(path + 'mute_results_0.p', 'rb')) res_1 = pickle.load(open(path + 'mute_results_1.p', 'rb')) res_2 = pickle.load(open(path + 'mute_results_2.p', 'rb')) res_3 = pickle.load(open(path + 'mute_results_3.p', 'rb')) res_4 = pickle.load(open(path + 'mute_results_4.p', 'rb')) # comparison settings comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'alpha_comp': 0.26, 'n_perm_comp': 4, 'tail': 'two' } comp = NetworkComparison() print('\n\nTEST 0 - independent within') comp_settings['stats_type'] = 'independent' comp.compare_within(comp_settings, res_0, res_1, data, data) print('\n\nTEST 1 - dependent within') comp_settings['stats_type'] = 'dependent' comp.compare_within(comp_settings, res_0, res_1, data, data) print('\n\nTEST 2 - independent between') comp_settings['stats_type'] = 'independent' comp.compare_between(comp_settings, network_set_a=np.array((res_0, res_1)), network_set_b=np.array((res_2, res_3)), data_set_a=np.array((data, data)), data_set_b=np.array((data, data))) print('\n\nTEST 3 - dependent between') comp_settings['stats_type'] = 'dependent' comp.compare_between(comp_settings, network_set_a=np.array((res_0, res_1)), network_set_b=np.array((res_2, res_3)), data_set_a=np.array((data, data)), data_set_b=np.array((data, data))) print('\n\nTEST 4 - independent within unbalanced') comp_settings['stats_type'] = 'independent' comp = NetworkComparison() comp.compare_within(comp_settings, res_0, res_1, data, data) print('\n\nTEST 5 - independent between unbalanced') comp_settings['stats_type'] = 'independent' comp = NetworkComparison() comp.compare_between(comp_settings, network_set_a=np.array((res_0, res_1)), network_set_b=np.array((res_2, res_3, res_4)), data_set_a=np.array((data, data)), data_set_b=np.array((data, data, data)))
def test_network_comparison_use_cases(): """Run all intended use cases, within/between, dependent/independent.""" dat = Data() dat.generate_mute_data(100, 5) # Load previously generated example data (pickled) res_0 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_0.pkl')) res_1 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_1.pkl')) res_2 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_2.pkl')) res_3 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_3.pkl')) res_4 = np.load(os.path.join(os.path.dirname(__file__), 'data/mute_res_4.pkl')) # path = os.path.dirname(__file__) + 'data/' # res_0 = idtxl_io.load_pickle(path + 'mute_res_0') # res_1 = idtxl_io.load_pickle(path + 'mute_res_1') # res_2 = idtxl_io.load_pickle(path + 'mute_res_2') # res_3 = idtxl_io.load_pickle(path + 'mute_res_3') # res_4 = idtxl_io.load_pickle(path + 'mute_res_4') # comparison settings comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'alpha_comp': 0.26, 'n_perm_comp': 4, 'tail': 'two' } comp = NetworkComparison() print('\n\nTEST 0 - independent within') comp_settings['stats_type'] = 'independent' comp.compare_within(comp_settings, res_0, res_1, dat, dat) print('\n\nTEST 1 - dependent within') comp_settings['stats_type'] = 'dependent' comp.compare_within(comp_settings, res_0, res_1, dat, dat) print('\n\nTEST 2 - independent between') comp_settings['stats_type'] = 'independent' comp.compare_between(comp_settings, network_set_a=np.array((res_0, res_1)), network_set_b=np.array((res_2, res_3)), data_set_a=np.array((dat, dat)), data_set_b=np.array((dat, dat))) print('\n\nTEST 3 - dependent between') comp_settings['stats_type'] = 'dependent' comp.compare_between(comp_settings, network_set_a=np.array((res_0, res_1)), network_set_b=np.array((res_2, res_3)), data_set_a=np.array((dat, dat)), data_set_b=np.array((dat, dat))) print('\n\nTEST 4 - independent within unbalanced') comp_settings['stats_type'] = 'independent' comp = NetworkComparison() comp.compare_within(comp_settings, res_0, res_1, dat, dat) print('\n\nTEST 5 - independent between unbalanced') comp_settings['stats_type'] = 'independent' comp = NetworkComparison() comp.compare_between(comp_settings, network_set_a=np.array((res_0, res_1)), network_set_b=np.array((res_2, res_3, res_4)), data_set_a=np.array((dat, dat)), data_set_b=np.array((dat, dat, dat)))
def test_multivariate_te_corr_gaussian(estimator=None): """Test multivariate TE estimation on correlated Gaussians. Run the multivariate TE algorithm on two sets of random Gaussian data with a given covariance. The second data set is shifted by one sample creating a source-target delay of one sample. This example is modeled after the JIDT demo 4 for transfer entropy. The resulting TE can be compared to the analytical result (but expect some error in the estimate). The simulated delay is 1 sample, i.e., the algorithm should find significant TE from sample (0, 1), a sample in process 0 with lag/delay 1. The final target sample should always be (1, 1), the mandatory sample at lat 1, because there is no memory in the process. Note: This test runs considerably faster than other system tests. This produces strange small values for non-coupled sources. TODO """ if estimator is None: estimator = 'jidt_kraskov' n = 1000 cov = 0.4 source_1 = [rn.normalvariate(0, 1) for r in range(n)] # correlated src # source_2 = [rn.normalvariate(0, 1) for r in range(n)] # uncorrelated src target = [ sum(pair) for pair in zip([cov * y for y in source_1], [(1 - cov) * y for y in [rn.normalvariate(0, 1) for r in range(n)]]) ] # Cast everything to numpy so the idtxl estimator understands it. source_1 = np.expand_dims(np.array(source_1), axis=1) # source_2 = np.expand_dims(np.array(source_2), axis=1) target = np.expand_dims(np.array(target), axis=1) dat = Data(normalise=True) dat.set_data(np.vstack((source_1[1:].T, target[:-1].T)), 'ps') analysis_opts = { 'cmi_calc_name': estimator, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, } random_analysis = Multivariate_te(max_lag_sources=5, min_lag_sources=1, max_lag_target=5, options=analysis_opts) # res = random_analysis.analyse_network(dat) # full network # utils.print_dict(res) res_1 = random_analysis.analyse_single_target(dat, 1) # coupled direction # Assert that there are significant conditionals from the source for target # 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for # examples 1 and 2 respectively. The maximum errors were 0.093841 and # 0.05833172 repectively. This inspired the following error boundaries. expected_res = np.log(1 / (1 - np.power(cov, 2))) diff = np.abs(max(res_1['cond_sources_te']) - expected_res) print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)') print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:' '{2:2.2f} % ').format(max(res_1['cond_sources_te']), expected_res, diff / expected_res)) assert (diff < 0.1), ('Multivariate TE calculation for correlated ' 'Gaussians failed (error larger 0.1: {0}, expected: ' '{1}, actual: {2}).'.format( diff, expected_res, res_1['cond_sources_te']))
# Import classes import numpy as np from idtxl.partial_information_decomposition import ( PartialInformationDecomposition) from idtxl.data import Data # a) Generate test data n = 100 alph = 2 x = np.random.randint(0, alph, n) y = np.random.randint(0, alph, n) z = np.logical_xor(x, y).astype(int) data = Data(np.vstack((x, y, z)), 'ps', normalise=False) # b) Initialise analysis object and define settings for both PID estimators pid = PartialInformationDecomposition() settings_tartu = {'pid_estimator': 'TartuPID', 'lags_pid': [0, 0]} settings_sydney = { 'alph_s1': alph, 'alph_s2': alph, 'alph_t': alph, 'max_unsuc_swaps_row_parm': 60, 'num_reps': 63, 'max_iters': 1000, 'pid_estimator': 'SydneyPID', 'lags_pid': [0, 0] } # c) Run Tartu estimator results_tartu = pid.analyse_single_target(settings=settings_tartu, data=data,
def test_multivariate_mi_init(): """Test instance creation for MultivariateMI class.""" # Test error on missing estimator settings = { 'n_perm_max_stat': 21, 'n_perm_omnibus': 30, 'max_lag_sources': 7, 'min_lag_sources': 2 } nw = MultivariateMI() with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=Data(), target=1) # Test setting of min and max lags settings['cmi_estimator'] = 'JidtKraskovCMI' data = Data() data.generate_mute_data(n_samples=10, n_replications=5) # Invalid: min lag sources bigger than max lag settings['min_lag_sources'] = 8 settings['max_lag_sources'] = 7 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: tau bigger than lags settings['min_lag_sources'] = 2 settings['max_lag_sources'] = 4 settings['tau_sources'] = 10 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: negative lags or taus settings['tau_sources'] = 1 settings['min_lag_sources'] = 1 settings['max_lag_sources'] = -7 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['max_lag_sources'] = 7 settings['min_lag_sources'] = -4 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['min_lag_sources'] = 4 settings['tau_sources'] = -1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: lags or taus are no integers settings['min_lag_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['min_lag_sources'] = 1 settings['max_lag_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['max_lag_sources'] = 7 settings['tau_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: sources or target is no int with pytest.raises(RuntimeError): # no int nw.analyse_single_target(settings=settings, data=data, target=1.5) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=data, target=-1) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=data, target=10) with pytest.raises(RuntimeError): # wrong type nw.analyse_single_target(settings=settings, data=data, target={}) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=data, target=0, sources=-1) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=data, target=0, sources=[-1]) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=data, target=0, sources=20) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=data, target=0, sources=[20])
def test_ais_fdr(): settings = {'n_perm_max_seq': 1000, 'n_perm_mi': 1000} process_0 = { 'selected_vars': [(0, 1), (0, 2), (0, 3)], 'ais_pval': 0.0001, 'ais_sign': True } process_1 = { 'selected_vars': [(1, 0), (1, 1), (1, 2)], 'ais_pval': 0.031, 'ais_sign': True } process_2 = {'selected_vars': [], 'ais_pval': 0.41, 'ais_sign': False} res_1 = ResultsSingleProcessAnalysis(n_nodes=3, n_realisations=1000, normalised=True) res_1._add_single_result(process=0, settings=settings, results=process_0) res_1._add_single_result(process=1, settings=settings, results=process_1) res_2 = ResultsSingleProcessAnalysis(n_nodes=3, n_realisations=1000, normalised=True) res_2._add_single_result(process=2, settings=settings, results=process_2) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'alpha_fdr': 0.05, 'max_lag': 3 } data = Data() data.generate_mute_data(n_samples=100, n_replications=3) analysis_setup = ActiveInformationStorage() analysis_setup._initialise(settings=settings, data=data, process=1) res_pruned = stats.ais_fdr(settings, res_1, res_2) assert (not res_pruned._single_process[2].selected_vars_sources), ( 'Process 2 has not been pruned from results.') alpha_fdr = res_pruned.settings.alpha_fdr for k in res_pruned.processes_analysed: if not res_pruned._single_process[k]['ais_sign']: assert (res_pruned._single_process[k]['ais_pval'] > alpha_fdr), ('P-value of non-sign. AIS is not 1.') assert (not res_pruned._single_process[k]['selected_vars']), ( 'List of significant past variables is not empty') else: assert (res_pruned._single_process[k]['ais_pval'] < 1), ('P-value of sign. AIS is not smaller 1.') assert (res_pruned._single_process[k]['selected_vars']), ( 'List of significant past variables is empty') # Test function call for single result res_pruned = stats.ais_fdr(settings, res_1) print('successful call on single result dict.') # Test None result for insufficient no. permutations, no FDR-corrected # results (the results class throws an error if no FDR-corrected results # exist). res_1.settings['n_perm_mi'] = 2 res_2.settings['n_perm_mi'] = 2 res_pruned = stats.ais_fdr(settings, res_1, res_2) with pytest.raises(RuntimeError): res_pruned.get_significant_processes(fdr=True)
import os import time import numpy as np from idtxl.multivariate_te import Multivariate_te from idtxl.data import Data start_time = time.time() # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load(os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) dat = Data() dat.set_data(d[:, :, 0:100], 'psr') analysis_opts = { 'cmi_calc_name': 'jidt_kraskov', 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } lorenz_analysis = Multivariate_te(max_lag_sources=50, min_lag_sources=40, max_lag_target=30, tau_sources=1, tau_target=3, options=analysis_opts) res_1 = lorenz_analysis.analyse_single_target(dat, 0) res_2 = lorenz_analysis.analyse_single_target(dat, 1) runtime = time.time() - start_time print("---- {0} minutes".format(runtime / 60)) path = os.path.dirname(__file__) + 'output/' np.savez(path + 'test_lorenz', res_1, res_2) np.save(path + 'test_lorenz_time', runtime)
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 data = Data(seed=SEED) data.generate_mute_data(200, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': max_lag, 'max_lag_target': max_lag } target = 1 mi = BivariateMI() results_local = mi.analyse_network(settings, data, targets=[target]) lmi = results_local.get_single_target(target, fdr=False)['mi'] if lmi is None: return n_sources = len(results_local.get_target_sources(target, fdr=False)) assert type(lmi) is np.ndarray, ( 'LMI estimation did not return an array of values: {0}'.format(lmi)) assert lmi.shape[0] == n_sources, ( 'Wrong dim (no. sources) in LMI estimate: {0}'.format(lmi.shape)) assert lmi.shape[1] == data.n_realisations_samples( (0, max_lag)), ('Wrong dim (no. samples) in LMI estimate {0}'.format( lmi.shape)) assert lmi.shape[2] == data.n_replications, ( 'Wrong dim (no. replications) in LMI estimate {0}'.format(lmi.shape)) # Test for correctnes of single link MI estimation by comparing it to the # MI between single variables and the target. For this test case where we # find only one significant past variable per source, the two should be the # same. Also compare single link average MI to mean local MI for each # link. settings['local_values'] = False results_avg = mi.analyse_network(settings, data, targets=[target]) mi_single_link = results_avg.get_single_target(target, fdr=False)['mi'] mi_selected_sources = results_avg.get_single_target( target, fdr=False)['selected_sources_mi'] sources_local = results_local.get_target_sources(target, fdr=False) sources_avg = results_avg.get_target_sources(target, fdr=False) print('Single link average MI: {0}, single source MI: {1}.'.format( mi_single_link, mi_selected_sources)) if mi_single_link is None: return assert np.isclose(mi_single_link, mi_selected_sources, atol=0.005).all(), ( 'Single link average MI {0} and single source MI {1} deviate.'.format( mi_single_link, mi_selected_sources)) # Check if average and local values are the same. Test each source # separately. Inferred sources may differ between the two calls to # analyse_network() due to low number of surrogates used in unit testing. print('Compare average and local values.') for s in list(set(sources_avg).intersection(sources_local)): i1 = np.where(sources_avg == s)[0][0] i2 = np.where(sources_local == s)[0][0] assert np.isclose( mi_single_link[i1], np.mean(lmi[i2, :, :]), atol=0.005 ), ('Single link average MI {0:0.6f} and mean LMI {1:0.6f} deviate.'. format(mi_single_link[i1], np.mean(lmi[i2, :, :]))) assert np.isclose( mi_single_link[i1], mi_selected_sources[i1], atol=0.005 ), ('Single link average MI {0:0.6f} and single source MI {1:0.6f} deviate.' .format(mi_single_link[i1], mi_selected_sources[i1]))
import os import time import numpy as np from idtxl.multivariate_te import MultivariateTE from idtxl.data import Data start_time = time.time() dat = Data() # initialise an empty data object dat.generate_mute_data(n_samples=1000, n_replications=10) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 500, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, 'max_lag_sources': 5, 'min_lag_sources': 1 } network_analysis = MultivariateTE() res = network_analysis.analyse_network(settings, dat) runtime = time.time() - start_time print("---- {0} minutes".format(runtime / 60)) path = os.path.dirname(__file__) + 'output/' np.save(path + 'test', res) np.save(path + 'test_time', runtime)
"""Unit tests for IDTxl I/O functions.""" import os import pickle import pytest import numpy as np from pkg_resources import resource_filename from idtxl import idtxl_io as io from idtxl.data import Data from idtxl.network_comparison import NetworkComparison # Generate data and load network inference results. n_nodes = 5 data_0 = Data() data_0.generate_mute_data(500, 5) data_1 = Data(np.random.rand(n_nodes, 500, 5), 'psr') path = os.path.join(os.path.dirname(__file__), 'data/') res_0 = pickle.load(open(path + 'mute_results_0.p', 'rb')) res_1 = pickle.load(open(path + 'mute_results_1.p', 'rb')) # Generate network comparison results. comp_settings = { 'cmi_estimator': 'JidtKraskovCMI', 'stats_type': 'independent', 'n_perm_max_stat': 50, 'n_perm_min_stat': 50, 'n_perm_omnibus': 200, 'n_perm_max_seq': 50, 'alpha_comp': 0.26, 'n_perm_comp': 200, 'tail': 'two',