def test_analyse_network(): """Test method for full network analysis.""" n_processes = 5 # the MuTE network has 5 nodes data = Data(seed=SEED) data.generate_mute_data(10, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 30, 'max_lag_sources': 5, 'min_lag_sources': 4 } nw = BivariateMI() # Test all to all analysis results = nw.analyse_network(settings, data, targets='all', sources='all') targets_analysed = results.targets_analysed sources = np.arange(n_processes) assert all(np.array(targets_analysed) == np.arange(n_processes)), ( 'Network analysis did not run on all targets.') for t in targets_analysed: s = np.array(list(set(sources) - set([t]))) assert all(np.array(results._single_target[t].sources_tested) == s), ( 'Network analysis did not run on all sources for target ' '{0}'.format(t)) # Test analysis for subset of targets target_list = [1, 2, 3] results = nw.analyse_network(settings, data, targets=target_list, sources='all') targets_analysed = results.targets_analysed assert all(np.array(targets_analysed) == np.array(target_list)), ( 'Network analysis did not run on correct subset of targets.') for t in targets_analysed: s = np.array(list(set(sources) - set([t]))) assert all(np.array(results._single_target[t].sources_tested) == s), ( 'Network analysis did not run on all sources for target ' '{0}'.format(t)) # Test analysis for subset of sources source_list = [1, 2, 3] target_list = [0, 4] results = nw.analyse_network(settings, data, targets=target_list, sources=source_list) targets_analysed = results.targets_analysed assert all(np.array(targets_analysed) == np.array(target_list)), ( 'Network analysis did not run for all targets.') for t in targets_analysed: assert all( results._single_target[t].sources_tested == np.array(source_list) ), ('Network analysis did not run on the correct subset ' 'of sources for target {0}'.format(t))
def test_analyse_network(): """Test method for full network analysis.""" n_processes = 5 # the MuTE network has 5 nodes data = Data() data.generate_mute_data(10, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 30, 'max_lag_sources': 5, 'min_lag_sources': 4} nw = BivariateMI() # Test all to all analysis results = nw.analyse_network(settings, data, targets='all', sources='all') targets_analysed = results.targets_analysed sources = np.arange(n_processes) assert all(np.array(targets_analysed) == np.arange(n_processes)), ( 'Network analysis did not run on all targets.') for t in targets_analysed: s = np.array(list(set(sources) - set([t]))) assert all(np.array(results._single_target[t].sources_tested) == s), ( 'Network analysis did not run on all sources for target ' '{0}'. format(t)) # Test analysis for subset of targets target_list = [1, 2, 3] results = nw.analyse_network(settings, data, targets=target_list, sources='all') targets_analysed = results.targets_analysed assert all(np.array(targets_analysed) == np.array(target_list)), ( 'Network analysis did not run on correct subset of targets.') for t in targets_analysed: s = np.array(list(set(sources) - set([t]))) assert all(np.array(results._single_target[t].sources_tested) == s), ( 'Network analysis did not run on all sources for target ' '{0}'. format(t)) # Test analysis for subset of sources source_list = [1, 2, 3] target_list = [0, 4] results = nw.analyse_network(settings, data, targets=target_list, sources=source_list) targets_analysed = results.targets_analysed assert all(np.array(targets_analysed) == np.array(target_list)), ( 'Network analysis did not run for all targets.') for t in targets_analysed: assert all(results._single_target[t].sources_tested == np.array(source_list)), ( 'Network analysis did not run on the correct subset ' 'of sources for target {0}'.format(t))
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 data = Data(seed=SEED) data.generate_mute_data(200, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': max_lag, 'max_lag_target': max_lag } target = 1 mi = BivariateMI() results_local = mi.analyse_network(settings, data, targets=[target]) lmi = results_local.get_single_target(target, fdr=False)['mi'] if lmi is None: return n_sources = len(results_local.get_target_sources(target, fdr=False)) assert type(lmi) is np.ndarray, ( 'LMI estimation did not return an array of values: {0}'.format(lmi)) assert lmi.shape[0] == n_sources, ( 'Wrong dim (no. sources) in LMI estimate: {0}'.format(lmi.shape)) assert lmi.shape[1] == data.n_realisations_samples( (0, max_lag)), ('Wrong dim (no. samples) in LMI estimate {0}'.format( lmi.shape)) assert lmi.shape[2] == data.n_replications, ( 'Wrong dim (no. replications) in LMI estimate {0}'.format(lmi.shape)) # Test for correctnes of single link MI estimation by comparing it to the # MI between single variables and the target. For this test case where we # find only one significant past variable per source, the two should be the # same. Also compare single link average MI to mean local MI for each # link. settings['local_values'] = False results_avg = mi.analyse_network(settings, data, targets=[target]) mi_single_link = results_avg.get_single_target(target, fdr=False)['mi'] mi_selected_sources = results_avg.get_single_target( target, fdr=False)['selected_sources_mi'] sources_local = results_local.get_target_sources(target, fdr=False) sources_avg = results_avg.get_target_sources(target, fdr=False) print('Single link average MI: {0}, single source MI: {1}.'.format( mi_single_link, mi_selected_sources)) if mi_single_link is None: return assert np.isclose(mi_single_link, mi_selected_sources, atol=0.005).all(), ( 'Single link average MI {0} and single source MI {1} deviate.'.format( mi_single_link, mi_selected_sources)) # Check if average and local values are the same. Test each source # separately. Inferred sources may differ between the two calls to # analyse_network() due to low number of surrogates used in unit testing. print('Compare average and local values.') for s in list(set(sources_avg).intersection(sources_local)): i1 = np.where(sources_avg == s)[0][0] i2 = np.where(sources_local == s)[0][0] assert np.isclose( mi_single_link[i1], np.mean(lmi[i2, :, :]), atol=0.005 ), ('Single link average MI {0:0.6f} and mean LMI {1:0.6f} deviate.'. format(mi_single_link[i1], np.mean(lmi[i2, :, :]))) assert np.isclose( mi_single_link[i1], mi_selected_sources[i1], atol=0.005 ), ('Single link average MI {0:0.6f} and single source MI {1:0.6f} deviate.' .format(mi_single_link[i1], mi_selected_sources[i1]))
def infer_network(network_inference, time_series, parallel_target_analysis=False): # Define parameter options dictionaries network_inference_algorithms = pd.DataFrame() network_inference_algorithms['Description'] = pd.Series({ 'bMI_greedy': 'Bivariate Mutual Information via greedy algorithm', 'bTE_greedy': 'Bivariate Transfer Entropy via greedy algorithm', 'mMI_greedy': 'Multivariate Mutual Information via greedy algorithm', 'mTE_greedy': 'Multivariate Transfer Entropy via greedy algorithm', 'cross_corr': 'Cross-correlation thresholding algorithm' }) network_inference_algorithms['Required parameters'] = pd.Series({ 'bMI_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'bTE_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'max_lag_target', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'mMI_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'mTE_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'max_lag_target', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'cross_corr': ['min_lag_sources', 'max_lag_sources'] }) try: # Ensure that a network inference algorithm has been specified if 'algorithm' not in network_inference: raise ParameterMissing('algorithm') # Ensure that the provided algorithm is implemented if network_inference.algorithm not in network_inference_algorithms.index: raise ParameterValue(network_inference.algorithm) # Ensure that all the parameters required by the algorithm have been provided par_required = network_inference_algorithms['Required parameters'][ network_inference.algorithm] for par in par_required: if par not in network_inference: raise ParameterMissing(par) except ParameterMissing as e: print(e.msg, e.par_names) raise except ParameterValue as e: print(e.msg, e.par_value) raise else: nodes_n = np.shape(time_series)[0] can_be_z_standardised = True if network_inference.z_standardise: # Check if data can be normalised per process (assuming the # first dimension represents processes, as in the rest of the code) can_be_z_standardised = np.all(np.std(time_series, axis=1) > 0) if not can_be_z_standardised: print('Time series can not be z-standardised') if len(time_series.shape) == 2: dim_order = 'ps' else: dim_order = 'psr' # initialise an empty data object dat = Data() # Load time series dat = Data(time_series, dim_order=dim_order, normalise=(network_inference.z_standardise & can_be_z_standardised)) algorithm = network_inference.algorithm if algorithm in [ 'bMI_greedy', 'mMI_greedy', 'bTE_greedy', 'mTE_greedy' ]: # Set analysis options if algorithm == 'bMI_greedy': network_analysis = BivariateMI() if algorithm == 'mMI_greedy': network_analysis = MultivariateMI() if algorithm == 'bTE_greedy': network_analysis = BivariateTE() if algorithm == 'mTE_greedy': network_analysis = MultivariateTE() settings = { 'min_lag_sources': network_inference.min_lag_sources, 'max_lag_sources': network_inference.max_lag_sources, 'tau_sources': network_inference.tau_sources, 'max_lag_target': network_inference.max_lag_target, 'tau_target': network_inference.tau_target, 'cmi_estimator': network_inference.cmi_estimator, 'kraskov_k': network_inference.kraskov_k, 'num_threads': network_inference.jidt_threads_n, 'permute_in_time': network_inference.permute_in_time, 'n_perm_max_stat': network_inference.n_perm_max_stat, 'n_perm_min_stat': network_inference.n_perm_min_stat, 'n_perm_omnibus': network_inference.n_perm_omnibus, 'n_perm_max_seq': network_inference.n_perm_max_seq, 'fdr_correction': network_inference.fdr_correction, 'alpha_max_stat': network_inference.p_value, 'alpha_min_stat': network_inference.p_value, 'alpha_omnibus': network_inference.p_value, 'alpha_max_seq': network_inference.p_value, 'alpha_fdr': network_inference.p_value } # # Add optional settings # optional_settings_keys = { # 'config.debug', # 'config.max_mem_frac' # } # for key in optional_settings_keys: # if traj.f_contains(key, shortcuts=True): # key_last = key.rpartition('.')[-1] # settings[key_last] = traj[key] # print('Using optional setting \'{0}\'={1}'.format( # key_last, # traj[key]) # ) if parallel_target_analysis: # Use SCOOP to create a generator of map results, each # correspinding to one map iteration res_iterator = futures.map_as_completed( network_analysis.analyse_single_target, itertools.repeat(settings, nodes_n), itertools.repeat(dat, nodes_n), list(range(nodes_n))) # Run analysis res_list = list(res_iterator) if settings['fdr_correction']: res = network_fdr({'alpha_fdr': settings['alpha_fdr']}, *res_list) else: res = res_list[0] res.combine_results(*res_list[1:]) else: # Run analysis res = network_analysis.analyse_network(settings=settings, data=dat) return res else: raise ParameterValue( algorithm, msg='Network inference algorithm not yet implemented')
def test_JidtKraskovCMI_BMI_checkpoint(): """ run test without checkpointing, with checkpointing without resume and checkpointing with resume to compare the results""" filename_ckp1 = os.path.join( os.path.dirname(__file__), 'data', 'run_checkpoint') filename_ckp2 = os.path.join( os.path.dirname(__file__), 'data', 'resume_checkpoint') """Test running analysis without any checkpoint setting.""" # Generate test data data = Data(seed=SEED) data.generate_mute_data(n_samples=N_SAMPLES, n_replications=1) # Initialise analysis object and define settings network_analysis1 = BivariateMI() network_analysis2 = BivariateMI() network_analysis3 = BivariateMI() network_analysis4 = BivariateMI() # Settings without checkpointing. settings1 = {'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': N_PERM, 'n_perm_min_stat': N_PERM, 'n_perm_max_seq': N_PERM, 'n_perm_omnibus': N_PERM, 'max_lag_sources': 3, 'min_lag_sources': 2, 'noise_level': 0 } # Settings with checkpointing. settings2 = {'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': N_PERM, 'n_perm_min_stat': N_PERM, 'n_perm_max_seq': N_PERM, 'n_perm_omnibus': N_PERM, 'max_lag_sources': 3, 'min_lag_sources': 2, 'noise_level': 0, 'write_ckp': True, 'filename_ckp': filename_ckp1} # Settings resuming from checkpoint. settings3 = {'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': N_PERM, 'n_perm_min_stat': N_PERM, 'n_perm_max_seq': N_PERM, 'n_perm_omnibus': N_PERM, 'max_lag_sources': 3, 'min_lag_sources': 2, 'noise_level': 0, 'write_ckp': True, 'filename_ckp': filename_ckp2} # Setting sources and targets for the analysis sources = [0] targets = [2, 3] targets2 = [3] # Starting Analysis of the Network # results of a network analysis without checkpointing results1 = network_analysis1.analyse_network( settings=settings1, data=data, targets=targets, sources=sources) # results of a network analysis with checkpointing results2 = network_analysis2.analyse_network( settings=settings2, data=data, targets=targets, sources=sources) # Creating a checkpoint similar to the above settings where the targets of # of the first source have been already analyzed with open(filename_ckp1 + ".ckp", "r+") as f: tarsource = f.readlines() tarsource = tarsource[8] tarsource = (tarsource[:-1]) network_analysis3._set_checkpointing_defaults( settings3, data, sources, targets) with open(filename_ckp2 + ".ckp") as f: lines = f.read().splitlines() lines[8] = tarsource with open(filename_ckp2 + ".ckp", 'w') as f: f.write('\n'.join(lines)) print(lines) # Resume analysis. network_analysis_res = BivariateMI() data, settings, targets, sources = network_analysis_res.resume_checkpoint( filename_ckp2) # results of a network analysis resuming from checkpoint results3 = network_analysis_res.analyse_network( settings=settings3, data=data, targets=targets, sources=sources) # results of a network analysis without checkpointing but other targets/sources results4 = network_analysis4.analyse_network( settings=settings1, data=data, targets=targets2, sources=sources) adj_matrix1 = results1.get_adjacency_matrix(weights='binary', fdr=False) adj_matrix2 = results2.get_adjacency_matrix(weights='binary', fdr=False) adj_matrix3 = results3.get_adjacency_matrix(weights='binary', fdr=False) adj_matrix4 = results4.get_adjacency_matrix(weights='binary', fdr=False) result1 = adj_matrix1.get_edge_list() result2 = adj_matrix2.get_edge_list() result3 = adj_matrix3.get_edge_list() result4 = adj_matrix4.get_edge_list() print("Printing results:") print("Result 1: without checkpoint") print(result1) print("Result 2: with checkpoint") print(result2) print("Result 3: resuming from checkpoint") print(result3) print("Result 4: without checkpoint and different targets and sources") print(result4) print("Comparing the results:") assert np.array_equal(result1, result2), 'Result 1 and 2 not equal!' assert np.array_equal(result1, result3), 'Result 1 and 3 not equal!' assert not np.array_equal(result1, result4), 'Result 1 and 4 equal, expected to be different!' cmp1 = list(set(result1).intersection(result2)) cmp2 = list(set(result1).intersection(result3)) cmp3 = list(set(result1).intersection(result4)) len1 = len(result1) assert len(cmp1) == len1 and len(cmp2) == len1 and len(cmp3) != len1, ( "Discrete MultivariateMI Running with checkpoints does not give the expected results") print("Final") print("Elements of comparison between result 1 and 2") print(cmp1) print("Elements of comparison between result 1 and 3") print(cmp2) print("Elements of comparison between result 1 and 4") print(cmp3) _clear_ckp(filename_ckp1) _clear_ckp(filename_ckp2)
# Import classes from idtxl.bivariate_mi import BivariateMI from idtxl.data import Data from idtxl.visualise_graph import plot_network import matplotlib.pyplot as plt # a) Generate test data data = Data() data.generate_mute_data(n_samples=1000, n_replications=5) # b) Initialise analysis object and define settings network_analysis = BivariateMI() settings = {'cmi_estimator': 'JidtGaussianCMI', 'max_lag_sources': 5, 'min_lag_sources': 1} # c) Run analysis results = network_analysis.analyse_network(settings=settings, data=data) # d) Plot inferred network to console and via matplotlib results.print_edge_list(weights='max_te_lag', fdr=False) plot_network(results=results, weights='max_te_lag', fdr=False) plt.show()
# Import classes from idtxl.bivariate_mi import BivariateMI from idtxl.data import Data from idtxl.visualise_graph import plot_network import matplotlib.pyplot as plt # a) Generate test data data = Data() data.generate_mute_data(n_samples=1000, n_replications=5) # b) Initialise analysis object and define settings network_analysis = BivariateMI() settings = { 'cmi_estimator': 'JidtGaussianCMI', 'max_lag_sources': 5, 'min_lag_sources': 1 } # c) Run analysis results = network_analysis.analyse_network(settings=settings, data=data) # d) Plot inferred network to console and via matplotlib results.print_edge_list(weights='max_te_lag', fdr=False) plot_network(results=results, weights='max_te_lag', fdr=False) plt.show()
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 data = Data() data.generate_mute_data(200, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': 4, 'max_lag_target': max_lag} target = 1 mi = BivariateMI() results_local = mi.analyse_network(settings, data, targets=[target]) lmi = results_local.get_single_target(target, fdr=False)['mi'] n_sources = len(results_local.get_target_sources(target, fdr=False)) assert type(lmi) is np.ndarray, ( 'LMI estimation did not return an array of values: {0}'.format( lmi)) assert lmi.shape[0] == n_sources, ( 'Wrong dim (no. sources) in LMI estimate: {0}'.format( lmi.shape)) assert lmi.shape[1] == data.n_realisations_samples((0, max_lag)), ( 'Wrong dim (no. samples) in LMI estimate {0}'.format( lmi.shape)) assert lmi.shape[2] == data.n_replications, ( 'Wrong dim (no. replications) in LMI estimate {0}'.format( lmi.shape)) # Test for correctnes of single link MI estimation by comparing it to the # MI between single variables and the target. For this test case where we # find only one significant past variable per source, the two should be the # same. Also compare single link average MI to mean local MI for each # link. settings['local_values'] = False results_avg = mi.analyse_network(settings, data, targets=[target]) mi_single_link = results_avg.get_single_target(target, fdr=False)['mi'] mi_selected_sources = results_avg.get_single_target( target, fdr=False)['selected_sources_mi'] sources_local = results_local.get_target_sources(target, fdr=False) sources_avg = results_avg.get_target_sources(target, fdr=False) assert np.isclose(mi_single_link, mi_selected_sources, atol=0.005).all(), ( 'Single link average MI {0} and single source MI {1} deviate.'.format( mi_single_link, mi_selected_sources)) # Check if average and local values are the same. Test each source # separately. Inferred sources may differ between the two calls to # analyse_network() due to low number of surrogates used in unit testing. print('Compare average and local values.') for s in list(set(sources_avg).intersection(sources_local)): i1 = np.where(sources_avg == s)[0][0] i2 = np.where(sources_local == s)[0][0] assert np.isclose(mi_single_link[i1], np.mean(lmi[i2, :, :]), atol=0.005), ( 'Single link average MI {0:0.6f} and mean LMI {1:0.6f} deviate.'.format( mi_single_link[i1], np.mean(lmi[i2, :, :]))) assert np.isclose(mi_single_link[i1], mi_selected_sources[i1], atol=0.005), ( 'Single link average MI {0:0.6f} and single source MI {1:0.6f} deviate.'.format( mi_single_link[i1], mi_selected_sources[i1]))