def assemble(traj): run_name = traj.v_crun nodes_n = traj.parameters.topology.initial['nodes_n'] # Gather single target results objects res_list = [] for node_i in range(nodes_n): res_single_target_file_name = '.'.join( [run_name, 'network_analysis', 'res', str(node_i)]) path = os.path.join(traj_dir, res_single_target_file_name + '.pkl') if os.path.exists(path): res_single_target = load_obj(path) res_list.append(res_single_target) else: raise ValueError( 'WARNING: Results missing for target {0} in {1}'.format( node_i, run_name)) if traj.parameters.network_inference.fdr_correction: res = network_fdr({'alpha_fdr': res_list[0].settings['alpha_fdr']}, *res_list) else: res = res_list[0] res.combine_results(*res_list[1:]) # Add results dictionary to trajectory results traj.f_add_result(PickleResult, '$.network_inference', network_inference_result=res, comment='') # Check if network files exist and add them to the trajectory single_run_network_file_names = [ 'topology.initial.adjacency_matrix', 'delay.initial.delay_matrices', 'node_coupling.initial.coupling_matrix', 'node_coupling.initial.coefficient_matrices', 'node_dynamics.time_series' ] for filename in single_run_network_file_names: path = os.path.join(traj_dir, '.'.join([run_name, filename]) + '.npy') if os.path.exists(path): obj = np.load(path) traj.f_add_result('$.' + filename, obj) else: raise ValueError('WARNING: file missing: {0}'.format(path))
def test_network_fdr(): target_0 = { 'selected_vars_sources': [(1, 1), (1, 2), (1, 3), (2, 1), (2, 0)], 'selected_vars_full': [(0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 3), (2, 1), (2, 0)], 'omnibus_pval': 0.0001, 'omnibus_sign': True, 'cond_sources_pval': np.array([0.001, 0.0014, 0.01, 0.045, 0.047]), 'cond_sources_te': np.array([1.1, 1.0, 0.8, 0.7, 0.63]) } target_1 = { 'selected_vars_sources': [(1, 2), (2, 1), (2, 2)], 'selected_vars_full': [(1, 0), (1, 1), (1, 2), (2, 1), (2, 2)], 'omnibus_pval': 0.031, 'omnibus_sign': True, 'cond_sources_pval': np.array([0.00001, 0.00014, 0.01]), 'cond_sources_te': np.array([1.8, 1.75, 0.75]) } target_2 = { 'selected_vars_sources': [], 'selected_vars_full': [(2, 0), (2, 1)], 'omnibus_pval': 0.41, 'omnibus_sign': False, 'cond_sources_pval': None, 'cond_sources_te': np.array([]) } res = { 0: target_0, 1: target_1, 2: target_2 } for correct_by_target in [True, False]: res_pruned = stats.network_fdr(res, 0.05, correct_by_target) assert (not res_pruned[2]['selected_vars_sources']), ('Target ') for k in res_pruned.keys(): if res_pruned[k]['cond_sources_pval'] is None: assert (not res_pruned[k]['selected_vars_sources']) else: assert (len(res_pruned[k]['selected_vars_sources']) == len(res_pruned[k]['cond_sources_pval'])), ( 'Source list and list of p-values should have ' 'the same length.')
def test_network_fdr(): target_0 = { 'selected_vars_sources': [(1, 1), (1, 2), (1, 3), (2, 1), (2, 0)], 'selected_vars_full': [(0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 3), (2, 1), (2, 0)], 'omnibus_pval': 0.0001, 'omnibus_sign': True, 'selected_sources_pval': np.array([0.001, 0.0014, 0.01, 0.045, 0.047]), 'selected_sources_te': np.array([1.1, 1.0, 0.8, 0.7, 0.63]) } target_1 = { 'selected_vars_sources': [(1, 2), (2, 1), (2, 2)], 'selected_vars_full': [(1, 0), (1, 1), (1, 2), (2, 1), (2, 2)], 'omnibus_pval': 0.031, 'omnibus_sign': True, 'selected_sources_pval': np.array([0.00001, 0.00014, 0.01]), 'selected_sources_te': np.array([1.8, 1.75, 0.75]) } target_2 = { 'selected_vars_sources': [], 'selected_vars_full': [(2, 0), (2, 1)], 'omnibus_pval': 0.41, 'omnibus_sign': False, 'selected_sources_pval': None, 'selected_sources_te': np.array([]) } res = { 0: target_0, 1: target_1, 2: target_2 } for correct_by_target in [True, False]: res_pruned = stats.network_fdr(res, 0.05, correct_by_target) assert (not res_pruned[2]['selected_vars_sources']), ('Target ') for k in res_pruned.keys(): if res_pruned[k]['selected_sources_pval'] is None: assert (not res_pruned[k]['selected_vars_sources']) else: assert (len(res_pruned[k]['selected_vars_sources']) == len(res_pruned[k]['selected_sources_pval'])), ( 'Source list and list of p-values should have ' 'the same length.')
def test_network_fdr(): settings = {'n_perm_max_seq': 1000, 'n_perm_omnibus': 1000} target_0 = { 'selected_vars_sources': [(1, 1), (1, 2), (1, 3), (2, 1), (2, 0)], 'selected_vars_full': [(0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 3), (2, 1), (2, 0)], 'omnibus_pval': 0.0001, 'omnibus_sign': True, 'selected_sources_pval': np.array([0.001, 0.0014, 0.01, 0.045, 0.047]), 'selected_sources_te': np.array([1.1, 1.0, 0.8, 0.7, 0.63]), } target_1 = { 'selected_vars_sources': [(1, 2), (2, 1), (2, 2)], 'selected_vars_full': [(1, 0), (1, 1), (1, 2), (2, 1), (2, 2)], 'omnibus_pval': 0.031, 'omnibus_sign': True, 'selected_sources_pval': np.array([0.00001, 0.00014, 0.01]), 'selected_sources_te': np.array([1.8, 1.75, 0.75]), } target_2 = { 'selected_vars_sources': [], 'selected_vars_full': [(2, 0), (2, 1)], 'omnibus_pval': 0.41, 'omnibus_sign': False, 'selected_sources_pval': None, 'selected_sources_te': np.array([]), } res_1 = ResultsNetworkInference( n_nodes=3, n_realisations=1000, normalised=True) res_1._add_single_result(target=0, settings=settings, results=target_0) res_1._add_single_result(target=1, settings=settings, results=target_1) res_2 = ResultsNetworkInference( n_nodes=3, n_realisations=1000, normalised=True) res_2._add_single_result(target=2, settings=settings, results=target_2) for correct_by_target in [True, False]: settings = { 'cmi_estimator': 'JidtKraskovCMI', 'alpha_fdr': 0.05, 'max_lag_sources': 3, 'min_lag_sources': 1, 'max_lag_target': 3, 'correct_by_target': correct_by_target} data = Data() data.generate_mute_data(n_samples=100, n_replications=3) analysis_setup = MultivariateTE() analysis_setup._initialise(settings=settings, data=data, sources=[1, 2], target=0) res_pruned = stats.network_fdr(settings, res_1, res_2) assert (not res_pruned._single_target[2].selected_vars_sources), ( 'Target 2 has not been pruned from results.') for k in res_pruned.targets_analysed: if res_pruned._single_target[k]['selected_sources_pval'] is None: assert ( not res_pruned._single_target[k]['selected_vars_sources']) else: assert ( len(res_pruned._single_target[k]['selected_vars_sources']) == len(res_pruned._single_target[k]['selected_sources_pval'])), ( 'Source list and list of p-values should have ' 'the same length.') # Test function call for single result res_pruned = stats.network_fdr(settings, res_1) print('successful call on single result dict.') # Test None result for insufficient no. permutations, no FDR-corrected # results (the results class throws an error if no FDR-corrected results # exist). res_1.settings['n_perm_max_seq'] = 2 res_2.settings['n_perm_max_seq'] = 2 res_pruned = stats.network_fdr(settings, res_1, res_2) with pytest.raises(RuntimeError): res_pruned.get_adjacency_matrix('binary', fdr=True)
def infer_network(network_inference, time_series, parallel_target_analysis=False): # Define parameter options dictionaries network_inference_algorithms = pd.DataFrame() network_inference_algorithms['Description'] = pd.Series({ 'bMI_greedy': 'Bivariate Mutual Information via greedy algorithm', 'bTE_greedy': 'Bivariate Transfer Entropy via greedy algorithm', 'mMI_greedy': 'Multivariate Mutual Information via greedy algorithm', 'mTE_greedy': 'Multivariate Transfer Entropy via greedy algorithm', 'cross_corr': 'Cross-correlation thresholding algorithm' }) network_inference_algorithms['Required parameters'] = pd.Series({ 'bMI_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'bTE_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'max_lag_target', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'mMI_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'mTE_greedy': [ 'min_lag_sources', 'max_lag_sources', 'tau_sources', 'max_lag_target', 'tau_target', 'cmi_estimator', 'z_standardise', 'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value' # 'alpha_max_stats', # 'alpha_min_stats', # 'alpha_omnibus', # 'alpha_max_seq', # 'alpha_fdr' ], 'cross_corr': ['min_lag_sources', 'max_lag_sources'] }) try: # Ensure that a network inference algorithm has been specified if 'algorithm' not in network_inference: raise ParameterMissing('algorithm') # Ensure that the provided algorithm is implemented if network_inference.algorithm not in network_inference_algorithms.index: raise ParameterValue(network_inference.algorithm) # Ensure that all the parameters required by the algorithm have been provided par_required = network_inference_algorithms['Required parameters'][ network_inference.algorithm] for par in par_required: if par not in network_inference: raise ParameterMissing(par) except ParameterMissing as e: print(e.msg, e.par_names) raise except ParameterValue as e: print(e.msg, e.par_value) raise else: nodes_n = np.shape(time_series)[0] can_be_z_standardised = True if network_inference.z_standardise: # Check if data can be normalised per process (assuming the # first dimension represents processes, as in the rest of the code) can_be_z_standardised = np.all(np.std(time_series, axis=1) > 0) if not can_be_z_standardised: print('Time series can not be z-standardised') if len(time_series.shape) == 2: dim_order = 'ps' else: dim_order = 'psr' # initialise an empty data object dat = Data() # Load time series dat = Data(time_series, dim_order=dim_order, normalise=(network_inference.z_standardise & can_be_z_standardised)) algorithm = network_inference.algorithm if algorithm in [ 'bMI_greedy', 'mMI_greedy', 'bTE_greedy', 'mTE_greedy' ]: # Set analysis options if algorithm == 'bMI_greedy': network_analysis = BivariateMI() if algorithm == 'mMI_greedy': network_analysis = MultivariateMI() if algorithm == 'bTE_greedy': network_analysis = BivariateTE() if algorithm == 'mTE_greedy': network_analysis = MultivariateTE() settings = { 'min_lag_sources': network_inference.min_lag_sources, 'max_lag_sources': network_inference.max_lag_sources, 'tau_sources': network_inference.tau_sources, 'max_lag_target': network_inference.max_lag_target, 'tau_target': network_inference.tau_target, 'cmi_estimator': network_inference.cmi_estimator, 'kraskov_k': network_inference.kraskov_k, 'num_threads': network_inference.jidt_threads_n, 'permute_in_time': network_inference.permute_in_time, 'n_perm_max_stat': network_inference.n_perm_max_stat, 'n_perm_min_stat': network_inference.n_perm_min_stat, 'n_perm_omnibus': network_inference.n_perm_omnibus, 'n_perm_max_seq': network_inference.n_perm_max_seq, 'fdr_correction': network_inference.fdr_correction, 'alpha_max_stat': network_inference.p_value, 'alpha_min_stat': network_inference.p_value, 'alpha_omnibus': network_inference.p_value, 'alpha_max_seq': network_inference.p_value, 'alpha_fdr': network_inference.p_value } # # Add optional settings # optional_settings_keys = { # 'config.debug', # 'config.max_mem_frac' # } # for key in optional_settings_keys: # if traj.f_contains(key, shortcuts=True): # key_last = key.rpartition('.')[-1] # settings[key_last] = traj[key] # print('Using optional setting \'{0}\'={1}'.format( # key_last, # traj[key]) # ) if parallel_target_analysis: # Use SCOOP to create a generator of map results, each # correspinding to one map iteration res_iterator = futures.map_as_completed( network_analysis.analyse_single_target, itertools.repeat(settings, nodes_n), itertools.repeat(dat, nodes_n), list(range(nodes_n))) # Run analysis res_list = list(res_iterator) if settings['fdr_correction']: res = network_fdr({'alpha_fdr': settings['alpha_fdr']}, *res_list) else: res = res_list[0] res.combine_results(*res_list[1:]) else: # Run analysis res = network_analysis.analyse_network(settings=settings, data=dat) return res else: raise ParameterValue( algorithm, msg='Network inference algorithm not yet implemented')
def test_network_fdr(): target_0 = { 'selected_vars_sources': [(1, 1), (1, 2), (1, 3), (2, 1), (2, 0)], 'selected_vars_full': [(0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 3), (2, 1), (2, 0)], 'omnibus_pval': 0.0001, 'omnibus_sign': True, 'selected_sources_pval': np.array([0.001, 0.0014, 0.01, 0.045, 0.047]), 'selected_sources_te': np.array([1.1, 1.0, 0.8, 0.7, 0.63]), 'settings': { 'n_perm_max_seq': 1000, 'n_perm_omnibus': 1000 } } target_1 = { 'selected_vars_sources': [(1, 2), (2, 1), (2, 2)], 'selected_vars_full': [(1, 0), (1, 1), (1, 2), (2, 1), (2, 2)], 'omnibus_pval': 0.031, 'omnibus_sign': True, 'selected_sources_pval': np.array([0.00001, 0.00014, 0.01]), 'selected_sources_te': np.array([1.8, 1.75, 0.75]), 'settings': { 'n_perm_max_seq': 1000, 'n_perm_omnibus': 1000 } } target_2 = { 'selected_vars_sources': [], 'selected_vars_full': [(2, 0), (2, 1)], 'omnibus_pval': 0.41, 'omnibus_sign': False, 'selected_sources_pval': None, 'selected_sources_te': np.array([]), 'settings': { 'n_perm_max_seq': 1000, 'n_perm_omnibus': 1000 } } res_1 = { 0: target_0, 1: target_1, } res_2 = {2: target_2} for correct_by_target in [True, False]: settings = { 'cmi_estimator': 'JidtKraskovCMI', 'alpha_fdr': 0.05, 'max_lag_sources': 3, 'min_lag_sources': 1, 'max_lag_target': 3, 'correct_by_target': correct_by_target } dat = Data() dat.generate_mute_data(n_samples=100, n_replications=3) analysis_setup = MultivariateTE() analysis_setup._initialise(settings=settings, data=dat, sources=[1, 2], target=0) res_pruned = stats.network_fdr(settings, res_1, res_2) assert (not res_pruned[2]['selected_vars_sources']), ('Target ') for k in res_pruned.keys(): if res_pruned[k]['selected_sources_pval'] is None: assert (not res_pruned[k]['selected_vars_sources']) else: assert (len(res_pruned[k]['selected_vars_sources']) == len( res_pruned[k]['selected_sources_pval'])), ( 'Source list and list of p-values should have ' 'the same length.') # Test function call for single result res_pruned = stats.network_fdr(settings, res_1) print('successful call on single result dict.') # Test None result for insufficient no. permutations res_1[0]['settings']['n_perm_max_seq'] = 2 res_pruned = stats.network_fdr(settings, res_1, res_2) assert not res_pruned, ('Res. should be None is no. permutations too low.')