def evaluate_many(n='all', workers=0): reports = [] output_file = f'output/fao_report_{n}.csv' if n == 'all': dataset = load_dataset(drop_small_layers=True) layer_pairs = list(combinations(dataset.layer_names.index, r=2)) else: layer_pairs = [None] * n if workers == 0: for layer_pair in tqdm(layer_pairs): try: reports.append(demo_single_run_silent(layer_pair)) except KeyboardInterrupt: output_file = output_file.replace('.csv', '_partial.csv') break else: reports = process_map(demo_single_run_silent, layer_pairs, max_workers=workers, chunksize=64) reports = list(filter(None, reports)) print('Total completed', len(reports), 'out of', len(layer_pairs)) df = pd.DataFrame.from_records(reports) df.to_csv(output_file, index=False) compute_metrics(output_file) return df
def fao_layer_sample(layer_id=None, hidden_ratio=0.5, random_state=None) -> LayerSplit: """ Select random layer, then split its nodes into 'observed' and 'hidden' parts. Edges that are adjacent to any hidden node are also considered as 'hidden'. Args: layer_id (int, optional): Chosen at random by default. hidden_ratio (float): Ratio of nodes hidden (0.25 by default). Ratio of hidden edges is essentially larger. If ratio = 1.0 then the observed edge list is empty, and reconstruction algorithm does not account for them. random_state (int): Random seed for splitting nodes. Returns: LayerSplit: data class containing nodes and edges for each partition: 'observed', 'hidden' and 'full'. """ dataset = load_dataset() if layer_id is None: layer_id = random.choice(dataset.layer_names.index) edges = filter_by_layer(dataset.edges, layer_id) if hidden_ratio != 1: return random_layer_split(edges=edges, layer_id=layer_id, hidden_ratio=hidden_ratio, random_state=random_state) else: return layer_split_with_no_observables(edges=edges, layer_id=layer_id)
def plot_multiplexity_histogram(dataset=None): if dataset is None: dataset = fao_data.load_dataset(drop_small_layers=True) multiplexities = compute_multiplexity(dataset) plt.rc('font', size=14) plt.hist(multiplexities, bins=32, facecolor='g', alpha=0.75) plt.ylabel('frequency') plt.xlabel('multiplexity value') ax = plt.gca() ax.axes.yaxis.set_ticklabels([]) ax.axes.yaxis.set_ticks([]) plt.tight_layout() plt.savefig('output/mltplx_hist.svg') plt.savefig('output/mltplx_hist.png', dpi=1200) plt.show()
def demo_evaluate_multiple_layers(n=None, layer_ids=None, num_seeds=2, num_workers=6): if layer_ids is None: layer_ids = load_dataset(drop_small_layers=True).layer_names.index if n is not None: layer_ids = random.choices(layer_ids, k=n) seeds = np.arange(num_seeds) experiments = [ ('Random', random_baseline.reconstruct_layer_sample), ('MaxEnt', ipf.reconstruct_layer_sample, ('ipf_steps', 0)), ('IPF', ipf.reconstruct_layer_sample_unconsciously), # ('IPF enforced', ipf.reconstruct_layer_sample), ('IPF enforced', ipf.reconstruct_v2), ('DBCM', dbcm.reconstruct_layer_sample, ('enforce_observed', False)), ('DBCM enforced', dbcm.reconstruct_layer_sample, ('enforce_observed', True)), ] index_keys = [] runs = [] for layer_id in layer_ids: for seed in seeds: sample = fao_layer_sample(layer_id, random_state=seed) for name, reconstruct_func, *kwargs in experiments: index_keys.append((layer_id, name, seed)) runs.append((sample, reconstruct_func, dict(kwargs))) results_list = process_map(_run_single_eval, runs, chunksize=3, max_workers=num_workers, smoothing=0) results_df = pd.DataFrame( results_list, index=pd.MultiIndex.from_tuples(index_keys, names=['layer_id', 'name', 'seed']), ) print('Stats by layer') display(results_df[METRICS_DISPLAY].groupby( level=['layer_id', 'name']).agg(describe_mean_std)) print('Stats by method') display(results_df[METRICS_DISPLAY].groupby( level=['name']).agg(describe_mean_std)) return results_df
def fao_multilayer_sample( layer_ids: Optional[List[int]] = None, hidden_ratio: float = 0.5, random_state: Optional[int] = None) -> MultiLayerSplit: dataset = load_dataset(drop_small_layers=True) if layer_ids is None: layer_ids = random.sample(dataset.layer_names.index.tolist(), k=2) elif len(layer_ids) != 2: raise NotImplementedError('Only two layers at once supported yet') edges = filter_by_layer(dataset.edges, layer_ids) return multilayer_sample(edges=edges, layer_ids=layer_ids, hidden_ratio=hidden_ratio, random_state=random_state)
def plot_multiplexity(dataset=None): if dataset is None: dataset = fao_data.load_dataset(drop_small_layers=True) n = len(dataset.layer_names) multiplexities = compute_multiplexity(dataset) multiplexities_2d = squareform(multiplexities) + np.eye(n) plt.figure(figsize=(4, 3.4), dpi=200) plt.imshow(multiplexities_2d, cmap='Greens') plt.colorbar(shrink=0.95) ax = plt.gca() ax.axes.xaxis.set_visible(False) ax.axes.yaxis.set_visible(False) plt.tight_layout() plt.savefig('output/mltplx_heatmap.svg') plt.savefig('output/mltplx_heatmap.png', dpi=1200) plt.show()
res = dict(mltplx_tgt=orig_mltplx, mltplx_pred=pred_mltplx, mltplx_mae=mean_absolute_error([orig_mltplx], [pred_mltplx]), mltplx_mape=mean_absolute_percentage_error([orig_mltplx], [pred_mltplx]), mltrcp_mae=mean_absolute_error([orig_mltrcp], [pred_mltrcp]), mltrcp_mape=mean_absolute_percentage_error([orig_mltrcp], [pred_mltrcp])) return res if __name__ == '__main__': from fao_data import load_dataset dataset = load_dataset() N = 10 layer_subset = dataset.layer_names.index[:N] common = np.zeros(shape=(N, N)) mltplx = np.zeros(shape=(N, N)) mltrcp = np.zeros(shape=(N, N)) for i, layer_a in enumerate(layer_subset): for j, layer_b in enumerate(layer_subset): common[i, j] = count_common_links(dataset.edges, layer_a, layer_b, False) mltplx[i, j] = multiplexity(dataset.edges, layer_a, layer_b, False) mltrcp[i, j] = multiplexity(dataset.edges, layer_a, layer_b, True) print(common) print(mltplx.round(2))