def node_label_and_degree_joint_stats(graph_ref_list, graph_pred_list): sample_ref = [] sample_pred = [] # in case an empty graph is generated graph_pred_list = [ G for G in graph_pred_list if not G.number_of_nodes() == 0] prev = datetime.now() node_map = {} for graph in graph_ref_list + graph_pred_list: for u in graph.nodes(): if (graph.degree[u], graph.nodes[u]['label']) not in node_map: node_map[(graph.degree[u], graph.nodes[u] ['label'])] = len(node_map) with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor: for node_label_hist in executor.map(partial( node_label_and_degree_worker, node_map=node_map), graph_ref_list): sample_ref.append(node_label_hist) with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor: for node_label_hist in executor.map(partial( node_label_and_degree_worker, node_map=node_map), graph_pred_list): sample_pred.append(node_label_hist) mmd_dist = mmd.compute_mmd( sample_ref, sample_pred, mmd.gaussian_emd, n_jobs=MAX_WORKERS) elapsed = datetime.now() - prev if PRINT_TIME: print('Time computing joint node label and degree mmd: ', elapsed) return mmd_dist
def orbit_stats_all(graph_ref_list, graph_pred_list): total_counts_ref = [] total_counts_pred = [] graph_pred_list = [ G for G in graph_pred_list if not G.number_of_nodes() == 0] prev = datetime.now() with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor: for orbit_counts_graph in executor.map(orbits_counts_worker, graph_ref_list): if orbit_counts_graph is not None: total_counts_ref.append(orbit_counts_graph) with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor: for orbit_counts_graph in executor.map(orbits_counts_worker, graph_pred_list): if orbit_counts_graph is not None: total_counts_pred.append(orbit_counts_graph) total_counts_ref = np.array(total_counts_ref) total_counts_pred = np.array(total_counts_pred) mmd_dist = mmd.compute_mmd(total_counts_ref, total_counts_pred, metric=partial( mmd.gaussian, sigma=30.0), is_hist=False, n_jobs=MAX_WORKERS) elapsed = datetime.now() - prev if PRINT_TIME: print('Time computing orbit mmd: ', elapsed) return mmd_dist
def degree_stats(graph_ref_list, graph_pred_list): """ Compute the distance between the degree distributions of two unordered sets of graphs. Args: graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated """ sample_ref = [] sample_pred = [] # in case an empty graph is generated graph_pred_list = [ G for G in graph_pred_list if not G.number_of_nodes() == 0] prev = datetime.now() with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor: for deg_hist in executor.map(degree_worker, graph_ref_list): sample_ref.append(deg_hist) with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor: for deg_hist in executor.map(degree_worker, graph_pred_list): sample_pred.append(deg_hist) mmd_dist = mmd.compute_mmd( sample_ref, sample_pred, mmd.gaussian_emd, n_jobs=MAX_WORKERS) elapsed = datetime.now() - prev if PRINT_TIME: print('Time computing degree mmd: ', elapsed) return mmd_dist
def clustering_stats(graph_ref_list, graph_pred_list, bins=100): sample_ref = [] sample_pred = [] graph_pred_list = [ G for G in graph_pred_list if not G.number_of_nodes() == 0] prev = datetime.now() with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor: for clustering_hist in executor.map(clustering_worker, [(G, bins) for G in graph_ref_list]): sample_ref.append(clustering_hist) with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor: for clustering_hist in executor.map(clustering_worker, [(G, bins) for G in graph_pred_list]): sample_pred.append(clustering_hist) mmd_dist = mmd.compute_mmd(sample_ref, sample_pred, metric=partial( mmd.gaussian_emd, sigma=0.1, distance_scaling=bins), n_jobs=MAX_WORKERS) elapsed = datetime.now() - prev if PRINT_TIME: print('Time computing clustering mmd: ', elapsed) return mmd_dist
def node_label_stats(graph_ref_list, graph_pred_list): """ Compute the distance between the node label distributions of two unordered sets of graphs. Args: graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated """ sample_ref = [] sample_pred = [] # in case an empty graph is generated graph_pred_list = [G for G in graph_pred_list if not G.number_of_nodes() == 0] prev = datetime.now() node_map = {} for graph in graph_ref_list + graph_pred_list: for u in graph.nodes(): if graph.nodes[u]['label'] not in node_map: node_map[graph.nodes[u]['label']] = len(node_map) with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor: for node_label_hist in executor.map(partial(node_label_worker, node_map=node_map), graph_ref_list): sample_ref.append(node_label_hist) with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor: for node_label_hist in executor.map(partial(node_label_worker, node_map=node_map), graph_pred_list): sample_pred.append(node_label_hist) mmd_dist = mmd.compute_mmd(sample_ref, sample_pred, mmd.gaussian_emd, n_jobs=MAX_WORKERS) elapsed = datetime.now() - prev if PRINT_TIME: print('Time computing node label mmd: ', elapsed) return mmd_dist
def nspdk_stats(graph_ref_list, graph_pred_list): graph_pred_list = [G for G in graph_pred_list if not G.number_of_nodes() == 0] prev = datetime.now() mmd_dist = mmd.compute_mmd(graph_ref_list, graph_pred_list, metric='nspdk', is_hist=False, n_jobs=MAX_WORKERS) elapsed = datetime.now() - prev if PRINT_TIME: print('Time computing NSPDK mmd: ', elapsed) return mmd_dist