def compute_graph_stats(root): print('computing GraphStats... ', end='', flush=True) if type(root) is list: graph_stats = [GraphStats(graph=g, run_id = 1) for g in root] else: graph_stats = [GraphStats(graph=node.graph, run_id=1) for node in [root] + list(root.descendants)] print('done') return graph_stats
def main(): base_path = '/data/infinity-mirror/' input_path = '/home/dgonza26/infinity-mirror/input' dataset = 'flights' model = 'GraphRNN' output_path = os.path.join(base_path, 'stats', 'degree') abs_lambda = [] trials = [] R = [(root, generation) for root, generation in load_data( os.path.join(base_path, 'cleaned'), dataset)] R.sort(key=lambda x: x[1]) R = [root for (root, generation) in R] if dataset == 'clique-ring-500-4': G = nx.ring_of_cliques(500, 4) else: G = init(os.path.join(input_path, f'{dataset}.g')) # add the initial graph and transpose the list roots = [list(r) for r in zip(*R)] cols = ['model', 'gen', 'abs'] rows = {col: [] for col in cols} gs0 = GraphStats(graph=G, run_id=1) for i, chain in enumerate(roots, 1): print(f'chain: {i}') for idx, graph in enumerate(chain, 1): print(f'\tgen: {idx} ... ', end='', flush=True) comparator = GraphPairCompare(gs0, GraphStats(graph=graph, run_id=1)) try: rows['abs'].append(comparator.js_distance()) except Exception as e: print(f'ERROR\n{e}') else: rows['model'].append('GraphRNN') rows['gen'].append(idx) print('done') df = pd.DataFrame(rows) print(df.head()) df.to_csv(f'{output_path}/{dataset}_{model}_lambda.csv', float_format='%.7f', sep='\t', index=False, na_rep='nan') print(f'wrote {output_path}/{dataset}_{model}_lambda.csv') #df.to_csv(f'{output_path}/{dataset}_{model}_lambda.csv', float_format='%.7f', sep='\t', index=False, na_rep='nan') return
def _fit(self) -> None: # find degree distribution and avg clustering by degree g_stats = GraphStats(self.input_graph, trial=-1) self.params['n'] = self.input_graph.order() self.params['degree_dist'] = g_stats.degree_dist( normalized=False) # we need the counts self.params['degree_seq'] = g_stats['degree_seq'] self.params[ 'avg_cc_by_deg'] = g_stats.clustering_coefficients_by_degree() return
def stats_computation(dataset, model, trial, filename, stats): path = Path( get_imt_output_directory()) / 'pickles' / dataset / model / filename graph_list = load_pickle(path) assert isinstance( graph_list, list), f'Expected type "list" and got type {type(graph_list)}.' assert all(isinstance(g, nx.Graph) for g in graph_list ), f'Expected a list of nx.Graph and got disappointed instead.' ColorPrint.print_orange(f'{filename} has length {len(graph_list)}') for idx, G in enumerate(graph_list): gs_obj = GraphStats(graph=G, dataset=dataset, model=model, trial=trial, iteration=idx) gs_obj.write_stats_jsons(stats=stats) return None
if __name__ == '__main__': base_path = '/data/infinity-mirror/' dataset = 'clique-ring-500-4' models = ['CNRG', 'Chung-Lu', 'Erdos-Renyi', 'SBM', 'HRG', 'NetGAN'] output_path = os.path.join(base_path, 'stats/deltacon/') mkdir_output(output_path) cols = ['model', 'gen', 'trial_id', 'deltacon'] g1 = nx.ring_of_cliques(500, 4) g2 = nx.ring_of_cliques(500, 4) gpc1 = GraphPairCompare(GraphStats(g1, 1), GraphStats(g1, 1)) print('should def be 0', gpc1.deltacon0()) gpc2 = GraphPairCompare(GraphStats(g1, 1), GraphStats(g2, 1)) print('should probably be 0', gpc2.deltacon0()) exit() for model in models[:1]: abs_delta = [] trials = [] rows = {col: [] for col in cols} for root, trial in load_data(base_path, dataset, model, True, False): # graph_stats = compute_graph_stats(root) if isinstance(root, list): root_graph = root[0]
def compute_graph_stats(root): graph_stats = [ GraphStats(graph=node.graph, run_id=1) for node in [root] + list(root.descendants) ] return graph_stats
for filename in files: string = subdir.split('/')[-2:] file = os.path.join(subdir, filename) if 'seq' in file and 'rob' not in file: newfile = file.split('.')[0] + '_rob.pkl.gz' #newfile = file[:-7] + '_rob.pkl.gz' print(f'starting\t{string[-2]}\t{string[-1]}\t{filename} ... ') root = load_pickle(file) try: root.robustness except AttributeError: root = TreeNode(name=root.name, graph=root.graph, stats=root.stats, stats_seq=root.stats_seq, parent=root.parent, children=root.children) if root.robustness is None or root.robustness == {}: graphstats = GraphStats(graph=root.graph, run_id=0) graphstats._calculate_robustness_measures() root.robustness = graphstats.stats for node in root.descendants: try: node.robustness except AttributeError: node = TreeNode(name=node.name, graph=node.graph, stats=node.stats, stats_seq=node.stats_seq, parent=node.parent, children=node.children) if node.robustness is None or node.robustness == {}: graphstats = GraphStats(graph=node.graph, run_id=0) graphstats._calculate_robustness_measures() node.robustness = graphstats.stats with open(newfile, 'wb') as f: pickle.dump(root, f) print('done')
def main(base_path, dataset, models): if 'GraphRNN' in models: #path = os.path.join(base_path, 'GraphRNN') #for subdir, dirs, files in os.walk(path): # if dataset == subdir.split('/')[-1].split('_')[0]: # print(subdir) # for filename in files: # print(filename) models.remove('GraphRNN') for model in models: path = os.path.join(base_path, dataset, model) for subdir, dirs, files in os.walk(path): for filename in files: if 'seq' not in filename: run_id = int(filename.split('.')[0].split('_')[-1]) string = subdir.split('/')[-2:] file = os.path.join(subdir, filename) newfile = file.split('.')[0] if 'rob' in file: newfile += '_seq_rob.pkl.gz' else: newfile += '_seq.pkl.gz' print(f'starting\t{string[-2]}\t{string[-1]}\t{filename} ... ', end='', flush=True) root = load_pickle(file) node = root try: node.stats_seq except AttributeError: if type(node) is LightTreeNode: node_graph_stats = GraphStats(run_id=run_id, graph=node.graph) comparator = GraphPairCompare(GraphStats(graph=root.graph, run_id=run_id), \ GraphStats(graph=root.graph, run_id=run_id)) stats = {} stats['lambda_dist'] = comparator.lambda_dist() stats['node_diff'] = comparator.node_diff() stats['edge_diff'] = comparator.edge_diff() stats['pgd_pearson'] = comparator.pgd_pearson() stats['pgd_spearman'] = comparator.pgd_spearman() stats['deltacon0'] = comparator.deltacon0() stats['degree_cvm'] = comparator.cvm_degree() stats['pagerank_cvm'] = comparator.cvm_pagerank() node = TreeNode(name=node.name, graph=node.graph, stats=stats, stats_seq={}, parent=node.parent, children=node.children) elif type(node) is TreeNode: node = TreeNode(name=node.name, graph=node.graph, stats=node.stats, stats_seq={}, parent=node.parent, children=node.children) else: print(f'node has unknown type: {type(node)}') exit() if node.stats_seq is None or node.stats_seq == {}: node.stats_seq = {} while len(node.children) > 0: child = node.children[0] try: child.stats_seq = {} except AttributeError: child = TreeNode(name=child.name, graph=child.graph, stats=child.stats, stats_seq={}, parent=child.parent, children=child.children) comparator = GraphPairCompare(GraphStats(graph=node.graph, run_id=run_id), \ GraphStats(graph=child.graph, run_id=run_id)) child.stats_seq['lambda_dist'] = comparator.lambda_dist() child.stats_seq['node_diff'] = comparator.node_diff() child.stats_seq['edge_diff'] = comparator.edge_diff() child.stats_seq['pgd_pearson'] = comparator.pgd_pearson() child.stats_seq['pgd_spearman'] = comparator.pgd_spearman() child.stats_seq['deltacon0'] = comparator.deltacon0() child.stats_seq['degree_cvm'] = comparator.cvm_degree() child.stats_seq['pagerank_cvm'] = comparator.cvm_pagerank() node = child with open(newfile, 'wb') as f: pickle.dump(root, f) print(f'\tdone')
def main(): base_path = '/data/infinity-mirror/' input_path = '/home/dgonza26/infinity-mirror/input' dataset = 'flights' model = 'GraphRNN' output_path = os.path.join(base_path, 'stats', 'pgd') abs_lambda = [] trials = [] R = [(root, generation) for root, generation in load_data( os.path.join(base_path, 'cleaned'), dataset)] R.sort(key=lambda x: x[1]) R = [root for (root, generation) in R] if dataset == 'clique-ring-500-4': G = nx.ring_of_cliques(500, 4) else: G = init(os.path.join(input_path, f'{dataset}.g')) # add the initial graph and transpose the list roots = [[G] + list(r) for r in zip(*R)] cols = [ 'model', 'gen', 'trial', 'total_2_1edge', 'total_2_indep', 'total_3_tris', 'total_2_star', 'total_3_1edge', 'total_4_clique', 'total_4_chordcycle', 'total_4_tailed_tris', 'total_3_star', 'total_4_path', 'total_4_1edge', 'total_4_2edge', 'total_4_2star', 'total_4_tri', 'total_4_indep' ] rows = {col: [] for col in cols} for i, chain in enumerate(roots, 1): print(f'chain: {i}') for idx, graph in enumerate(chain): print(f'\tgen: {idx} ... ', end='', flush=True) try: pgd = GraphStats(graph=graph, run_id=1).pgd_graphlet_counts() except Exception as e: print(f'ERROR\n{e}') else: if pgd == {} and len(graph.edges()) == 0: rows['model'].append('GraphRNN') rows['gen'].append(idx) rows['trial'].append(i) rows['total_2_1edge'].append(0) rows['total_2_indep'].append(0) rows['total_3_tris'].append(0) rows['total_2_star'].append(0) rows['total_3_1edge'].append(0) rows['total_4_clique'].append(0) rows['total_4_chordcycle'].append(0) rows['total_4_tailed_tris'].append(0) rows['total_3_star'].append(0) rows['total_4_path'].append(0) rows['total_4_1edge'].append(0) rows['total_4_2edge'].append(0) rows['total_4_2star'].append(0) rows['total_4_tri'].append(0) rows['total_4_indep'].append(0) print('done') else: rows['model'].append('GraphRNN') rows['gen'].append(idx) rows['trial'].append(i) rows['total_2_1edge'].append(pgd['total_2_1edge']) rows['total_2_indep'].append(pgd['total_2_indep']) rows['total_3_tris'].append(pgd['total_3_tris']) rows['total_2_star'].append(pgd['total_2_star']) rows['total_3_1edge'].append(pgd['total_3_1edge']) rows['total_4_clique'].append(pgd['total_4_clique']) rows['total_4_chordcycle'].append( pgd['total_4_chordcycle']) rows['total_4_tailed_tris'].append( pgd['total_4_tailed_tris']) rows['total_3_star'].append(pgd['total_3_star']) rows['total_4_path'].append(pgd['total_4_path']) rows['total_4_1edge'].append(pgd['total_4_1edge']) rows['total_4_2edge'].append(pgd['total_4_2edge']) rows['total_4_2star'].append(pgd['total_4_2star']) rows['total_4_tri'].append(pgd['total_4_tri']) rows['total_4_indep'].append(pgd['total_4_indep']) print('done') df = pd.DataFrame(rows) print(df.head()) df.to_csv(f'{output_path}/{dataset}_{model}_pgd_full.csv', float_format='%.7f', sep='\t', index=False, na_rep='nan') print(f'wrote {output_path}/{dataset}_{model}_pgd_full.csv') #df.to_csv(f'{output_path}/{dataset}_{model}_lambda.csv', float_format='%.7f', sep='\t', index=False, na_rep='nan') return