def eval_list(real_graphs_filename, pred_graphs_filename, prefix, eval_every): real_graphs_dict = {} pred_graphs_dict = {} for fname in real_graphs_filename: result_id, epochs = extract_result_id_and_epoch(fname, prefix, "real_") if not epochs % eval_every == 0: continue if result_id not in real_graphs_dict: real_graphs_dict[result_id] = {} real_graphs_dict[result_id][epochs] = fname for fname in pred_graphs_filename: result_id, epochs = extract_result_id_and_epoch(fname, prefix, "pred_") if not epochs % eval_every == 0: continue if result_id not in pred_graphs_dict: pred_graphs_dict[result_id] = {} pred_graphs_dict[result_id][epochs] = fname for result_id in real_graphs_dict.keys(): for epochs in sorted(real_graphs_dict[result_id]): real_g_list = utils.load_graph_list( real_graphs_dict[result_id][epochs]) pred_g_list = utils.load_graph_list( pred_graphs_dict[result_id][epochs]) random.shuffle(real_g_list) random.shuffle(pred_g_list) perturbed_g_list = perturb(real_g_list, 0.05) # dist = eval.stats.degree_stats(real_g_list, pred_g_list) dist = eval.stats.clustering_stats(real_g_list, pred_g_list) print( "dist between real and pred (", result_id, ") at epoch ", epochs, ": ", dist, ) # dist = eval.stats.degree_stats(real_g_list, perturbed_g_list) dist = eval.stats.clustering_stats(real_g_list, perturbed_g_list) print("dist between real and perturbed: ", dist) mid = len(real_g_list) // 2 # dist = eval.stats.degree_stats(real_g_list[:mid], real_g_list[mid:]) dist = eval.stats.clustering_stats(real_g_list[:mid], real_g_list[mid:]) print("dist among real: ", dist)
def load_ground_truth(dir_input, dataset_name, model_name='GraphRNN_RNN'): ''' Read ground truth graphs. ''' if not 'small' in dataset_name: hidden = 128 else: hidden = 64 if model_name == 'Internal' or model_name == 'Noise' or model_name == 'B-A' or model_name == 'E-R': fname_test = dir_input + 'GraphRNN_MLP' + '_' + dataset_name + '_' + str( args.num_layers) + '_' + str(args.num_layers_edge) + '_' + str( args.hidden_size_rnn) + '_' + str( args.hidden_size_rnn_output) + '_' + str( args.embedding_size_rnn) + '_' + str( args.embedding_size_rnn_output) + '_test_' + str( 0) + '.dat' else: fname_test = dir_input + model_name + '_' + dataset_name + '_' + str( args.num_layers) + '_' + str(args.num_layers_edge) + '_' + str( args.hidden_size_rnn) + '_' + str( args.hidden_size_rnn_output) + '_' + str( args.embedding_size_rnn) + '_' + str( args.embedding_size_rnn_output) + '_test_' + str( 0) + '.dat' try: graph_test = utils.load_graph_list(fname_test, is_real=True) except: print('Not found: ' + fname_test) logging.warning('Not found: ' + fname_test) return None return graph_test
def eval_list(real_graphs_filename, pred_graphs_filename, prefix, eval_every): real_graphs_dict = {} pred_graphs_dict = {} for fname in real_graphs_filename: result_id, epochs = extract_result_id_and_epoch(fname, prefix, 'real_') if not epochs % eval_every == 0: continue if result_id not in real_graphs_dict: real_graphs_dict[result_id] = {} real_graphs_dict[result_id][epochs] = fname for fname in pred_graphs_filename: result_id, epochs = extract_result_id_and_epoch(fname, prefix, 'pred_') if not epochs % eval_every == 0: continue if result_id not in pred_graphs_dict: pred_graphs_dict[result_id] = {} pred_graphs_dict[result_id][epochs] = fname for result_id in real_graphs_dict.keys(): for epochs in sorted(real_graphs_dict[result_id]): real_g_list = utils.load_graph_list(real_graphs_dict[result_id][epochs]) pred_g_list = utils.load_graph_list(pred_graphs_dict[result_id][epochs]) shuffle(real_g_list) shuffle(pred_g_list) perturbed_g_list = perturb(real_g_list, 0.05) #dist = eval.stats.degree_stats(real_g_list, pred_g_list) dist = eval.stats.clustering_stats(real_g_list, pred_g_list) print('dist between real and pred (', result_id, ') at epoch ', epochs, ': ', dist) #dist = eval.stats.degree_stats(real_g_list, perturbed_g_list) dist = eval.stats.clustering_stats(real_g_list, perturbed_g_list) print('dist between real and perturbed: ', dist) mid = len(real_g_list) // 2 #dist = eval.stats.degree_stats(real_g_list[:mid], real_g_list[mid:]) dist = eval.stats.clustering_stats(real_g_list[:mid], real_g_list[mid:]) print('dist among real: ', dist)
def process_kron(kron_dir): txt_files = [] for f in os.listdir(kron_dir): filename = os.fsdecode(f) if filename.endswith('.txt'): txt_files.append(filename) elif filename.endswith('.dat'): return utils.load_graph_list(os.path.join(kron_dir, filename)) G_list = [] for filename in txt_files: G_list.append(utils.snap_txt_output_to_nx(os.path.join(kron_dir, filename))) return G_list
def generate_data_community(fname): # num_communities = int(args.graph_type[-1]) # print('Creating dataset with ', 2, ' communities') graphs = [] # c_sizes = np.random.choice([12, 13, 14, 15, 16, 17], 2) # c_sizes = [15] * num_communities graphs_train = utils.load_graph_list( "/home/rachneet/PycharmProjects/graph_generation/graphs/GraphRNN_RNN_community2_4_128_train_0.dat" ) for k in range(len(graphs_train)): graphs.append(getDecisionSequence(graphs_train[k])) with open(fname, 'wb') as f: pickle.dump(graphs, f)
def load_ground_truth(dir_input, dataset_name, model_name='GraphRNN_RNN'): ''' Read ground truth graphs. ''' if not 'small' in dataset_name: hidden = 128 else: hidden = 64 if model_name=='Internal' or model_name=='Noise' or model_name=='B-A' or model_name=='E-R': fname_test = dir_input + 'GraphRNN_MLP' + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str( hidden) + '_test_' + str(0) + '.dat' else: fname_test = dir_input + model_name + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str( hidden) + '_test_' + str(0) + '.dat' try: graph_test = utils.load_graph_list(fname_test,is_real=True) except: print('Not found: ' + fname_test) logging.warning('Not found: ' + fname_test) return None return graph_test
def eval_single_list(graphs, dir_input, dataset_name): ''' Evaluate a list of graphs by comparing with graphs in directory dir_input. Args: dir_input: directory where ground truth graph list is stored dataset_name: name of the dataset (ground truth) ''' graph_test = utils.load_graph_list( '/u/home/r/rlwillia/graph-generation/graphs/ground_truth.pkl' ) #load_ground_truth(dir_input, dataset_name) graph_test_len = len(graph_test) graph_test = graph_test[int( 0.8 * graph_test_len):] # test on a hold out test set mmd_degree = eval.stats.degree_stats(graph_test, graphs) mmd_clustering = eval.stats.clustering_stats(graph_test, graphs) try: mmd_4orbits = eval.stats.orbit_stats_all(graph_test, graphs) except: mmd_4orbits = -1 print('deg: ', mmd_degree) print('clustering: ', mmd_clustering) print('orbits: ', mmd_4orbits)
def load_ground_truth(dir_input, dataset_name, model_name="GraphRNN_RNN"): """Read ground truth graphs.""" if not "small" in dataset_name: hidden = 128 else: hidden = 64 if (model_name == "Internal" or model_name == "Noise" or model_name == "B-A" or model_name == "E-R"): fname_test = (dir_input + "GraphRNN_MLP" + "_" + dataset_name + "_" + str(args.num_layers) + "_" + str(hidden) + "_test_" + str(0) + ".dat") else: fname_test = (dir_input + model_name + "_" + dataset_name + "_" + str(args.num_layers) + "_" + str(hidden) + "_test_" + str(0) + ".dat") try: graph_test = utils.load_graph_list(fname_test, is_real=True) except: print("Not found: " + fname_test) logging.warning("Not found: " + fname_test) return None return graph_test
def eval_list_fname( real_graph_filename, pred_graphs_filename, baselines, eval_every, epoch_range=None, out_file_prefix=None, ): """Evaluate list of predicted graphs compared to ground truth, stored in files. Args: baselines: dict mapping name of the baseline to list of generated graphs. """ if out_file_prefix is not None: out_files = { "train": open(out_file_prefix + "_train.txt", "w+"), "compare": open(out_file_prefix + "_compare.txt", "w+"), } out_files["train"].write("degree,clustering,orbits4\n") line = "metric,real,ours,perturbed" for bl in baselines: line += "," + bl line += "\n" out_files["compare"].write(line) results = { "deg": { "real": 0, "ours": 100, # take min over all training epochs "perturbed": 0, "kron": 0, }, "clustering": { "real": 0, "ours": 100, "perturbed": 0, "kron": 0 }, "orbits4": { "real": 0, "ours": 100, "perturbed": 0, "kron": 0 }, } num_evals = len(pred_graphs_filename) if epoch_range is None: epoch_range = [i * eval_every for i in range(num_evals)] for i in range(num_evals): real_g_list = utils.load_graph_list(real_graph_filename) # pred_g_list = utils.load_graph_list(pred_graphs_filename[i]) # contains all predicted G pred_g_list_raw = utils.load_graph_list(pred_graphs_filename[i]) if len(real_g_list) > 200: real_g_list = real_g_list[0:200] random.shuffle(real_g_list) random.shuffle(pred_g_list_raw) # get length real_g_len_list = np.array( [len(real_g_list[i]) for i in range(len(real_g_list))]) pred_g_len_list_raw = np.array( [len(pred_g_list_raw[i]) for i in range(len(pred_g_list_raw))]) # get perturb real # perturbed_g_list_001 = perturb(real_g_list, 0.01) perturbed_g_list_005 = perturb(real_g_list, 0.05) # perturbed_g_list_010 = perturb(real_g_list, 0.10) # select pred samples # The number of nodes are sampled from the similar distribution as the training set pred_g_list = [] pred_g_len_list = [] for value in real_g_len_list: pred_idx = find_nearest_idx(pred_g_len_list_raw, value) pred_g_list.append(pred_g_list_raw[pred_idx]) pred_g_len_list.append(pred_g_len_list_raw[pred_idx]) # delete pred_g_len_list_raw = np.delete(pred_g_len_list_raw, pred_idx) del pred_g_list_raw[pred_idx] if len(pred_g_list) == len(real_g_list): break # pred_g_len_list = np.array(pred_g_len_list) print("################## epoch {} ##################".format( epoch_range[i])) # info about graph size print( "real average nodes", sum([ real_g_list[i].number_of_nodes() for i in range(len(real_g_list)) ]) / len(real_g_list), ) print( "pred average nodes", sum([ pred_g_list[i].number_of_nodes() for i in range(len(pred_g_list)) ]) / len(pred_g_list), ) print("num of real graphs", len(real_g_list)) print("num of pred graphs", len(pred_g_list)) # ======================================== # Evaluation # ======================================== mid = len(real_g_list) // 2 dist_degree, dist_clustering = compute_basic_stats( real_g_list[:mid], real_g_list[mid:]) # dist_4cycle = eval.stats.motif_stats(real_g_list[:mid], real_g_list[mid:]) dist_4orbits = eval.stats.orbit_stats_all(real_g_list[:mid], real_g_list[mid:]) print("degree dist among real: ", dist_degree) print("clustering dist among real: ", dist_clustering) # print('4 cycle dist among real: ', dist_4cycle) print("orbits dist among real: ", dist_4orbits) results["deg"]["real"] += dist_degree results["clustering"]["real"] += dist_clustering results["orbits4"]["real"] += dist_4orbits dist_degree, dist_clustering = compute_basic_stats( real_g_list, pred_g_list) # dist_4cycle = eval.stats.motif_stats(real_g_list, pred_g_list) dist_4orbits = eval.stats.orbit_stats_all(real_g_list, pred_g_list) print( "degree dist between real and pred at epoch ", epoch_range[i], ": ", dist_degree, ) print( "clustering dist between real and pred at epoch ", epoch_range[i], ": ", dist_clustering, ) # print('4 cycle dist between real and pred at epoch: ', epoch_range[i], dist_4cycle) print( "orbits dist between real and pred at epoch ", epoch_range[i], ": ", dist_4orbits, ) results["deg"]["ours"] = min(dist_degree, results["deg"]["ours"]) results["clustering"]["ours"] = min(dist_clustering, results["clustering"]["ours"]) results["orbits4"]["ours"] = min(dist_4orbits, results["orbits4"]["ours"]) # performance at training time out_files["train"].write(str(dist_degree) + ",") out_files["train"].write(str(dist_clustering) + ",") out_files["train"].write(str(dist_4orbits) + ",") dist_degree, dist_clustering = compute_basic_stats( real_g_list, perturbed_g_list_005) # dist_4cycle = eval.stats.motif_stats(real_g_list, perturbed_g_list_005) dist_4orbits = eval.stats.orbit_stats_all(real_g_list, perturbed_g_list_005) print( "degree dist between real and perturbed at epoch ", epoch_range[i], ": ", dist_degree, ) print( "clustering dist between real and perturbed at epoch ", epoch_range[i], ": ", dist_clustering, ) # print('4 cycle dist between real and perturbed at epoch: ', epoch_range[i], dist_4cycle) print( "orbits dist between real and perturbed at epoch ", epoch_range[i], ": ", dist_4orbits, ) results["deg"]["perturbed"] += dist_degree results["clustering"]["perturbed"] += dist_clustering results["orbits4"]["perturbed"] += dist_4orbits if i == 0: # Baselines for baseline in baselines: dist_degree, dist_clustering = compute_basic_stats( real_g_list, baselines[baseline]) dist_4orbits = eval.stats.orbit_stats_all( real_g_list, baselines[baseline]) results["deg"][baseline] = dist_degree results["clustering"][baseline] = dist_clustering results["orbits4"][baseline] = dist_4orbits print( "Kron: deg=", dist_degree, ", clustering=", dist_clustering, ", orbits4=", dist_4orbits, ) out_files["train"].write("\n") for metric, methods in results.items(): methods["real"] /= num_evals methods["perturbed"] /= num_evals # Write results for metric, methods in results.items(): line = (metric + "," + str(methods["real"]) + "," + str(methods["ours"]) + "," + str(methods["perturbed"])) for baseline in baselines: line += "," + str(methods[baseline]) line += "\n" out_files["compare"].write(line) for _, out_f in out_files.items(): out_f.close()
def evaluation_epoch( dir_input, fname_output, model_name, dataset_name, args, is_clean=True, epoch_start=1000, epoch_end=3001, epoch_step=100, ): with open(fname_output, "w+") as f: f.write( "sample_time,epoch,degree_validate,clustering_validate,orbits4_validate,degree_test,clustering_test,orbits4_test\n" ) # TODO: Maybe refactor into a separate file/function that specifies THE naming convention # across main and evaluate if not "small" in dataset_name: hidden = 128 else: hidden = 64 # read real graph if (model_name == "Internal" or model_name == "Noise" or model_name == "B-A" or model_name == "E-R"): fname_test = (dir_input + "GraphRNN_MLP" + "_" + dataset_name + "_" + str(args.num_layers) + "_" + str(hidden) + "_test_" + str(0) + ".dat") elif "Baseline" in model_name: fname_test = (dir_input + model_name + "_" + dataset_name + "_" + str(64) + "_test_" + str(0) + ".dat") else: fname_test = (dir_input + model_name + "_" + dataset_name + "_" + str(args.num_layers) + "_" + str(hidden) + "_test_" + str(0) + ".dat") try: graph_test = utils.load_graph_list(fname_test, is_real=True) except: print("Not found: " + fname_test) logging.warning("Not found: " + fname_test) return None graph_test_len = len(graph_test) graph_train = graph_test[0:int(0.8 * graph_test_len)] # train graph_validate = graph_test[0:int(0.2 * graph_test_len)] # validate graph_test = graph_test[int( 0.8 * graph_test_len):] # test on a hold out test set graph_test_aver = 0 for graph in graph_test: graph_test_aver += graph.number_of_nodes() graph_test_aver /= len(graph_test) print("test average len", graph_test_aver) # get performance for proposed approaches if "GraphRNN" in model_name: # read test graph for epoch in range(epoch_start, epoch_end, epoch_step): for sample_time in range(1, 4): # get filename fname_pred = (dir_input + model_name + "_" + dataset_name + "_" + str(args.num_layers) + "_" + str(hidden) + "_pred_" + str(epoch) + "_" + str(sample_time) + ".dat") # load graphs try: graph_pred = utils.load_graph_list( fname_pred, is_real=False) # default False except: print("Not found: " + fname_pred) logging.warning("Not found: " + fname_pred) continue # clean graphs if is_clean: graph_test, graph_pred = clean_graphs( graph_test, graph_pred) else: random.shuffle(graph_pred) graph_pred = graph_pred[0:len(graph_test)] print("len graph_test", len(graph_test)) print("len graph_validate", len(graph_validate)) print("len graph_pred", len(graph_pred)) graph_pred_aver = 0 for graph in graph_pred: graph_pred_aver += graph.number_of_nodes() graph_pred_aver /= len(graph_pred) print("pred average len", graph_pred_aver) # evaluate MMD test mmd_degree = eval.stats.degree_stats( graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats( graph_test, graph_pred) try: mmd_4orbits = eval.stats.orbit_stats_all( graph_test, graph_pred) except: mmd_4orbits = -1 # evaluate MMD validate mmd_degree_validate = eval.stats.degree_stats( graph_validate, graph_pred) mmd_clustering_validate = eval.stats.clustering_stats( graph_validate, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_validate, graph_pred) except: mmd_4orbits_validate = -1 # write results f.write( str(sample_time) + "," + str(epoch) + "," + str(mmd_degree_validate) + "," + str(mmd_clustering_validate) + "," + str(mmd_4orbits_validate) + "," + str(mmd_degree) + "," + str(mmd_clustering) + "," + str(mmd_4orbits) + "\n") print( "degree", mmd_degree, "clustering", mmd_clustering, "orbits", mmd_4orbits, ) # get internal MMD (MMD between ground truth validation and test sets) if model_name == "Internal": mmd_degree_validate = eval.stats.degree_stats( graph_test, graph_validate) mmd_clustering_validate = eval.stats.clustering_stats( graph_test, graph_validate) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_test, graph_validate) except: mmd_4orbits_validate = -1 f.write( str(-1) + "," + str(-1) + "," + str(mmd_degree_validate) + "," + str(mmd_clustering_validate) + "," + str(mmd_4orbits_validate) + "," + str(-1) + "," + str(-1) + "," + str(-1) + "\n") # get MMD between ground truth and its perturbed graphs if model_name == "Noise": graph_validate_perturbed = perturb(graph_validate, 0.05) mmd_degree_validate = eval.stats.degree_stats( graph_test, graph_validate_perturbed) mmd_clustering_validate = eval.stats.clustering_stats( graph_test, graph_validate_perturbed) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_test, graph_validate_perturbed) except: mmd_4orbits_validate = -1 f.write( str(-1) + "," + str(-1) + "," + str(mmd_degree_validate) + "," + str(mmd_clustering_validate) + "," + str(mmd_4orbits_validate) + "," + str(-1) + "," + str(-1) + "," + str(-1) + "\n") # get E-R MMD if model_name == "E-R": graph_pred = Graph_generator_baseline(graph_train, generator="Gnp") # clean graphs if is_clean: graph_test, graph_pred = clean_graphs(graph_test, graph_pred) print("len graph_test", len(graph_test)) print("len graph_pred", len(graph_pred)) mmd_degree = eval.stats.degree_stats(graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats( graph_test, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_test, graph_pred) except: mmd_4orbits_validate = -1 f.write( str(-1) + "," + str(-1) + "," + str(-1) + "," + str(-1) + "," + str(-1) + "," + str(mmd_degree) + "," + str(mmd_clustering) + "," + str(mmd_4orbits_validate) + "\n") # get B-A MMD if model_name == "B-A": graph_pred = Graph_generator_baseline(graph_train, generator="BA") # clean graphs if is_clean: graph_test, graph_pred = clean_graphs(graph_test, graph_pred) print("len graph_test", len(graph_test)) print("len graph_pred", len(graph_pred)) mmd_degree = eval.stats.degree_stats(graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats( graph_test, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_test, graph_pred) except: mmd_4orbits_validate = -1 f.write( str(-1) + "," + str(-1) + "," + str(-1) + "," + str(-1) + "," + str(-1) + "," + str(mmd_degree) + "," + str(mmd_clustering) + "," + str(mmd_4orbits_validate) + "\n") # get performance for baseline approaches if "Baseline" in model_name: # read test graph for epoch in range(epoch_start, epoch_end, epoch_step): # get filename fname_pred = (dir_input + model_name + "_" + dataset_name + "_" + str(64) + "_pred_" + str(epoch) + ".dat") # load graphs try: graph_pred = utils.load_graph_list( fname_pred, is_real=True) # default False except: print("Not found: " + fname_pred) logging.warning("Not found: " + fname_pred) continue # clean graphs if is_clean: graph_test, graph_pred = clean_graphs( graph_test, graph_pred) else: random.shuffle(graph_pred) graph_pred = graph_pred[0:len(graph_test)] print("len graph_test", len(graph_test)) print("len graph_validate", len(graph_validate)) print("len graph_pred", len(graph_pred)) graph_pred_aver = 0 for graph in graph_pred: graph_pred_aver += graph.number_of_nodes() graph_pred_aver /= len(graph_pred) print("pred average len", graph_pred_aver) # evaluate MMD test mmd_degree = eval.stats.degree_stats(graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats( graph_test, graph_pred) try: mmd_4orbits = eval.stats.orbit_stats_all( graph_test, graph_pred) except: mmd_4orbits = -1 # evaluate MMD validate mmd_degree_validate = eval.stats.degree_stats( graph_validate, graph_pred) mmd_clustering_validate = eval.stats.clustering_stats( graph_validate, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_validate, graph_pred) except: mmd_4orbits_validate = -1 # write results f.write( str(-1) + "," + str(epoch) + "," + str(mmd_degree_validate) + "," + str(mmd_clustering_validate) + "," + str(mmd_4orbits_validate) + "," + str(mmd_degree) + "," + str(mmd_clustering) + "," + str(mmd_4orbits) + "\n") print( "degree", mmd_degree, "clustering", mmd_clustering, "orbits", mmd_4orbits, ) return True
graphs.append(nx.grid_2d_graph(i, j)) utils.export_graphs_to_txt(graphs, output_prefix) elif prog_args.graph_type == "caveman": graphs = [] for i in range(2, 3): for j in range(30, 81): for k in range(10): graphs.append(caveman_special(i, j, p_edge=0.3)) utils.export_graphs_to_txt(graphs, output_prefix) elif prog_args.graph_type == "citeseer": graphs = utils.citeseer_ego() utils.export_graphs_to_txt(graphs, output_prefix) else: # load from directory input_path = dir_prefix + args.graph_save_path + args.fname_test + "0.dat" g_list = utils.load_graph_list(input_path) utils.export_graphs_to_txt(g_list, output_prefix) elif not prog_args.kron_dir == "": kron_g_list = process_kron(prog_args.kron_dir) fname = os.path.join(prog_args.kron_dir, prog_args.graph_type + ".dat") print([g.number_of_nodes() for g in kron_g_list]) utils.save_graph_list(kron_g_list, fname) elif not prog_args.test_file == "": # evaluate single .dat file containing list of test graphs (networkx format) graphs = utils.load_graph_list(prog_args.test_file) eval_single_list(graphs, dir_input=dir_prefix + "graphs/", dataset_name="grid") ## if you don't try kronecker, only the following part is needed else: if not os.path.isdir(dir_prefix + "eval_results"):
def eval_list_fname(real_graph_filename, pred_graphs_filename, baselines, eval_every, epoch_range=None, out_file_prefix=None): ''' Evaluate list of predicted graphs compared to ground truth, stored in files. Args: baselines: dict mapping name of the baseline to list of generated graphs. ''' if out_file_prefix is not None: out_files = { 'train': open(out_file_prefix + '_train.txt', 'w+'), 'compare': open(out_file_prefix + '_compare.txt', 'w+') } out_files['train'].write('degree,clustering,orbits4\n') line = 'metric,real,ours,perturbed' for bl in baselines: line += ',' + bl line += '\n' out_files['compare'].write(line) results = { 'deg': { 'real': 0, 'ours': 100, # take min over all training epochs 'perturbed': 0, 'kron': 0 }, 'clustering': { 'real': 0, 'ours': 100, 'perturbed': 0, 'kron': 0 }, 'orbits4': { 'real': 0, 'ours': 100, 'perturbed': 0, 'kron': 0 } } num_evals = len(pred_graphs_filename) if epoch_range is None: epoch_range = [i * eval_every for i in range(num_evals)] for i in range(num_evals): real_g_list = utils.load_graph_list(real_graph_filename) #pred_g_list = utils.load_graph_list(pred_graphs_filename[i]) # contains all predicted G pred_g_list_raw = utils.load_graph_list(pred_graphs_filename[i]) if len(real_g_list) > 200: real_g_list = real_g_list[0:200] shuffle(real_g_list) shuffle(pred_g_list_raw) # get length real_g_len_list = np.array( [len(real_g_list[i]) for i in range(len(real_g_list))]) pred_g_len_list_raw = np.array( [len(pred_g_list_raw[i]) for i in range(len(pred_g_list_raw))]) # get perturb real #perturbed_g_list_001 = perturb(real_g_list, 0.01) perturbed_g_list_005 = perturb(real_g_list, 0.05) #perturbed_g_list_010 = perturb(real_g_list, 0.10) # select pred samples # The number of nodes are sampled from the similar distribution as the training set pred_g_list = [] pred_g_len_list = [] for value in real_g_len_list: pred_idx = find_nearest_idx(pred_g_len_list_raw, value) pred_g_list.append(pred_g_list_raw[pred_idx]) pred_g_len_list.append(pred_g_len_list_raw[pred_idx]) # delete pred_g_len_list_raw = np.delete(pred_g_len_list_raw, pred_idx) del pred_g_list_raw[pred_idx] if len(pred_g_list) == len(real_g_list): break # pred_g_len_list = np.array(pred_g_len_list) print('################## epoch {} ##################'.format( epoch_range[i])) # info about graph size print( 'real average nodes', sum([ real_g_list[i].number_of_nodes() for i in range(len(real_g_list)) ]) / len(real_g_list)) print( 'pred average nodes', sum([ pred_g_list[i].number_of_nodes() for i in range(len(pred_g_list)) ]) / len(pred_g_list)) print('num of real graphs', len(real_g_list)) print('num of pred graphs', len(pred_g_list)) # ======================================== # Evaluation # ======================================== mid = len(real_g_list) // 2 dist_degree, dist_clustering = compute_basic_stats( real_g_list[:mid], real_g_list[mid:]) #dist_4cycle = eval.stats.motif_stats(real_g_list[:mid], real_g_list[mid:]) dist_4orbits = eval.stats.orbit_stats_all(real_g_list[:mid], real_g_list[mid:]) print('degree dist among real: ', dist_degree) print('clustering dist among real: ', dist_clustering) #print('4 cycle dist among real: ', dist_4cycle) print('orbits dist among real: ', dist_4orbits) results['deg']['real'] += dist_degree results['clustering']['real'] += dist_clustering results['orbits4']['real'] += dist_4orbits dist_degree, dist_clustering = compute_basic_stats( real_g_list, pred_g_list) #dist_4cycle = eval.stats.motif_stats(real_g_list, pred_g_list) dist_4orbits = eval.stats.orbit_stats_all(real_g_list, pred_g_list) print('degree dist between real and pred at epoch ', epoch_range[i], ': ', dist_degree) print('clustering dist between real and pred at epoch ', epoch_range[i], ': ', dist_clustering) #print('4 cycle dist between real and pred at epoch: ', epoch_range[i], dist_4cycle) print('orbits dist between real and pred at epoch ', epoch_range[i], ': ', dist_4orbits) results['deg']['ours'] = min(dist_degree, results['deg']['ours']) results['clustering']['ours'] = min(dist_clustering, results['clustering']['ours']) results['orbits4']['ours'] = min(dist_4orbits, results['orbits4']['ours']) # performance at training time out_files['train'].write(str(dist_degree) + ',') out_files['train'].write(str(dist_clustering) + ',') out_files['train'].write(str(dist_4orbits) + ',') dist_degree, dist_clustering = compute_basic_stats( real_g_list, perturbed_g_list_005) #dist_4cycle = eval.stats.motif_stats(real_g_list, perturbed_g_list_005) dist_4orbits = eval.stats.orbit_stats_all(real_g_list, perturbed_g_list_005) print('degree dist between real and perturbed at epoch ', epoch_range[i], ': ', dist_degree) print('clustering dist between real and perturbed at epoch ', epoch_range[i], ': ', dist_clustering) #print('4 cycle dist between real and perturbed at epoch: ', epoch_range[i], dist_4cycle) print('orbits dist between real and perturbed at epoch ', epoch_range[i], ': ', dist_4orbits) results['deg']['perturbed'] += dist_degree results['clustering']['perturbed'] += dist_clustering results['orbits4']['perturbed'] += dist_4orbits if i == 0: # Baselines for baseline in baselines: dist_degree, dist_clustering = compute_basic_stats( real_g_list, baselines[baseline]) dist_4orbits = eval.stats.orbit_stats_all( real_g_list, baselines[baseline]) results['deg'][baseline] = dist_degree results['clustering'][baseline] = dist_clustering results['orbits4'][baseline] = dist_4orbits print('Kron: deg=', dist_degree, ', clustering=', dist_clustering, ', orbits4=', dist_4orbits) out_files['train'].write('\n') for metric, methods in results.items(): methods['real'] /= num_evals methods['perturbed'] /= num_evals # Write results for metric, methods in results.items(): line = metric+','+ \ str(methods['real'])+','+ \ str(methods['ours'])+','+ \ str(methods['perturbed']) for baseline in baselines: line += ',' + str(methods[baseline]) line += '\n' out_files['compare'].write(line) for _, out_f in out_files.items(): out_f.close()
def evaluation_epoch( dir_input, fname_output, model_name, dataset_name, args, is_clean=True, selected_epochs=list(range( 1000, 3001, 100))): # epoch_start=1000,epoch_end=3001,epoch_step=100): with open(fname_output, 'w+') as f: # f.write('sample_time,epoch,degree_validate,clustering_validate,orbits4_validate,degree_test,clustering_test,orbits4_test\n') f.write( 'sample_time,\tepoch,\tdegree_test,\tclustering_test,\torbits4_test\n' ) # TODO: Maybe refactor into a separate file/function that specifies THE naming convention # across main and evaluate if not 'small' in dataset_name: hidden = 128 else: hidden = 64 # read real graph if model_name == 'Internal' or model_name == 'Noise' or model_name == 'B-A' or model_name == 'E-R': fname_test = dir_input + 'GraphRNN_MLP' + '_' + dataset_name + '_' + str( args.num_layers) + '_' + str(hidden) + '_test_' + str( 0) + '.dat' elif 'Baseline' in model_name: fname_test = dir_input + model_name + '_' + dataset_name + '_' + str( 64) + '_test_' + str(0) + '.dat' else: # fname_test = dir_input + model_name + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str( fname_test = dir_input + model_name.split( '-' )[0] + '_' + dataset_name + '_' + args.input_type + '_test_' + str( 0) + '.dat' try: graph_test = utils.load_graph_list(fname_test, is_real=True) except: print('Not found: ' + fname_test) logging.warning('Not found: ' + fname_test) return None graph_test_len = len(graph_test) graph_train = graph_test[0:int(0.8 * graph_test_len)] # train graph_validate = graph_test[0:int(0.2 * graph_test_len)] # validate graph_test = graph_test[int( 0.8 * graph_test_len):] # test on a hold out test set graph_test_aver = 0 for graph in graph_test: graph_test_aver += graph.number_of_nodes() graph_test_aver /= len(graph_test) print('test average len', graph_test_aver) # get performance for proposed approaches # if 'GraphRNN' in model_name: if model_name.startswith('Gransformer'): # read test graph for epoch in selected_epochs: # range(epoch_start,epoch_end,epoch_step): for sample_time in range(1, 2): # ,4): # get filename # fname_pred = dir_input + model_name + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str(hidden) + '_pred_' + str(epoch) + '_' + str(sample_time) + '.dat' fname_pred = dir_input + model_name + '_' + dataset_name + '_' + args.input_type + '_pred_' + str( epoch) + '_' + str(sample_time) + '.dat' # load graphs try: graph_pred = utils.load_graph_list( fname_pred, is_real=False) # default False except: print('Not found: ' + fname_pred) logging.warning('Not found: ' + fname_pred) continue # clean graphs if is_clean: graph_test, graph_pred = clean_graphs( graph_test, graph_pred) else: shuffle(graph_pred) graph_pred = graph_pred[0:len(graph_test)] print('len graph_test', len(graph_test)) print('len graph_validate', len(graph_validate)) print('len graph_pred', len(graph_pred)) graph_pred_aver = 0 for graph in graph_pred: graph_pred_aver += graph.number_of_nodes() graph_pred_aver /= len(graph_pred) print('pred average len', graph_pred_aver) # evaluate MMD test mmd_degree = eval.stats.degree_stats( graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats( graph_test, graph_pred) try: mmd_4orbits = eval.stats.orbit_stats_all( graph_test, graph_pred) except: mmd_4orbits = -1 # evaluate MMD validate # mmd_degree_validate = eval.stats.degree_stats(graph_validate, graph_pred) # mmd_clustering_validate = eval.stats.clustering_stats(graph_validate, graph_pred) # try: # mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_validate, graph_pred) # except: # mmd_4orbits_validate = -1 # write results # f.write(str(sample_time)+','+ # str(epoch)+','+ # str(mmd_degree_validate)+','+ # str(mmd_clustering_validate)+','+ # str(mmd_4orbits_validate)+','+ # str(mmd_degree)+','+ # str(mmd_clustering)+','+ # str(mmd_4orbits)+'\n') f.write( str(sample_time) + ',\t' + str(epoch) + ',\t' + str(mmd_degree) + ',\t' + str(mmd_clustering) + ',\t' + str(mmd_4orbits) + '\n') print('degree', mmd_degree, 'clustering', mmd_clustering, 'orbits', mmd_4orbits) # get internal MMD (MMD between ground truth validation and test sets) if model_name == 'Internal': mmd_degree_validate = eval.stats.degree_stats( graph_test, graph_validate) mmd_clustering_validate = eval.stats.clustering_stats( graph_test, graph_validate) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_test, graph_validate) except: mmd_4orbits_validate = -1 f.write( str(-1) + ',' + str(-1) + ',' + str(mmd_degree_validate) + ',' + str(mmd_clustering_validate) + ',' + str(mmd_4orbits_validate) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + '\n') # get MMD between ground truth and its perturbed graphs if model_name == 'Noise': graph_validate_perturbed = perturb(graph_validate, 0.05) mmd_degree_validate = eval.stats.degree_stats( graph_test, graph_validate_perturbed) mmd_clustering_validate = eval.stats.clustering_stats( graph_test, graph_validate_perturbed) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_test, graph_validate_perturbed) except: mmd_4orbits_validate = -1 f.write( str(-1) + ',' + str(-1) + ',' + str(mmd_degree_validate) + ',' + str(mmd_clustering_validate) + ',' + str(mmd_4orbits_validate) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + '\n') # get E-R MMD if model_name == 'E-R': graph_pred = Graph_generator_baseline(graph_train, generator='Gnp') # clean graphs if is_clean: graph_test, graph_pred = clean_graphs(graph_test, graph_pred) print('len graph_test', len(graph_test)) print('len graph_pred', len(graph_pred)) mmd_degree = eval.stats.degree_stats(graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats( graph_test, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_test, graph_pred) except: mmd_4orbits_validate = -1 f.write( str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(mmd_degree) + ',' + str(mmd_clustering) + ',' + str(mmd_4orbits_validate) + '\n') # get B-A MMD if model_name == 'B-A': graph_pred = Graph_generator_baseline(graph_train, generator='BA') # clean graphs if is_clean: graph_test, graph_pred = clean_graphs(graph_test, graph_pred) print('len graph_test', len(graph_test)) print('len graph_pred', len(graph_pred)) mmd_degree = eval.stats.degree_stats(graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats( graph_test, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_test, graph_pred) except: mmd_4orbits_validate = -1 f.write( str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(mmd_degree) + ',' + str(mmd_clustering) + ',' + str(mmd_4orbits_validate) + '\n') # get performance for baseline approaches if 'Baseline' in model_name: # read test graph for epoch in selected_epochs: #range(epoch_start, epoch_end, epoch_step): # get filename fname_pred = dir_input + model_name + '_' + dataset_name + '_' + str( 64) + '_pred_' + str(epoch) + '.dat' # load graphs try: graph_pred = utils.load_graph_list( fname_pred, is_real=True) # default False except: print('Not found: ' + fname_pred) logging.warning('Not found: ' + fname_pred) continue # clean graphs if is_clean: graph_test, graph_pred = clean_graphs( graph_test, graph_pred) else: shuffle(graph_pred) graph_pred = graph_pred[0:len(graph_test)] print('len graph_test', len(graph_test)) print('len graph_validate', len(graph_validate)) print('len graph_pred', len(graph_pred)) graph_pred_aver = 0 for graph in graph_pred: graph_pred_aver += graph.number_of_nodes() graph_pred_aver /= len(graph_pred) print('pred average len', graph_pred_aver) # evaluate MMD test mmd_degree = eval.stats.degree_stats(graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats( graph_test, graph_pred) try: mmd_4orbits = eval.stats.orbit_stats_all( graph_test, graph_pred) except: mmd_4orbits = -1 # evaluate MMD validate mmd_degree_validate = eval.stats.degree_stats( graph_validate, graph_pred) mmd_clustering_validate = eval.stats.clustering_stats( graph_validate, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all( graph_validate, graph_pred) except: mmd_4orbits_validate = -1 # write results f.write( str(-1) + ',' + str(epoch) + ',' + str(mmd_degree_validate) + ',' + str(mmd_clustering_validate) + ',' + str(mmd_4orbits_validate) + ',' + str(mmd_degree) + ',' + str(mmd_clustering) + ',' + str(mmd_4orbits) + '\n') print('degree', mmd_degree, 'clustering', mmd_clustering, 'orbits', mmd_4orbits) return True
mmd_4orbits) def save_graph_list(G_list, fname): with open(fname, "wb") as f: pickle.dump(G_list, f) if __name__ == "__main__": # load test graphs to test against test_graphs = [] test_path = "graph/GraphRNN_RNN_community2_multi_4_128_test_0.dat" validate_path = "graph/GraphRNN_RNN_community2_multi_4_128_validate_0.dat" # test_path = "graph/GraphRNN_RNN_barabasi_small_4_64_test_0.dat" test_graphs = load_graph_list(test_path) v_graphs = load_graph_list(validate_path) # load predicted graphs and add them to a list # path = "sample/*" # path = "/home/rachneet/PycharmProjects/graph_generation/baselines/graphvae/graphs/" path = "graph/nevae_community_pred.dat" #for i in range(2): # for fname in sorted(glob.glob(path)): # pred_graphs = [] # print(fname) # if "community_vae" in fname: # with open(fname,'rb') as f: # graph = nx.read_edgelist(f, nodetype=int) # pred_graphs.append(graph)
def eval_list_fname(real_graph_filename, pred_graphs_filename, baselines, eval_every, epoch_range=None, out_file_prefix=None): ''' Evaluate list of predicted graphs compared to ground truth, stored in files. Args: baselines: dict mapping name of the baseline to list of generated graphs. ''' if out_file_prefix is not None: out_files = { 'train': open(out_file_prefix + '_train.txt', 'w+'), 'compare': open(out_file_prefix + '_compare.txt', 'w+') } out_files['train'].write('degree,clustering,orbits4\n') line = 'metric,real,ours,perturbed' for bl in baselines: line += ',' + bl line += '\n' out_files['compare'].write(line) results = { 'deg': { 'real': 0, 'ours': 100, # take min over all training epochs 'perturbed': 0, 'kron': 0}, 'clustering': { 'real': 0, 'ours': 100, 'perturbed': 0, 'kron': 0}, 'orbits4': { 'real': 0, 'ours': 100, 'perturbed': 0, 'kron': 0} } num_evals = len(pred_graphs_filename) if epoch_range is None: epoch_range = [i * eval_every for i in range(num_evals)] for i in range(num_evals): real_g_list = utils.load_graph_list(real_graph_filename) #pred_g_list = utils.load_graph_list(pred_graphs_filename[i]) # contains all predicted G pred_g_list_raw = utils.load_graph_list(pred_graphs_filename[i]) if len(real_g_list)>200: real_g_list = real_g_list[0:200] shuffle(real_g_list) shuffle(pred_g_list_raw) # get length real_g_len_list = np.array([len(real_g_list[i]) for i in range(len(real_g_list))]) pred_g_len_list_raw = np.array([len(pred_g_list_raw[i]) for i in range(len(pred_g_list_raw))]) # get perturb real #perturbed_g_list_001 = perturb(real_g_list, 0.01) perturbed_g_list_005 = perturb(real_g_list, 0.05) #perturbed_g_list_010 = perturb(real_g_list, 0.10) # select pred samples # The number of nodes are sampled from the similar distribution as the training set pred_g_list = [] pred_g_len_list = [] for value in real_g_len_list: pred_idx = find_nearest_idx(pred_g_len_list_raw, value) pred_g_list.append(pred_g_list_raw[pred_idx]) pred_g_len_list.append(pred_g_len_list_raw[pred_idx]) # delete pred_g_len_list_raw = np.delete(pred_g_len_list_raw, pred_idx) del pred_g_list_raw[pred_idx] if len(pred_g_list) == len(real_g_list): break # pred_g_len_list = np.array(pred_g_len_list) print('################## epoch {} ##################'.format(epoch_range[i])) # info about graph size print('real average nodes', sum([real_g_list[i].number_of_nodes() for i in range(len(real_g_list))]) / len(real_g_list)) print('pred average nodes', sum([pred_g_list[i].number_of_nodes() for i in range(len(pred_g_list))]) / len(pred_g_list)) print('num of real graphs', len(real_g_list)) print('num of pred graphs', len(pred_g_list)) # ======================================== # Evaluation # ======================================== mid = len(real_g_list) // 2 dist_degree, dist_clustering = compute_basic_stats(real_g_list[:mid], real_g_list[mid:]) #dist_4cycle = eval.stats.motif_stats(real_g_list[:mid], real_g_list[mid:]) dist_4orbits = eval.stats.orbit_stats_all(real_g_list[:mid], real_g_list[mid:]) print('degree dist among real: ', dist_degree) print('clustering dist among real: ', dist_clustering) #print('4 cycle dist among real: ', dist_4cycle) print('orbits dist among real: ', dist_4orbits) results['deg']['real'] += dist_degree results['clustering']['real'] += dist_clustering results['orbits4']['real'] += dist_4orbits dist_degree, dist_clustering = compute_basic_stats(real_g_list, pred_g_list) #dist_4cycle = eval.stats.motif_stats(real_g_list, pred_g_list) dist_4orbits = eval.stats.orbit_stats_all(real_g_list, pred_g_list) print('degree dist between real and pred at epoch ', epoch_range[i], ': ', dist_degree) print('clustering dist between real and pred at epoch ', epoch_range[i], ': ', dist_clustering) #print('4 cycle dist between real and pred at epoch: ', epoch_range[i], dist_4cycle) print('orbits dist between real and pred at epoch ', epoch_range[i], ': ', dist_4orbits) results['deg']['ours'] = min(dist_degree, results['deg']['ours']) results['clustering']['ours'] = min(dist_clustering, results['clustering']['ours']) results['orbits4']['ours'] = min(dist_4orbits, results['orbits4']['ours']) # performance at training time out_files['train'].write(str(dist_degree) + ',') out_files['train'].write(str(dist_clustering) + ',') out_files['train'].write(str(dist_4orbits) + ',') dist_degree, dist_clustering = compute_basic_stats(real_g_list, perturbed_g_list_005) #dist_4cycle = eval.stats.motif_stats(real_g_list, perturbed_g_list_005) dist_4orbits = eval.stats.orbit_stats_all(real_g_list, perturbed_g_list_005) print('degree dist between real and perturbed at epoch ', epoch_range[i], ': ', dist_degree) print('clustering dist between real and perturbed at epoch ', epoch_range[i], ': ', dist_clustering) #print('4 cycle dist between real and perturbed at epoch: ', epoch_range[i], dist_4cycle) print('orbits dist between real and perturbed at epoch ', epoch_range[i], ': ', dist_4orbits) results['deg']['perturbed'] += dist_degree results['clustering']['perturbed'] += dist_clustering results['orbits4']['perturbed'] += dist_4orbits if i == 0: # Baselines for baseline in baselines: dist_degree, dist_clustering = compute_basic_stats(real_g_list, baselines[baseline]) dist_4orbits = eval.stats.orbit_stats_all(real_g_list, baselines[baseline]) results['deg'][baseline] = dist_degree results['clustering'][baseline] = dist_clustering results['orbits4'][baseline] = dist_4orbits print('Kron: deg=', dist_degree, ', clustering=', dist_clustering, ', orbits4=', dist_4orbits) out_files['train'].write('\n') for metric, methods in results.items(): methods['real'] /= num_evals methods['perturbed'] /= num_evals # Write results for metric, methods in results.items(): line = metric+','+ \ str(methods['real'])+','+ \ str(methods['ours'])+','+ \ str(methods['perturbed']) for baseline in baselines: line += ',' + str(methods[baseline]) line += '\n' out_files['compare'].write(line) for _, out_f in out_files.items(): out_f.close()
def evaluation_epoch(dir_input, fname_output, model_name, dataset_name, args, is_clean=True, epoch_start=1000,epoch_end=3001,epoch_step=100): with open(fname_output, 'w+') as f: f.write('sample_time,epoch,degree_validate,clustering_validate,orbits4_validate,degree_test,clustering_test,orbits4_test\n') # TODO: Maybe refactor into a separate file/function that specifies THE naming convention # across main and evaluate if not 'small' in dataset_name: hidden = 128 else: hidden = 64 # read real graph if model_name=='Internal' or model_name=='Noise' or model_name=='B-A' or model_name=='E-R': fname_test = dir_input + 'GraphRNN_MLP' + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str( hidden) + '_test_' + str(0) + '.dat' elif 'Baseline' in model_name: fname_test = dir_input + model_name + '_' + dataset_name + '_' + str(64) + '_test_' + str(0) + '.dat' else: fname_test = dir_input + model_name + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str( hidden) + '_test_' + str(0) + '.dat' try: graph_test = utils.load_graph_list(fname_test,is_real=True) except: print('Not found: ' + fname_test) logging.warning('Not found: ' + fname_test) return None graph_test_len = len(graph_test) graph_train = graph_test[0:int(0.8 * graph_test_len)] # train graph_validate = graph_test[0:int(0.2 * graph_test_len)] # validate graph_test = graph_test[int(0.8 * graph_test_len):] # test on a hold out test set graph_test_aver = 0 for graph in graph_test: graph_test_aver+=graph.number_of_nodes() graph_test_aver /= len(graph_test) print('test average len',graph_test_aver) # get performance for proposed approaches if 'GraphRNN' in model_name: # read test graph for epoch in range(epoch_start,epoch_end,epoch_step): for sample_time in range(1,4): # get filename fname_pred = dir_input + model_name + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str(hidden) + '_pred_' + str(epoch) + '_' + str(sample_time) + '.dat' # load graphs try: graph_pred = utils.load_graph_list(fname_pred,is_real=False) # default False except: print('Not found: '+ fname_pred) logging.warning('Not found: '+ fname_pred) continue # clean graphs if is_clean: graph_test, graph_pred = clean_graphs(graph_test, graph_pred) else: shuffle(graph_pred) graph_pred = graph_pred[0:len(graph_test)] print('len graph_test', len(graph_test)) print('len graph_validate', len(graph_validate)) print('len graph_pred', len(graph_pred)) graph_pred_aver = 0 for graph in graph_pred: graph_pred_aver += graph.number_of_nodes() graph_pred_aver /= len(graph_pred) print('pred average len', graph_pred_aver) # evaluate MMD test mmd_degree = eval.stats.degree_stats(graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats(graph_test, graph_pred) try: mmd_4orbits = eval.stats.orbit_stats_all(graph_test, graph_pred) except: mmd_4orbits = -1 # evaluate MMD validate mmd_degree_validate = eval.stats.degree_stats(graph_validate, graph_pred) mmd_clustering_validate = eval.stats.clustering_stats(graph_validate, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_validate, graph_pred) except: mmd_4orbits_validate = -1 # write results f.write(str(sample_time)+','+ str(epoch)+','+ str(mmd_degree_validate)+','+ str(mmd_clustering_validate)+','+ str(mmd_4orbits_validate)+','+ str(mmd_degree)+','+ str(mmd_clustering)+','+ str(mmd_4orbits)+'\n') print('degree',mmd_degree,'clustering',mmd_clustering,'orbits',mmd_4orbits) # get internal MMD (MMD between ground truth validation and test sets) if model_name == 'Internal': mmd_degree_validate = eval.stats.degree_stats(graph_test, graph_validate) mmd_clustering_validate = eval.stats.clustering_stats(graph_test, graph_validate) try: mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_test, graph_validate) except: mmd_4orbits_validate = -1 f.write(str(-1) + ',' + str(-1) + ',' + str(mmd_degree_validate) + ',' + str( mmd_clustering_validate) + ',' + str(mmd_4orbits_validate) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + '\n') # get MMD between ground truth and its perturbed graphs if model_name == 'Noise': graph_validate_perturbed = perturb(graph_validate, 0.05) mmd_degree_validate = eval.stats.degree_stats(graph_test, graph_validate_perturbed) mmd_clustering_validate = eval.stats.clustering_stats(graph_test, graph_validate_perturbed) try: mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_test, graph_validate_perturbed) except: mmd_4orbits_validate = -1 f.write(str(-1) + ',' + str(-1) + ',' + str(mmd_degree_validate) + ',' + str( mmd_clustering_validate) + ',' + str(mmd_4orbits_validate) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + '\n') # get E-R MMD if model_name == 'E-R': graph_pred = Graph_generator_baseline(graph_train,generator='Gnp') # clean graphs if is_clean: graph_test, graph_pred = clean_graphs(graph_test, graph_pred) print('len graph_test', len(graph_test)) print('len graph_pred', len(graph_pred)) mmd_degree = eval.stats.degree_stats(graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats(graph_test, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_test, graph_pred) except: mmd_4orbits_validate = -1 f.write(str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(mmd_degree) + ',' + str(mmd_clustering) + ',' + str(mmd_4orbits_validate) + '\n') # get B-A MMD if model_name == 'B-A': graph_pred = Graph_generator_baseline(graph_train, generator='BA') # clean graphs if is_clean: graph_test, graph_pred = clean_graphs(graph_test, graph_pred) print('len graph_test', len(graph_test)) print('len graph_pred', len(graph_pred)) mmd_degree = eval.stats.degree_stats(graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats(graph_test, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_test, graph_pred) except: mmd_4orbits_validate = -1 f.write(str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(mmd_degree) + ',' + str(mmd_clustering) + ',' + str(mmd_4orbits_validate) + '\n') # get performance for baseline approaches if 'Baseline' in model_name: # read test graph for epoch in range(epoch_start, epoch_end, epoch_step): # get filename fname_pred = dir_input + model_name + '_' + dataset_name + '_' + str( 64) + '_pred_' + str(epoch) + '.dat' # load graphs try: graph_pred = utils.load_graph_list(fname_pred, is_real=True) # default False except: print('Not found: ' + fname_pred) logging.warning('Not found: ' + fname_pred) continue # clean graphs if is_clean: graph_test, graph_pred = clean_graphs(graph_test, graph_pred) else: shuffle(graph_pred) graph_pred = graph_pred[0:len(graph_test)] print('len graph_test', len(graph_test)) print('len graph_validate', len(graph_validate)) print('len graph_pred', len(graph_pred)) graph_pred_aver = 0 for graph in graph_pred: graph_pred_aver += graph.number_of_nodes() graph_pred_aver /= len(graph_pred) print('pred average len', graph_pred_aver) # evaluate MMD test mmd_degree = eval.stats.degree_stats(graph_test, graph_pred) mmd_clustering = eval.stats.clustering_stats(graph_test, graph_pred) try: mmd_4orbits = eval.stats.orbit_stats_all(graph_test, graph_pred) except: mmd_4orbits = -1 # evaluate MMD validate mmd_degree_validate = eval.stats.degree_stats(graph_validate, graph_pred) mmd_clustering_validate = eval.stats.clustering_stats(graph_validate, graph_pred) try: mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_validate, graph_pred) except: mmd_4orbits_validate = -1 # write results f.write(str(-1) + ',' + str(epoch) + ',' + str(mmd_degree_validate) + ',' + str( mmd_clustering_validate) + ',' + str(mmd_4orbits_validate) + ',' + str(mmd_degree) + ',' + str(mmd_clustering) + ',' + str(mmd_4orbits) + '\n') print('degree', mmd_degree, 'clustering', mmd_clustering, 'orbits', mmd_4orbits) return True
graphs.append(nx.grid_2d_graph(i,j)) utils.export_graphs_to_txt(graphs, output_prefix) elif prog_args.graph_type == 'caveman': graphs = [] for i in range(2, 3): for j in range(30, 81): for k in range(10): graphs.append(caveman_special(i,j, p_edge=0.3)) utils.export_graphs_to_txt(graphs, output_prefix) elif prog_args.graph_type == 'citeseer': graphs = utils.citeseer_ego() utils.export_graphs_to_txt(graphs, output_prefix) else: # load from directory input_path = dir_prefix + real_graph_filename g_list = utils.load_graph_list(input_path) utils.export_graphs_to_txt(g_list, output_prefix) elif not prog_args.kron_dir == '': kron_g_list = process_kron(prog_args.kron_dir) fname = os.path.join(prog_args.kron_dir, prog_args.graph_type + '.dat') print([g.number_of_nodes() for g in kron_g_list]) utils.save_graph_list(kron_g_list, fname) elif not prog_args.test_file == '': # evaluate single .dat file containing list of test graphs (networkx format) graphs = utils.load_graph_list(prog_args.test_file) eval_single_list(graphs, dir_input=dir_prefix+'graphs/', dataset_name='grid') ## if you don't try kronecker, only the following part is needed else: if not os.path.isdir(dir_prefix+'eval_results'): os.makedirs(dir_prefix+'eval_results') evaluation(args_evaluate,dir_input=dir_prefix+"graphs/", dir_output=dir_prefix+"eval_results/",
def main(): args = Args() print(args.graph_type, args.note) # epoch = 16000 epoch = 3000 sample_time = 3 # for baseline model for num_layers in range(4, 5): # give file name and figure name fname_real = args.graph_save_path + args.fname_real + str(0) fname_pred = (args.graph_save_path + args.fname_pred + str(epoch) + "_" + str(sample_time)) figname = args.figure_save_path + args.fname + str(epoch) + "_" + str( sample_time) # fname_real = args.graph_save_path + args.note + '_' + args.graph_type + '_' + str(args.graph_node_num) + '_' + \ # str(epoch) + '_real_' + str(True) + '_' + str(num_layers) # fname_pred = args.graph_save_path + args.note + '_' + args.graph_type + '_' + str(args.graph_node_num) + '_' + \ # str(epoch) + '_pred_' + str(True) + '_' + str(num_layers) # figname = args.figure_save_path + args.note + '_' + args.graph_type + '_' + str(args.graph_node_num) + '_' + \ # str(epoch) + '_' + str(num_layers) print(fname_real) print(fname_pred) # load data graph_real_list = load_graph_list(fname_real + ".dat") random.shuffle(graph_real_list) graph_pred_list_raw = load_graph_list(fname_pred + ".dat") graph_real_len_list = np.array( [len(graph_real_list[i]) for i in range(len(graph_real_list))]) graph_pred_len_list_raw = np.array([ len(graph_pred_list_raw[i]) for i in range(len(graph_pred_list_raw)) ]) graph_pred_list = graph_pred_list_raw graph_pred_len_list = graph_pred_len_list_raw # # select samples # graph_pred_list = [] # graph_pred_len_list = [] # for value in graph_real_len_list: # pred_idx = find_nearest_idx(graph_pred_len_list_raw, value) # graph_pred_list.append(graph_pred_list_raw[pred_idx]) # graph_pred_len_list.append(graph_pred_len_list_raw[pred_idx]) # # delete # graph_pred_len_list_raw=np.delete(graph_pred_len_list_raw, pred_idx) # del graph_pred_list_raw[pred_idx] # if len(graph_pred_list)==200: # break # graph_pred_len_list = np.array(graph_pred_len_list) # # select pred data within certain range # len_min = np.amin(graph_real_len_list) # len_max = np.amax(graph_real_len_list) # pred_index = np.where((graph_pred_len_list>=len_min)&(graph_pred_len_list<=len_max)) # # print(pred_index[0]) # graph_pred_list = [graph_pred_list[i] for i in pred_index[0]] # graph_pred_len_list = graph_pred_len_list[pred_index[0]] # real_order = np.argsort(graph_real_len_list) # pred_order = np.argsort(graph_pred_len_list) real_order = np.argsort(graph_real_len_list)[::-1] pred_order = np.argsort(graph_pred_len_list)[::-1] # print(real_order) # print(pred_order) graph_real_list = [graph_real_list[i] for i in real_order] graph_pred_list = [graph_pred_list[i] for i in pred_order] # shuffle(graph_real_list) # shuffle(graph_pred_list) print( "real average nodes", sum([ graph_real_list[i].number_of_nodes() for i in range(len(graph_real_list)) ]) / len(graph_real_list), ) print( "pred average nodes", sum([ graph_pred_list[i].number_of_nodes() for i in range(len(graph_pred_list)) ]) / len(graph_pred_list), ) print("num of real graphs", len(graph_real_list)) print("num of pred graphs", len(graph_pred_list)) # # draw all graphs # for iter in range(8): # print('iter', iter) # graph_list = [] # for i in range(8): # index = 8 * iter + i # # graph_real_list[index].remove_nodes_from(list(nx.isolates(graph_real_list[index]))) # # graph_pred_list[index].remove_nodes_from(list(nx.isolates(graph_pred_list[index]))) # graph_list.append(graph_real_list[index]) # graph_list.append(graph_pred_list[index]) # print('real', graph_real_list[index].number_of_nodes()) # print('pred', graph_pred_list[index].number_of_nodes()) # # draw_graph_list(graph_list, row=4, col=4, fname=figname + '_' + str(iter)) # draw all graphs for iter in range(8): print("iter", iter) graph_list = [] for i in range(8): index = 32 * iter + i # graph_real_list[index].remove_nodes_from(list(nx.isolates(graph_real_list[index]))) # graph_pred_list[index].remove_nodes_from(list(nx.isolates(graph_pred_list[index]))) # graph_list.append(graph_real_list[index]) graph_list.append(graph_pred_list[index]) # print('real', graph_real_list[index].number_of_nodes()) print("pred", graph_pred_list[index].number_of_nodes()) draw_graph_list(graph_list, row=4, col=4, fname=figname + "_" + str(iter) + "_pred") # draw all graphs for iter in range(8): print("iter", iter) graph_list = [] for i in range(8): index = 16 * iter + i # graph_real_list[index].remove_nodes_from(list(nx.isolates(graph_real_list[index]))) # graph_pred_list[index].remove_nodes_from(list(nx.isolates(graph_pred_list[index]))) graph_list.append(graph_real_list[index]) # graph_list.append(graph_pred_list[index]) print("real", graph_real_list[index].number_of_nodes()) # print('pred', graph_pred_list[index].number_of_nodes()) draw_graph_list(graph_list, row=4, col=4, fname=figname + "_" + str(iter) + "_real")
def __init__(self, file_path, is_real=True, batch_size=None): assert batch_size is not None self.graph_list = utils.load_graph_list(file_path, is_real=is_real) self.curr_index = 0 self.batch_size = batch_size self.curr_graph_list = deepcopy(self.graph_list)