def exp11(): dataset = 'linux' l = load_as_dict( '/home/<>/Documents/GraphEmbedding/model/Siamese/logs/siamese_regression_linux_2018-11-04T22:07:15.428277(sepa, fix=10; check multi-scale)/test_info.klepto') weight = l['atts'] node_embs_dict = l['node_embs_dict'] draw_emb_hist_heat(dataset, node_embs_dict, True) # TODO: fix
def exp12(): dataset = 'ptc' ds_algo = 'astar' ds_metric = 'ged' sim_or_dist = 'dist' dir = '/media/...)' row_graphs = load_data(dataset, False).graphs col_graphs = load_data(dataset, True).graphs tr_l = load_as_dict(dir + '/train_val_info.klepto') print(tr_l.keys()) te_l = load_as_dict(dir + '/test_info.klepto') print(te_l.keys()) true_r = load_result(dataset, ds_algo, row_graphs, col_graphs, None, None, False, sim_or_dist, ds_metric, None) pred_r = load_result(dataset, 'siamese', row_graphs, col_graphs, None, te_l['sim_mat'], True, sim_or_dist, ds_metric, None) draw_ranking(dataset, ds_metric, true_r, pred_r, 'Our Model', tr_l['flags']['node_feat_name'], plot_node_ids=False, plot_gids=False, ds_norm=True, existing_mappings=None)
def compute_quality_for_corpus(corpus_dir): '''Return the quality score for tested corpus (with truth and prediction files).''' from utils import read_classification_from_file as load_as_dict truth_file = '!truth.txt' pred_file = '!prediction.txt' truth_dict = load_as_dict(os.path.join(corpus_dir, truth_file)) pred_dict = load_as_dict(os.path.join(corpus_dir, pred_file)) from confmat import BinaryConfusionMatrix pos_tag = 'SPAM' neg_tag = 'OK' cm = BinaryConfusionMatrix(pos_tag, neg_tag) cm.compute_from_dicts(truth_dict, pred_dict) confusion_dict = cm.as_dict() tp = confusion_dict['tp'] tn = confusion_dict['tn'] fp = confusion_dict['fp'] fn = confusion_dict['fn'] return quality_score(tp, tn, fp, fn)
def main(): sfn = cur_folder + '/temp' loaded = load_as_dict(sfn) if not loaded: movies, movies_dict, people_dict = read_data() print("finish reading data!") movies.sort(key=voteGetter, reverse=True) print('sorted') for idx, movie in enumerate(movies): movie.set_rank(idx) print('shuffled') save_as_dict(sfn, movies, movies_dict, people_dict) else: movies = loaded['movies'] movies_dict = loaded['movies_dict'] people_dict = loaded['people_dict'] print('loaded movies, movies_dict, people_dict') create_dataset(movies, movies_dict, people_dict, 'Coarse') create_dataset(movies, movies_dict, people_dict, 'Fine')
def gen_aids_small(name, additional=False): datadir = get_root_path() + '/data' dirin = datadir + '/AIDS40k_orig' sfn = get_save_path() + '/aids40k_orig' loaded = load_as_dict(sfn) if not loaded: graphs = {} nodes_graphs = defaultdict(list) lesseq30 = set() lesseq10 = set() disconnects = set() # Iterate through all 40k graphs. for file in glob(dirin + '/*.gexf'): gid = int(file.split('/')[-1].split('.')[0]) g = nx.read_gexf(file) if not nx.is_connected(g): print('{} not connected'.format(gid)) disconnects.add(gid) continue graphs[gid] = g nodes_graphs[g.number_of_nodes()].append(gid) if g.number_of_nodes() <= 30: lesseq30.add(gid) if g.number_of_nodes() <= 10: lesseq10.add(gid) save_as_dict(sfn, graphs, nodes_graphs, lesseq30, lesseq10, disconnects) else: graphs = loaded['graphs'] nodes_graphs = loaded['nodes_graphs'] lesseq30 = loaded['lesseq30'] lesseq10 = loaded['lesseq10'] disconnects = loaded['disconnects'] print(len(disconnects), 'disconnected graphs out of', len(graphs)) print(len(lesseq30), 'with <= 30 nodes') print(len(lesseq10), 'with <= 10 nodes') # exit(1) train_dir = '{}/{}/train'.format(datadir, name) if additional: train_data = load_data(name.lower(), train=True) test_dir_str = 'test2' else: exec_cmd('mkdir -p {}'.format(train_dir)) test_dir_str = 'test' test_dir = '{}/{}/{}'.format(datadir, name, test_dir_str) exec_cmd('mkdir -p {}'.format(test_dir)) if not additional: if name == 'AIDS10k': for num_node in range(5, 23): choose = random.Random(123).sample(nodes_graphs[num_node], 1)[0] print('choose {} with {} nodes'.format(choose, num_node)) nx.write_gexf(graphs[choose], test_dir + '/{}.gexf'.format(choose)) lesseq30.remove(choose) for tid in random.Random(123).sample(lesseq30, 10000): nx.write_gexf(graphs[tid], train_dir + '/{}.gexf'.format(tid)) elif name == 'AIDS700nef': lesseq10 = sample_from_lessthan10eq(train_dir, lesseq10, 560, graphs, 'train') sample_from_lessthan10eq(test_dir, lesseq10, 140, graphs, 'test') else: assert (name == 'AIDS10k') for num_node in range(5, 30): k = 4 from_li = nodes_graphs[num_node] print('sampling {} from {} (size={})'.format( k, len(from_li), num_node)) choose = random.Random(123).sample_exclude(from_li, k, train_data.get_gids()) print('choose {} with {} nodes'.format(choose, num_node)) for c in choose: nx.write_gexf(graphs[c], test_dir + '/{}.gexf'.format(c)) print('Done')
'draw_edge_label_font_size': 6, # graph text info config 'each_graph_text_list': [], 'each_graph_text_font_size': 8, 'each_graph_text_pos': [0.5, 1.05], # graph padding: value range: [0, 1] 'top_space': 0.20 if concise else 0.26, # out of whole graph 'bottom_space': 0.05, 'hbetween_space': 0.6 if concise else 1, # out of the subgraph 'wbetween_space': 0, # plot config 'plot_dpi': 200, 'plot_save_path': '' } emb_data = load_as_dict("/home/songbian/Documents/fork/" "GraphEmbedding/data/" "regression_linux_test_info.pickle") weight_data = load_as_dict("/home/songbian/Documents/" "fork/GraphEmbedding/data/" "classification_linux_test_info.pickle") # print(weight_data) weight = weight_data['atts'] weight_max_array = [] weight_min_array = [] for i in range(len(weight)): weight_min_array.append(min(weight[i])) weight_max_array.append(max(weight[i])) weight_max = max(weight_max_array) weight_min = min(weight_min_array) print("max:", weight_max) print("min:", weight_min)
'draw_edge_label_font_size': 6, # graph text info config 'each_graph_text_list': [], 'each_graph_text_font_size': 8, 'each_graph_text_pos': [0.5, 1.05], # graph padding: value range: [0, 1] 'top_space': 0.20 if concise else 0.26, # out of whole graph 'bottom_space': 0.05, 'hbetween_space': 0.6 if concise else 1, # out of the subgraph 'wbetween_space': 0, # plot config 'plot_dpi': 200, 'plot_save_path': '' } weight_data = load_as_dict( "/home/songbian/Documents/fork/GraphEmbedding/model/Siamese/logs" "/siamese_classification_aids700nef_2018-07-28T10:09:33" "/test_info.pickle") weight = weight_data['atts'] weight_max_array = [] weight_min_array = [] for i in range(len(weight)): weight_min_array.append(min(weight[i])) weight_max_array.append(max(weight[i])) weight_max = max(weight_max_array) weight_min = min(weight_min_array) print("max:", weight_max) print("min:", weight_min) weight_max = 0.85 weight_min = 0.7 train_data = load_data(dataset, train=True) test_data = load_data(dataset, train=False)
def create_siamese_result_from_test_info_pickle(fp, dataset, row_gs, col_gs): name = 'siamese_test' d = load_as_dict(fp) return name, load_result(dataset, name, sim_mat=d['sim_mat'], row_graphs=row_gs, col_graphs=col_gs, time_mat=[])
from results import load_result def sigmoid(x): return 1 / (1 + math.exp(-x)) if __name__ == '__main__': dataset = 'aids80nef' train_data = load_data(dataset, train=True) test_data = load_data(dataset, train=False) row_graphs = test_data.graphs col_graphs = train_data.graphs load_res = load_result(dataset, 'astar', row_graphs=row_graphs, col_graphs=col_graphs) data_origin = load_as_dict("/home/songbian/Documents/fork/" "GraphEmbedding/data/" "regression_aids80nef_test_info.pickle") data = data_origin['node_embs_list'] for i in range(len(data)): for j in range(len(data[i])): if len(data[i]) < 10: data[i] = np.pad(data[i], ((0, 10 - len(data[i])), (0, 0)), 'constant', constant_values=(0, 0)) ids = load_res.sort_id_mat_ for i in range(len(row_graphs)): q = test_data.graphs[i] gids = np.concatenate([ids[i][:10], ids[i][-10:]]) for j in gids: result = np.dot(data[i], data[j].T) sns_plot = sns.heatmap(result)
ha='right', va='bottom', fontsize=35) # axes = plt.gca() # axes.set_xlim([-1,1]) # axes.set_ylim([-1,1]) plt.savefig(filename) plt.close() try: set_plot_defaults() print('Reading the tokenized corpus...') read_obj = load_as_dict(corpus) dictionary = read_obj['dictionary'] reversed_dictionary = read_obj['reversed_dictionary'] pca_lcl_tgt = PCA(n_components=2) pca_lcl_nce = PCA(n_components=2) pca_glb_tgt = PCA(n_components=2) pca_glb_nce = PCA(n_components=2) plot_only = create_plotID_list(words, dictionary) print('Plotting...') for bf in listdir(folder_name): # Check for existing plot files if bf.startswith(prefix): continue
pad_inches=0) if eps_dir: plt.savefig(eps_dir + '/' + str(i) + '.png', bbox_inches='tight', pad_inches=0) plt.savefig(eps_dir + '/' + str(i) + '.eps', bbox_inches='tight', pad_inches=0) plt_cnt += 1 plt.close() print('Saved {} embedding visualization plots'.format(plt_cnt)) if __name__ == '__main__': data = load_as_dict("/home/songbian/Documents/fork/" "GraphEmbedding/data/" "regression_linux_test_info.pickle") embs = data['embs'] dataset = 'linux' thresh_pos = 0.58 thresh_neg = 0.58 thresh_pos_sim = 0.5 thresh_neg_sim = 0.5 norm = True row_graphs = load_data(dataset, train=False).graphs col_graphs = load_data(dataset, train=True).graphs true_result = load_result(dataset, TRUE_MODEL, row_graphs=row_graphs, col_graphs=col_graphs) pred_r = load_result(dataset,
import numpy as np import matplotlib.pyplot as plt import seaborn as sns from scipy import stats, integrate import sys sys.path.append('../') from dist_calculator import get_gs_dist_mat, DistCalculator from utils import load_as_dict, load_data from results import load_result if __name__ == '__main__': dataset = 'aids700nef' dist_metric = 'ged' dist_algo = 'astar' emb_data = load_as_dict("/home/songbian/Documents/fork/GraphEmbedding/model/Siamese/logs/" \ "siamese_regression_aids700nef_2018-08-01T11:52:11(cur_best)/test_info.pickle") train_data = load_data(dataset, train=True) test_data = load_data(dataset, train=False) row_graphs = test_data.graphs col_graphs = train_data.graphs matrix = load_result(dataset, 'astar', row_graphs=row_graphs, col_graphs=col_graphs) pred_r = load_result(dataset, 'siamese', sim_mat=emb_data['sim_mat'], time_mat=emb_data['time_li']) ids = matrix.sort_id_mat_ print(len(matrix.dist_norm_mat_)) print(len(matrix.dist_norm_mat_[0]))