def __init__(self, data, dist_sim_calculator, need_val=True): self.test_row_gs = load_data(FLAGS.dataset_val_test, train=False).graphs self.test_col_gs = load_data(FLAGS.dataset_val_test, train=True).graphs self.baseline_models = [] self.baseline_results_dict = load_results_as_dict( FLAGS.dataset_val_test, self.baseline_models, self.test_row_gs, self.test_col_gs, col_graphs_list=None, ds_mat=None, ds_mat_normed=False, # load its own thing from disk sim_or_dist=self._ds_metric_is_dist_or_sim(), ds_metric=FLAGS.ds_metric, time_mat=None) if need_val: val_gs1, val_gs2 = self.get_val_gs_as_tuple(data) self.val_row_gs = self._to_nxgraph_list(val_gs1) self.val_col_gs = self._to_nxgraph_list(val_gs2) true_val_ds_mat = self._get_true_dist_sim_mat_for_val( data, dist_sim_calculator) self.true_val_result = load_result( FLAGS.dataset_val_test, FLAGS.ds_algo, self.val_row_gs, self.val_col_gs, col_graphs_list=None, ds_mat=true_val_ds_mat, ds_mat_normed=False, # provide true ds mat sim_or_dist=self._ds_metric_is_dist_or_sim(), ds_metric=FLAGS.ds_metric, time_mat=None) self.true_test_result = load_result( FLAGS.dataset_val_test, FLAGS.ds_algo, self.test_row_gs, self.test_col_gs, col_graphs_list=None, ds_mat=None, ds_mat_normed=False, # load its own thing from disk sim_or_dist=self._ds_metric_is_dist_or_sim(), ds_metric=FLAGS.ds_metric, time_mat=None) self.norms = [FLAGS.ds_norm]
def exp5(): """ Query visualization. """ dataset = 'imdbmulti' model = 'astar' concise = True norms = [True, False] dir = get_result_path() + '/{}/query_vis/{}'.format(dataset, model) create_dir_if_not_exists(dir) info_dict = { # draw node config 'draw_node_size': 150 if dataset != 'linux' else 10, 'draw_node_label_enable': True, 'node_label_name': None if dataset == 'linux' else 'type', 'draw_node_label_font_size': 6, 'draw_node_color_map': TYPE_COLOR_MAP, # draw edge config 'draw_edge_label_enable': False, 'edge_label_name': 'valence', 'draw_edge_label_font_size': 6, # graph text info config 'each_graph_text_list': [], 'each_graph_text_font_size': 8, 'each_graph_text_pos': [0.5, 1.05], # graph padding: value range: [0, 1] 'top_space': 0.20 if concise else 0.26, # out of whole graph 'bottom_space': 0.05, 'hbetween_space': 0.6 if concise else 1, # out of the subgraph 'wbetween_space': 0, # plot config 'plot_dpi': 200, 'plot_save_path_eps': '', 'plot_save_path_png': '' } train_data = load_data(dataset, train=True) test_data = load_data(dataset, train=False) row_graphs = test_data.graphs col_graphs = train_data.graphs r = load_result(dataset, model, row_graphs=row_graphs, col_graphs=col_graphs) tr = load_result(dataset, TRUE_MODEL, row_graphs=row_graphs, col_graphs=col_graphs) for norm in norms: ids = r.get_sort_id_mat(norm) m, n = r.m_n() num_vis = 10 for i in range(num_vis): q = test_data.graphs[i] gids = np.concatenate([ids[i][:3], [ids[i][int(n / 2)]], ids[i][-3:]]) gs = [train_data.graphs[j] for j in gids] info_dict['each_graph_text_list'] = \ [get_text_label(dataset, r, tr, i, i, q, model, norm, True, concise)] + \ [get_text_label(dataset, r, tr, i, j, train_data.graphs[j], model, norm, False, concise) \ for j in gids] # print(info_dict['each_graph_text_list']) info_dict['plot_save_path_png'] = '{}/query_vis_{}_{}_{}{}.{}'.format( dir, dataset, model, i, get_norm_str(norm), 'png') info_dict['plot_save_path_eps'] = '{}/query_vis_{}_{}_{}{}.{}'.format( dir, dataset, model, i, get_norm_str(norm), 'eps') vis(q, gs, info_dict)
def __init__(self, data, dist_calculator): test_gs1 = load_data(FLAGS.dataset, train=False).graphs test_gs2 = load_data(FLAGS.dataset, train=True).graphs self.baseline_models = BASELINE_MODELS if FLAGS.dataset == 'imdbmulti': self.baseline_models = ['hungarian', 'vj', 'beam80', 'beam1', 'beam2'] self.baseline_results_dict = load_results_as_dict( FLAGS.dataset, self.baseline_models, row_graphs=test_gs1, col_graphs=test_gs2) val_gs1, val_gs2 = self.get_val_gs_as_tuple(data) self.true_val_result = load_result( FLAGS.dataset, FLAGS.dist_algo, dist_mat=self._get_true_dist_mat_for_val(data, dist_calculator), row_graphs=self._to_nxgraph_list(val_gs1), col_graphs=self._to_nxgraph_list(val_gs2)) self.true_test_result = load_result( FLAGS.dataset, FLAGS.dist_algo , row_graphs=test_gs1, col_graphs=test_gs2) self.norms = [FLAGS.dist_norm]
def exp12(): dataset = 'ptc' ds_algo = 'astar' ds_metric = 'ged' sim_or_dist = 'dist' dir = '/media/...)' row_graphs = load_data(dataset, False).graphs col_graphs = load_data(dataset, True).graphs tr_l = load_as_dict(dir + '/train_val_info.klepto') print(tr_l.keys()) te_l = load_as_dict(dir + '/test_info.klepto') print(te_l.keys()) true_r = load_result(dataset, ds_algo, row_graphs, col_graphs, None, None, False, sim_or_dist, ds_metric, None) pred_r = load_result(dataset, 'siamese', row_graphs, col_graphs, None, te_l['sim_mat'], True, sim_or_dist, ds_metric, None) draw_ranking(dataset, ds_metric, true_r, pred_r, 'Our Model', tr_l['flags']['node_feat_name'], plot_node_ids=False, plot_gids=False, ds_norm=True, existing_mappings=None)
def _prepare_slt_results(self, slt_collec, time_list, extra_dir): assert (slt_collec is not None) row_gs = self._to_nxgraph_list(slt_collec.row_gs) col_gs = None col_graphs_list = [self._to_nxgraph_list(li) for li in slt_collec.col_gs_list] extra_dir = join(extra_dir, slt_collec.short_name) true_r = load_result( FLAGS.dataset_val_test, 'decoy_true_result', row_gs, col_gs, col_graphs_list=col_graphs_list, ds_mat=slt_collec.true_ds_mat, ds_mat_normed=FLAGS.ds_norm, sim_or_dist=FLAGS.pred_sim_dist, ds_metric=FLAGS.ds_metric, time_mat=time_list) rs = {FLAGS.ds_algo: true_r} return row_gs, col_gs, col_graphs_list, extra_dir, rs, true_r
def eval_for_test(self, sim_mat, loss_list, time_list, node_embs_list, graph_embs_mat, attentions, metrics, saver): models = [FLAGS.dist_algo] + self.baseline_models + [FLAGS.model] rs = {FLAGS.model: load_result( FLAGS.dataset, FLAGS.model, sim_mat=sim_mat, time_mat=time_list), FLAGS.dist_algo: self.true_test_result} if FLAGS.plot_results: rs.update(self.baseline_results_dict) eps_dir = saver.get_log_dir() return self._eval(models, rs, self.true_test_result, loss_list, metrics, FLAGS.plot_results, node_embs_list, graph_embs_mat, attentions, eps_dir)
def eval_test(self, cur_model, sim_mat, time_mat): models = self.models + [cur_model] self.rs[cur_model] = load_result( self.dataset, cur_model, sim_mat=sim_mat, time_mat=time_mat) norms = [True, False] d = plot_apk( self.dataset, models, self.rs, self.true_result, 'ap@k', norms, self.plot_results) self.results.update(d) metrics = ['mrr', 'mse'] if time_mat is not None: metrics.append('time') for metric in metrics: d = plot_mrr_mse_time( self.dataset, models, self.rs, self.true_result, metric, norms, self.sim_kernel_name, self.yeta, self.plot_results) self.results.update(d) return self.results
def eval_for_val(self, sim_mat, loss_list, time_list, metrics): models = [FLAGS.model] pred_r = load_result( FLAGS.dataset, FLAGS.model, sim_mat=sim_mat, time_mat=time_list) rs = {FLAGS.model: pred_r, FLAGS.dist_algo: self.true_val_result} results = self._eval(models, rs, self.true_val_result, loss_list, metrics, False) rtn = OrderedDict() li = [] for metric, num in results.items(): if not 'loss' in metric: num = num[FLAGS.model] results[metric] = num metric = 'val_' + self._remove_norm_from_str(metric) rtn[metric] = num s = '{}={:.5f}'.format(metric, num) li.append(s) return rtn, ' '.join(li)
def eval_for_test(self, ds_mat, metrics, saver, loss_list=None, time_list=None, node_embs_dict=None, graph_embs_mat=None, attentions=None, model=None, data=None, slt_collec=None): assert (ds_mat is not None) models = [] extra_dir = saver.get_log_dir() assert (slt_collec is None) row_gs, col_gs, col_graphs_list, models, rs, true_r = \ self._prepare_regular_results() models += [FLAGS.model] pred_r = load_result( FLAGS.dataset_val_test, FLAGS.model, row_gs, col_gs, col_graphs_list=col_graphs_list, ds_mat=ds_mat, ds_mat_normed=FLAGS.ds_norm, # provide pred ds mat sim_or_dist=FLAGS.pred_sim_dist, ds_metric=FLAGS.ds_metric, time_mat=time_list) rs.update({FLAGS.model: pred_r}) return self._eval(models, rs, true_r, metrics, FLAGS.plot_results, loss_list, node_embs_dict, graph_embs_mat, attentions, extra_dir, model, data)
def eval_for_val(self, ds_mat, loss_list, time_list, metrics): assert (ds_mat is not None) models = [FLAGS.model] pred_r = load_result( FLAGS.dataset_val_test, FLAGS.model, self.val_row_gs, self.val_col_gs, col_graphs_list=None, ds_mat=ds_mat, ds_mat_normed=FLAGS.ds_norm, # provide pred ds mat sim_or_dist=FLAGS.pred_sim_dist, ds_metric=FLAGS.ds_metric, time_mat=time_list) rs = {FLAGS.model: pred_r, FLAGS.ds_algo: self.true_val_result} results = self._eval(models, rs, self.true_val_result, metrics, False, loss_list=loss_list) rtn = OrderedDict() li = [] for metric, num in results.items(): if not 'loss' in metric: num = num[FLAGS.model] results[metric] = num metric = 'val_' + self._remove_norm_from_str(metric) rtn[metric] = num s = '{}={:.5f}'.format(metric, num) li.append(s) return rtn, ' '.join(li)
weight_min_array.append(min(weight[i])) weight_max_array.append(max(weight[i])) weight_max = max(weight_max_array) weight_min = min(weight_min_array) print("max:", weight_max) print("min:", weight_min) # linux max = 0.5, min = 0.2 aids700nef max = 0.7, min = 0.4 # imdb1kcoarse max = 0.25, min = 0.15 weight_max = 0.15 weight_min = 0.1 train_data = load_data(dataset, train=True) test_data = load_data(dataset, train=False) row_graphs = test_data.graphs col_graphs = train_data.graphs pred_r = load_result(dataset, 'siamese', sim_mat=emb_data['sim_mat'], time_mat=emb_data['time_li']) # r = load_result(dataset, model, row_graphs=row_graphs, col_graphs=col_graphs) tr = load_result(dataset, TRUE_MODEL, row_graphs=row_graphs, col_graphs=col_graphs) for norm in norms: ids = pred_r.sort_id_mat_ num_vis = 10 for i in range(len(row_graphs)): q = test_data.graphs[i] # gids = ids[i][:7] gids = np.concatenate( [ids[i][:5], [ids[i][int(len(col_graphs) / 2)]], ids[i][-1:]]) gs = [train_data.graphs[j] for j in gids]
weight_min_array = [] for i in range(len(weight)): weight_min_array.append(min(weight[i])) weight_max_array.append(max(weight[i])) weight_max = max(weight_max_array) weight_min = min(weight_min_array) print("max:", weight_max) print("min:", weight_min) weight_max = 0.85 weight_min = 0.7 train_data = load_data(dataset, train=True) test_data = load_data(dataset, train=False) row_graphs = test_data.graphs col_graphs = train_data.graphs r = load_result(dataset, model, row_graphs=row_graphs, col_graphs=col_graphs) tr = load_result(dataset, TRUE_MODEL, row_graphs=row_graphs, col_graphs=col_graphs) for norm in norms: ids = r.sort_id_mat(norm) m, n = r.m_n() num_vis = 10 for i in range(len(row_graphs)): q = test_data.graphs[i] gids = ids[i][:5] # gids = np.concatenate([ids[i][:3], [ids[i][int(n / 2)]], ids[i][-3:]]) gs = [train_data.graphs[j] for j in gids] weight_query = []
def create_siamese_result_from_test_info_pickle(fp, dataset, row_gs, col_gs): name = 'siamese_test' d = load_as_dict(fp) return name, load_result(dataset, name, sim_mat=d['sim_mat'], row_graphs=row_gs, col_graphs=col_gs, time_mat=[])
from scipy import stats, integrate from utils import load_as_dict, load_data from results import load_result def sigmoid(x): return 1 / (1 + math.exp(-x)) if __name__ == '__main__': dataset = 'aids80nef' train_data = load_data(dataset, train=True) test_data = load_data(dataset, train=False) row_graphs = test_data.graphs col_graphs = train_data.graphs load_res = load_result(dataset, 'astar', row_graphs=row_graphs, col_graphs=col_graphs) data_origin = load_as_dict("/home/songbian/Documents/fork/" "GraphEmbedding/data/" "regression_aids80nef_test_info.pickle") data = data_origin['node_embs_list'] for i in range(len(data)): for j in range(len(data[i])): if len(data[i]) < 10: data[i] = np.pad(data[i], ((0, 10 - len(data[i])), (0, 0)), 'constant', constant_values=(0, 0)) ids = load_res.sort_id_mat_ for i in range(len(row_graphs)): q = test_data.graphs[i] gids = np.concatenate([ids[i][:10], ids[i][-10:]]) for j in gids:
from dist_calculator import get_gs_dist_mat, DistCalculator from utils import load_as_dict, load_data from results import load_result if __name__ == '__main__': dataset = 'aids700nef' dist_metric = 'ged' dist_algo = 'astar' emb_data = load_as_dict("/home/songbian/Documents/fork/GraphEmbedding/model/Siamese/logs/" \ "siamese_regression_aids700nef_2018-08-01T11:52:11(cur_best)/test_info.pickle") train_data = load_data(dataset, train=True) test_data = load_data(dataset, train=False) row_graphs = test_data.graphs col_graphs = train_data.graphs matrix = load_result(dataset, 'astar', row_graphs=row_graphs, col_graphs=col_graphs) pred_r = load_result(dataset, 'siamese', sim_mat=emb_data['sim_mat'], time_mat=emb_data['time_li']) ids = matrix.sort_id_mat_ print(len(matrix.dist_norm_mat_)) print(len(matrix.dist_norm_mat_[0])) print(matrix.dist_norm_mat_) for i in range(len(row_graphs)): q = test_data.graphs[i] # gids = np.concatenate([ids[i][:5], [ids[i][int(len(col_graphs) / 2)]], ids[i][-1:]]) gids = ids[i][:10] gs = [matrix.dist_norm_mat_[i][j] for j in gids] sns_plot = sns.distplot(gs, kde=False, fit=stats.gamma)