Example #1
0
 def __init__(self, data, dist_sim_calculator, need_val=True):
     self.test_row_gs = load_data(FLAGS.dataset_val_test, train=False).graphs
     self.test_col_gs = load_data(FLAGS.dataset_val_test, train=True).graphs
     self.baseline_models = []
     self.baseline_results_dict = load_results_as_dict(
         FLAGS.dataset_val_test, self.baseline_models,
         self.test_row_gs, self.test_col_gs, col_graphs_list=None,
         ds_mat=None, ds_mat_normed=False,  # load its own thing from disk
         sim_or_dist=self._ds_metric_is_dist_or_sim(),
         ds_metric=FLAGS.ds_metric, time_mat=None)
     if need_val:
         val_gs1, val_gs2 = self.get_val_gs_as_tuple(data)
         self.val_row_gs = self._to_nxgraph_list(val_gs1)
         self.val_col_gs = self._to_nxgraph_list(val_gs2)
         true_val_ds_mat = self._get_true_dist_sim_mat_for_val(
             data, dist_sim_calculator)
         self.true_val_result = load_result(
             FLAGS.dataset_val_test, FLAGS.ds_algo,
             self.val_row_gs, self.val_col_gs, col_graphs_list=None,
             ds_mat=true_val_ds_mat, ds_mat_normed=False,  # provide true ds mat
             sim_or_dist=self._ds_metric_is_dist_or_sim(),
             ds_metric=FLAGS.ds_metric, time_mat=None)
     self.true_test_result = load_result(
         FLAGS.dataset_val_test, FLAGS.ds_algo,
         self.test_row_gs, self.test_col_gs, col_graphs_list=None,
         ds_mat=None, ds_mat_normed=False,  # load its own thing from disk
         sim_or_dist=self._ds_metric_is_dist_or_sim(),
         ds_metric=FLAGS.ds_metric, time_mat=None)
     self.norms = [FLAGS.ds_norm]
Example #2
0
def exp5():
    """ Query visualization. """
    dataset = 'imdbmulti'
    model = 'astar'
    concise = True
    norms = [True, False]
    dir = get_result_path() + '/{}/query_vis/{}'.format(dataset, model)
    create_dir_if_not_exists(dir)
    info_dict = {
        # draw node config
        'draw_node_size': 150 if dataset != 'linux' else 10,
        'draw_node_label_enable': True,
        'node_label_name': None if dataset == 'linux' else 'type',
        'draw_node_label_font_size': 6,
        'draw_node_color_map': TYPE_COLOR_MAP,
        # draw edge config
        'draw_edge_label_enable': False,
        'edge_label_name': 'valence',
        'draw_edge_label_font_size': 6,
        # graph text info config
        'each_graph_text_list': [],
        'each_graph_text_font_size': 8,
        'each_graph_text_pos': [0.5, 1.05],
        # graph padding: value range: [0, 1]
        'top_space': 0.20 if concise else 0.26,  # out of whole graph
        'bottom_space': 0.05,
        'hbetween_space': 0.6 if concise else 1,  # out of the subgraph
        'wbetween_space': 0,
        # plot config
        'plot_dpi': 200,
        'plot_save_path_eps': '',
        'plot_save_path_png': ''
    }
    train_data = load_data(dataset, train=True)
    test_data = load_data(dataset, train=False)
    row_graphs = test_data.graphs
    col_graphs = train_data.graphs
    r = load_result(dataset, model, row_graphs=row_graphs, col_graphs=col_graphs)
    tr = load_result(dataset, TRUE_MODEL, row_graphs=row_graphs, col_graphs=col_graphs)
    for norm in norms:
        ids = r.get_sort_id_mat(norm)
        m, n = r.m_n()
        num_vis = 10
        for i in range(num_vis):
            q = test_data.graphs[i]
            gids = np.concatenate([ids[i][:3], [ids[i][int(n / 2)]], ids[i][-3:]])
            gs = [train_data.graphs[j] for j in gids]
            info_dict['each_graph_text_list'] = \
                [get_text_label(dataset, r, tr, i, i, q, model, norm, True, concise)] + \
                [get_text_label(dataset, r, tr, i, j,
                                train_data.graphs[j], model, norm, False, concise) \
                 for j in gids]
            # print(info_dict['each_graph_text_list'])
            info_dict['plot_save_path_png'] = '{}/query_vis_{}_{}_{}{}.{}'.format(
                dir, dataset, model, i, get_norm_str(norm), 'png')
            info_dict['plot_save_path_eps'] = '{}/query_vis_{}_{}_{}{}.{}'.format(
                dir, dataset, model, i, get_norm_str(norm), 'eps')
            vis(q, gs, info_dict)
Example #3
0
 def __init__(self, data, dist_calculator):
     test_gs1 = load_data(FLAGS.dataset, train=False).graphs
     test_gs2 = load_data(FLAGS.dataset, train=True).graphs
     self.baseline_models = BASELINE_MODELS
     if FLAGS.dataset == 'imdbmulti':
         self.baseline_models = ['hungarian', 'vj', 'beam80', 'beam1', 'beam2']
     self.baseline_results_dict = load_results_as_dict(
         FLAGS.dataset, self.baseline_models,
         row_graphs=test_gs1, col_graphs=test_gs2)
     val_gs1, val_gs2 = self.get_val_gs_as_tuple(data)
     self.true_val_result = load_result(
         FLAGS.dataset, FLAGS.dist_algo,
         dist_mat=self._get_true_dist_mat_for_val(data, dist_calculator),
         row_graphs=self._to_nxgraph_list(val_gs1),
         col_graphs=self._to_nxgraph_list(val_gs2))
     self.true_test_result = load_result(
         FLAGS.dataset, FLAGS.dist_algo
         , row_graphs=test_gs1, col_graphs=test_gs2)
     self.norms = [FLAGS.dist_norm]
Example #4
0
def exp12():
    dataset = 'ptc'
    ds_algo = 'astar'
    ds_metric = 'ged'
    sim_or_dist = 'dist'
    dir = '/media/...)'
    row_graphs = load_data(dataset, False).graphs
    col_graphs = load_data(dataset, True).graphs
    tr_l = load_as_dict(dir + '/train_val_info.klepto')
    print(tr_l.keys())
    te_l = load_as_dict(dir + '/test_info.klepto')
    print(te_l.keys())
    true_r = load_result(dataset, ds_algo, row_graphs, col_graphs, None,
                         None, False, sim_or_dist, ds_metric, None)
    pred_r = load_result(dataset, 'siamese', row_graphs, col_graphs, None,
                         te_l['sim_mat'], True, sim_or_dist, ds_metric, None)
    draw_ranking(dataset, ds_metric, true_r, pred_r, 'Our Model',
                 tr_l['flags']['node_feat_name'],
                 plot_node_ids=False, plot_gids=False, ds_norm=True,
                 existing_mappings=None)
Example #5
0
 def _prepare_slt_results(self, slt_collec, time_list, extra_dir):
     assert (slt_collec is not None)
     row_gs = self._to_nxgraph_list(slt_collec.row_gs)
     col_gs = None
     col_graphs_list = [self._to_nxgraph_list(li) for li in slt_collec.col_gs_list]
     extra_dir = join(extra_dir, slt_collec.short_name)
     true_r = load_result(
         FLAGS.dataset_val_test, 'decoy_true_result',
         row_gs, col_gs, col_graphs_list=col_graphs_list,
         ds_mat=slt_collec.true_ds_mat, ds_mat_normed=FLAGS.ds_norm,
         sim_or_dist=FLAGS.pred_sim_dist,
         ds_metric=FLAGS.ds_metric, time_mat=time_list)
     rs = {FLAGS.ds_algo: true_r}
     return row_gs, col_gs, col_graphs_list, extra_dir, rs, true_r
Example #6
0
 def eval_for_test(self, sim_mat, loss_list, time_list,
                   node_embs_list, graph_embs_mat, attentions,
                   metrics, saver):
     models = [FLAGS.dist_algo] + self.baseline_models + [FLAGS.model]
     rs = {FLAGS.model: load_result(
         FLAGS.dataset, FLAGS.model, sim_mat=sim_mat, time_mat=time_list),
         FLAGS.dist_algo: self.true_test_result}
     if FLAGS.plot_results:
         rs.update(self.baseline_results_dict)
     eps_dir = saver.get_log_dir()
     return self._eval(models, rs, self.true_test_result,
                       loss_list, metrics, FLAGS.plot_results,
                       node_embs_list, graph_embs_mat, attentions,
                       eps_dir)
Example #7
0
 def eval_test(self, cur_model, sim_mat, time_mat):
     models = self.models + [cur_model]
     self.rs[cur_model] = load_result(
         self.dataset, cur_model, sim_mat=sim_mat, time_mat=time_mat)
     norms = [True, False]
     d = plot_apk(
         self.dataset, models, self.rs, self.true_result, 'ap@k', norms,
         self.plot_results)
     self.results.update(d)
     metrics = ['mrr', 'mse']
     if time_mat is not None:
         metrics.append('time')
     for metric in metrics:
         d = plot_mrr_mse_time(
             self.dataset, models, self.rs, self.true_result, metric,
             norms, self.sim_kernel_name, self.yeta, self.plot_results)
         self.results.update(d)
     return self.results
Example #8
0
 def eval_for_val(self, sim_mat, loss_list, time_list, metrics):
     models = [FLAGS.model]
     pred_r = load_result(
         FLAGS.dataset, FLAGS.model, sim_mat=sim_mat, time_mat=time_list)
     rs = {FLAGS.model: pred_r, FLAGS.dist_algo: self.true_val_result}
     results = self._eval(models, rs, self.true_val_result,
                          loss_list, metrics, False)
     rtn = OrderedDict()
     li = []
     for metric, num in results.items():
         if not 'loss' in metric:
             num = num[FLAGS.model]
             results[metric] = num
         metric = 'val_' + self._remove_norm_from_str(metric)
         rtn[metric] = num
         s = '{}={:.5f}'.format(metric, num)
         li.append(s)
     return rtn, ' '.join(li)
Example #9
0
 def eval_for_test(self, ds_mat, metrics, saver, loss_list=None, time_list=None,
                   node_embs_dict=None, graph_embs_mat=None, attentions=None,
                   model=None, data=None, slt_collec=None):
     assert (ds_mat is not None)
     models = []
     extra_dir = saver.get_log_dir()
     assert (slt_collec is None)
     row_gs, col_gs, col_graphs_list, models, rs, true_r = \
         self._prepare_regular_results()
     models += [FLAGS.model]
     pred_r = load_result(
         FLAGS.dataset_val_test, FLAGS.model,
         row_gs, col_gs, col_graphs_list=col_graphs_list,
         ds_mat=ds_mat, ds_mat_normed=FLAGS.ds_norm,  # provide pred ds mat
         sim_or_dist=FLAGS.pred_sim_dist,
         ds_metric=FLAGS.ds_metric, time_mat=time_list)
     rs.update({FLAGS.model: pred_r})
     return self._eval(models, rs, true_r,
                       metrics, FLAGS.plot_results, loss_list,
                       node_embs_dict, graph_embs_mat, attentions,
                       extra_dir, model, data)
Example #10
0
 def eval_for_val(self, ds_mat, loss_list, time_list, metrics):
     assert (ds_mat is not None)
     models = [FLAGS.model]
     pred_r = load_result(
         FLAGS.dataset_val_test, FLAGS.model,
         self.val_row_gs, self.val_col_gs, col_graphs_list=None,
         ds_mat=ds_mat, ds_mat_normed=FLAGS.ds_norm,  # provide pred ds mat
         sim_or_dist=FLAGS.pred_sim_dist,
         ds_metric=FLAGS.ds_metric, time_mat=time_list)
     rs = {FLAGS.model: pred_r, FLAGS.ds_algo: self.true_val_result}
     results = self._eval(models, rs, self.true_val_result,
                          metrics, False, loss_list=loss_list)
     rtn = OrderedDict()
     li = []
     for metric, num in results.items():
         if not 'loss' in metric:
             num = num[FLAGS.model]
             results[metric] = num
         metric = 'val_' + self._remove_norm_from_str(metric)
         rtn[metric] = num
         s = '{}={:.5f}'.format(metric, num)
         li.append(s)
     return rtn, ' '.join(li)
Example #11
0
     weight_min_array.append(min(weight[i]))
     weight_max_array.append(max(weight[i]))
 weight_max = max(weight_max_array)
 weight_min = min(weight_min_array)
 print("max:", weight_max)
 print("min:", weight_min)
 # linux max = 0.5, min = 0.2   aids700nef max = 0.7, min = 0.4
 # imdb1kcoarse max = 0.25, min = 0.15
 weight_max = 0.15
 weight_min = 0.1
 train_data = load_data(dataset, train=True)
 test_data = load_data(dataset, train=False)
 row_graphs = test_data.graphs
 col_graphs = train_data.graphs
 pred_r = load_result(dataset,
                      'siamese',
                      sim_mat=emb_data['sim_mat'],
                      time_mat=emb_data['time_li'])
 # r = load_result(dataset, model, row_graphs=row_graphs, col_graphs=col_graphs)
 tr = load_result(dataset,
                  TRUE_MODEL,
                  row_graphs=row_graphs,
                  col_graphs=col_graphs)
 for norm in norms:
     ids = pred_r.sort_id_mat_
     num_vis = 10
     for i in range(len(row_graphs)):
         q = test_data.graphs[i]
         # gids = ids[i][:7]
         gids = np.concatenate(
             [ids[i][:5], [ids[i][int(len(col_graphs) / 2)]], ids[i][-1:]])
         gs = [train_data.graphs[j] for j in gids]
Example #12
0
 weight_min_array = []
 for i in range(len(weight)):
     weight_min_array.append(min(weight[i]))
     weight_max_array.append(max(weight[i]))
 weight_max = max(weight_max_array)
 weight_min = min(weight_min_array)
 print("max:", weight_max)
 print("min:", weight_min)
 weight_max = 0.85
 weight_min = 0.7
 train_data = load_data(dataset, train=True)
 test_data = load_data(dataset, train=False)
 row_graphs = test_data.graphs
 col_graphs = train_data.graphs
 r = load_result(dataset,
                 model,
                 row_graphs=row_graphs,
                 col_graphs=col_graphs)
 tr = load_result(dataset,
                  TRUE_MODEL,
                  row_graphs=row_graphs,
                  col_graphs=col_graphs)
 for norm in norms:
     ids = r.sort_id_mat(norm)
     m, n = r.m_n()
     num_vis = 10
     for i in range(len(row_graphs)):
         q = test_data.graphs[i]
         gids = ids[i][:5]
         # gids = np.concatenate([ids[i][:3], [ids[i][int(n / 2)]], ids[i][-3:]])
         gs = [train_data.graphs[j] for j in gids]
         weight_query = []
Example #13
0
def create_siamese_result_from_test_info_pickle(fp, dataset, row_gs, col_gs):
    name = 'siamese_test'
    d = load_as_dict(fp)
    return name, load_result(dataset, name, sim_mat=d['sim_mat'],
                             row_graphs=row_gs, col_graphs=col_gs,
                             time_mat=[])
Example #14
0
from scipy import stats, integrate
from utils import load_as_dict, load_data
from results import load_result


def sigmoid(x):
    return 1 / (1 + math.exp(-x))


if __name__ == '__main__':
    dataset = 'aids80nef'
    train_data = load_data(dataset, train=True)
    test_data = load_data(dataset, train=False)
    row_graphs = test_data.graphs
    col_graphs = train_data.graphs
    load_res = load_result(dataset, 'astar', row_graphs=row_graphs, col_graphs=col_graphs)
    data_origin = load_as_dict("/home/songbian/Documents/fork/"
                        "GraphEmbedding/data/"
                        "regression_aids80nef_test_info.pickle")
    data = data_origin['node_embs_list']
    for i in range(len(data)):
        for j in range(len(data[i])):
            if len(data[i]) < 10:
                data[i] = np.pad(data[i], ((0, 10 - len(data[i])),
                                (0, 0)), 'constant', constant_values=(0, 0))

    ids = load_res.sort_id_mat_
    for i in range(len(row_graphs)):
        q = test_data.graphs[i]
        gids = np.concatenate([ids[i][:10], ids[i][-10:]])
        for j in gids:
Example #15
0
from dist_calculator import get_gs_dist_mat, DistCalculator
from utils import load_as_dict, load_data
from results import load_result

if __name__ == '__main__':
    dataset = 'aids700nef'
    dist_metric = 'ged'
    dist_algo = 'astar'
    emb_data = load_as_dict("/home/songbian/Documents/fork/GraphEmbedding/model/Siamese/logs/" \
                  "siamese_regression_aids700nef_2018-08-01T11:52:11(cur_best)/test_info.pickle")
    train_data = load_data(dataset, train=True)
    test_data = load_data(dataset, train=False)
    row_graphs = test_data.graphs
    col_graphs = train_data.graphs
    matrix = load_result(dataset,
                         'astar',
                         row_graphs=row_graphs,
                         col_graphs=col_graphs)
    pred_r = load_result(dataset,
                         'siamese',
                         sim_mat=emb_data['sim_mat'],
                         time_mat=emb_data['time_li'])
    ids = matrix.sort_id_mat_
    print(len(matrix.dist_norm_mat_))
    print(len(matrix.dist_norm_mat_[0]))
    print(matrix.dist_norm_mat_)
    for i in range(len(row_graphs)):
        q = test_data.graphs[i]
        # gids = np.concatenate([ids[i][:5], [ids[i][int(len(col_graphs) / 2)]], ids[i][-1:]])
        gids = ids[i][:10]
        gs = [matrix.dist_norm_mat_[i][j] for j in gids]
        sns_plot = sns.distplot(gs, kde=False, fit=stats.gamma)