Ejemplo n.º 1
0
def evaluateDynamicLinkPrediction_TIMERS(graph,
                                         embedding,
                                         t,
                                         rounds,
                                         n_sample_nodes=None,
                                         no_python=False,
                                         is_undirected=True,
                                         sampling_scheme="u_rand"):
    node_l = None
    if n_sample_nodes:
        if sampling_scheme == "u_rand":
            test_digraph, node_l = graph_util.sample_graph(
                graph, n_sample_nodes)
        else:
            test_digraph, node_l = graph_util.sample_graph_rw_int(
                graph, n_sample_nodes)
    estimated_adj = embedding.predict_next_adj(t, node_l)

    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=is_undirected, edge_pairs=None)

    MAP = metrics.computeMAP(predicted_edge_list, test_digraph)
    prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list,
                                                 test_digraph)
    return (MAP, prec_curv)
Ejemplo n.º 2
0
def evaluateDynamic_changed_LinkPrediction(graph,
                                           embedding,
                                           rounds,
                                           edges_add,
                                           edges_rm,
                                           n_sample_nodes=None,
                                           no_python=False,
                                           is_undirected=True,
                                           sampling_scheme="u_rand"):
    nodes = []
    for e in edges_add[0]:
        nodes.append(e[0])
        nodes.append(e[1])

    # for e in edges_rm[0]:
    #   nodes.append(e[0])
    #   nodes.append(e[1])

    nodes = list(np.unique(nodes))
    # pdb.set_trace()

    test_digraph, node_l = graph_util.sample_graph(graph, len(nodes), nodes)
    estimated_adj = embedding.predict_next_adj(node_l)

    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=is_undirected, edge_pairs=None)

    MAP = metrics.computeMAP(predicted_edge_list, test_digraph)
    prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list,
                                                 test_digraph)

    return (MAP, prec_curv)
def evaluateStaticGraphReconstruction(digraph,
                                      graph_embedding,
                                      X_stat,
                                      node_l=None,
                                      sample_ratio_e=None,
                                      file_suffix=None,
                                      is_undirected=True,
                                      is_weighted=False):
    """Function to evaluate static graph reconstruction
           
           Attributes:
               digraph (Object): Networkx Graph Object
               graph_embedding (object): Algorithm for learning graph embedding
               X_stat (ndarray): Embedding values of the graph.
               node_l (int): Total number of nodes.
               sammple_ratio_e (float): SAmpling ration for testing. Only sample number of nodes are tested.
               file_suffix (str): Suffix for file name.
               is_undirected (bool): Flag to denote if the graph is directed.
               is_weighted (bool): Flag denoting if the graph has weighted edge. 

            Returns:
                ndarray: MAP, precision curve, error values and error baselines
    """

    node_num = digraph.number_of_nodes()
    # evaluation
    if sample_ratio_e:
        eval_edge_pairs = evaluation_util.getRandomEdgePairs(
            node_num, sample_ratio_e, is_undirected)
    else:
        eval_edge_pairs = None
    if file_suffix is None:
        estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l)
    else:
        estimated_adj = graph_embedding.get_reconstructed_adj(
            X_stat, node_l, file_suffix)
    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs)
    MAP = metrics.computeMAP(predicted_edge_list, digraph)
    prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph)
    # If weighted, compute the error in reconstructed weights of observed edges
    if is_weighted:
        digraph_adj = nx.to_numpy_matrix(digraph)
        estimated_adj[digraph_adj == 0] = 0
        err = np.linalg.norm(digraph_adj - estimated_adj)
        err_baseline = np.linalg.norm(digraph_adj)
    else:
        err = None
        err_baseline = None
    return (MAP, prec_curv, err, err_baseline)
def evaluateDynamic_changed_LinkPrediction(graph,
                                           embedding,
                                           rounds,
                                           edges_add,
                                           edges_rm,
                                           n_sample_nodes=None,
                                           no_python=False,
                                           is_undirected=True,
                                           sampling_scheme="u_rand"):
    """Function to evaluate dynamic changed link prediction
           
           Attributes:
               graph (Object): Networkx Graph Object
               embedding (object): Algorithm for learning graph embedding.
               edges_add (list): list of edges to be added.
               edges_rm (list): list of edges to be removed.
               n_sampled_nodes (int): List of sampled nodes.
               train_ratio_init (float): sample to be used for training and testing.
               rounds (int): Number of times to run the experiment
               m_summ (str): summary to be used to save the result.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    nodes = []
    for e in edges_add[0]:
        nodes.append(e[0])
        nodes.append(e[1])

    # for e in edges_rm[0]:
    #   nodes.append(e[0])
    #   nodes.append(e[1])

    nodes = list(np.unique(nodes))
    # pdb.set_trace()

    test_digraph, node_l = graph_util.sample_graph(graph, len(nodes), nodes)
    estimated_adj = embedding.predict_next_adj(node_l)

    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=is_undirected, edge_pairs=None)

    MAP = metrics.computeMAP(predicted_edge_list, test_digraph)
    prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list,
                                                 test_digraph)

    return (MAP, prec_curv)
def evaluateDynamicLinkPrediction(graph,
                                  embedding,
                                  rounds,
                                  n_sample_nodes=None,
                                  no_python=False,
                                  is_undirected=True,
                                  sampling_scheme="u_rand"):
    """Function to evaluate Dynamic Link Prediction
           
           Attributes:
               graph (Object): Networkx Graph Object
               embedding (object): Algorithm for learning graph embedding
               n_sample_nodes (list): sampled nodes
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme (str): Sampling scheme to be used. 

            Returns:
                ndarray: MAP, precision curve
    """
    node_l = None
    if n_sample_nodes:
        if sampling_scheme == "u_rand":
            test_digraph, node_l = graph_util.sample_graph(
                graph, n_sample_nodes)
        else:
            test_digraph, node_l = graph_util.sample_graph_rw_int(
                graph, n_sample_nodes)
    estimated_adj = embedding.predict_next_adj(node_l)
    print(len(estimated_adj), np.shape(estimated_adj))

    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=is_undirected, edge_pairs=None)
    print(len(predicted_edge_list), np.shape(predicted_edge_list),
          len(test_digraph.edges()), np.shape(test_digraph.edges()))
    # pdb.set_trace()

    MAP = metrics.computeMAP(predicted_edge_list, test_digraph)
    prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list,
                                                 test_digraph)
    return (MAP, prec_curv)
Ejemplo n.º 6
0
def main():
    # data_list = ['cellphone', 'enron', 'fbmessages', 'HS11', 'HS12', 'primary', 'workplace']
    data_list = ['bitcoin_alpha', 'bitcoin_otc', 'college_msg', 'enron_all', 'enron_all_shuffle']
    funcs = ['AE', 'AERNN']
    for data in data_list:
        graphs = process('data/' + data)
        length = len(graphs)
        dim_emb = 128
        lookback = 3

        for func in funcs:
            MAP_list = []
            for i in range(length - lookback - 1):
                if func == 'AERNN':
                    embedding = DynAERNN(d=dim_emb,
                                         beta=5,
                                         n_prev_graphs=lookback,
                                         nu1=1e-6,
                                         nu2=1e-6,
                                         n_aeunits=[500, 300],
                                         n_lstmunits=[500, dim_emb],
                                         rho=0.3,
                                         n_iter=250,
                                         xeta=1e-3,
                                         n_batch=100,
                                         modelfile=None,
                                         weightfile=None,
                                         savefilesuffix=None)
                elif func == 'RNN':
                    embedding = DynRNN(d=dim_emb,
                                       beta=5,
                                       n_prev_graphs=lookback,
                                       nu1=1e-6,
                                       nu2=1e-6,
                                       n_enc_units=[500, 300],
                                       n_dec_units=[500, 300],
                                       rho=0.3,
                                       n_iter=250,
                                       xeta=1e-3,
                                       n_batch=100,
                                       modelfile=None,
                                       weightfile=None,
                                       savefilesuffix=None)
                else:
                    embedding = DynAE(d=dim_emb,
                                      beta=5,
                                      n_prev_graphs=lookback,
                                      nu1=1e-6,
                                      nu2=1e-6,
                                      n_units=[500, 300, ],
                                      rho=0.3,
                                      n_iter=250,
                                      xeta=1e-4,
                                      n_batch=100,
                                      modelfile=None,
                                      weightfile=None,
                                      savefilesuffix=None)

                embs = []
                t1 = time()
                # for temp_var in range(lookback + 1, length + 1):
                emb, _ = embedding.learn_embeddings(graphs[i: i + lookback + 1])
                embs.append(emb)
                print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1))
                pred_adj = graphify(embedding.predict_next_adj())
                edge_index_pre = evaluation_util.getEdgeListFromAdjMtx(adj=pred_adj)
                MAP = metrics.computeMAP(edge_index_pre, graphs[i + lookback + 1])
                MAP_list.append(MAP)
                print('第' + str(i) + '-' + str(i + lookback) + '个时间片的MAP值为' + str(MAP))
            MAP_list.append(np.mean(MAP_list))
            result = {'MAP值': MAP_list}
            label = []
            for i in range(len(MAP_list) - 1):
                row = '第' + str(i) + '-' + str(i + lookback) + '个时间片'
                label.append(row)
            label.append('mean_MAP')
            if not os.path.exists('result/' + data):
                os.mkdir('result/' + data)
            csv_path = 'result/' + data + '/' + str(func) + '.csv'
            df = pd.DataFrame(result, index=label)
            df.to_csv(csv_path)
Ejemplo n.º 7
0
def main():

    graphs = process()
    length = len(graphs)
    dim_emb = 8
    lookback = 3
    MAP_list = []

    for i in range(length - lookback - 1):
        # embedding = DynAE(d=dim_emb,
        #                   beta=5,
        #                   n_prev_graphs=lookback,
        #                   nu1=1e-6,
        #                   nu2=1e-6,
        #                   n_units=[500, 300, ],
        #                   rho=0.3,
        #                   n_iter=250,
        #                   xeta=1e-4,
        #                   n_batch=100,
        #                   modelfile=['./intermediate/enc_model_dynAE.json',
        #                              './intermediate/dec_model_dynAE.json'],
        #                   weightfile=['./intermediate/enc_weights_dynAE.hdf5',
        #                               './intermediate/dec_weights_dynAE.hdf5'],
        #                   savefilesuffix="testing")
        embedding = DynRNN(d=dim_emb,
                           beta=5,
                           n_prev_graphs=lookback,
                           nu1=1e-6,
                           nu2=1e-6,
                           n_enc_units=[500, 300],
                           n_dec_units=[500, 300],
                           rho=0.3,
                           n_iter=250,
                           xeta=1e-3,
                           n_batch=100,
                           modelfile=[
                               './intermediate/enc_model_dynRNN.json',
                               './intermediate/dec_model_dynRNN.json'
                           ],
                           weightfile=[
                               './intermediate/enc_weights_dynRNN.hdf5',
                               './intermediate/dec_weights_dynRNN.hdf5'
                           ],
                           savefilesuffix="testing")
        embs = []
        t1 = time()
        # for temp_var in range(lookback + 1, length + 1):
        emb, _ = embedding.learn_embeddings(graphs[i:i + lookback + 1])
        embs.append(emb)
        print(embedding._method_name + ':\n\tTraining time: %f' %
              (time() - t1))
        pred_adj = graphify(embedding.predict_next_adj())
        edge_index_pre = evaluation_util.getEdgeListFromAdjMtx(adj=pred_adj)
        MAP = metrics.computeMAP(edge_index_pre, graphs[i + lookback + 1])
        MAP_list.append(MAP)
        print('第' + str(i) + '-' + str(i + lookback) + '个时间片的MAP值为' + str(MAP))

    with open('result/dynrnn_enron_MAP.txt', mode='w+') as file:
        file.write('数据集共有' + str(length) + '个时间片\n')
        file.write('lookback的值为' + str(lookback) + '\nMAP的值分别为:')
        for MAP in MAP_list:
            file.write(str(MAP) + ' ')
        file.write('\n')
        file.write('mean MAP: ' + str(np.mean(MAP_list)))