コード例 #1
0
def evaluateStaticGraphReconstruction(digraph,
                                      graph_embedding,
                                      X_stat,
                                      node_l=None,
                                      file_suffix=None,
                                      sample_ratio_e=None,
                                      is_undirected=True,
                                      is_weighted=False):
    node_num = digraph.number_of_nodes()
    # evaluation
    if sample_ratio_e:
        eval_edge_pairs = evaluation_util.getRandomEdgePairs(
            node_num, sample_ratio_e, is_undirected)
    else:
        eval_edge_pairs = None
    if file_suffix is None:
        estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l)
    else:
        estimated_adj = graph_embedding.get_reconstructed_adj(
            X_stat, file_suffix, node_l)
    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs)
    MAP = metrics.computeMAP(predicted_edge_list, digraph)
    prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph)
    # If weighted, compute the error in reconstructed weights of observed edges
    if is_weighted:
        digraph_adj = nx.to_numpy_matrix(digraph)
        estimated_adj[digraph_adj == 0] = 0
        err = np.linalg.norm(digraph_adj - estimated_adj)
        err_baseline = np.linalg.norm(digraph_adj)
    else:
        err = None
        err_baseline = None
    return (MAP, prec_curv, err, err_baseline)
コード例 #2
0
def evaluateStaticGraphReconstruction(digraph,
                                      graph_embedding,
                                      X_stat,
                                      node_l=None,
                                      file_suffix=None,
                                      sample_ratio_e=None,
                                      is_undirected=True):
    node_num = digraph.number_of_nodes()
    # evaluation
    if sample_ratio_e:
        eval_edge_pairs = evaluation_util.getRandomEdgePairs(
            node_num, sample_ratio_e, is_undirected)
    else:
        eval_edge_pairs = None
    if file_suffix is None:
        estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l)
    else:
        estimated_adj = graph_embedding.get_reconstructed_adj(
            X_stat, file_suffix, node_l)

    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs)
    MAP = metrics.computeMAP(predicted_edge_list, digraph)
    prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph)

    return (MAP, prec_curv)
コード例 #3
0
def evaluateStaticGraphReconstruction(digraph,
                                      graph_embedding,
                                      X_stat,
                                      node_l=None,
                                      file_suffix=None,
                                      sample_ratio_e=None,
                                      is_undirected=True,
                                      is_weighted=False):
    """This function evaluates the graph reconstruction accuracy of the embedding algorithms.
        Args:
            digraph (Object): directed networkx graph object.
            graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding.
            X_stat (Vector): Embedding of the the nodes of the graph.
            node_l (Int): Number of nodes in the graph.
            file_suffix (Str): The name of the algorithm and dataset used to save the embedding.
            sample_ratio_e (Float): The ratio used to sample the original graph for evaluation purpose.
            is_undirected (bool): Boolean flag to denote whether the graph is directed or not.
            is_weighted (bool): Boolean flag to denote whether the edges of the graph is weighted.
        Returns:
            Numpy Array: Consiting of Mean average precision precision curve, errors and error baselines.
    """
    node_num = digraph.number_of_nodes()
    # evaluation
    if sample_ratio_e:
        eval_edge_pairs = evaluation_util.getRandomEdgePairs(
            node_num, sample_ratio_e, is_undirected)
    else:
        eval_edge_pairs = None
    if file_suffix is None:
        estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l)
    else:
        estimated_adj = graph_embedding.get_reconstructed_adj(
            X_stat, file_suffix, node_l)
    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs)
    if 'partition' in digraph.node[0]:
        predicted_edge_list = [
            e for e in predicted_edge_list if
            digraph.node[e[0]]['partition'] != digraph.node[e[1]]['partition']
        ]

    MAP = metrics.computeMAP(predicted_edge_list, digraph)
    prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph)
    # If weighted, compute the error in reconstructed weights of observed edges
    if is_weighted:
        digraph_adj = nx.to_numpy_matrix(digraph)
        estimated_adj[digraph_adj == 0] = 0
        err = np.linalg.norm(digraph_adj - estimated_adj)
        err_baseline = np.linalg.norm(digraph_adj)
    else:
        err = None
        err_baseline = None
    return (MAP, prec_curv, err, err_baseline)
コード例 #4
0
def evaluateStaticLinkPrediction(train_digraph, test_digraph,
                                 graph_embedding, X,
                                 node_l=None,
                                 sample_ratio_e=None,
                                 is_undirected=True,
                                store_predictions=1):
    """This function evaluates the static link prediction accuracy of the embedding algorithms.
        Args:
            train_digraph (Object): directed networkx graph object used for training the algorithm.
            test_digraph (Object): directed networkx graph object to be used for testing the algorithm.
            graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding.
            X (Vector): Embedding of the the nodes of the graph.
            node_l (Int): Number of nodes in the graph.
            sample_ratio_e (Float): The ratio used to sample the original graph for evaluation purpose.
            is_undirected (bool): Boolean flag to denote whether the graph is directed or not.
            store_prediction (Int): Stores the predicted values.
        Returns:
            Numpy Array: Consiting of Mean average precision and the precision curve values.
    """
    node_num = train_digraph.number_of_nodes()
    # evaluation
    if sample_ratio_e:
        eval_edge_pairs = evaluation_util.getRandomEdgePairs(
            node_num,
            sample_ratio_e,
            is_undirected
        )
    else:
        eval_edge_pairs = None
    if X is None:
        # If not an embedding approach, store the new subgraph
        graph_embedding.learn_embedding(train_digraph)
    estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l)
    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj,
        is_undirected=is_undirected,
        edge_pairs=eval_edge_pairs
    )
    filtered_edge_list = [e for e in predicted_edge_list if not train_digraph.has_edge(e[0], e[1])]
    if 'partition' in train_digraph.node[0]:
        filtered_edge_list = [e for e in predicted_edge_list if train_digraph.node[e[0]]['partition'] != train_digraph.node[e[1]]['partition']]
    pickle.dump(filtered_edge_list, open('gem/nodeListMap/preds.pickle', 'wb'))
    pickle.dump(test_digraph, open('gem/nodeListMap/test_graph.pickle', 'wb'))
    t1 = time()
    MAP = metrics.computeMAP(filtered_edge_list, test_digraph)
    t2 = time()
    prec_curv, _ = metrics.computePrecisionCurve(
        filtered_edge_list,
        test_digraph
    )
    t3 = time()
    print('MAP computation time: %f sec, prec: %f sec' % (t2 - t1, t3 - t2))
    return (MAP, prec_curv)
コード例 #5
0
def evaluateStaticLinkPrediction(digraph,
                                 graph_embedding,
                                 train_ratio=0.8,
                                 n_sample_nodes=None,
                                 sample_ratio_e=None,
                                 no_python=False,
                                 is_undirected=True):
    node_num = digraph.number_of_nodes()
    # seperate train and test graph
    train_digraph, test_digraph = evaluation_util.splitDiGraphToTrainTest(
        digraph, train_ratio=train_ratio, is_undirected=is_undirected)
    if not nx.is_connected(train_digraph.to_undirected()):
        train_digraph = max(
            nx.weakly_connected_component_subgraphs(train_digraph), key=len)
        tdl_nodes = train_digraph.nodes()
        nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes))))
        nx.relabel_nodes(train_digraph, nodeListMap, copy=False)
        test_digraph = test_digraph.subgraph(tdl_nodes)
        nx.relabel_nodes(test_digraph, nodeListMap, copy=False)

    # learning graph embedding
    X, _ = graph_embedding.learn_embedding(graph=train_digraph,
                                           no_python=no_python)
    node_l = None
    if n_sample_nodes:
        test_digraph, node_l = graph_util.sample_graph(test_digraph,
                                                       n_sample_nodes)
        X = X[node_l]

    # evaluation
    if sample_ratio_e:
        eval_edge_pairs = evaluation_util.getRandomEdgePairs(
            node_num, sample_ratio_e, is_undirected)
    else:
        eval_edge_pairs = None
    estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l)
    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs)

    if node_l is None:
        node_l = list(range(train_digraph.number_of_nodes()))

    filtered_edge_list = [
        e for e in predicted_edge_list
        if not train_digraph.has_edge(node_l(e[0]), node_l(e[1]))
    ]

    MAP = metrics.computeMAP(filtered_edge_list, test_digraph)
    prec_curv, _ = metrics.computePrecisionCurve(filtered_edge_list,
                                                 test_digraph)
    return (MAP, prec_curv)
コード例 #6
0
def evaluateStaticLinkPrediction(digraph, graph_embedding,
                                 train_ratio=0.8,
                                 n_sample_nodes=None,
                                 sample_ratio_e=None,
                                 no_python=False,
                                 is_undirected=True):
    node_num = digraph.number_of_nodes()
    print('eslp graph')
    print(digraph.edges()[:3])
    # seperate train and test graph
    train_digraph, test_digraph = evaluation_util.splitDiGraphToTrainTest(
        digraph,
        train_ratio=train_ratio,
        is_undirected=is_undirected
    )
    print('eslp training graph')
    print(train_digraph.edges()[:3])
    if not nx.is_connected(train_digraph.to_undirected()):
        train_digraph = max(
            nx.weakly_connected_component_subgraphs(train_digraph),
            key=len
        )
        tdl_nodes = train_digraph.nodes()
        nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes))))
        reversedNodeListMap = dict(zip(range(len(tdl_nodes)),tdl_nodes))
        print(nodeListMap)
        nx.relabel_nodes(train_digraph, nodeListMap, copy=False)
        test_digraph = test_digraph.subgraph(tdl_nodes)
        nx.relabel_nodes(test_digraph, nodeListMap, copy=False)
    else:
        reversedNodeListMap = dict(zip(tdl_nodes,tdl_nodes))

    print('elsp training graph after largest cc')
    print(train_digraph.edges()[:3])
    # learning graph embedding
    X, _ = graph_embedding.learn_embedding(
        graph=train_digraph,
        no_python=no_python
    )
    node_l = None
    if n_sample_nodes:
        test_digraph, node_l = graph_util.sample_graph(
            test_digraph,
            n_sample_nodes
        )
        X = X[node_l]

    # print('len graph edges')
    # print(len(graph.nodes()))
    # print('embedding vectors number')
    # print(len(self._X))
    node2vec_dict = {}
    print('GUESS embedding node2vc train result')
    for i in range(len(X)):
        node2vec_dict[reversedNodeListMap[train_digraph.nodes()[i]]] = X[i]
        # print(str(train_digraph.nodes()[i])+" "+str(reversedNodeListMap[train_digraph.nodes()[i]]) + " "+ str(X[i]))
    # evaluation
    if sample_ratio_e:
        eval_edge_pairs = evaluation_util.getRandomEdgePairs(
            node_num,
            sample_ratio_e,
            is_undirected
        )
    else:
        eval_edge_pairs = None
    estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l)
    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj,
        is_undirected=is_undirected,
        edge_pairs=eval_edge_pairs
    )
    if node_l is None:
        node_l = list(range(train_digraph.number_of_nodes()))
    filtered_edge_list = [e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]])]
    MAP = metrics.computeMAP(filtered_edge_list, test_digraph)
    prec_curv, _ = metrics.computePrecisionCurve(
        filtered_edge_list,
        test_digraph
    )
    return (MAP, prec_curv, node2vec_dict)