예제 #1
0
def load_models(G, edge_f, models):
    isDirected = True

    # Load graph
    G = G.to_directed()

    # models.append(node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1))
    models.append(
        SDNE(d=2,
             beta=5,
             alpha=1e-5,
             nu1=1e-6,
             nu2=1e-6,
             K=3,
             n_units=[
                 50,
                 15,
             ],
             rho=0.3,
             n_iter=50,
             xeta=0.01,
             n_batch=100,
             modelfile=['enc_model.json', 'dec_model.json'],
             weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))
    #    return models,edge_f

    # For each model, learn the embedding and evaluate on graph reconstruction and visualization
    for embedding in models:
        print('Num nodes: %d, num edges: %d' %
              (G.number_of_nodes(), G.number_of_edges()))
        t1 = time()
        # Learn embedding - accepts a networkx graph or file with edge list
        Y, t = embedding.learn_embedding(graph=G,
                                         edge_f=None,
                                         is_weighted=True,
                                         no_python=True)
        #        Y, t = embedding.learn_embedding(graph=None, edge_f=edge_f, is_weighted=True, no_python=True)

        print(embedding._method_name + ':\n\tTraining time: %f' %
              (time() - t1))
        # Evaluate on graph reconstruction
        MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
            G, embedding, Y, None)  ###HERE
        #---------------------------------------------------------------------------------
        print(("\tMAP: {} \t precision curve: {}\n\n\n\n" + '-' * 100).format(
            MAP, prec_curv[:5]))
        # ---------------------------------------------------------------------------------
        # Visualize
        viz.plot_embedding2D(embedding.get_embedding(),
                             di_graph=G,
                             node_colors=None)
        #plt.show()
        return models
예제 #2
0
파일: Gecko.py 프로젝트: NewKnowledge/gecko
    def GraphReconstruction(self,
                            G,
                            verbose=True,
                            visualize=True,
                            directed=False):
        # convert to directed form for base library gem, if needed
        if (not directed):
            G = G.to_directed()
        # important that nodes are contiguously numbered
        G = nx.convert_node_labels_to_integers(
            G,
            first_label=0,
            ordering='default',
            label_attribute="original_label")
        # now find best performing embedding
        maxMAP = 0
        for embedding in self.models:
            if (verbose):
                print('Num nodes: %d, num edges: %d' %
                      (G.number_of_nodes(), G.number_of_edges()))

            # Learn embedding - accepts a networkx graph or file with edge list
            t1 = time()
            Y, t = embedding.learn_embedding(graph=G,
                                             edge_f=None,
                                             is_weighted=True,
                                             no_python=True)
            if (verbose):
                print(embedding._method_name + ':\n\tTraining time: %f' %
                      (time() - t1))
            # Evaluate on graph reconstruction
            MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
                G, embedding, Y, None)
            #---------------------------------------------------------------------------------
            print(("\tMAP: {} \t precision curve: {}\n\n\n\n" +
                   '-' * 100).format(MAP, prec_curv[:5]))
            #---------------------------------------------------------------------------------
            # Visualize
            if (visualize):
                viz.plot_embedding2D(embedding.get_embedding(),
                                     di_graph=G,
                                     node_colors=None)
                plt.show()  # one can display using 'TkAgg' matplotlib backend
                plt.savefig("embedding_" + embedding._method_name
                            )  # saving figure with 'Agg' matplotlib backend

            # keep track of the best embedding so far
            if (maxMAP < MAP):
                bestEmbedding = embedding
                maxMAP = MAP

        return bestEmbedding
예제 #3
0
def evaluate_embedding(graph, embedding):   
    class _Dummy(object):
        def __init__(self, embedding):
            self.embedding = embedding

        def get_reconstructed_adj(self, X=None, node_l=None):
            node_num = self.embedding.shape[0]
            adj_mtx_r = np.zeros((node_num, node_num))
            for v_i in range(node_num):
                for v_j in range(node_num):
                    if v_i == v_j:
                        continue
                    adj_mtx_r[v_i, v_j] = self.get_edge_weight(v_i, v_j)
            return adj_mtx_r
        
        def get_edge_weight(self, i, j):
            return np.dot(self.embedding[i, :], self.embedding[j, :])

    dummy_model = _Dummy(embedding)
    MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(graph, dummy_model, embedding, None)
    return (MAP, prec_curv)
예제 #4
0
def generate_metrics(G,
                     model,
                     embedding,
                     labels,
                     predicted_labels,
                     S=None,
                     cv=5):
    """Generates a series of benchmarks for unsupervised learning (MAP), semi-supervised learning, and supervised learning (cross validation accuracy with random forest classifiers) for the provided input dataset.
    
    # Arguments:
        x (NEGraph): A NeuroEmbed graph.
        cv (int): Optional. Number of cross-validation folds to use.
        
    # Returns:
        dict: A result dictionary with all models and results.
    """
    out_metrics = {}
    clf = RandomForestClassifier(n_estimators=2000)
    MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
        G, model, embedding, is_undirected=False, is_weighted=True)
    out_metrics['MAP'] = MAP
    if labels is not None:
        scores = cross_val_score(clf, embedding, labels, cv=cv)
        print(scores)
        out_metrics['CV'] = scores.mean()
        if S is not None:
            scores = cross_val_score(clf,
                                     np.hstack((embedding, S)),
                                     labels,
                                     cv=cv)
            print(scores)
            out_metrics['CVAnatomy+Graph'] = scores.mean()
            scores = cross_val_score(clf, S, labels, cv=cv)
            print(scores)
            out_metrics['CVAnatomyOnly'] = scores.mean()
        out_metrics['ARC Clustering'] = metrics.adjusted_rand_score(
            labels, predicted_labels)
        out_metrics['AMI Clustering'] = metrics.adjusted_mutual_info_score(
            labels, predicted_labels)
    return out_metrics
예제 #5
0
            xeta: sgd step size parameter
            n_batch: minibatch size for SGD
            modelfile: Files containing previous encoder and decoder models
            weightfile: Files containing previous encoder and decoder weights
    '''
    models.append(
        SDNE(d=args.d, beta=args.beta, alpha=args.alpha, nu1=args.nu1, nu2=args.nu2,
             K=args.k, n_units=args.nunits, n_iter=args.niter, xeta=args.xeta, n_batch=args.nbatch,
             modelfile=['enc_model.json', 'dec_model.json'],
             weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))

    for embedding in models:
        print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(),
                                                G.number_of_edges()))
        t1 = time()
        # Learn embedding - accepts a networkx graph or file with edge list
        Y, t = embedding.learn_embedding(
            graph=G, edge_f=None, is_weighted=True, no_python=True)
        print(embedding._method_name +
              ':\n\tTraining time: %f' % (time() - t1))
        # Evaluate on graph reconstruction
        MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
            G, embedding, Y, None)
        # ---------------------------------------------------------------------------------
        print(("\tMAP: {} \t precision curve: {}\n\n\n\n" + '-' * 100).format(
            MAP, prec_curv[:5]))
        # ---------------------------------------------------------------------------------
        # Visualize
        # viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=None)
        # plt.show()
예제 #6
0
from gem.embedding.gf import GraphFactorization as gf
from gem.evaluation import evaluate_graph_reconstruction as gr
from gem.utils.graph_util import *

# Instatiate the embedding method with hyperparameters
em = gf(2, 100000, 1*10**-4, 1.0)

# Load graph
graph = loadGraphFromEdgeListTxt('data/karate.edgelist')

# Learn embedding - accepts a networkx graph or file with edge list
Y, t = em.learn_embedding(graph, edge_f=None, is_weighted=True, no_python=True)

# Evaluate on graph reconstruction
MAP, prec_curv = gr.evaluateStaticGraphReconstruction(graph, em, Y, None)

print MAP, prec_curve
예제 #7
0
파일: test.py 프로젝트: zhouzhou12/GEM
         modelfile=[
             './intermediate/enc_model.json', './intermediate/dec_model.json'
         ],
         weightfile=[
             './intermediate/enc_weights.hdf5',
             './intermediate/dec_weights.hdf5'
         ]))

for embedding in models:
    print('Num nodes: %d, num edges: %d' %
          (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
    # Learn embedding - accepts a networkx graph or file with edge list
    Y, t = embedding.learn_embedding(graph=G,
                                     edge_f=None,
                                     is_weighted=True,
                                     no_python=True)
    print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1))
    # Evaluate on graph reconstruction
    MAP, prec_curv = gr.evaluateStaticGraphReconstruction(
        G, embedding, Y, None)
    #---------------------------------------------------------------------------------
    print(("\tMAP: {} \t preccision curve: {}\n\n\n\n" + '-' * 100).format(
        MAP, prec_curv))
    #---------------------------------------------------------------------------------
    # Visualize
    viz.plot_embedding2D(embedding.get_embedding(),
                         di_graph=G,
                         node_colors=None)
    plt.show()
예제 #8
0
def benchmark(x, cv=5):
    """This function automatically runs through a series of benchmarks for unsupervised learning (MAP), semi-supervised learning, and supervised learning (cross validation accuracy with random forest classifiers) for the provided input dataset.
    
    # Arguments:
        x (NEGraph): A NeuroEmbed graph.
        cv (int): Optional. Number of cross-validation folds to use.
        
    # Returns:
        dict: A result dictionary with all models and results.
    """
    all_results = {}
    G, X, y, S, names = x.G, x.X, x.y, x.S, x.names
    out_metrics = {}
    model = ASEEmbedding()
    model.fit(X)
    MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
        G, model, model.H, is_undirected=False, is_weighted=True)
    out_metrics['MAP'] = MAP
    d = model.H.shape[1] // 2
    out_metrics = generate_metrics(G, model, model.H, y, model.y, S, cv=cv)
    all_results['ASE'] = out_metrics
    raw_model = RawEmbedding()
    raw_model.fit(X, n_components=d)
    out_metrics = generate_metrics(G,
                                   raw_model,
                                   raw_model.H,
                                   y,
                                   raw_model.y,
                                   S,
                                   cv=cv)
    all_results['Raw'] = out_metrics
    G = nx.from_numpy_matrix(X, create_using=nx.DiGraph)
    Gd = nx.from_numpy_matrix(X + 1e-9, create_using=nx.DiGraph)
    models = {}
    if N2VC_available:
        models['node2vec'] = node2vec(d=d,
                                      max_iter=10,
                                      walk_len=80,
                                      num_walks=10,
                                      con_size=10,
                                      ret_p=1,
                                      inout_p=1)
    models['HOPE'] = HOPE(d=d, beta=0.01)
    models['Laplacian Eigenmaps'] = LaplacianEigenmaps(d=d)
    for model_name, embedding in models.items():
        if model_name == 'node2vec':
            Xh, t = embedding.learn_embedding(graph=Gd,
                                              edge_f=None,
                                              is_weighted=True,
                                              no_python=True)
            MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
                Gd, embedding, Xh, is_undirected=False, is_weighted=False)
        else:
            Xh, t = embedding.learn_embedding(graph=G,
                                              edge_f=None,
                                              is_weighted=True,
                                              no_python=True)
            MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
                G, embedding, Xh, is_undirected=False, is_weighted=False)
        Xh = np.real(Xh)
        if y is not None:
            clf = RandomForestClassifier(n_estimators=200)
            clf = MLPClassifier(alpha=1, max_iter=100000)
            clusterer = GaussianMixture(n_components=Xh.shape[1])
            clusterer.fit(Xh)
            predict_labels = clusterer.predict(Xh)
            scores = cross_val_score(clf, Xh, y, cv=cv)
            out_metrics['CV'] = scores.mean()
            if S is not None:
                scores = cross_val_score(clf, np.hstack((Xh, S)), y, cv=cv)
                out_metrics['CVAnatomy+Graph'] = scores.mean()
                scores = cross_val_score(clf, S, y, cv=cv)
                out_metrics['CVAnatomyOnly'] = scores.mean()
            out_metrics['ARC Clustering'] = metrics.adjusted_rand_score(
                y, predict_labels)
            out_metrics['AMI Clustering'] = metrics.adjusted_mutual_info_score(
                y, predict_labels)
        out_metrics['MAP'] = MAP
        print(model_name, out_metrics)
        all_results[model_name] = out_metrics
    return all_results
예제 #9
0
        print('Num nodes: %d, num edges: %d' %
              (G.number_of_nodes(), G.number_of_edges()))
        t1 = time()
        # Learn embedding - accepts a networkx graph or file with edge list

        Y, t = embedding.learn_embedding(graph=G_train,
                                         valgraph=G_val,
                                         edge_f=None,
                                         is_weighted=True,
                                         no_python=True)

        print(embedding._method_name + ':\n\tTraining time: %f' %
              (time() - t1))

        # Evaluate on graph reconstruction:train
        MANE, avgrecpred, avgrectrue, err, err_baseline = gr.evaluateStaticGraphReconstruction(
            G_train, embedding, Y, None, is_weighted=True, is_undirected=False)
        print("MANE train is ", MANE)
        print("avgrec 10 pred train is ", avgrecpred)
        print("avgrec 10 true is ", avgrectrue)
        print("MSE train is ", pow(err, 2) / G_train.number_of_edges())
        #print(("\tMAP: {} \t precision curve: {}\n\n\n\n"+'-'*100).format(MAP,prec_curv[:5]))
        #viz.plot_embedding2D(embedding.get_embedding(), di_graph=G_train, node_colors=None)
        #plt.show()
        #plt.clf()

        # Evaluate on graph reconstruction:val
        MANE, avgrecpred, avgrectrue, err, err_baseline = gr.evaluateStaticGraphReconstruction(
            G_val, embedding, Y, None, is_weighted=True, is_undirected=False)
        print("MANE val is ", MANE)
        print("avgrec 10 pred val is ", avgrecpred)
        print("avgrec 10 true val is ", avgrectrue)