def main(args): t1 = time.time() g = Graph() print("Reading...") if args.graph_format == 'adjlist': g.read_adjlist(filename=args.input) elif args.graph_format == 'edgelist': g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed) if args.method == 'dngr': model = DNGR(graph=g, Kstep=args.kstep, dim=args.representation_size) model.show() t2 = time.time() print(t2 - t1) if args.method != 'gcn': print("Saving embeddings...") model.save_embeddings(args.output) if args.label_file and args.method != 'gcn': vectors = model.vectors X, Y = read_node_label(args.label_file) print("Training classifier using {:.2f}% nodes...".format( args.clf_ratio * 100)) clf = Classifier(vectors=vectors, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, args.clf_ratio)
def main(args): t1 = time.time() g = Graph() singluar_node_file = "singluar_nodes.txt" print("Reading...") if args.graph_format == 'adjlist': g.read_adjlist(filename=args.input) elif args.graph_format == 'edgelist': g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed) if args.method == 'node2vec': model = node2vec.Node2vec(graph=g, path_length=args.walk_length, num_paths=args.number_walks, dim=args.representation_size, workers=args.workers, p=args.p, q=args.q, window=args.window_size) elif args.method == 'line': if args.label_file and not args.no_auto_save: model = line.LINE(g, epoch = args.epochs, rep_size=args.representation_size, order=args.order, label_file=args.label_file, clf_ratio=args.clf_ratio) else: model = line.LINE(g, epoch = args.epochs, rep_size=args.representation_size, order=args.order) elif args.method == 'deepWalk': model = node2vec.Node2vec(graph=g, path_length=args.walk_length, num_paths=args.number_walks, dim=args.representation_size, workers=args.workers, window=args.window_size, dw=True) elif args.method == 'tadw': # assert args.label_file != '' assert args.feature_file != '' # g.read_node_label(args.label_file) g.read_node_features(args.feature_file) fout = open(singluar_node_file, "w+") for node_idx in g.sgl_node_list: fout.write("{}\n".format(node_idx)) fout.close() model = tadw.TADW(graph=g, dim=args.representation_size, lamb=args.lamb) # model = tadw_gpu.TADW_GPU(graph=g, dim=args.representation_size, lamb=args.lamb) elif args.method == 'gcn': assert args.label_file != '' assert args.feature_file != '' g.read_node_label(args.label_file) g.read_node_features(args.feature_file) model = gcnAPI.GCN(graph=g, dropout=args.dropout, weight_decay=args.weight_decay, hidden1=args.hidden, epochs=args.epochs, clf_ratio=args.clf_ratio) elif args.method == 'grarep': model = GraRep(graph=g, Kstep=args.kstep, dim=args.representation_size) t2 = time.time() print("time: ", t2-t1) if args.method != 'gcn': print("Saving embeddings...") model.save_embeddings(args.output) if args.label_file and args.method != 'gcn': vectors = model.vectors X, Y = read_node_label(args.label_file) print("Training classifier using {:.2f}% nodes...".format(args.clf_ratio*100)) clf = Classifier(vectors=vectors, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, args.clf_ratio)
def main(args): t1 = time.time() g = Graph() print("Reading...") X, Y = read_node_label(args.label_file) training_size = int(args.clf_ratio * len(X)) shuffle_indices = np.random.permutation(np.arange(len(X))) X_train = [X[shuffle_indices[i]] for i in range(training_size)] Y_train = [Y[shuffle_indices[i]] for i in range(training_size)] X_test = [X[shuffle_indices[i]] for i in range(training_size, len(X))] Y_test = [Y[shuffle_indices[i]] for i in range(training_size, len(X))] if args.graph_format == 'adjlist': g.read_adjlist(filename=args.input) elif args.graph_format == 'edgelist': g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed) if args.method == 'dngr': model = DNGR(graph=g, Kstep=args.kstep, dim=args.representation_size, XY=[X_train, Y_train]) model.show() if args.method == 'vaedngr': model = VAEDNGR(graph=g, Kstep=args.kstep, dim=args.representation_size, XY=[X_train, Y_train]) model.show() if args.method == 'sdngr': model= SDNGR(graph=g, Kstep=args.kstep, dim=args.representation_size, XY=[X_train, Y_train]) if args.method == 'vaesdngr': model = VAESDNGR(graph=g, Kstep=args.kstep, dim=args.representation_size, XY=[X_train, Y_train]) model.show() t2 = time.time() print(t2-t1) if args.method != 'gcn': print("Saving embeddings...") model.save_embeddings(args.method+'_'+args.output) # if args.label_file and args.method != 'gcn': vectors = model.vectors print("Training classifier using {:.2f}% nodes...".format(args.clf_ratio*100)) clf = Classifier(vectors=vectors, clf=LogisticRegression()) clf.my_evaluate(X_train, Y_train, X_test, Y_test)
import numpy as np import random from sklearn.linear_model import LogisticRegression from libnrl.graph import * from libnrl import node2vec from libnrl.classify import Classifier, read_node_label import matplotlib as plt g = Graph() g.read_edgelist(filename='../data/load_rename.csv', weighted=True, directed=True) #调参 X, Y = read_node_label('../data/load_label.csv') tuned_parameters = { 'path_length': [20, 100], 'num_paths': [10, 20, 50], 'dim': [30, 80, 200], 'p': [0.25, 0.5, 1, 2, 4], 'q': [0.25, 0.5, 1, 2, 4] } test_scores = {} for p in tuned_parameters['p']: for q in tuned_parameters['q']: model = node2vec.Node2vec(graph=g, path_length=80, num_paths=10, dim=30, p=p, q=q, window=20)
def main(args): t1 = time.time() g = Graph() print "Reading..." if args.graph_format == 'adjlist': g.read_adjlist(filename=args.input) elif args.graph_format == 'edgelist': g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed) if args.method == 'node2vec': model = node2vec.Node2vec(graph=g, path_length=args.walk_length, num_paths=args.number_walks, dim=args.representation_size, workers=args.workers, p=args.p, q=args.q, window=args.window_size) elif args.method == 'line': if args.label_file: model = line.LINE(g, lr=args.lr, batch_size=args.batch_size, epoch=args.epochs, rep_size=args.representation_size, order=args.order, label_file=args.label_file, clf_ratio=args.clf_ratio, auto_stop=args.no - auto_stop) else: model = line.LINE(g, lr=args.lr, batch_size=args.batch_size, epoch=args.epochs, rep_size=args.representation_size, order=args.order) elif args.method == 'deepWalk': model = node2vec.Node2vec(graph=g, path_length=args.walk_length, num_paths=args.number_walks, dim=args.representation_size, workers=args.workers, window=args.window_size, dw=True) elif args.method == 'tadw': assert args.label_file != '' assert args.feature_file != '' g.read_node_label(args.label_file) g.read_node_features(args.feature_file) model = tadw.TADW(graph=g, dim=args.representation_size, lamb=args.lamb) elif args.method == 'gcn': assert args.label_file != '' assert args.feature_file != '' g.read_node_label(args.label_file) g.read_node_features(args.feature_file) model = gcnAPI.GCN(graph=g, dropout=args.dropout, weight_decay=args.weight_decay, hidden1=args.hidden, epochs=args.epochs, clf_ratio=args.clf_ratio) elif args.method == 'grarep': model = GraRep(graph=g, Kstep=args.kstep, dim=args.representation_size) t2 = time.time() print t2 - t1 if args.method != 'gcn': print "Saving embeddings..." model.save_embeddings(args.output) if args.label_file and args.method != 'gcn': vectors = model.vectors X, Y = read_node_label(args.label_file) print "Training classifier using {:.2f}% nodes...".format( args.clf_ratio * 100) clf = Classifier(vectors=vectors, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, args.clf_ratio)