def main(args): t1 = time.time() g = Graph() singluar_node_file = "singluar_nodes.txt" print("Reading...") if args.graph_format == 'adjlist': g.read_adjlist(filename=args.input) elif args.graph_format == 'edgelist': g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed) if args.method == 'node2vec': model = node2vec.Node2vec(graph=g, path_length=args.walk_length, num_paths=args.number_walks, dim=args.representation_size, workers=args.workers, p=args.p, q=args.q, window=args.window_size) elif args.method == 'line': if args.label_file and not args.no_auto_save: model = line.LINE(g, epoch = args.epochs, rep_size=args.representation_size, order=args.order, label_file=args.label_file, clf_ratio=args.clf_ratio) else: model = line.LINE(g, epoch = args.epochs, rep_size=args.representation_size, order=args.order) elif args.method == 'deepWalk': model = node2vec.Node2vec(graph=g, path_length=args.walk_length, num_paths=args.number_walks, dim=args.representation_size, workers=args.workers, window=args.window_size, dw=True) elif args.method == 'tadw': # assert args.label_file != '' assert args.feature_file != '' # g.read_node_label(args.label_file) g.read_node_features(args.feature_file) fout = open(singluar_node_file, "w+") for node_idx in g.sgl_node_list: fout.write("{}\n".format(node_idx)) fout.close() model = tadw.TADW(graph=g, dim=args.representation_size, lamb=args.lamb) # model = tadw_gpu.TADW_GPU(graph=g, dim=args.representation_size, lamb=args.lamb) elif args.method == 'gcn': assert args.label_file != '' assert args.feature_file != '' g.read_node_label(args.label_file) g.read_node_features(args.feature_file) model = gcnAPI.GCN(graph=g, dropout=args.dropout, weight_decay=args.weight_decay, hidden1=args.hidden, epochs=args.epochs, clf_ratio=args.clf_ratio) elif args.method == 'grarep': model = GraRep(graph=g, Kstep=args.kstep, dim=args.representation_size) t2 = time.time() print("time: ", t2-t1) if args.method != 'gcn': print("Saving embeddings...") model.save_embeddings(args.output) if args.label_file and args.method != 'gcn': vectors = model.vectors X, Y = read_node_label(args.label_file) print("Training classifier using {:.2f}% nodes...".format(args.clf_ratio*100)) clf = Classifier(vectors=vectors, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, args.clf_ratio)
model = node2vec.Node2vec(graph=g, path_length=args.walk_length, num_paths=args.number_walks, dim=args.representation_size, workers=args.workers, window=args.window_size, dw=True) elif args.method == 'tadw': assert args.label_file != '' assert args.feature_file != '' g.read_node_label(args.label_file) g.read_node_features(args.feature_file) model = tadw.TADW(graph=g, dim=args.representation_size, lamb=args.lamb) elif args.method == 'gcn': assert args.label_file != '' assert args.feature_file != '' g.read_node_label(args.label_file) g.read_node_features(args.feature_file) model = gcnAPI.GCN(graph=g, dropout=args.dropout, weight_decay=args.weight_decay, hidden1=args.hidden, epochs=args.epochs, clf_ratio=args.clf_ratio) elif args.method == 'grarep': model = GraRep(graph=g, Kstep=args.kstep, dim=args.representation_size) t2 = time.time() print(t2-t1) embeddings_items = model.vectors # make full node embeddings shape = [n_nodes, args.representation_size] node_embeddings = np.random.uniform(-np.sqrt(6.0 / (shape[0] + shape[1])), np.sqrt(6.0 / (shape[0] + shape[1])), size=shape) choose_nodeidx = [] for node in embeddings_items: node_embeddings[int(node), :] = embeddings_items[node]
def main(args): t1 = time.time() g = Graph() print "Reading..." if args.graph_format == 'adjlist': g.read_adjlist(filename=args.input) elif args.graph_format == 'edgelist': g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed) if args.method == 'node2vec': model = node2vec.Node2vec(graph=g, path_length=args.walk_length, num_paths=args.number_walks, dim=args.representation_size, workers=args.workers, p=args.p, q=args.q, window=args.window_size) elif args.method == 'line': if args.label_file: model = line.LINE(g, lr=args.lr, batch_size=args.batch_size, epoch=args.epochs, rep_size=args.representation_size, order=args.order, label_file=args.label_file, clf_ratio=args.clf_ratio, auto_stop=args.no - auto_stop) else: model = line.LINE(g, lr=args.lr, batch_size=args.batch_size, epoch=args.epochs, rep_size=args.representation_size, order=args.order) elif args.method == 'deepWalk': model = node2vec.Node2vec(graph=g, path_length=args.walk_length, num_paths=args.number_walks, dim=args.representation_size, workers=args.workers, window=args.window_size, dw=True) elif args.method == 'tadw': assert args.label_file != '' assert args.feature_file != '' g.read_node_label(args.label_file) g.read_node_features(args.feature_file) model = tadw.TADW(graph=g, dim=args.representation_size, lamb=args.lamb) elif args.method == 'gcn': assert args.label_file != '' assert args.feature_file != '' g.read_node_label(args.label_file) g.read_node_features(args.feature_file) model = gcnAPI.GCN(graph=g, dropout=args.dropout, weight_decay=args.weight_decay, hidden1=args.hidden, epochs=args.epochs, clf_ratio=args.clf_ratio) elif args.method == 'grarep': model = GraRep(graph=g, Kstep=args.kstep, dim=args.representation_size) t2 = time.time() print t2 - t1 if args.method != 'gcn': print "Saving embeddings..." model.save_embeddings(args.output) if args.label_file and args.method != 'gcn': vectors = model.vectors X, Y = read_node_label(args.label_file) print "Training classifier using {:.2f}% nodes...".format( args.clf_ratio * 100) clf = Classifier(vectors=vectors, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, args.clf_ratio)