Example #1
0
def main(args):
    t1 = time.time()
    g = Graph()
    singluar_node_file = "singluar_nodes.txt"
    
    print("Reading...")


    if args.graph_format == 'adjlist':
        g.read_adjlist(filename=args.input)
    elif args.graph_format == 'edgelist':
        g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed)
    if args.method == 'node2vec':
        model = node2vec.Node2vec(graph=g, path_length=args.walk_length,
                                 num_paths=args.number_walks, dim=args.representation_size,
                                 workers=args.workers, p=args.p, q=args.q, window=args.window_size)
    elif args.method == 'line':
        if args.label_file and not args.no_auto_save:
            model = line.LINE(g, epoch = args.epochs, rep_size=args.representation_size, order=args.order, 
                label_file=args.label_file, clf_ratio=args.clf_ratio)
        else:
            model = line.LINE(g, epoch = args.epochs, rep_size=args.representation_size, order=args.order)
    elif args.method == 'deepWalk':
        model = node2vec.Node2vec(graph=g, path_length=args.walk_length,
                                 num_paths=args.number_walks, dim=args.representation_size,
                                 workers=args.workers, window=args.window_size, dw=True)
    elif args.method == 'tadw':
        # assert args.label_file != ''
        assert args.feature_file != ''
        # g.read_node_label(args.label_file)
        g.read_node_features(args.feature_file)
        fout = open(singluar_node_file, "w+")
        for node_idx in g.sgl_node_list:
            fout.write("{}\n".format(node_idx))
        fout.close()

        model = tadw.TADW(graph=g, dim=args.representation_size, lamb=args.lamb)
        # model = tadw_gpu.TADW_GPU(graph=g, dim=args.representation_size, lamb=args.lamb)
    elif args.method == 'gcn':
        assert args.label_file != ''
        assert args.feature_file != ''
        g.read_node_label(args.label_file)
        g.read_node_features(args.feature_file)
        model = gcnAPI.GCN(graph=g, dropout=args.dropout,
                            weight_decay=args.weight_decay, hidden1=args.hidden,
                            epochs=args.epochs, clf_ratio=args.clf_ratio)
    elif args.method == 'grarep':
        model = GraRep(graph=g, Kstep=args.kstep, dim=args.representation_size)
    t2 = time.time()
    print("time: ", t2-t1)
    if args.method != 'gcn':
        print("Saving embeddings...")
        model.save_embeddings(args.output)
    if args.label_file and args.method != 'gcn':
        vectors = model.vectors
        X, Y = read_node_label(args.label_file)
        print("Training classifier using {:.2f}% nodes...".format(args.clf_ratio*100))
        clf = Classifier(vectors=vectors, clf=LogisticRegression())
        clf.split_train_evaluate(X, Y, args.clf_ratio)
        model = node2vec.Node2vec(graph=g, path_length=args.walk_length,
                                     num_paths=args.number_walks, dim=args.representation_size,
                                     workers=args.workers, window=args.window_size, dw=True)
    elif args.method == 'tadw':
        assert args.label_file != ''
        assert args.feature_file != ''
        g.read_node_label(args.label_file)
        g.read_node_features(args.feature_file)
        model = tadw.TADW(graph=g, dim=args.representation_size, lamb=args.lamb)
    elif args.method == 'gcn':
        assert args.label_file != ''
        assert args.feature_file != ''
        g.read_node_label(args.label_file)
        g.read_node_features(args.feature_file)
        model = gcnAPI.GCN(graph=g, dropout=args.dropout,
                                weight_decay=args.weight_decay, hidden1=args.hidden,
                                epochs=args.epochs, clf_ratio=args.clf_ratio)
    elif args.method == 'grarep':
        model = GraRep(graph=g, Kstep=args.kstep, dim=args.representation_size)

    t2 = time.time()
    print(t2-t1)

    embeddings_items = model.vectors
    # make full node embeddings

    shape = [n_nodes, args.representation_size]
    node_embeddings = np.random.uniform(-np.sqrt(6.0 / (shape[0] + shape[1])), np.sqrt(6.0 / (shape[0] + shape[1])), size=shape)
    choose_nodeidx = []
    for node in embeddings_items:
        node_embeddings[int(node), :] = embeddings_items[node]
Example #3
0
File: main.py Project: yyr93520/sne
def main(args):
    t1 = time.time()
    g = Graph()
    print "Reading..."
    if args.graph_format == 'adjlist':
        g.read_adjlist(filename=args.input)
    elif args.graph_format == 'edgelist':
        g.read_edgelist(filename=args.input,
                        weighted=args.weighted,
                        directed=args.directed)
    if args.method == 'node2vec':
        model = node2vec.Node2vec(graph=g,
                                  path_length=args.walk_length,
                                  num_paths=args.number_walks,
                                  dim=args.representation_size,
                                  workers=args.workers,
                                  p=args.p,
                                  q=args.q,
                                  window=args.window_size)
    elif args.method == 'line':
        if args.label_file:
            model = line.LINE(g,
                              lr=args.lr,
                              batch_size=args.batch_size,
                              epoch=args.epochs,
                              rep_size=args.representation_size,
                              order=args.order,
                              label_file=args.label_file,
                              clf_ratio=args.clf_ratio,
                              auto_stop=args.no - auto_stop)
        else:
            model = line.LINE(g,
                              lr=args.lr,
                              batch_size=args.batch_size,
                              epoch=args.epochs,
                              rep_size=args.representation_size,
                              order=args.order)
    elif args.method == 'deepWalk':
        model = node2vec.Node2vec(graph=g,
                                  path_length=args.walk_length,
                                  num_paths=args.number_walks,
                                  dim=args.representation_size,
                                  workers=args.workers,
                                  window=args.window_size,
                                  dw=True)
    elif args.method == 'tadw':
        assert args.label_file != ''
        assert args.feature_file != ''
        g.read_node_label(args.label_file)
        g.read_node_features(args.feature_file)
        model = tadw.TADW(graph=g,
                          dim=args.representation_size,
                          lamb=args.lamb)
    elif args.method == 'gcn':
        assert args.label_file != ''
        assert args.feature_file != ''
        g.read_node_label(args.label_file)
        g.read_node_features(args.feature_file)
        model = gcnAPI.GCN(graph=g,
                           dropout=args.dropout,
                           weight_decay=args.weight_decay,
                           hidden1=args.hidden,
                           epochs=args.epochs,
                           clf_ratio=args.clf_ratio)
    elif args.method == 'grarep':
        model = GraRep(graph=g, Kstep=args.kstep, dim=args.representation_size)
    t2 = time.time()
    print t2 - t1
    if args.method != 'gcn':
        print "Saving embeddings..."
        model.save_embeddings(args.output)
    if args.label_file and args.method != 'gcn':
        vectors = model.vectors
        X, Y = read_node_label(args.label_file)
        print "Training classifier using {:.2f}% nodes...".format(
            args.clf_ratio * 100)
        clf = Classifier(vectors=vectors, clf=LogisticRegression())
        clf.split_train_evaluate(X, Y, args.clf_ratio)