Ejemplo n.º 1
0
def main(args):
    import logging
    log.setLevel(logging.DEBUG)
    log.info("start")

    num_devices = len(F.cuda_places())
    model = DeepwalkModel(args.num_nodes, args.hidden_size, args.neg_num,
                          False, False, 1.)
    pyreader = model.pyreader
    loss = model.forward()

    train_steps = int(args.num_nodes * args.epoch / args.batch_size /
                      num_devices)
    optimization(args.lr * num_devices, loss, train_steps, args.optimizer)

    place = F.CUDAPlace(0)
    exe = F.Executor(place)
    exe.run(F.default_startup_program())

    graph = build_graph(args.num_nodes, args.edge_path)
    gen_func = build_gen_func(args, graph)

    pyreader.decorate_tensor_provider(gen_func)
    pyreader.start()

    train_prog = F.default_main_program()

    if args.warm_start_from_dir is not None:
        F.io.load_params(exe, args.warm_start_from_dir, train_prog)

    train_exe = get_parallel_exe(train_prog, loss)
    train(train_exe, exe, train_prog, loss, pyreader, args, train_steps)
Ejemplo n.º 2
0
def train(args):
    import logging
    log.setLevel(logging.DEBUG)
    log.info("start")

    worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", "0"))
    num_devices = int(os.getenv("CPU_NUM", 10))

    data = load_raw_edges_fn(args.edge_path, args.undirected)
    edges = data[0]
    weights = data[1]
    node2idx = data[2]
    num_nodes = len(node2idx)

    model = DeepwalkModel(num_nodes, args.hidden_size, args.neg_num,
                          args.is_sparse, args.is_distributed, 1.)
    pyreader = model.pyreader
    loss = model.forward()

    # init fleet
    log.info("init_role")
    init_role()

    train_steps = math.ceil(1. * num_nodes * args.epoch /
                            args.batch_size / num_devices / worker_num)
    log.info("Train step: %s" % train_steps)

    if args.optimizer == "sgd":
        args.lr *= args.batch_size * args.walk_len * args.win_size
    optimization(args.lr, loss, train_steps, args.optimizer)

    # init and run server or worker
    if fleet.is_server():
        log.info("PS server mode")
        fleet.init_server()
        fleet.run_server()

    if fleet.is_worker():
        log.info("start init worker done")
        exe = F.Executor(F.CPUPlace())
        exe.run(F.default_startup_program())
        log.info("Startup done")
        fleet.init_worker()
        #just the worker, load the sample
        log.info("init worker done")


        print("LEO num_nodes:",num_nodes, len(edges))
        edges_feat={}
        edges_feat["weight"] = np.array(weights)
        graph = pgl.graph.Graph(num_nodes, edges, edge_feat=edges_feat)
        # bind gen
        gen_func = build_gen_func(args, graph)

        pyreader.decorate_tensor_provider(gen_func)

        train_prog(exe, F.default_main_program(), loss, pyreader, args, train_steps)
        print("fleet try to stop worker\r\n")
        fleet.stop_worker()
        print("Game over\r\n")
Ejemplo n.º 3
0
def main(args):
    """ main
    """
    import logging
    log.setLevel(logging.DEBUG)
    log.info("start")

    if args.dataset is not None:
        if args.dataset == "BlogCatalog":
            graph = data_loader.BlogCatalogDataset().graph
        else:
            raise ValueError(args.dataset + " dataset doesn't exists")
        log.info("Load buildin BlogCatalog dataset done.")
        node_feat = np.expand_dims(graph.node_feat["group_id"].argmax(-1),
                                   -1) + graph.num_nodes
        args.num_nodes = graph.num_nodes
        args.num_embedding = graph.num_nodes + graph.node_feat[
            "group_id"].shape[-1]
    else:
        graph = build_graph(args.num_nodes, args.edge_path, args.output_path)
        node_feat = np.load(args.node_feat_npy)

    model = GESModel(args.num_embedding, node_feat.shape[1] + 1,
                     args.hidden_size, args.neg_num, False, 2)
    pyreader = model.pyreader
    loss = model.forward()
    num_devices = len(F.cuda_places())

    train_steps = int(args.num_nodes * args.epoch / args.batch_size /
                      num_devices)
    log.info("Train steps: %s" % train_steps)
    optimization(args.lr * num_devices, loss, train_steps, args.optimizer)

    place = F.CUDAPlace(0)
    exe = F.Executor(place)
    exe.run(F.default_startup_program())

    gen_func = build_gen_func(args, graph, node_feat)

    pyreader.decorate_tensor_provider(gen_func)
    pyreader.start()
    train_prog = F.default_main_program()
    train_exe = get_parallel_exe(train_prog, loss)
    train(train_exe, exe, train_prog, loss, pyreader, args, train_steps)
Ejemplo n.º 4
0
def train(args):
    import logging
    log.setLevel(logging.DEBUG)
    log.info("start")

    worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", "0"))
    num_devices = int(os.getenv("CPU_NUM", 10))

    model = DeepwalkModel(args.num_nodes, args.hidden_size, args.neg_num,
                          args.is_sparse, args.is_distributed, 1.)
    pyreader = model.pyreader
    loss = model.forward()

    # init fleet
    init_role()

    train_steps = math.ceil(1. * args.num_nodes * args.epoch /
                            args.batch_size / num_devices / worker_num)
    log.info("Train step: %s" % train_steps)

    if args.optimizer == "sgd":
        args.lr *= args.batch_size * args.walk_len * args.win_size
    optimization(args.lr, loss, train_steps, args.optimizer)

    # init and run server or worker
    if fleet.is_server():
        fleet.init_server(args.warm_start_from_dir)
        fleet.run_server()

    if fleet.is_worker():
        log.info("start init worker done")
        fleet.init_worker()
        #just the worker, load the sample
        log.info("init worker done")

        exe = F.Executor(F.CPUPlace())
        exe.run(fleet.startup_program)
        log.info("Startup done")

        if args.dataset is not None:
            if args.dataset == "BlogCatalog":
                graph = data_loader.BlogCatalogDataset().graph
            elif args.dataset == "ArXiv":
                graph = data_loader.ArXivDataset().graph
            else:
                raise ValueError(args.dataset + " dataset doesn't exists")
            log.info("Load buildin BlogCatalog dataset done.")
        elif args.walkpath_files is None or args.walkpath_files == "None":
            graph = build_graph(args.num_nodes, args.edge_path)
            log.info("Load graph from '%s' done." % args.edge_path)
        else:
            graph = build_fake_graph(args.num_nodes)
            log.info("Load fake graph done.")

        # bind gen
        gen_func = build_gen_func(args, graph)

        pyreader.decorate_tensor_provider(gen_func)
        pyreader.start()

        compiled_prog = build_complied_prog(fleet.main_program, loss)
        train_prog(exe, compiled_prog, loss, pyreader, args, train_steps)
Ejemplo n.º 5
0
import numpy as np
import tqdm
import pgl
from pgl.graph_kernel import alias_sample_build_table
from pgl.utils.logger import log
import paddle.fluid.dygraph as D
import paddle.fluid as F
from easydict import EasyDict as edict
import yaml

from ernie.tokenizing_ernie import ErnieTokenizer
from ernie.tokenizing_ernie import ErnieTinyTokenizer
from ernie.modeling_ernie import ErnieModel

log.setLevel(logging.DEBUG)


def term2id(string, tokenizer, max_seqlen):
    tokens = tokenizer.tokenize(string)
    ids = tokenizer.convert_tokens_to_ids(tokens)
    ids = ids[:max_seqlen - 1]
    ids = ids + [tokenizer.sep_id]  # ids + [sep]
    ids = ids + [tokenizer.pad_id] * (max_seqlen - len(ids))
    return ids


def load_graph(config, str2id, term_file, terms, item_distribution):
    edges = []
    with io.open(config.graph_data, encoding=config.encoding) as f:
        for idx, line in enumerate(f):