Esempio n. 1
0
def main(args):
    data = load_data(args.normalize, args.symmetry)
    log.info("preprocess finish")
    log.info("Train Examples: %s" % len(data["train_index"]))
    log.info("Val Examples: %s" % len(data["val_index"]))
    log.info("Test Examples: %s" % len(data["test_index"]))
    log.info("Num nodes %s" % data["graph"].num_nodes)
    log.info("Num edges %s" % data["graph"].num_edges)
    log.info("Average Degree %s" % np.mean(data["graph"].indegree()))

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
    train_program = fluid.Program()
    startup_program = fluid.Program()
    samples = []
    if args.samples_1 > 0:
        samples.append(args.samples_1)
    if args.samples_2 > 0:
        samples.append(args.samples_2)

    with fluid.program_guard(train_program, startup_program):
        feature, feature_init = paddle_helper.constant(
            "feat",
            dtype=data['feature'].dtype,
            value=data['feature'],
            hide_batch_size=False)

        graph_wrapper = pgl.graph_wrapper.GraphWrapper(
            "sub_graph",
            fluid.CPUPlace(),
            node_feat=data['graph'].node_feat_info())
        model_loss, model_acc = build_graph_model(
            graph_wrapper,
            num_class=data["num_class"],
            feature=feature,
            hidden_size=args.hidden_size,
            graphsage_type=args.graphsage_type,
            k_hop=len(samples))

    test_program = train_program.clone(for_test=True)

    with fluid.program_guard(train_program, startup_program):
        adam = fluid.optimizer.Adam(learning_rate=args.lr)
        adam.minimize(model_loss)

    exe = fluid.Executor(place)
    exe.run(startup_program)
    feature_init(place)

    train_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
        with_parent_node_index=True,
        node_index=data['train_index'],
        node_label=data["train_label"])

    val_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
        with_parent_node_index=True,
        node_index=data['val_index'],
        node_label=data["val_label"])

    test_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
        with_parent_node_index=True,
        node_index=data['test_index'],
        node_label=data["test_label"])

    for epoch in range(args.epoch):
        run_epoch(train_iter,
                  program=train_program,
                  exe=exe,
                  prefix="train",
                  model_loss=model_loss,
                  model_acc=model_acc,
                  epoch=epoch)

        run_epoch(val_iter,
                  program=test_program,
                  exe=exe,
                  prefix="val",
                  model_loss=model_loss,
                  model_acc=model_acc,
                  log_per_step=10000,
                  epoch=epoch)

    run_epoch(test_iter,
              program=test_program,
              prefix="test",
              exe=exe,
              model_loss=model_loss,
              model_acc=model_acc,
              log_per_step=10000,
              epoch=epoch)
Esempio n. 2
0
def main(args):
    """main"""
    data = load_data(args.normalize, args.symmetry)
    log.info("preprocess finish")
    log.info("Train Examples: %s" % len(data["train_index"]))
    log.info("Val Examples: %s" % len(data["val_index"]))
    log.info("Test Examples: %s" % len(data["test_index"]))
    log.info("Num nodes %s" % data["graph"].num_nodes)
    log.info("Num edges %s" % data["graph"].num_edges)
    log.info("Average Degree %s" % np.mean(data["graph"].indegree()))

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
    train_program = fluid.Program()
    startup_program = fluid.Program()
    samples = []
    if args.samples_1 > 0:
        samples.append(args.samples_1)
    if args.samples_2 > 0:
        samples.append(args.samples_2)

    with fluid.program_guard(train_program, startup_program):
        graph_wrapper = pgl.graph_wrapper.GraphWrapper(
            "sub_graph",
            fluid.CPUPlace(),
            node_feat=data['graph'].node_feat_info())

        model_loss, model_acc = build_graph_model(
            graph_wrapper,
            num_class=data["num_class"],
            hidden_size=args.hidden_size,
            graphsage_type=args.graphsage_type,
            k_hop=len(samples))

    test_program = train_program.clone(for_test=True)

    with fluid.program_guard(train_program, startup_program):
        adam = fluid.optimizer.Adam(learning_rate=args.lr)
        adam.minimize(model_loss)

    exe = fluid.Executor(place)
    exe.run(startup_program)
    if args.num_trainer > 1:
        build_strategy = fluid.BuildStrategy()
        build_strategy.remove_unnecessary_lock = False
        build_strategy.enable_sequential_execution = True

        train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                           main_program=train_program,
                                           build_strategy=build_strategy,
                                           loss_name=model_loss.name)
    else:
        train_exe = exe

    if args.sample_workers > 1:
        train_iter = reader.multiprocess_graph_reader(
            data['graph'],
            graph_wrapper,
            samples=samples,
            num_workers=args.sample_workers,
            batch_size=args.batch_size,
            node_index=data['train_index'],
            node_label=data["train_label"])
    else:
        train_iter = reader.graph_reader(data['graph'],
                                         graph_wrapper,
                                         samples=samples,
                                         batch_size=args.batch_size,
                                         node_index=data['train_index'],
                                         node_label=data["train_label"])

    if args.sample_workers > 1:
        val_iter = reader.multiprocess_graph_reader(
            data['graph'],
            graph_wrapper,
            samples=samples,
            num_workers=args.sample_workers,
            batch_size=args.batch_size,
            node_index=data['val_index'],
            node_label=data["val_label"])
    else:
        val_iter = reader.graph_reader(data['graph'],
                                       graph_wrapper,
                                       samples=samples,
                                       batch_size=args.batch_size,
                                       node_index=data['val_index'],
                                       node_label=data["val_label"])

    if args.sample_workers > 1:
        test_iter = reader.multiprocess_graph_reader(
            data['graph'],
            graph_wrapper,
            samples=samples,
            num_workers=args.sample_workers,
            batch_size=args.batch_size,
            node_index=data['test_index'],
            node_label=data["test_label"])
    else:
        test_iter = reader.graph_reader(data['graph'],
                                        graph_wrapper,
                                        samples=samples,
                                        batch_size=args.batch_size,
                                        node_index=data['test_index'],
                                        node_label=data["test_label"])

    for epoch in range(args.epoch):
        run_epoch(train_iter,
                  program=train_program,
                  exe=train_exe,
                  prefix="train",
                  model_loss=model_loss,
                  model_acc=model_acc,
                  num_trainer=args.num_trainer,
                  epoch=epoch)

        run_epoch(val_iter,
                  program=test_program,
                  exe=exe,
                  prefix="val",
                  model_loss=model_loss,
                  model_acc=model_acc,
                  log_per_step=10000,
                  epoch=epoch)

    run_epoch(test_iter,
              program=test_program,
              prefix="test",
              exe=exe,
              model_loss=model_loss,
              model_acc=model_acc,
              log_per_step=10000,
              epoch=epoch)
Esempio n. 3
0
    with fluid.program_guard(train_program, startup_program):
        lr = args.lr
        adam = fluid.optimizer.Adam(
            learning_rate=lr,
            regularization=fluid.regularizer.L2DecayRegularizer(
                regularization_coeff=args.rc))
        adam.minimize(loss)

    exe = fluid.Executor(place)
    exe.run(startup_program)

    train_iter = reader.multiprocess_graph_reader(
        g,
        gw,
        samples=samples,
        num_workers=sample_workers,
        batch_size=batch_size,
        with_parent_node_index=True,
        node_index=train_idx,
        node_label=np.array(label[train_idx], dtype='float32'))

    val_iter = reader.multiprocess_graph_reader(
        g,
        gw,
        samples=samples,
        num_workers=sample_workers,
        batch_size=batch_size,
        with_parent_node_index=True,
        node_index=valid_idx,
        node_label=np.array(label[valid_idx], dtype='float32'))
Esempio n. 4
0
def main(args):
    data = load_data()
    log.info("preprocess finish")
    log.info("Train Examples: %s" % len(data["train_index"]))
    log.info("Val Examples: %s" % len(data["val_index"]))
    log.info("Test Examples: %s" % len(data["test_index"]))

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
    train_program = fluid.Program()
    startup_program = fluid.Program()
    samples = []
    if args.samples_1 > 0:
        samples.append(args.samples_1)
    if args.samples_2 > 0:
        samples.append(args.samples_2)

    with fluid.program_guard(train_program, startup_program):
        graph_wrapper = pgl.graph_wrapper.GraphWrapper(
            "sub_graph", fluid.CPUPlace(), node_feat=[('feats', [None, 602], np.dtype('float32'))])
        model_loss, model_acc = build_graph_model(
            graph_wrapper,
            num_class=data["num_class"],
            hidden_size=args.hidden_size,
            graphsage_type=args.graphsage_type,
            k_hop=len(samples))

    test_program = train_program.clone(for_test=True)

    with fluid.program_guard(train_program, startup_program):
        adam = fluid.optimizer.Adam(learning_rate=args.lr)
        adam.minimize(model_loss)

    exe = fluid.Executor(place)
    exe.run(startup_program)

    train_iter = reader.multiprocess_graph_reader(
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
        node_index=data['train_index'],
        node_label=data["train_label"])

    val_iter = reader.multiprocess_graph_reader(
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
        node_index=data['val_index'],
        node_label=data["val_label"])

    test_iter = reader.multiprocess_graph_reader(
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
        node_index=data['test_index'],
        node_label=data["test_label"])

    for epoch in range(args.epoch):
        run_epoch(
            train_iter,
            program=train_program,
            exe=exe,
            prefix="train",
            model_loss=model_loss,
            model_acc=model_acc,
            log_per_step=1,
            epoch=epoch)

        run_epoch(
            val_iter,
            program=test_program,
            exe=exe,
            prefix="val",
            model_loss=model_loss,
            model_acc=model_acc,
            log_per_step=10000,
            epoch=epoch)

    run_epoch(
        test_iter,
        program=test_program,
        prefix="test",
        exe=exe,
        model_loss=model_loss,
        model_acc=model_acc,
        log_per_step=10000,
        epoch=epoch)