Exemple #1
0
 def __init__(self,
              graph_wrapper=None,
              buf_size=1000,
              batch_size=128,
              num_workers=1,
              samples=[30, 30],
              shuffle=True,
              phase="train"):
     super(ArxivDataGenerator, self).__init__(buf_size=buf_size,
                                              num_workers=num_workers,
                                              batch_size=batch_size,
                                              shuffle=shuffle)
     self.samples = samples
     self.d_name = "ogbn-arxiv"
     self.graph_wrapper = graph_wrapper
     dataset = PglNodePropPredDataset(name=self.d_name)
     splitted_idx = dataset.get_idx_split()
     self.phase = phase
     graph, label = dataset[0]
     graph = to_undirected(graph)
     self.graph = graph
     self.num_nodes = graph.num_nodes
     if self.phase == 'train':
         nodes_idx = splitted_idx["train"]
         labels = label[nodes_idx]
     elif self.phase == "valid":
         nodes_idx = splitted_idx["valid"]
         labels = label[nodes_idx]
     elif self.phase == "test":
         nodes_idx = splitted_idx["test"]
         labels = label[nodes_idx]
     self.nodes_idx = nodes_idx
     self.labels = labels
     self.sample_based_line_example(nodes_idx, labels)
Exemple #2
0
            wf.flush()
    return max_cor_acc


if __name__ == '__main__':
    parser = get_config()
    print('===========args==============')
    print(parser)
    print('=============================')

    startup_prog = F.default_startup_program()
    train_prog = F.default_main_program()

    place = F.CPUPlace() if parser.place < 0 else F.CUDAPlace(parser.place)

    dataset = PglNodePropPredDataset(name="ogbn-arxiv")
    split_idx = dataset.get_idx_split()

    graph, label = dataset[0]
    print(label.shape)

    graph = to_undirected(graph)
    graph = add_self_loop(graph)

    with F.unique_name.guard():
        with F.program_guard(train_prog, startup_prog):
            gw = pgl.graph_wrapper.GraphWrapper(
                name="arxiv", node_feat=graph.node_feat_info(), place=place)

            if parser.use_label_e:
                model = Arxiv_label_embedding_model(gw, parser.hidden_size,
Exemple #3
0
    evaluator = OgbEvaluator()

    train_prog = F.Program()
    startup_prog = F.Program()
    args.num_nodes = evaluator.num_nodes

    if args.use_cuda:
        dev_list = F.cuda_places()
        place = dev_list[0]
        dev_count = len(dev_list)
    else:
        place = F.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    assert dev_count == 1, "The program not support multi devices now!"

    dataset = PglNodePropPredDataset(name="ogbn-arxiv")
    graph, label = dataset[0]
    graph = to_undirected(graph)

    if args.model is None:
        Model = BaseGraph
    elif args.model.upper() == "MLP":
        Model = MLPModel
    elif args.model.upper() == "SAGE":
        Model = SAGEModel
    elif args.model.upper() == "GAT":
        Model = GATModel
    elif args.model.upper() == "GCN":
        Model = GCNModel
    elif args.model.upper() == "GAAN":
        Model = GAANModel
Exemple #4
0
def aggregate_node_features(graph):
    efeat = graph.edge_feat["feat"]
    graph.edge_feat["feat"] = efeat
    nfeat = np.zeros((graph.num_nodes, efeat.shape[-1]), dtype="float32")
    edges_dst = graph.edges[:, 1]
    np_scatter(edges_dst, efeat, nfeat)
    graph.node_feat["feat"] = nfeat


if __name__ == '__main__':
    parser = get_config()
    print('===========args==============')
    print(parser)
    print('=============================')

    dataset = PglNodePropPredDataset(name="ogbn-proteins")
    split_idx = dataset.get_idx_split()

    graph, label = dataset[0]
    aggregate_node_features(graph)

    place = F.CPUPlace() if parser.place < 0 else F.CUDAPlace(parser.place)

    startup_prog = F.default_startup_program()
    train_prog = F.default_main_program()

    with F.program_guard(train_prog, startup_prog):
        with F.unique_name.guard():
            gw = pgl.graph_wrapper.GraphWrapper(
                name="proteins",
                node_feat=graph.node_feat_info(),
Exemple #5
0
def main():
    """main
    """
    # Training settings
    parser = argparse.ArgumentParser(description='Graph Dataset')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--dataset',
                        type=str,
                        default="ogbn-proteins",
                        help='dataset name (default: proteinfunc)')
    args = parser.parse_args()

    #device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    #place = fluid.CUDAPlace(0)
    place = fluid.CPUPlace()  # Dataset too big to use GPU

    ### automatic dataloading and splitting
    dataset = PglNodePropPredDataset(name=args.dataset)
    splitted_idx = dataset.get_idx_split()

    ### automatic evaluator. takes dataset name as input
    evaluator = Evaluator(args.dataset)

    graph_data, label = dataset[0]

    train_program = fluid.Program()
    startup_program = fluid.Program()
    test_program = fluid.Program()
    # degree normalize
    indegree = graph_data.indegree()
    norm = np.zeros_like(indegree, dtype="float32")
    norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5)
    graph_data.node_feat["norm"] = np.expand_dims(norm, -1).astype("float32")
    graph_data.node_feat["x"] = np.zeros((len(indegree), 1), dtype="int64")
    graph_data.edge_feat["feat"] = graph_data.edge_feat["feat"].astype(
        "float32")
    model = GNNModel(name="gnn",
                     num_task=dataset.num_tasks,
                     emb_dim=64,
                     num_layers=2)

    with fluid.program_guard(train_program, startup_program):
        gw = pgl.graph_wrapper.StaticGraphWrapper("graph", graph_data, place)
        pred = model.forward(gw)
        sigmoid_pred = fluid.layers.sigmoid(pred)

    val_program = train_program.clone(for_test=True)

    initializer = []
    with fluid.program_guard(train_program, startup_program):
        train_node_index, init = paddle_helper.constant(
            "train_node_index", dtype="int64", value=splitted_idx["train"])
        initializer.append(init)

        train_node_label, init = paddle_helper.constant(
            "train_node_label",
            dtype="float32",
            value=label[splitted_idx["train"]].astype("float32"))
        initializer.append(init)
        train_pred_t = fluid.layers.gather(pred, train_node_index)
        train_loss_t = fluid.layers.sigmoid_cross_entropy_with_logits(
            x=train_pred_t, label=train_node_label)
        train_loss_t = fluid.layers.reduce_sum(train_loss_t)
        train_pred_t = fluid.layers.sigmoid(train_pred_t)

        adam = fluid.optimizer.Adam(
            learning_rate=1e-2,
            regularization=fluid.regularizer.L2DecayRegularizer(
                regularization_coeff=0.0005))
        adam.minimize(train_loss_t)

    exe = fluid.Executor(place)
    exe.run(startup_program)
    gw.initialize(place)
    for init in initializer:
        init(place)

    for epoch in range(1, args.epochs + 1):
        loss = exe.run(train_program, feed={}, fetch_list=[train_loss_t])
        print("Loss %s" % loss[0])
        print("Evaluating...")
        y_pred = exe.run(val_program, feed={}, fetch_list=[sigmoid_pred])[0]
        result = {}
        input_dict = {
            "y_true": label[splitted_idx["train"]],
            "y_pred": y_pred[splitted_idx["train"]]
        }
        result["train"] = evaluator.eval(input_dict)
        input_dict = {
            "y_true": label[splitted_idx["valid"]],
            "y_pred": y_pred[splitted_idx["valid"]]
        }
        result["valid"] = evaluator.eval(input_dict)
        input_dict = {
            "y_true": label[splitted_idx["test"]],
            "y_pred": y_pred[splitted_idx["test"]]
        }
        result["test"] = evaluator.eval(input_dict)
        print(result)