예제 #1
0
def calculate_loss(name, np_cached_h, node_label_val, num_classes, args):
    """Calculate loss function.
    """
    initializer = []
    const_cached_h, init = paddle_helper.constant("const_%s_cached_h" % name,
                                                  dtype='float32',
                                                  value=np_cached_h)
    initializer.append(init)

    node_label, init = paddle_helper.constant("%s_node_label" % (name),
                                              dtype='int64',
                                              value=node_label_val)
    initializer.append(init)

    output = fluid.layers.fc(const_cached_h,
                             size=num_classes,
                             bias_attr=args.bias,
                             name='fc')

    loss, probs = fluid.layers.softmax_with_cross_entropy(logits=output,
                                                          label=node_label,
                                                          return_softmax=True)
    loss = fluid.layers.mean(loss)

    acc = None
    if name != 'train':
        acc = fluid.layers.accuracy(input=probs, label=node_label, k=1)

    return {
        'loss': loss,
        'acc': acc,
        'probs': probs,
        'initializer': initializer
    }
예제 #2
0
def pre_gather(features, name_prefix, node_index_val):
    """Get features with respect to node index.
    """
    node_index, init = paddle_helper.constant("%s_node_index" % (name_prefix),
                                              dtype='int32',
                                              value=node_index_val)
    logits = fluid.layers.gather(features, node_index)

    return logits, init
예제 #3
0
 def __create_graph_edge_feat(self, edge_feat, collector):
     """Convert edge features into paddlepaddle tensor.
     """
     for edge_feat_name, edge_feat_value in edge_feat.items():
         edge_feat_shape = edge_feat_value.shape
         edge_feat_dtype = edge_feat_value.dtype
         self._edge_feat_tensor_dict[
             edge_feat_name], init = paddle_helper.constant(
                 name=self.__data_name_prefix + '_' + edge_feat_name,
                 dtype=edge_feat_dtype,
                 value=edge_feat_value)
         collector.append(init)
예제 #4
0
 def __create_graph_node_feat(self, node_feat, collector):
     """Convert node features into paddlepaddle tensor.
     """
     for node_feat_name, node_feat_value in node_feat.items():
         node_feat_shape = node_feat_value.shape
         node_feat_dtype = node_feat_value.dtype
         self._node_feat_tensor_dict[
             node_feat_name], init = paddle_helper.constant(
                 name=self.__data_name_prefix + '_' + node_feat_name,
                 dtype=node_feat_dtype,
                 value=node_feat_value)
         collector.append(init)
예제 #5
0
파일: train.py 프로젝트: zgsxwsdxg/PGL
def main(args):
    data = load_data(args.normalize, args.symmetry)
    log.info("preprocess finish")
    log.info("Train Examples: %s" % len(data["train_index"]))
    log.info("Val Examples: %s" % len(data["val_index"]))
    log.info("Test Examples: %s" % len(data["test_index"]))
    log.info("Num nodes %s" % data["graph"].num_nodes)
    log.info("Num edges %s" % data["graph"].num_edges)
    log.info("Average Degree %s" % np.mean(data["graph"].indegree()))

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
    train_program = fluid.Program()
    startup_program = fluid.Program()
    samples = []
    if args.samples_1 > 0:
        samples.append(args.samples_1)
    if args.samples_2 > 0:
        samples.append(args.samples_2)

    with fluid.program_guard(train_program, startup_program):
        feature, feature_init = paddle_helper.constant(
            "feat",
            dtype=data['feature'].dtype,
            value=data['feature'],
            hide_batch_size=False)

        graph_wrapper = pgl.graph_wrapper.GraphWrapper(
            "sub_graph",
            fluid.CPUPlace(),
            node_feat=data['graph'].node_feat_info())
        model_loss, model_acc = build_graph_model(
            graph_wrapper,
            num_class=data["num_class"],
            feature=feature,
            hidden_size=args.hidden_size,
            graphsage_type=args.graphsage_type,
            k_hop=len(samples))

    test_program = train_program.clone(for_test=True)

    with fluid.program_guard(train_program, startup_program):
        adam = fluid.optimizer.Adam(learning_rate=args.lr)
        adam.minimize(model_loss)

    exe = fluid.Executor(place)
    exe.run(startup_program)
    feature_init(place)

    train_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
        with_parent_node_index=True,
        node_index=data['train_index'],
        node_label=data["train_label"])

    val_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
        with_parent_node_index=True,
        node_index=data['val_index'],
        node_label=data["val_label"])

    test_iter = reader.multiprocess_graph_reader(
        data['graph'],
        graph_wrapper,
        samples=samples,
        num_workers=args.sample_workers,
        batch_size=args.batch_size,
        with_parent_node_index=True,
        node_index=data['test_index'],
        node_label=data["test_label"])

    for epoch in range(args.epoch):
        run_epoch(train_iter,
                  program=train_program,
                  exe=exe,
                  prefix="train",
                  model_loss=model_loss,
                  model_acc=model_acc,
                  epoch=epoch)

        run_epoch(val_iter,
                  program=test_program,
                  exe=exe,
                  prefix="val",
                  model_loss=model_loss,
                  model_acc=model_acc,
                  log_per_step=10000,
                  epoch=epoch)

    run_epoch(test_iter,
              program=test_program,
              prefix="test",
              exe=exe,
              model_loss=model_loss,
              model_acc=model_acc,
              log_per_step=10000,
              epoch=epoch)
예제 #6
0
파일: graph_wrapper.py 프로젝트: kiminh/PGL
    def __create_graph_attr(self, graph):
        """Create graph attributes for paddlepaddle.
        """
        src, dst, eid = graph.sorted_edges(sort_by="dst")
        indegree = graph.indegree()
        nodes = graph.nodes
        uniq_dst = nodes[indegree > 0]
        uniq_dst_count = indegree[indegree > 0]
        uniq_dst_count = np.cumsum(uniq_dst_count, dtype='int32')
        uniq_dst_count = np.insert(uniq_dst_count, 0, 0)
        graph_lod = graph.graph_lod
        num_graph = graph.num_graph

        num_edges = len(src)
        if num_edges == 0:
            # Fake Graph
            src = np.array([0], dtype="int64")
            dst = np.array([0], dtype="int64")
            eid = np.array([0], dtype="int64")
            uniq_dst_count = np.array([0, 1], dtype="int32")
            uniq_dst = np.array([0], dtype="int64")

        edge_feat = {}

        for key, value in graph.edge_feat.items():
            edge_feat[key] = value[eid]
        node_feat = graph.node_feat

        self.__create_graph_node_feat(node_feat, self._initializers)
        self.__create_graph_edge_feat(edge_feat, self._initializers)

        self._num_edges, init = paddle_helper.constant(
            dtype="int64",
            value=np.array([num_edges], dtype="int64"),
            name=self._data_name_prefix + '/num_edges')
        self._initializers.append(init)

        self._num_graph, init = paddle_helper.constant(
            dtype="int64",
            value=np.array([num_graph], dtype="int64"),
            name=self._data_name_prefix + '/num_graph')
        self._initializers.append(init)

        self._edges_src, init = paddle_helper.constant(
            dtype="int64",
            value=src,
            name=self._data_name_prefix + '/edges_src')
        self._initializers.append(init)

        self._edges_dst, init = paddle_helper.constant(
            dtype="int64",
            value=dst,
            name=self._data_name_prefix + '/edges_dst')
        self._initializers.append(init)

        self._num_nodes, init = paddle_helper.constant(
            dtype="int64",
            hide_batch_size=False,
            value=np.array([graph.num_nodes]),
            name=self._data_name_prefix + '/num_nodes')
        self._initializers.append(init)

        self._edge_uniq_dst, init = paddle_helper.constant(
            name=self._data_name_prefix + "/uniq_dst",
            dtype="int64",
            value=uniq_dst)
        self._initializers.append(init)

        self._edge_uniq_dst_count, init = paddle_helper.constant(
            name=self._data_name_prefix + "/uniq_dst_count",
            dtype="int32",
            value=uniq_dst_count)
        self._initializers.append(init)

        self._graph_lod, init = paddle_helper.constant(
            name=self._data_name_prefix + "/graph_lod",
            dtype="int32",
            value=graph_lod)
        self._initializers.append(init)

        node_ids_value = np.arange(0, graph.num_nodes, dtype="int64")
        self._node_ids, init = paddle_helper.constant(
            name=self._data_name_prefix + "/node_ids",
            dtype="int64",
            value=node_ids_value)
        self._initializers.append(init)

        self._indegree, init = paddle_helper.constant(
            name=self._data_name_prefix + "/indegree",
            dtype="int64",
            value=indegree)
        self._initializers.append(init)
예제 #7
0
def main(args):
    dataset = load(args.dataset)

    # normalize
    indegree = dataset.graph.indegree()
    norm = np.zeros_like(indegree, dtype="float32")
    norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5)
    dataset.graph.node_feat["norm"] = np.expand_dims(norm, -1)

    train_index = dataset.train_index
    train_label = np.expand_dims(dataset.y[train_index], -1)
    train_index = np.expand_dims(train_index, -1)

    val_index = dataset.val_index
    val_label = np.expand_dims(dataset.y[val_index], -1)
    val_index = np.expand_dims(val_index, -1)

    test_index = dataset.test_index
    test_label = np.expand_dims(dataset.y[test_index], -1)
    test_index = np.expand_dims(test_index, -1)

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
    train_program = fluid.Program()
    startup_program = fluid.Program()
    test_program = fluid.Program()
    hidden_size = 16

    with fluid.program_guard(train_program, startup_program):
        gw = pgl.graph_wrapper.StaticGraphWrapper(name="graph",
                                                  graph=dataset.graph,
                                                  place=place)
        output = pgl.layers.gcn(gw,
                                gw.node_feat["words"],
                                hidden_size,
                                activation="relu",
                                norm=gw.node_feat['norm'],
                                name="gcn_layer_1")
        output = fluid.layers.dropout(
            output, 0.5, dropout_implementation='upscale_in_train')
        output = pgl.layers.gcn(gw,
                                output,
                                dataset.num_classes,
                                activation=None,
                                norm=gw.node_feat['norm'],
                                name="gcn_layer_2")

    val_program = train_program.clone(for_test=True)
    test_program = train_program.clone(for_test=True)

    initializer = []
    with fluid.program_guard(train_program, startup_program):
        train_node_index, init = paddle_helper.constant("train_node_index",
                                                        dtype="int32",
                                                        value=train_index)
        initializer.append(init)

        train_node_label, init = paddle_helper.constant("train_node_label",
                                                        dtype="int64",
                                                        value=train_label)
        initializer.append(init)
        pred = fluid.layers.gather(output, train_node_index)
        train_loss_t = fluid.layers.softmax_with_cross_entropy(
            logits=pred, label=train_node_label)
        train_loss_t = fluid.layers.reduce_mean(train_loss_t)

        adam = fluid.optimizer.Adam(
            learning_rate=1e-2,
            regularization=fluid.regularizer.L2DecayRegularizer(
                regularization_coeff=0.0005))
        adam.minimize(train_loss_t)

    with fluid.program_guard(val_program, startup_program):
        val_node_index, init = paddle_helper.constant("val_node_index",
                                                      dtype="int32",
                                                      value=val_index)
        initializer.append(init)

        val_node_label, init = paddle_helper.constant("val_node_label",
                                                      dtype="int64",
                                                      value=val_label)
        initializer.append(init)

        pred = fluid.layers.gather(output, val_node_index)
        val_loss_t, pred = fluid.layers.softmax_with_cross_entropy(
            logits=pred, label=val_node_label, return_softmax=True)
        val_acc_t = fluid.layers.accuracy(input=pred,
                                          label=val_node_label,
                                          k=1)
        val_loss_t = fluid.layers.reduce_mean(val_loss_t)

    with fluid.program_guard(test_program, startup_program):
        test_node_index, init = paddle_helper.constant("test_node_index",
                                                       dtype="int32",
                                                       value=test_index)
        initializer.append(init)

        test_node_label, init = paddle_helper.constant("test_node_label",
                                                       dtype="int64",
                                                       value=test_label)
        initializer.append(init)

        pred = fluid.layers.gather(output, test_node_index)
        test_loss_t, pred = fluid.layers.softmax_with_cross_entropy(
            logits=pred, label=test_node_label, return_softmax=True)
        test_acc_t = fluid.layers.accuracy(input=pred,
                                           label=test_node_label,
                                           k=1)
        test_loss_t = fluid.layers.reduce_mean(test_loss_t)

    exe = fluid.Executor(place)
    exe.run(startup_program)
    gw.initialize(place)
    for init in initializer:
        init(place)

    dur = []
    for epoch in range(200):
        if epoch >= 3:
            t0 = time.time()

        train_loss = exe.run(train_program,
                             feed={},
                             fetch_list=[train_loss_t],
                             return_numpy=True)
        train_loss = train_loss[0]

        if epoch >= 3:
            time_per_epoch = 1.0 * (time.time() - t0)
            dur.append(time_per_epoch)

        val_loss, val_acc = exe.run(val_program,
                                    feed={},
                                    fetch_list=[val_loss_t, val_acc_t],
                                    return_numpy=True)

        log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(dur) +
                 "Train Loss: %f " % train_loss + "Val Loss: %f " % val_loss +
                 "Val Acc: %f " % val_acc)

    test_loss, test_acc = exe.run(test_program,
                                  feed={},
                                  fetch_list=[test_loss_t, test_acc_t],
                                  return_numpy=True)
    log.info("Accuracy: %f" % test_acc)
예제 #8
0
    split_idx=dataset.get_idx_split()
    
    graph, label = dataset[0]
    print(label.shape)
    
    with F.program_guard(train_prog, startup_prog):
        with F.unique_name.guard():
            
            gw_list=[]
            
            for i in range(len(parser.sizes)):
                gw_list.append(pgl.graph_wrapper.GraphWrapper(
                    name="product_"+str(i)))

            feature_input, feat_init=paddle_helper.constant(
                    name='node_feat_input',
                    dtype='float32',
                    value=graph.node_feat['feat'])
    
            if parser.use_label_e:
                model=Products_label_embedding_model(feature_input, gw_list, 
                                                     parser.hidden_size, parser.num_heads, 
                                                        parser.dropout, parser.num_layers)
            else:
                model=Arxiv_baseline_model(gw, parser.hidden_size, parser.num_heads, 
                                                 parser.dropout, parser.num_layers)
                
#             test_prog=train_prog.clone(for_test=True)
            model.train_program()
           
            adam_optimizer = optimizer_func(parser.lr)#optimizer
            adam_optimizer.minimize(model.avg_cost)
예제 #9
0
    def __create_graph_attr(self, graph):
        """Create graph attributes for paddlepaddle.
        """
        src, dst = list(zip(*graph.edges))
        src, dst, eid = graph.sorted_edges(sort_by="dst")
        indegree = graph.indegree()
        nodes = graph.nodes
        uniq_dst = nodes[indegree > 0]
        uniq_dst_count = indegree[indegree > 0]

        edge_feat = {}

        for key, value in graph.edge_feat.items():
            edge_feat[key] = value[eid]
        node_feat = graph.node_feat

        self.__create_graph_node_feat(node_feat, self._initializers)
        self.__create_graph_edge_feat(edge_feat, self._initializers)

        self._edges_src, init = paddle_helper.constant(
            dtype="int32",
            value=src,
            name=self.__data_name_prefix + '_edges_src')
        self._initializers.append(init)

        self._edges_dst, init = paddle_helper.constant(
            dtype="int32",
            value=dst,
            name=self.__data_name_prefix + '_edges_dst')
        self._initializers.append(init)

        self._num_nodes, init = paddle_helper.constant(
            dtype="int32",
            hide_batch_size=False,
            value=np.array([graph.num_nodes]),
            name=self.__data_name_prefix + '_num_nodes')
        self._initializers.append(init)

        self._edge_uniq_dst, init = paddle_helper.constant(
            name=self.__data_name_prefix + "_uniq_dst",
            dtype="int32",
            value=uniq_dst)
        self._initializers.append(init)

        self._edge_uniq_dst_count, init = paddle_helper.constant(
            name=self.__data_name_prefix + "_uniq_dst_count",
            dtype="int32",
            value=uniq_dst_count)
        self._initializers.append(init)

        bucket_value = np.expand_dims(np.arange(0, len(dst), dtype="int32"),
                                      -1)
        self._bucketing_index, init = paddle_helper.lod_constant(
            name=self.__data_name_prefix + "_bucketing_index",
            dtype="int32",
            lod=list(uniq_dst_count),
            value=bucket_value)
        self._initializers.append(init)

        node_ids_value = np.arange(0, graph.num_nodes, dtype="int32")
        self._node_ids, init = paddle_helper.constant(
            name=self.__data_name_prefix + "_node_ids",
            dtype="int32",
            value=node_ids_value)
        self._initializers.append(init)

        self._indegree, init = paddle_helper.constant(
            name=self.__data_name_prefix + "_indegree",
            dtype="int32",
            value=indegree)
        self._initializers.append(init)
예제 #10
0
파일: main_pgl.py 프로젝트: zzs95/PGL
def main():
    """main
    """
    # Training settings
    parser = argparse.ArgumentParser(description='Graph Dataset')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--dataset',
                        type=str,
                        default="ogbn-proteins",
                        help='dataset name (default: proteinfunc)')
    args = parser.parse_args()

    #device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    #place = fluid.CUDAPlace(0)
    place = fluid.CPUPlace()  # Dataset too big to use GPU

    ### automatic dataloading and splitting
    dataset = PglNodePropPredDataset(name=args.dataset)
    splitted_idx = dataset.get_idx_split()

    ### automatic evaluator. takes dataset name as input
    evaluator = Evaluator(args.dataset)

    graph_data, label = dataset[0]

    train_program = fluid.Program()
    startup_program = fluid.Program()
    test_program = fluid.Program()
    # degree normalize
    indegree = graph_data.indegree()
    norm = np.zeros_like(indegree, dtype="float32")
    norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5)
    graph_data.node_feat["norm"] = np.expand_dims(norm, -1).astype("float32")
    graph_data.node_feat["x"] = np.zeros((len(indegree), 1), dtype="int64")
    graph_data.edge_feat["feat"] = graph_data.edge_feat["feat"].astype(
        "float32")
    model = GNNModel(name="gnn",
                     num_task=dataset.num_tasks,
                     emb_dim=64,
                     num_layers=2)

    with fluid.program_guard(train_program, startup_program):
        gw = pgl.graph_wrapper.StaticGraphWrapper("graph", graph_data, place)
        pred = model.forward(gw)
        sigmoid_pred = fluid.layers.sigmoid(pred)

    val_program = train_program.clone(for_test=True)

    initializer = []
    with fluid.program_guard(train_program, startup_program):
        train_node_index, init = paddle_helper.constant(
            "train_node_index", dtype="int64", value=splitted_idx["train"])
        initializer.append(init)

        train_node_label, init = paddle_helper.constant(
            "train_node_label",
            dtype="float32",
            value=label[splitted_idx["train"]].astype("float32"))
        initializer.append(init)
        train_pred_t = fluid.layers.gather(pred, train_node_index)
        train_loss_t = fluid.layers.sigmoid_cross_entropy_with_logits(
            x=train_pred_t, label=train_node_label)
        train_loss_t = fluid.layers.reduce_sum(train_loss_t)
        train_pred_t = fluid.layers.sigmoid(train_pred_t)

        adam = fluid.optimizer.Adam(
            learning_rate=1e-2,
            regularization=fluid.regularizer.L2DecayRegularizer(
                regularization_coeff=0.0005))
        adam.minimize(train_loss_t)

    exe = fluid.Executor(place)
    exe.run(startup_program)
    gw.initialize(place)
    for init in initializer:
        init(place)

    for epoch in range(1, args.epochs + 1):
        loss = exe.run(train_program, feed={}, fetch_list=[train_loss_t])
        print("Loss %s" % loss[0])
        print("Evaluating...")
        y_pred = exe.run(val_program, feed={}, fetch_list=[sigmoid_pred])[0]
        result = {}
        input_dict = {
            "y_true": label[splitted_idx["train"]],
            "y_pred": y_pred[splitted_idx["train"]]
        }
        result["train"] = evaluator.eval(input_dict)
        input_dict = {
            "y_true": label[splitted_idx["valid"]],
            "y_pred": y_pred[splitted_idx["valid"]]
        }
        result["valid"] = evaluator.eval(input_dict)
        input_dict = {
            "y_true": label[splitted_idx["test"]],
            "y_pred": y_pred[splitted_idx["test"]]
        }
        result["test"] = evaluator.eval(input_dict)
        print(result)