Ejemplo n.º 1
0
def train():
    model.train()
    acc_list = []
    for e in range(EPOCHS):
        for batch in range(NUM_BATCH_PER_EPOCH):
            batch_src_index = np.random.choice(train_index,
                                               size=(BTACH_SIZE, ))
            batch_src_label = torch.from_numpy(
                train_label[batch_src_index]).long().to(DEVICE)
            batch_sampling_result = multihop_sampling(batch_src_index,
                                                      NUM_NEIGHBORS_LIST,
                                                      data.adjacency_dict)
            batch_sampling_x = [
                torch.from_numpy(x[idx]).float().to(DEVICE)
                for idx in batch_sampling_result
            ]
            batch_train_logits = model(batch_sampling_x)
            loss = criterion(batch_train_logits, batch_src_label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format(
                e, batch, loss.item()))
        acc = test()
        acc_list.append(acc)
    return acc_list
Ejemplo n.º 2
0
def train():
    model.train()
    for e in range(args.epochs):
        for batch in range(args.num_batch_per_epoch):
            batch_src_index = np.random.choice(train_index,
                                               size=(args.btach_size, ))
            batch_src_label = torch.from_numpy(
                train_label[batch_src_index]).long().to(DEVICE)
            batch_sampling_result = multihop_sampling(
                src_nodes=batch_src_index,
                sample_nums=args.num_neighbors_list,
                neighbor_table=data.adjacency_dict)
            batch_sampling_x = [
                torch.from_numpy(x[idx]).float().to(DEVICE)
                for idx in batch_sampling_result
            ]
            batch_train_logits = model(batch_sampling_x)
            loss_train = criterion(batch_train_logits, batch_src_label)
            accuarcy_train = torch.eq(
                batch_train_logits.max(1)[1],
                batch_src_label).float().mean().item()
            optimizer.zero_grad()  # 先将梯度归零
            loss_train.backward()  # 反向传播计算得到每个参数的梯度值
            optimizer.step()  # 最后通过梯度下降执行一步参数更新
            accuarcy_val, loss_val = val()

            print('Epoch: {:03d}'.format(e), 'Batch: {:03d}'.format(batch),
                  'Loss_train: {:.4f}'.format(loss_train.item()),
                  'accuarcy_train:{:.4f}'.format(accuarcy_train),
                  'Loss_val: {:.4f}'.format(loss_val),
                  'acc_val:{:.4f}'.format(accuarcy_val))
        if e % 10 == 0:
            torch.save(model.state_dict(), 'model/model.pkl')
            print('第%d epoch,保存模型' % e)
    torch.save(model.state_dict(), 'model/model.pkl')
Ejemplo n.º 3
0
def save_embeddings():
    model.eval()
    with torch.no_grad():
        # src_index = torch.LongTensor(range(features.shape[0]))
        # test_sampling_result = multihop_sampling(src_index, NUM_NEIGHBORS_LIST, neighbor_table)
        # x = []
        # for idx in test_sampling_result:
        #     x.append(features[idx])
        # # test_x = [features[idx] for idx in test_sampling_result]
        #     print('x:{}'.format(torch.cuda.memory_allocated()))
        # output = model(x)
        src_index = torch.LongTensor(range(features.shape[0]))
        sampling_result = multihop_sampling(src_index, num_neighbors_list,
                                            neighbor_table)
        #print('{}:sampling_x前:{}'.format(epoch, torch.cuda.memory_allocated()))
        sampling_x = []
        for idx in sampling_result:
            # 这种方法会共享内存
            sampling_x.append(features[idx])
    #     print('{}:sampling_x:{}'.format(epoch, torch.cuda.memory_allocated()))
    # sampling_x = [features[idx] for idx in sampling_result]
    #  print('{}:sampling_x后:{}'.format(epoch, torch.cuda.memory_allocated()))
        train_logits = model(sampling_x)
    outVec = train_logits.cpu().detach().numpy()
    path = Path(__file__).parent / ('{}_outVec.txt'.format(dataset))
    np.savetxt(path, outVec)
    path = Path(__file__).parent / ('{}_labels.txt'.format(dataset))
    outLabel = labels.cpu().detach().numpy()
    np.savetxt(path, outLabel)
Ejemplo n.º 4
0
def train_batch():
    model.train()
    for e in range(epochs):
        loss = 0
        for batch in range(NUM_BATCH_PER_EPOCH):
            # batch_src_index = np.random.choice(idx_train, size=(BTACH_SIZE,))
            # batch_src_label = torch.from_numpy(labels[batch_src_index]).long().to(DEVICE)
            # batch_sampling_result = multihop_sampling(batch_src_index, NUM_NEIGHBORS_LIST, neighbor_table)
            # batch_sampling_x = [torch.from_numpy(features[idx]).float().to(DEVICE) for idx in batch_sampling_result]
            # 在这里全部转化为tensor进行处理
            batch_src_index = torch.from_numpy(
                np.random.choice(idx_train, size=(BTACH_SIZE, ))).long()
            batch_src_label = labels[batch_src_index].to(device)
            batch_sampling_result = multihop_sampling(batch_src_index,
                                                      num_neighbors_list,
                                                      neighbor_table)
            batch_sampling_x = [
                features[idx].to(device) for idx in batch_sampling_result
            ]
            batch_train_logits = model(batch_sampling_x)
            batch_loss = criterion(batch_train_logits, batch_src_label)
            optimizer.zero_grad()
            batch_loss.backward()  # 反向传播计算参数的梯度
            optimizer.step()  # 使用优化方法进行梯度更新
            loss += batch_loss.item() * BTACH_SIZE
        print("Epoch {:03d} Loss: {:.4f}".format(
            e, loss / (NUM_BATCH_PER_EPOCH * BTACH_SIZE)))
        test()
Ejemplo n.º 5
0
def train():
    model.train()
    for e in range(EPOCHS):
        for batch in range(NUM_BATCH_PER_EPOCH):
            batch_src_index = np.random.choice(train_index,
                                               size=(BTACH_SIZE, ))
            batch_src_label = torch.from_numpy(
                train_label[batch_src_index]).long().to(DEVICE)
            batch_sampling_result = multihop_sampling(batch_src_index,
                                                      NUM_NEIGHBORS_LIST,
                                                      data.adjacency_dict)
            batch_sampling_x = [
                torch.from_numpy(x[idx]).float().to(DEVICE)
                for idx in batch_sampling_result
            ]  # 转化为张量
            batch_train_logits = model(
                batch_sampling_x
            )  # 从头到尾GraphSage只处理一个node,这里传入的是一个tensorlist,进行批处理运算
            loss = criterion(batch_train_logits, batch_src_label)
            optimizer.zero_grad()
            loss.backward()  # 反向传播计算参数的梯度
            optimizer.step()  # 使用优化方法进行梯度更新
            print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format(
                e, batch, loss.item()))
        test()
Ejemplo n.º 6
0
Archivo: train.py Proyecto: zjwfno1/GNN
def train():
    for e in range(EPOCHS):
        for batch in range(NUM_BATCH_PER_EPOCH):
            batch_src_index = np.random.choice(train_index,
                                               size=(BTACH_SIZE, ))
            batch_src_label = train_label[batch_src_index].astype(float)

            batch_sampling_result = multihop_sampling(batch_src_index,
                                                      NUM_NEIGHBORS_LIST,
                                                      data.adjacency_dict)
            batch_sampling_x = [
                data.x[np.array(idx.astype(np.int32))]
                for idx in batch_sampling_result
            ]

            loss = 0.0
            with tf.GradientTape() as tape:
                batch_train_logits = model(batch_sampling_x)
                loss = loss_object(batch_src_label, batch_train_logits)
                grads = tape.gradient(loss, model.trainable_variables)

                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

            # print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format(e, batch, loss))

        train_accuracy = test(train_index)
        val_accuracy = test(val_index)
        test_accuracy = test(test_index)

        train_loss_results.append(loss)
        train_accuracy_results.append(train_accuracy)
        train_val_results.append(val_accuracy)
        train_test_results.append(test_accuracy)

        print(
            "Epoch {:03d} train accuracy: {} val accuracy: {} test accuracy:{}"
            .format(e, train_accuracy, val_accuracy, test_accuracy))

        # ISSUE: https://stackoverflow.com/questions/58947679/no-gradients-provided-for-any-variable-in-tensorflow2-0

    # 训练过程可视化
    fig, axes = plt.subplots(4, sharex=True, figsize=(12, 8))
    fig.suptitle('Training Metrics')

    axes[0].set_ylabel("Loss", fontsize=14)
    axes[0].plot(train_loss_results)

    axes[1].set_ylabel("Accuracy", fontsize=14)
    axes[1].plot(train_accuracy_results)

    axes[2].set_ylabel("Val Acc", fontsize=14)
    axes[2].plot(train_val_results)

    axes[3].set_ylabel("Test Acc", fontsize=14)
    axes[3].plot(train_test_results)

    plt.show()
Ejemplo n.º 7
0
def test():
    model.eval()
    with torch.no_grad():
        test_sampling_result = multihop_sampling(test_index, NUM_NEIGHBORS_LIST, data.adjacency_dict)
        test_x = [torch.from_numpy(x[idx]).float().to(DEVICE) for idx in test_sampling_result]
        test_logits = model(test_x)
        test_label = torch.from_numpy(data.y[test_index]).long().to(DEVICE)
        predict_y = test_logits.max(1)[1]
        accuarcy = torch.eq(predict_y, test_label).float().mean().item()
        print("Test Accuracy: ", accuarcy)
Ejemplo n.º 8
0
Archivo: train.py Proyecto: zjwfno1/GNN
def test(index):
    test_sampling_result = multihop_sampling(index, NUM_NEIGHBORS_LIST,
                                             data.adjacency_dict)
    test_x = [data.x[idx.astype(np.int32)] for idx in test_sampling_result]
    test_logits = model(test_x)
    test_label = data.y[index]

    ll = tf.math.equal(tf.math.argmax(test_label, -1),
                       tf.math.argmax(test_logits, -1))
    accuarcy = tf.reduce_mean(tf.cast(ll, dtype=tf.float32))

    return accuarcy
Ejemplo n.º 9
0
def test():
    model.eval()
    with torch.no_grad():
        test_sampling_result = multihop_sampling(torch.LongTensor(idx_test),
                                                 num_neighbors_list,
                                                 neighbor_table)
        test_x = []
        for idx in test_sampling_result:
            test_x.append(features[idx])
        #  test_x = [features[idx] for idx in test_sampling_result]
        test_logits = model(test_x)
        test_label = labels[idx_test].long()
        predict_y = test_logits.max(1)[1]
        accuarcy = torch.eq(predict_y, test_label).float().mean().item()
    return accuarcy
Ejemplo n.º 10
0
def train():
    model.train()
    for e in range(EPOCHS):
        for batch in range(NUM_BATCH_PER_EPOCH):
            batch_src_index = np.random.choice(train_index, size=(BTACH_SIZE,))
            batch_src_label = torch.from_numpy(train_label[batch_src_index]).long().to(DEVICE)
            batch_sampling_result = multihop_sampling(batch_src_index, NUM_NEIGHBORS_LIST, data.adjacency_dict)
            batch_sampling_x = [torch.from_numpy(x[idx]).float().to(DEVICE) for idx in batch_sampling_result]
            batch_train_logits = model(batch_sampling_x)#传入随机采样的batch个源节点
            loss = criterion(batch_train_logits, batch_src_label)
            optimizer.zero_grad()
            loss.backward()  # 反向传播计算参数的梯度
            optimizer.step()  # 使用优化方法进行梯度更新

            predict_y = batch_train_logits.max(1)[1]
            accuracy = torch.eq(predict_y, batch_src_label).float().mean().item()
            print("Epoch {:03d} Batch {:03d} Loss: {:.4f} Accuracy:{:.4f}".format(e, batch, loss.item(),accuracy))
        test()
Ejemplo n.º 11
0
def test():
    model.eval()
    with torch.no_grad():
        checkpoint = torch.load('model/model.pkl')
        model.load_state_dict(checkpoint)

        # 强制之后的内容不进行计算图构建
        test_sampling_result = multihop_sampling(
            src_nodes=test_index,
            sample_nums=args.num_neighbors_list,
            neighbor_table=data.adjacency_dict)
        test_x = [
            torch.from_numpy(x[idx]).float().to(DEVICE)
            for idx in test_sampling_result
        ]
        test_logits = model(test_x)
        test_label = torch.from_numpy(data.y[test_index]).long().to(DEVICE)
        predict_y = test_logits.max(1)[1]
        accuarcy = torch.eq(predict_y, test_label).float().mean().item()
        print("Test Accuracy: ", accuarcy)
Ejemplo n.º 12
0
def train():
    model.train()
    for e in range(EPOCHS):
        for batch in range(NUM_BATCH_PER_EPOCH):
            batch_src_index = np.random.choice(train_index,
                                               size=(BTACH_SIZE, ))  #[16,]
            batch_src_label = torch.from_numpy(
                train_label[batch_src_index]).long().to(DEVICE)  #[16,]
            batch_sampling_result = multihop_sampling(
                batch_src_index, NUM_NEIGHBORS_LIST, data.adjacency_dict
            )  # 长度为3的list 每个都是一个ndarray 元素0为batch_src_index,1和2分别为取样的邻居
            batch_sampling_x = [
                torch.from_numpy(x[idx]).float().to(DEVICE)
                for idx in batch_sampling_result
            ]
            batch_train_logits = model(batch_sampling_x)  #[16,7]
            loss = criterion(batch_train_logits, batch_src_label)
            optimizer.zero_grad()
            loss.backward()  # 反向传播计算参数的梯度
            optimizer.step()  # 使用优化方法进行梯度更新
            print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format(
                e, batch, loss.item()))
        test()
Ejemplo n.º 13
0
def val():
    with torch.no_grad():
        val_sampling_result = multihop_sampling(
            src_nodes=val_index,
            sample_nums=args.num_neighbors_list,
            neighbor_table=data.adjacency_dict)
        val_x = [
            torch.from_numpy(x[idx]).float().to(DEVICE)
            for idx in val_sampling_result
        ]
        val_label = torch.from_numpy(data.y[val_index]).long().to(DEVICE)
        if not args.fastmode:
            # 使用完整网络来求验证集的正确率
            model.eval()
            # 不启用 Batch Normalization 和 Dropout
            val_logits = model(val_x)
        else:
            val_logits = model(val_x)

        loss_val = criterion(val_logits, val_label).item()
        predict_y = val_logits.max(1)[1]
        accuarcy = torch.eq(predict_y, val_label).float().mean().item()
        return accuarcy, loss_val
Ejemplo n.º 14
0
    path = Path(__file__).parent / ('{}_outVec.txt'.format(dataset))
    np.savetxt(path, outVec)
    path = Path(__file__).parent / ('{}_labels.txt'.format(dataset))
    outLabel = labels.cpu().detach().numpy()
    np.savetxt(path, outLabel)


if __name__ == '__main__':
    model.train()
    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)
    for epoch in range(epochs):
        model.train()
        src_index = torch.LongTensor(range(features.shape[0]))
        sampling_result = multihop_sampling(src_index, num_neighbors_list,
                                            neighbor_table)
        #print('{}:sampling_x前:{}'.format(epoch, torch.cuda.memory_allocated()))
        sampling_x = []
        for idx in sampling_result:
            # 这种方法会共享内存
            sampling_x.append(features[idx])
            #print('{}:sampling_x:{}'.format(epoch, torch.cuda.memory_allocated()))
            # print('sampling_x:{}'.format(epoch, torch.cuda.memory_allocated()))
        # sampling_x = [features[idx] for idx in sampling_result]
        #print('{}:sampling_x后:{}'.format(epoch, torch.cuda.memory_allocated()))
        train_logits = model(sampling_x)
        sampling_x = []  # 释放内存
        loss = criterion(train_logits[idx_train], labels[idx_train])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()