def train(): model.train() acc_list = [] for e in range(EPOCHS): for batch in range(NUM_BATCH_PER_EPOCH): batch_src_index = np.random.choice(train_index, size=(BTACH_SIZE, )) batch_src_label = torch.from_numpy( train_label[batch_src_index]).long().to(DEVICE) batch_sampling_result = multihop_sampling(batch_src_index, NUM_NEIGHBORS_LIST, data.adjacency_dict) batch_sampling_x = [ torch.from_numpy(x[idx]).float().to(DEVICE) for idx in batch_sampling_result ] batch_train_logits = model(batch_sampling_x) loss = criterion(batch_train_logits, batch_src_label) optimizer.zero_grad() loss.backward() optimizer.step() print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format( e, batch, loss.item())) acc = test() acc_list.append(acc) return acc_list
def train(): model.train() for e in range(args.epochs): for batch in range(args.num_batch_per_epoch): batch_src_index = np.random.choice(train_index, size=(args.btach_size, )) batch_src_label = torch.from_numpy( train_label[batch_src_index]).long().to(DEVICE) batch_sampling_result = multihop_sampling( src_nodes=batch_src_index, sample_nums=args.num_neighbors_list, neighbor_table=data.adjacency_dict) batch_sampling_x = [ torch.from_numpy(x[idx]).float().to(DEVICE) for idx in batch_sampling_result ] batch_train_logits = model(batch_sampling_x) loss_train = criterion(batch_train_logits, batch_src_label) accuarcy_train = torch.eq( batch_train_logits.max(1)[1], batch_src_label).float().mean().item() optimizer.zero_grad() # 先将梯度归零 loss_train.backward() # 反向传播计算得到每个参数的梯度值 optimizer.step() # 最后通过梯度下降执行一步参数更新 accuarcy_val, loss_val = val() print('Epoch: {:03d}'.format(e), 'Batch: {:03d}'.format(batch), 'Loss_train: {:.4f}'.format(loss_train.item()), 'accuarcy_train:{:.4f}'.format(accuarcy_train), 'Loss_val: {:.4f}'.format(loss_val), 'acc_val:{:.4f}'.format(accuarcy_val)) if e % 10 == 0: torch.save(model.state_dict(), 'model/model.pkl') print('第%d epoch,保存模型' % e) torch.save(model.state_dict(), 'model/model.pkl')
def save_embeddings(): model.eval() with torch.no_grad(): # src_index = torch.LongTensor(range(features.shape[0])) # test_sampling_result = multihop_sampling(src_index, NUM_NEIGHBORS_LIST, neighbor_table) # x = [] # for idx in test_sampling_result: # x.append(features[idx]) # # test_x = [features[idx] for idx in test_sampling_result] # print('x:{}'.format(torch.cuda.memory_allocated())) # output = model(x) src_index = torch.LongTensor(range(features.shape[0])) sampling_result = multihop_sampling(src_index, num_neighbors_list, neighbor_table) #print('{}:sampling_x前:{}'.format(epoch, torch.cuda.memory_allocated())) sampling_x = [] for idx in sampling_result: # 这种方法会共享内存 sampling_x.append(features[idx]) # print('{}:sampling_x:{}'.format(epoch, torch.cuda.memory_allocated())) # sampling_x = [features[idx] for idx in sampling_result] # print('{}:sampling_x后:{}'.format(epoch, torch.cuda.memory_allocated())) train_logits = model(sampling_x) outVec = train_logits.cpu().detach().numpy() path = Path(__file__).parent / ('{}_outVec.txt'.format(dataset)) np.savetxt(path, outVec) path = Path(__file__).parent / ('{}_labels.txt'.format(dataset)) outLabel = labels.cpu().detach().numpy() np.savetxt(path, outLabel)
def train_batch(): model.train() for e in range(epochs): loss = 0 for batch in range(NUM_BATCH_PER_EPOCH): # batch_src_index = np.random.choice(idx_train, size=(BTACH_SIZE,)) # batch_src_label = torch.from_numpy(labels[batch_src_index]).long().to(DEVICE) # batch_sampling_result = multihop_sampling(batch_src_index, NUM_NEIGHBORS_LIST, neighbor_table) # batch_sampling_x = [torch.from_numpy(features[idx]).float().to(DEVICE) for idx in batch_sampling_result] # 在这里全部转化为tensor进行处理 batch_src_index = torch.from_numpy( np.random.choice(idx_train, size=(BTACH_SIZE, ))).long() batch_src_label = labels[batch_src_index].to(device) batch_sampling_result = multihop_sampling(batch_src_index, num_neighbors_list, neighbor_table) batch_sampling_x = [ features[idx].to(device) for idx in batch_sampling_result ] batch_train_logits = model(batch_sampling_x) batch_loss = criterion(batch_train_logits, batch_src_label) optimizer.zero_grad() batch_loss.backward() # 反向传播计算参数的梯度 optimizer.step() # 使用优化方法进行梯度更新 loss += batch_loss.item() * BTACH_SIZE print("Epoch {:03d} Loss: {:.4f}".format( e, loss / (NUM_BATCH_PER_EPOCH * BTACH_SIZE))) test()
def train(): model.train() for e in range(EPOCHS): for batch in range(NUM_BATCH_PER_EPOCH): batch_src_index = np.random.choice(train_index, size=(BTACH_SIZE, )) batch_src_label = torch.from_numpy( train_label[batch_src_index]).long().to(DEVICE) batch_sampling_result = multihop_sampling(batch_src_index, NUM_NEIGHBORS_LIST, data.adjacency_dict) batch_sampling_x = [ torch.from_numpy(x[idx]).float().to(DEVICE) for idx in batch_sampling_result ] # 转化为张量 batch_train_logits = model( batch_sampling_x ) # 从头到尾GraphSage只处理一个node,这里传入的是一个tensorlist,进行批处理运算 loss = criterion(batch_train_logits, batch_src_label) optimizer.zero_grad() loss.backward() # 反向传播计算参数的梯度 optimizer.step() # 使用优化方法进行梯度更新 print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format( e, batch, loss.item())) test()
def train(): for e in range(EPOCHS): for batch in range(NUM_BATCH_PER_EPOCH): batch_src_index = np.random.choice(train_index, size=(BTACH_SIZE, )) batch_src_label = train_label[batch_src_index].astype(float) batch_sampling_result = multihop_sampling(batch_src_index, NUM_NEIGHBORS_LIST, data.adjacency_dict) batch_sampling_x = [ data.x[np.array(idx.astype(np.int32))] for idx in batch_sampling_result ] loss = 0.0 with tf.GradientTape() as tape: batch_train_logits = model(batch_sampling_x) loss = loss_object(batch_src_label, batch_train_logits) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) # print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format(e, batch, loss)) train_accuracy = test(train_index) val_accuracy = test(val_index) test_accuracy = test(test_index) train_loss_results.append(loss) train_accuracy_results.append(train_accuracy) train_val_results.append(val_accuracy) train_test_results.append(test_accuracy) print( "Epoch {:03d} train accuracy: {} val accuracy: {} test accuracy:{}" .format(e, train_accuracy, val_accuracy, test_accuracy)) # ISSUE: https://stackoverflow.com/questions/58947679/no-gradients-provided-for-any-variable-in-tensorflow2-0 # 训练过程可视化 fig, axes = plt.subplots(4, sharex=True, figsize=(12, 8)) fig.suptitle('Training Metrics') axes[0].set_ylabel("Loss", fontsize=14) axes[0].plot(train_loss_results) axes[1].set_ylabel("Accuracy", fontsize=14) axes[1].plot(train_accuracy_results) axes[2].set_ylabel("Val Acc", fontsize=14) axes[2].plot(train_val_results) axes[3].set_ylabel("Test Acc", fontsize=14) axes[3].plot(train_test_results) plt.show()
def test(): model.eval() with torch.no_grad(): test_sampling_result = multihop_sampling(test_index, NUM_NEIGHBORS_LIST, data.adjacency_dict) test_x = [torch.from_numpy(x[idx]).float().to(DEVICE) for idx in test_sampling_result] test_logits = model(test_x) test_label = torch.from_numpy(data.y[test_index]).long().to(DEVICE) predict_y = test_logits.max(1)[1] accuarcy = torch.eq(predict_y, test_label).float().mean().item() print("Test Accuracy: ", accuarcy)
def test(index): test_sampling_result = multihop_sampling(index, NUM_NEIGHBORS_LIST, data.adjacency_dict) test_x = [data.x[idx.astype(np.int32)] for idx in test_sampling_result] test_logits = model(test_x) test_label = data.y[index] ll = tf.math.equal(tf.math.argmax(test_label, -1), tf.math.argmax(test_logits, -1)) accuarcy = tf.reduce_mean(tf.cast(ll, dtype=tf.float32)) return accuarcy
def test(): model.eval() with torch.no_grad(): test_sampling_result = multihop_sampling(torch.LongTensor(idx_test), num_neighbors_list, neighbor_table) test_x = [] for idx in test_sampling_result: test_x.append(features[idx]) # test_x = [features[idx] for idx in test_sampling_result] test_logits = model(test_x) test_label = labels[idx_test].long() predict_y = test_logits.max(1)[1] accuarcy = torch.eq(predict_y, test_label).float().mean().item() return accuarcy
def train(): model.train() for e in range(EPOCHS): for batch in range(NUM_BATCH_PER_EPOCH): batch_src_index = np.random.choice(train_index, size=(BTACH_SIZE,)) batch_src_label = torch.from_numpy(train_label[batch_src_index]).long().to(DEVICE) batch_sampling_result = multihop_sampling(batch_src_index, NUM_NEIGHBORS_LIST, data.adjacency_dict) batch_sampling_x = [torch.from_numpy(x[idx]).float().to(DEVICE) for idx in batch_sampling_result] batch_train_logits = model(batch_sampling_x)#传入随机采样的batch个源节点 loss = criterion(batch_train_logits, batch_src_label) optimizer.zero_grad() loss.backward() # 反向传播计算参数的梯度 optimizer.step() # 使用优化方法进行梯度更新 predict_y = batch_train_logits.max(1)[1] accuracy = torch.eq(predict_y, batch_src_label).float().mean().item() print("Epoch {:03d} Batch {:03d} Loss: {:.4f} Accuracy:{:.4f}".format(e, batch, loss.item(),accuracy)) test()
def test(): model.eval() with torch.no_grad(): checkpoint = torch.load('model/model.pkl') model.load_state_dict(checkpoint) # 强制之后的内容不进行计算图构建 test_sampling_result = multihop_sampling( src_nodes=test_index, sample_nums=args.num_neighbors_list, neighbor_table=data.adjacency_dict) test_x = [ torch.from_numpy(x[idx]).float().to(DEVICE) for idx in test_sampling_result ] test_logits = model(test_x) test_label = torch.from_numpy(data.y[test_index]).long().to(DEVICE) predict_y = test_logits.max(1)[1] accuarcy = torch.eq(predict_y, test_label).float().mean().item() print("Test Accuracy: ", accuarcy)
def train(): model.train() for e in range(EPOCHS): for batch in range(NUM_BATCH_PER_EPOCH): batch_src_index = np.random.choice(train_index, size=(BTACH_SIZE, )) #[16,] batch_src_label = torch.from_numpy( train_label[batch_src_index]).long().to(DEVICE) #[16,] batch_sampling_result = multihop_sampling( batch_src_index, NUM_NEIGHBORS_LIST, data.adjacency_dict ) # 长度为3的list 每个都是一个ndarray 元素0为batch_src_index,1和2分别为取样的邻居 batch_sampling_x = [ torch.from_numpy(x[idx]).float().to(DEVICE) for idx in batch_sampling_result ] batch_train_logits = model(batch_sampling_x) #[16,7] loss = criterion(batch_train_logits, batch_src_label) optimizer.zero_grad() loss.backward() # 反向传播计算参数的梯度 optimizer.step() # 使用优化方法进行梯度更新 print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format( e, batch, loss.item())) test()
def val(): with torch.no_grad(): val_sampling_result = multihop_sampling( src_nodes=val_index, sample_nums=args.num_neighbors_list, neighbor_table=data.adjacency_dict) val_x = [ torch.from_numpy(x[idx]).float().to(DEVICE) for idx in val_sampling_result ] val_label = torch.from_numpy(data.y[val_index]).long().to(DEVICE) if not args.fastmode: # 使用完整网络来求验证集的正确率 model.eval() # 不启用 Batch Normalization 和 Dropout val_logits = model(val_x) else: val_logits = model(val_x) loss_val = criterion(val_logits, val_label).item() predict_y = val_logits.max(1)[1] accuarcy = torch.eq(predict_y, val_label).float().mean().item() return accuarcy, loss_val
path = Path(__file__).parent / ('{}_outVec.txt'.format(dataset)) np.savetxt(path, outVec) path = Path(__file__).parent / ('{}_labels.txt'.format(dataset)) outLabel = labels.cpu().detach().numpy() np.savetxt(path, outLabel) if __name__ == '__main__': model.train() idx_train = torch.LongTensor(idx_train) idx_val = torch.LongTensor(idx_val) idx_test = torch.LongTensor(idx_test) for epoch in range(epochs): model.train() src_index = torch.LongTensor(range(features.shape[0])) sampling_result = multihop_sampling(src_index, num_neighbors_list, neighbor_table) #print('{}:sampling_x前:{}'.format(epoch, torch.cuda.memory_allocated())) sampling_x = [] for idx in sampling_result: # 这种方法会共享内存 sampling_x.append(features[idx]) #print('{}:sampling_x:{}'.format(epoch, torch.cuda.memory_allocated())) # print('sampling_x:{}'.format(epoch, torch.cuda.memory_allocated())) # sampling_x = [features[idx] for idx in sampling_result] #print('{}:sampling_x后:{}'.format(epoch, torch.cuda.memory_allocated())) train_logits = model(sampling_x) sampling_x = [] # 释放内存 loss = criterion(train_logits[idx_train], labels[idx_train]) optimizer.zero_grad() loss.backward() optimizer.step()