def train_step(): for step in range(1000): with tf.GradientTape() as tape: embedded = encoder1.call(train_graph) # 对训练样本进行负采样生成不存在的边 train_neg_edge_index = negative_sampling( train_graph.num_edges, graph.num_nodes, edge_index=train_graph.edge_index ) #不用replace=False是因为训练时可以有重复的负样本 #对正负样本进行训练 pos_edge_logits = predict_edge(embedded, train_graph.edge_index) neg_edge_logits = predict_edge(embedded, train_neg_edge_index) loss = compute_loss(pos_edge_logits, neg_edge_logits) vars = tape.watched_variables() grads = tape.gradient(loss, vars) optimizer.apply_gradients(zip(grads, vars)) if step % 20 == 0: auc_score = evaluate(embedded) print("step = {}\tloss = {}\tauc_score = {}".format( step, loss, auc_score))
from tf_geometric.utils.graph_utils import edge_train_test_split, negative_sampling graph, (train_index, valid_index, test_index) = tfg.datasets.CoraDataset().load_data() # undirected edges can be used for evaluation undirected_train_edge_index, undirected_test_edge_index, _, _ = edge_train_test_split( edge_index=graph.edge_index, test_size=0.15 ) # use negative_sampling with replace=False to create negative edges for test undirected_test_neg_edge_index = negative_sampling( num_samples=undirected_test_edge_index.shape[1], num_nodes=graph.num_nodes, edge_index=graph.edge_index, replace=False ) # for training, you should convert undirected edges to directed edges for correct GCN propagation train_graph = tfg.Graph(x=graph.x, edge_index=undirected_train_edge_index).convert_edge_to_directed() embedding_size = 16 drop_rate = 0.2 gcn0 = tfg.layers.GCN(32, activation=tf.nn.relu) gcn1 = tfg.layers.GCN(embedding_size) dropout = keras.layers.Dropout(drop_rate)
#graph.edge_index.shape=(2, 10556) #任务是通过自编码器对节点的特征维度进行编码,进而挖掘节点之间的相互关系,从而对节点间边的链接关系进行预测 #首先要将边进行分离,分离成训练集和测试集, 同时转为无向边 undirected_train_edge_index, undirected_test_edge_index, _, _ = edge_train_test_split( edge_index=graph.edge_index, num_nodes=graph.num_nodes, mode='undirected', test_size=0.15) print(undirected_train_edge_index.shape, undirected_test_edge_index.shape) #(2, 4486) (2, 792) # 通过replace=False的负采样,来为测试集产生负样本(不存在的边),以满足验证评估要求 undirected_test_neg_edge_index = negative_sampling( num_samples=undirected_test_edge_index.shape[1], num_nodes=graph.num_nodes, edge_index=graph.edge_index, replace=False #不生成重复的负采样样本 ) #(2, 792) train_graph = tfg.Graph( x=graph.x, edge_index=undirected_train_edge_index).convert_edge_to_directed() t_graph = tfg.Graph(x=graph.x, edge_index=undirected_train_edge_index) #将训练图重新变回有向边,也就是把每条无向边首尾颠倒再复制一次 #print(train_graph.edge_index.shape), (2, 8972)训练图中有8972条边,是将测试集中的边从原图中拆除后得到的 #利用图卷积编码器进行边的恢复 class Encoder(tf.keras.Model): #原始的图自编码器使用图卷积网络(GCN),基于节点的特征和边,为节点学习高阶特征 def __init__(self, rate, embedding_size):