def train( epochs = 20, batchSize = 1024, lr = 0.01, dim = 128, n_neighbors=10, eva_per_epochs=1): users, items, train_set, test_set = dataloader4kge.readRecData() entitys, relations, kgTriples = dataloader4kge.readKGData() kg_indexes = dataloader4KGNN.getKgIndexsFromKgTriples(kgTriples) adj_entity, adj_relation = dataloader4KGNN.construct_adj(n_neighbors, kg_indexes, len(entitys)) net = KGCN( max(users)+1, max(entitys)+1, max(relations)+1, dim, adj_entity, adj_relation,n_neighbors = n_neighbors) optimizer = torch.optim.Adam( net.parameters(), lr = lr, weight_decay = 5e-4 ) loss_fcn = nn.BCELoss() print(len(train_set)//batchSize) for e in range( epochs ): net.train() all_loss = 0.0 for u,i,r in tqdm( DataLoader( train_set, batch_size = batchSize,shuffle=True) ): logits = net( u, i ) loss = loss_fcn( logits, r.float() ) optimizer.zero_grad() loss.backward() optimizer.step() all_loss += loss.item() print('epoch {},avg_loss={:.4f}'.format(e, all_loss / (len(train_set) // batchSize))) # 评估模型 if e % eva_per_epochs == 0: p, r, acc = doEva(net, train_set) print('train: Precision {:.4f} | Recall {:.4f} | accuracy {:.4f}'.format(p, r, acc)) p, r, acc = doEva(net, test_set) print('test: Precision {:.4f} | Recall {:.4f} | accuracy {:.4f}'.format(p, r, acc))
def train( epochs = 20, batchSize = 1024, lr = 0.01, dim = 128 ): #读取数据 entitys, relation, triples = dataloader4kge.readKGData( ) train_set = dataloader4kge.KgDatasetWithNegativeSampling( triples, entitys ) #初始化模型 net = TransR( max( entitys ) + 1 , max( relation ) + 1, dim ) #初始化优化器 optimizer = torch.optim.AdamW( net.parameters(), lr = lr, weight_decay = 5e-3 ) #开始训练 for e in range(epochs): net.train() all_lose = 0 for X in tqdm( DataLoader( train_set, batch_size = batchSize, shuffle = True )): optimizer.zero_grad( ) loss = net( X ) all_lose += loss loss.backward( ) optimizer.step( ) print('epoch {},avg_loss={:.4f}'.format( e, all_lose/( len( triples ))))
def train(epochs=20, batchSize=1024, lr=0.01, dim=128, eva_per_epochs=1): #读取数据 entitys, relation, triples = dataloader4kge.readKGData() kgTrainSet = dataloader4kge.KgDatasetWithNegativeSampling(triples, entitys) users, items, train_set, test_set = dataloader4kge.readRecData() #初始化模型 net = MKR(max(users) + 1, max(entitys) + 1, max(relation) + 1, dim) #初始化优化器 optimizer = torch.optim.AdamW(net.parameters(), lr=lr, weight_decay=5e-3) #开始训练 for e in range(epochs): net.train() all_loss = 0 #同时采样用户物品三元组及知识图谱三元组数据, 但因为C单元中物品与头实体的计算过程相互干涉,所以batch_size必须一致 for rec_set, kg_set in tqdm( zip( DataLoader(train_set, batch_size=batchSize, shuffle=True, drop_last=True), DataLoader(kgTrainSet, batch_size=batchSize, shuffle=True, drop_last=True))): optimizer.zero_grad() loss = net(rec_set, kg_set) all_loss += loss loss.backward() optimizer.step() print('epoch {},avg_loss={:.4f}'.format(e, all_loss / (len(train_set)))) # 评估模型 if e % eva_per_epochs == 0: p, r, acc = doEva(net, train_set) print('train: Precision {:.4f} | Recall {:.4f} | accuracy {:.4f}'. format(p, r, acc)) p, r, acc = doEva(net, test_set) print('test: Precision {:.4f} | Recall {:.4f} | accuracy {:.4f}'. format(p, r, acc))
def train(n_epoch=n_epoch,batch_size=batch_size,eva_per_epochs=1): # 读取知识图谱数据 entitys, relation, kg_triples = dataloader4kge.readKGData() # 根据知识图谱三元组数据得到知识图谱索引集 kg_indexs = dataloader4KGNN.getKgIndexsFromKgTriples(kg_triples) # 读取用户物品三元组数据 users, items, train_set, test_set = dataloader4kge.readRecData() # 读取用户正例集作为用户历史观看的物品 user_history_pos_dict = dataloader4KGNN.getUserHistoryPosDict(train_set) # 将没有历史正例的用户过滤掉 train_set = dataloader4KGNN.filetDateSet(train_set, user_history_pos_dict) test_set = dataloader4KGNN.filetDateSet(test_set, user_history_pos_dict) # 初始化模型与优化器 net = RippleNet(max(entitys)+1, max(relation)+1) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()),lr) # 开始训练 for e in range(n_epoch): net.train() all_loss = 0 # 每个epoch都重新生成水波集 ripple_set = get_ripple_set(kg_indexs,user_history_pos_dict) for dataset in tqdm(DataLoader(train_set, batch_size=batch_size, shuffle=True)): return_dict = net(*get_feed_dict(dataset, ripple_set)) loss = return_dict["loss"] all_loss+=loss optimizer.zero_grad() loss.backward() optimizer.step() print('epoch {},avg_loss={:.4f}'.format(e, all_loss/(len(train_set)//batch_size))) # 评估模型 if e % eva_per_epochs == 0: p, r, auc = doEva(net, train_set, ripple_set, batch_size) print('train: Precision {:.4f} | Recall {:.4f} | AUC {:.4f}'.format(p, r, auc)) # 给测试集测试时重新生成水波集来增加预测难度 ripple_set = get_ripple_set(kg_indexs, user_history_pos_dict) p, r, auc = doEva(net, test_set, ripple_set, batch_size) print('test: Precision {:.4f} | Recall {:.4f} | AUC {:.4f}'.format(p, r, auc))
walk = [str(start_node)] # 初始化游走序列 for _ in range(walk_length): # 最大长度范围内进行采样 current_node = int(walk[-1]) neighbors = list(g.neighbors(current_node)) # 获取当前节点的邻居 if len(neighbors) > 0: next_node = np.random.choice(neighbors, 1) walk.extend([str(n) for n in next_node]) return walk def multi_metaPath2vec(graphs, dim=16, walk_length=12, num_walks=256, min_count=3): seqs = [] for g in graphs: # 将不同元路径随机游走生成的序列合并起来 seqs.extend(getDeepwalkSeqs(g, walk_length, num_walks)) model = word2vec.Word2Vec(seqs, size=dim, min_count=min_count) return model if __name__ == '__main__': # 读取知识图谱数据 _, _, triples = dataloader4kge.readKGData() graphs = fromTriplesGeneralSubGraphSepByMetaPath(triples) model = multi_metaPath2vec(graphs) print(model.wv.most_similar('259', topn=3)) # 观察与节点259最相近的三个节点 model.wv.save_word2vec_format('e.emd') # 可以把emd储存下来以便下游任务使用 model.save('m.model') # 可以把模型储存下来以便下游任务使用