Esempio n. 1
0
def train( epochs = 20, batchSize = 1024, lr = 0.01, dim = 128, n_neighbors=10, eva_per_epochs=1):


    users, items, train_set, test_set = dataloader4kge.readRecData()
    entitys, relations, kgTriples = dataloader4kge.readKGData()
    kg_indexes = dataloader4KGNN.getKgIndexsFromKgTriples(kgTriples)

    adj_entity, adj_relation = dataloader4KGNN.construct_adj(n_neighbors, kg_indexes, len(entitys))


    net = KGCN( max(users)+1, max(entitys)+1, max(relations)+1,
                  dim, adj_entity, adj_relation,n_neighbors = n_neighbors)
    optimizer = torch.optim.Adam( net.parameters(), lr = lr, weight_decay = 5e-4 )
    loss_fcn = nn.BCELoss()

    print(len(train_set)//batchSize)

    for e in range( epochs ):
        net.train()
        all_loss = 0.0
        for u,i,r in tqdm( DataLoader( train_set, batch_size = batchSize,shuffle=True) ):
            logits = net( u, i )
            loss = loss_fcn( logits, r.float() )
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            all_loss += loss.item()
        print('epoch {},avg_loss={:.4f}'.format(e, all_loss / (len(train_set) // batchSize)))

        # 评估模型
        if e % eva_per_epochs == 0:
            p, r, acc = doEva(net, train_set)
            print('train: Precision {:.4f} | Recall {:.4f} | accuracy {:.4f}'.format(p, r, acc))
            p, r, acc = doEva(net, test_set)
            print('test: Precision {:.4f} | Recall {:.4f} | accuracy {:.4f}'.format(p, r, acc))
Esempio n. 2
0
def train( epochs = 20, batchSize = 1024, lr = 0.01, dim = 128 ):
    #读取数据
    entitys, relation, triples = dataloader4kge.readKGData( )
    train_set = dataloader4kge.KgDatasetWithNegativeSampling( triples, entitys )
    #初始化模型
    net = TransR( max( entitys ) + 1 , max( relation ) + 1, dim )
    #初始化优化器
    optimizer = torch.optim.AdamW( net.parameters(), lr = lr, weight_decay = 5e-3 )
    #开始训练
    for e in range(epochs):
        net.train()
        all_lose = 0
        for X in tqdm( DataLoader( train_set, batch_size = batchSize, shuffle = True )):
            optimizer.zero_grad( )
            loss = net( X )
            all_lose += loss
            loss.backward( )
            optimizer.step( )
        print('epoch {},avg_loss={:.4f}'.format( e, all_lose/( len( triples ))))
Esempio n. 3
0
def train(epochs=20, batchSize=1024, lr=0.01, dim=128, eva_per_epochs=1):
    #读取数据
    entitys, relation, triples = dataloader4kge.readKGData()
    kgTrainSet = dataloader4kge.KgDatasetWithNegativeSampling(triples, entitys)
    users, items, train_set, test_set = dataloader4kge.readRecData()

    #初始化模型
    net = MKR(max(users) + 1, max(entitys) + 1, max(relation) + 1, dim)
    #初始化优化器
    optimizer = torch.optim.AdamW(net.parameters(), lr=lr, weight_decay=5e-3)
    #开始训练
    for e in range(epochs):
        net.train()
        all_loss = 0
        #同时采样用户物品三元组及知识图谱三元组数据, 但因为C单元中物品与头实体的计算过程相互干涉,所以batch_size必须一致
        for rec_set, kg_set in tqdm(
                zip(
                    DataLoader(train_set,
                               batch_size=batchSize,
                               shuffle=True,
                               drop_last=True),
                    DataLoader(kgTrainSet,
                               batch_size=batchSize,
                               shuffle=True,
                               drop_last=True))):
            optimizer.zero_grad()
            loss = net(rec_set, kg_set)
            all_loss += loss
            loss.backward()
            optimizer.step()
        print('epoch {},avg_loss={:.4f}'.format(e,
                                                all_loss / (len(train_set))))

        # 评估模型
        if e % eva_per_epochs == 0:
            p, r, acc = doEva(net, train_set)
            print('train: Precision {:.4f} | Recall {:.4f} | accuracy {:.4f}'.
                  format(p, r, acc))
            p, r, acc = doEva(net, test_set)
            print('test: Precision {:.4f} | Recall {:.4f} | accuracy {:.4f}'.
                  format(p, r, acc))
Esempio n. 4
0
def train(n_epoch=n_epoch,batch_size=batch_size,eva_per_epochs=1):
    # 读取知识图谱数据
    entitys, relation, kg_triples = dataloader4kge.readKGData()
    # 根据知识图谱三元组数据得到知识图谱索引集
    kg_indexs = dataloader4KGNN.getKgIndexsFromKgTriples(kg_triples)
    # 读取用户物品三元组数据
    users, items, train_set, test_set = dataloader4kge.readRecData()
    # 读取用户正例集作为用户历史观看的物品
    user_history_pos_dict = dataloader4KGNN.getUserHistoryPosDict(train_set)
    # 将没有历史正例的用户过滤掉
    train_set = dataloader4KGNN.filetDateSet(train_set, user_history_pos_dict)
    test_set = dataloader4KGNN.filetDateSet(test_set, user_history_pos_dict)

    # 初始化模型与优化器
    net = RippleNet(max(entitys)+1, max(relation)+1)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()),lr)

    # 开始训练
    for e in range(n_epoch):
        net.train()
        all_loss = 0
        # 每个epoch都重新生成水波集
        ripple_set = get_ripple_set(kg_indexs,user_history_pos_dict)
        for dataset in tqdm(DataLoader(train_set, batch_size=batch_size, shuffle=True)):
            return_dict = net(*get_feed_dict(dataset, ripple_set))
            loss = return_dict["loss"]
            all_loss+=loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('epoch {},avg_loss={:.4f}'.format(e, all_loss/(len(train_set)//batch_size)))

        # 评估模型
        if e % eva_per_epochs == 0:
            p, r, auc = doEva(net, train_set, ripple_set, batch_size)
            print('train: Precision {:.4f} | Recall {:.4f} | AUC {:.4f}'.format(p, r, auc))
            # 给测试集测试时重新生成水波集来增加预测难度
            ripple_set = get_ripple_set(kg_indexs, user_history_pos_dict)
            p, r, auc = doEva(net, test_set, ripple_set, batch_size)
            print('test: Precision {:.4f} | Recall {:.4f} | AUC {:.4f}'.format(p, r, auc))
Esempio n. 5
0
    walk = [str(start_node)]  # 初始化游走序列
    for _ in range(walk_length):  # 最大长度范围内进行采样
        current_node = int(walk[-1])
        neighbors = list(g.neighbors(current_node))  # 获取当前节点的邻居
        if len(neighbors) > 0:
            next_node = np.random.choice(neighbors, 1)
            walk.extend([str(n) for n in next_node])
    return walk


def multi_metaPath2vec(graphs,
                       dim=16,
                       walk_length=12,
                       num_walks=256,
                       min_count=3):
    seqs = []
    for g in graphs:
        # 将不同元路径随机游走生成的序列合并起来
        seqs.extend(getDeepwalkSeqs(g, walk_length, num_walks))
    model = word2vec.Word2Vec(seqs, size=dim, min_count=min_count)
    return model


if __name__ == '__main__':
    # 读取知识图谱数据
    _, _, triples = dataloader4kge.readKGData()
    graphs = fromTriplesGeneralSubGraphSepByMetaPath(triples)
    model = multi_metaPath2vec(graphs)
    print(model.wv.most_similar('259', topn=3))  # 观察与节点259最相近的三个节点
    model.wv.save_word2vec_format('e.emd')  # 可以把emd储存下来以便下游任务使用
    model.save('m.model')  # 可以把模型储存下来以便下游任务使用