Example #1
0
def train(epochs=20, batchSize=1024, lr=0.01, lamda=0.1, factors_dim=64):
    '''
    :param epochs: 迭代次数
    :param batchSize: 一批次的数量
    :param lr: 学习率
    :param lamda: 正则系数
    :param factors_dim: 隐因子数量
    :return:
    '''
    user_set, item_set, train_set, test_set = dataloader.readRecData(
        fp.Ml_100K.RATING5, test_ratio=0.1)

    #初始化ALS模型
    als = ALS(len(user_set), len(item_set), factors_dim)
    #初始化批量提出数据的迭代器
    dataIter = DataIter(train_set)

    for e in range(epochs):
        for batch in tqdm(dataIter.iter(batchSize)):
            #将用户id,物品id,评分从三元组中拆出
            u = batch[:, 0]
            i = batch[:, 1]
            r = batch[:, 2].reshape(-1, 1)  #形状变一变是为了方便等会广播计算
            #得到预测评分
            r_pred = als.forward(u, i)
            #根据梯度下降迭代
            als.backword(r, r_pred, u, i, lr, lamda)

        print("TrainSet: Epoch {} | RMSE {:.4f} ".format(
            e, evaluateRMSE(train_set, als)))
        print("TestSet: Epoch {} | RMSE {:.4f} ".format(
            e, evaluateRMSE(test_set, als)))
Example #2
0
def train(epochs=10, batchSize=1024, lr=0.01, dim=256, eva_per_epochs=1):
    '''
    :param epochs: 迭代次数
    :param batchSize: 一批次的数量
    :param lr: 学习率
    :param dim: 用户物品向量的维度
    :param eva_per_epochs: 设定每几次进行一次验证
    '''
    #读取数据
    user_set, item_set, train_set, test_set = \
        dataloader.readRecData(fp.Ml_100K.RATING, test_ratio = 0.1)
    #初始化DSSM模型
    net = DSSM(len(user_set), len(item_set), dim)
    #定义优化器
    optimizer = torch.optim.AdamW(net.parameters(), lr=lr)
    #定义损失函数
    criterion = torch.nn.BCELoss()
    #开始迭代
    for e in range(epochs):
        net.train()
        all_lose = 0
        #每一批次地读取数据
        for u, i, r in DataLoader(train_set,
                                  batch_size=batchSize,
                                  shuffle=True):
            optimizer.zero_grad()
            r = torch.FloatTensor(r.detach().numpy())
            result = net(u, i)
            loss = criterion(result, r)
            all_lose += loss
            loss.backward()
            optimizer.step()
        print('epoch {}, avg_loss = {:.4f}'.format(
            e, all_lose / (len(train_set) // batchSize)))

        #评估模型
        if e % eva_per_epochs == 0:
            p, r, acc = doEva(net, train_set)
            print('train: Precision {:.4f} | Recall {:.4f} | accuracy {:.4f}'.
                  format(p, r, acc))
            p, r, acc = doEva(net, test_set)
            print('test: Precision {:.4f} | Recall {:.4f} | accuracy {:.4f}'.
                  format(p, r, acc))

    return net
Example #3
0
def get_recomedations_by_itemCF(item_sims, user_o_set):
    '''
    :param item_sims: 物品的近邻集:{样本1:[近邻1,近邻2,近邻3]}
    :param user_o_set: 用户的原本喜欢的物品集合:{用户1:{物品1,物品2,物品3}}
    :return: 每个用户的推荐列表{用户1:[物品1,物品2,物品3]}
    '''
    recomedations = collections.defaultdict(set)
    for u in user_o_set:
        for item in user_o_set[u]:
            # 将自己喜欢物品的近邻物品与自己观看过的视频去重后推荐给自己
            if item in item_sims:
                recomedations[u] |= set(item_sims[item]) - user_o_set[u]
    return recomedations


#得到基于ItemCF的推荐列表
def trainItemCF(item_users, sim_method, user_items, k=5):
    item_sims = userCF.knn4set(item_users, k, sim_method)
    recomedations = get_recomedations_by_itemCF(item_sims, user_items)
    return recomedations


if __name__ == '__main__':
    _, _, train_set, test_set = dataloader.readRecData(fp.Ml_100K.RATING,
                                                       test_ratio=0.1)
    item_users, user_items = getSet(train_set)
    recomedations_by_itemCF = trainItemCF(item_users,
                                          b_sim.cos4set,
                                          user_items,
                                          k=5)
    print(recomedations_by_itemCF)