Example #1
0
from model import bmf
import pandas as pd
import numpy as np

logger = get_logger('e_BMF_list')

filename = 'j:/amazon/output/[email protected]'
dataname = filename.split('/')[-1]
#outpath
outpath = 'j:/amazon/result/result4/'
#0.读取数据
ratings = preprocess.readdata(filename, ',')
#1.判断是否有重复元素,如果有,去除重复元素
ratings_d = preprocess.drop_duplicate(ratings)
#2. 替换user_id 与 item_id
ratings_r, users, items = preprocess.replace_user_and_item(ratings_d)

#基本数据描述(包含数据总数目,用户数,物品数)
#用户数
m = len(users)
n = len(items)
logger.info('dataset:' + dataname + ',ratings:' + str(len(ratings_r)) +
            ',user:'******',item:' + str(n))

#3. 切分数据
trainset, testset = preprocess.split_data(ratings_r, 0.8)
trainset.to_csv(outpath + 'trainset' + '_' + dataname, index=None, header=None)
testset.to_csv(outpath + 'testset' + '_' + dataname, index=None, header=None)

#4. 构建训练输入
#train_matrix = preprocess.create_matrix_by_trainset(trainset,m,n)
Example #2
0
from preprocess import preprocess
from model import lmf
import pandas as pd
from utils.logger import get_logger

logger = get_logger('e_MF')

filename = 'j:/amazon/output/[email protected]'
dataname = filename.split('/')[-1]
outpath = 'j:/amazon/result/result2/'
#0.读取数据
ratings = preprocess.readdata(filename, ',')
#1.判断是否有重复元素,如果有,去除重复元素
ratings_d = preprocess.drop_duplicate(ratings)
#2. 替换user_id 与 item_id
ratings_r = preprocess.replace_user_and_item(ratings_d)

#基本数据描述(包含数据总数目,用户数,物品数)
#用户数
m = len(set(ratings_r['user_id']))
n = len(set(ratings_r['item_id']))
logger.info('dataset:' + dataname + ',ratings:' + str(len(ratings_r)) +
            ',user:'******',item:' + str(n))

#3. 切分数据
trainset, testset = preprocess.split_data(ratings_r, 0.8)
trainset.to_csv(outpath + '_MF_' + 'trainset_' + dataname,
                index=None,
                header=None)
testset.to_csv(outpath + '_MF_' + 'testset_' + dataname,
               index=None,