Esempio n. 1
0
	else:
		d[row[2]] = d.get(row[2]) + '===' + row[4]

item_review_l = list(d.items())
print(item_review_l[0][1])
'''
#分词,去除停用词,stem,

#构建词典

#构建词袋

#lda 输出

trainfile = '../data/[email protected]'
train_df = preprocess.readdata(trainfile, ',')
train_list = preprocess.create_train_list(train_df)

testset_df = pd.read_csv('../data/[email protected]', header=None)

#构建两个字典,分别是用户-情感向量,物品-topic向量

#1. 构建字典user-pref矩阵
uisv_names = ['user', 'item', 'sentiment', 'vector', 'pref']
uisv_df = pd.read_csv('../data/out3/uisv3.csv', header=None, names=uisv_names)
user_l = uisv_df['user'].tolist()
pref_l = uisv_df['pref'].tolist()

u_dict = dict(zip(user_l, pref_l))

#2 构建item-vertor矩阵
Esempio n. 2
0
sys.path.append("..")
from utils.logger import get_logger
from preprocess import preprocess
from model import bmf
import pandas as pd
import numpy as np

logger = get_logger('e_BMF_list')

filename = 'j:/amazon/output/[email protected]'
dataname = filename.split('/')[-1]
#outpath
outpath = 'j:/amazon/result/result4/'
#0.读取数据
ratings = preprocess.readdata(filename, ',')
#1.判断是否有重复元素,如果有,去除重复元素
ratings_d = preprocess.drop_duplicate(ratings)
#2. 替换user_id 与 item_id
ratings_r, users, items = preprocess.replace_user_and_item(ratings_d)

#基本数据描述(包含数据总数目,用户数,物品数)
#用户数
m = len(users)
n = len(items)
logger.info('dataset:' + dataname + ',ratings:' + str(len(ratings_r)) +
            ',user:'******',item:' + str(n))

#3. 切分数据
trainset, testset = preprocess.split_data(ratings_r, 0.8)
trainset.to_csv(outpath + 'trainset' + '_' + dataname, index=None, header=None)
Esempio n. 3
0
import sys
sys.path.append('../')
from preprocess import preprocess
import numpy as np 


ratings_df = preprocess.readdata('j:/amazon/output/[email protected]',',')

ratings_r_df,users,items = preprocess.replace_user_and_item(ratings_df)

l = []
#row[0]=index,row[1]=user_id,row[2]=item_id
for row in ratings_r_df.itertuples():
    t = [(row[1],row[2]),row[3]]
    l.append(t)	

row = l[0]
print(row)
print(row[0][0],row[0][1],row[1])

'''
@desc:训练
@param:trainMatrix,k(int,代表factors数量)
@return:u,v(m*k和n*k的矩阵)
'''
def train(l,k):
    m = len(users)
    n = len(items)
    alpha = 0.01
    lamda = 0.01
    u = np.random.rand(m,k)