def ex1(dat_file='./ml-1m/ratings.dat', pct_train=0.5): data = Data() data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,'ids':int}) # create train/test split train, test = data.split_train_test(percent=pct_train) # create svd K=100 svd = SVD() svd.set_data(train) svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) # evaluate performance rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()
def impute_to_file(self, tastings, k=100, min_values=2, verbose=True): # create a data file in Movielens format with the tastings data self.save_tastings_to_movielens_format_file(tastings) # for logging/testing purposes we may like this verbose if verbose: recsys.algorithm.VERBOSE = True svd = SVD() # load source data, perform SVD, save to zip file source_file = self.file_location(self.tastings_movielens_format) svd.load_data(filename=source_file, sep='::', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) outfile = self.file_location(self.tastings_recsys_svd) svd.compute(k=k, min_values=min_values, pre_normalize=None, mean_center=True, post_normalize=True, savefile=outfile) return svd
def calculate_stats_users(pct_train): dat_file = 'user_data_working.csv' data = Data() data.load(dat_file, sep=',', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) train, test = data.split_train_test(percent=pct_train) svd = SVD() svd.set_data(train) svd.compute(k=100, min_values=2, pre_normalize=None, mean_center=True, post_normalize=False) rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s\n' % mae.compute()
def test_classifier(model, filename=None, itemkey="track", selector="SELECT * FROM train"): conn = sqlite3.connect("db.sqlite") conn.row_factory = dict_factory cur = conn.cursor() s = 0 c = 0 t_p = 0 for i in range(0,10): svd = SVD() if filename is not None: svd.load_model(filename) l = list(cur.execute(selector)) random.shuffle(l) count = len(l) svd.set_data([(x["rating"],x["track"],x["user"]) for x in l[0:int(count*0.7)]]) K = 1000 svd.compute(k=K, min_values=0.0, pre_normalize=None, mean_center=True, post_normalize=True) pairs = [] for idx,item in enumerate(l[int(count*0.7):]): user = item["user"] track = item[itemkey] pairs.append((predict_item(svd, track,user), item["rating"])) t_p += len(pairs) s += RMSE(pairs).compute() c += 1.0 print "iteration" print s/c, t_p
def evaluate(data, count=5, K=100): results = [] for i in range(count): train, test = data.split_train_test(percent=PERCENT_TRAIN) print len(data.get()), len(train.get()), len(test.get()) #test_in_train(test, train) #print train.get() svd = SVD() svd.set_data(train) svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) #Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: #print "keyerror: ===========================================================>" continue try: rsu = {} rsu["RMSE"] = rmse.compute() rsu["MAE"] = mae.compute() print rsu results.append(rsu) except: print "one error....++++++++++++++++++++++++++++++++++++++++++++++++++++" return results
def main(): svd = SVD() train = Data() test = Data() train.load('randUser/rate1.csv', force=True, sep=',', format={'col':0, 'row':1, 'value':2, 'ids':int}) test.load('randUser/rate1.csv', force=True, sep=',', format={'col':0, 'row':1, 'value':2, 'ids':int}) svd.set_data(train) svd.compute(k=100, min_values=0.5, pre_normalize=False, mean_center=True, post_normalize=True) # rmse = RMSE() # mae = MAE() # for rating, item_id, user_id in test.get(): # try: # pred_rating = svd.predict(item_id, user_id) # rmse.add(rating, pred_rating) # mae.add(rating, pred_rating) # except KeyError: # continue # print 'RMSE=%s' % rmse.compute() # print 'MAE=%s' % mae.compute() # test = make_test() # print precision_and_recall(test, svd) # rec_list = svd.recommend(200, n=5, only_unknowns=False, is_row=False) print svd.recommend(1, n=5, only_unknowns=False, is_row=False)
def recommend(dimension=100): svd = SVD() svd.load_data(filename='rating.dat', sep='\t', format={'col':2, 'row':1, 'value':0, 'ids': int}) k = dimension svd.compute(k=k, min_values=1, pre_normalize=None, mean_center=True, post_normalize=True) game_recdict={} for item in svd.recommend(1, is_row=False): appid=item[0] game=Game(appid) if (game.success==1): game_recdict[game.rec]=[game.appid, game.genre, game.name, game.img] sorted_list=sorted(game_recdict.keys(), reverse=True) print ("Games Recommended:") for i in sorted_list: # image urllib.urlretrieve(game_recdict[i][3], "local-filename.jpg") image = plt.imread("local-filename.jpg") plt.imshow(image) plt.show() #name print game_recdict[i][2]
def train_and_save(filename): step = filename.split('.')[-1] data = Data() format = {'col': 1, 'row': 0, 'value': 2, 'ids': 'str'} data.load(filename, sep='::', format=format) train, test = data.split_train_test(percent=80) try: svd = SVD('svdn_model_{step}.zip'.format(step=step)) print('Already exists: svdn_model_{step}.zip'.format(step=step)) except: svd = SVD() svd.set_data(train) svd.compute( k=100, min_values=2, pre_normalize=False, mean_center=True, post_normalize=True, savefile='svdn_model_{step}'.format(step=step) ) print('Saved svdn_model_{step}.zip'.format(step=step))
def similar_users(user): if not type(user) is str: user = unidecode.unidecode(user) if db.done_users.find_one({'user': user})['recommended'] == False: user_files = db.user_list.find({'user': user}) f = open('./dc_recom.dat', 'a') for u in user_files: f.write(u['user'] + '::' + u['tth']) f.write('\n') f.close() db.done_users.update({'user': user}, { 'user': user, 'recommended': True }) data = Data() data.load('./dc_recom.dat', sep='::', format={'col': 1, 'row': 0}) svd = SVD() svd.set_data(data) svd.compute(k=1000, min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) return [i[0] for i in svd.similar(user)]
def reCompute(user_id): data = Data() fname = 'ratings.dat' dataset = Data() format = {'col': 0, 'row': 1, 'value': 2, 'ids': 'int'} dataset.load(fname, sep=':', format=format) svd = SVD() svd.set_data(dataset) k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) #New ID of Added User USERID = user_id a = svd.recommend(USERID, is_row=False) for j in range(1, len(a)): global a k = a[j][0] print df_movies.query('movie_id==@k')
def compute_SVD(): svd = SVD() svd.set_data(load_data()) K=100 svd.compute(k=K, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile=None) svd.save_model(os.path.join(utils.get_add_dir(), 'ratings'))
def recommended_files(data,user): svd = SVD() svd.set_data(data) svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) similar_users = [i[0] for i in svd.similar(user)] #recoms = svd.recommend(user,is_row=True,only_unknowns=True,n=50) predict_arr = [] user_tths = db.user_list.find({'user':user}) tths = [i['tth'] for i in user_tths] movie_names = [] for i in similar_users[1:]: for j in db.user_list.find({'user':i}): if j['tth'] not in tths: movie_name = db.tths.find_one({'tth':j['tth']})['name'] movie_names.append(movie_name) tths.append(j['tth']) predict_arr.append((movie_name,j['tth'],svd.predict(user,j['tth']))) predict_arr = sorted(predict_arr,key=lambda x:x[2],reverse=True) res = [] c_res = 0 for p in predict_arr: flag=0 for r in res: if similar(p[0],r[0]): flag = 1 break if flag == 0: res.append(p[1]) c_res += 1 if c_res > 10: return res
def SVDtrain2(data,pct_train): train, test = data.split_train_test(percent=pct_train) K=100 svd = SVD() svd.set_data(train) svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) return svd,train,test
def recommended_files(user): if not type(user) is str: user = unidecode.unidecode(user) if db.done_users.find_one({'user':user})['recommended']==False: user_files = db.user_list.find({'user':user}) f = open('./dc_recom.dat','a') for u in user_files: f.write(u['user'] + '::' + u['tth']) f.write('\n') f.close() db.done_users.update({'user': user}, {'user':user, 'recommended': True}) data = Data() data.load('./dc_recom.dat', sep='::', format={'col':1,'row':0}) svd = SVD() svd.set_data(data) svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) similar_users = [i[0] for i in svd.similar(user,n=10)] newdata = Data() for i in range(0,len(similar_users),1): files = db.user_list.find({'user':similar_users[i]}) for f in files: newdata.add_tuple((1.0,similar_users[i],f['tth'])) svd.set_data(newdata) svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) recoms = svd.recommend(user,is_row=True,only_unknowns=True,n=100) res = [] c_res = 0 for p in recoms: flag=0 for r in res: if similar(db.tths.find_one({'tth':p[0]})['name'],db.tths.find_one({'tth':r[0]})['name']): flag = 1 break if flag == 0: res.append(p) c_res += 1 if c_res > 10: k = [] for i in res: try: j = 'magnet:?xt=urn:tree:tiger:'+i[0] + "&dn=" + unidecode.unidecode(db.tths.find_one({'tth': i[0]})['name']) except: j = 'magnet:?xt=urn:tree:tiger:'+i[0] k.append(j) return k k = [] for i in res: try: j = 'magnet:?xt=urn:tree:tiger:'+i[0] + "&dn=" + unidecode.unidecode(db.tths.find_one({'tth': i[0]})['name']) except: j = 'magnet:?xt=urn:tree:tiger:'+i[0] k.append(j) return k
def build_model(self, uids, kn): data = Data() for uid, songs in uids.items(): for song in songs: data.add_tuple((1, song, uid)) svd = SVD() svd.set_data(data) svd.compute(k=kn, min_values=1) self.model = svd
def build_model(self,uids,kn): data = Data() for uid,songs in uids.items(): for song in songs: data.add_tuple((1,song,uid)) svd = SVD() svd.set_data(data) svd.compute(k=kn,min_values=1) self.model = svd
def calculate_SVD_features(): print "Thanks for input, calculating..." svd = SVD() recsys.algorithm.VERBOSE = True dat_file = 'feature_matrix.csv' svd.load_data(filename=dat_file, sep=',', format = {'col':0, 'row':1, 'value': 2, 'ids': int}) svd.compute(k=100, min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) return svd
def train_svd(data): """ This method load processed data and modelling data using Singular Value Decomposition :return: SVD model """ svd = SVD() svd.set_data(get_data_model_matrix(data)) k = 30 svd.compute(k=k, min_values=0, pre_normalize=None, mean_center=True, post_normalize=True) return svd
def getSVD(): filename = "/home/udaysagar/Documents/Classes/239/recsys/model/movielens.zip" if os.path.exists(filename): return SVD("./model/movielens") else: svd = SVD() svd.load_data(filename='./data/movielens/ratings.dat', sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile='./model/movielens') return svd
def SVDtrain2(data, pct_train): train, test = data.split_train_test(percent=pct_train) K = 100 svd = SVD() svd.set_data(train) svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) return svd, train, test
def calculate_SVD_users(): print "Thanks for input, calculating..." svd = SVD() recsys.algorithm.VERBOSE = True dat_file = 'user_data_working.csv' svd.load_data(filename=dat_file, sep=',', format = {'col':0, 'row':1, 'value': 2, 'ids': int}) svd.compute(k=100, min_values=2, pre_normalize=None, mean_center=True, post_normalize=True) shutil.copy('user_data_original.csv','user_data_working.csv') return svd
def calculate_stats_features(pct_train): dat_file='feature_matrix.csv' data = Data() data.load(dat_file, sep=',', format={'col':0, 'row':1, 'value':2,'ids':int}) train, test = data.split_train_test(percent=pct_train) K=100 svd = SVD() svd.set_data(train) svd.compute(k=K, min_values=0, pre_normalize=None, mean_center=False, post_normalize=False) return svd,train,test
def create_svd_model(train): """ Build SVD model """ svd = SVD() svd.set_data(train) svd.compute(k=100, min_values=0, pre_normalize=None, mean_center=True, post_normalize=True) return svd
def impute_to_file(self, tastings, k=100, min_values=2, verbose=True): # create a data file in Movielens format with the tastings data self.save_tastings_to_movielens_format_file(tastings) # for logging/testing purposes we may like this verbose if verbose: recsys.algorithm.VERBOSE = True svd = SVD() # load source data, perform SVD, save to zip file source_file = self.file_location(self.tastings_movielens_format) svd.load_data(filename=source_file, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) outfile = self.file_location(self.tastings_recsys_svd) svd.compute(k=k, min_values=min_values, pre_normalize=None, mean_center=True, post_normalize=True, savefile=outfile) return svd
def Compute(): svd = SVD() svd.load_data(filename='./ml-1m/ratings.dat', sep='::', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) svd.compute(k=100, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile='./mvsvd')
def quickstart(): svd = SVD() recsys.algorithm.VERBOSE = True # load movielens data dat_file = DATA_DIR + 'ml-1m-ratings.dat' svd.load_data(filename=dat_file, sep='::', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) # compute svd k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) pdb.set_trace() # movie id's ITEMID1 = 1 # toy story ITEMID2 = 1221 # godfather II # get movies similar to toy story print svd.similar(ITEMID1) # get predicted rating for given user & movie MIN_RATING = 0.0 MAX_RATING = 5.0 USERID = 1 ITEMID = 1 # get predicted rating for user1 and item1, mapped onto min max pred = svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING) actual = svd.get_matrix().value(ITEMID, USERID) print 'predicted rating = {0}'.format(pred) print 'actual rating = {0}'.format(actual) print 'which users should see Toy Story?:' print svd.recommend(ITEMID)
def ex1(dat_file=DATA_DIR + 'ml-1m-ratings.dat', pct_train=0.5): data = Data() data.load(dat_file, sep='::', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) # About format parameter: # 'row': 1 -> Rows in matrix come from column 1 in ratings.dat file # 'col': 0 -> Cols in matrix come from column 0 in ratings.dat file # 'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat # file # 'ids': int -> Ids (row and col ids) are integers (not strings) # create train/test split train, test = data.split_train_test(percent=pct_train) # create svd K = 100 svd = SVD() svd.set_data(train) svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) # evaluate performance rmse = RMSE() # mae is mean ABSOLUTE error # ... in this case it will return 1.09 which means there is an error of almost 1 point out of 5 mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()
def similar_users(user): if not type(user) is str: user = unidecode.unidecode(user) if db.done_users.find_one({'user':user})['recommended']==False: user_files = db.user_list.find({'user':user}) f = open('./dc_recom.dat','a') for u in user_files: f.write(u['user'] + '::' + u['tth']) f.write('\n') f.close() db.done_users.update({'user': user}, {'user':user, 'recommended': True}) data = Data() data.load('./dc_recom.dat', sep='::', format={'col':1,'row':0}) svd = SVD() svd.set_data(data) svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) return [i[0] for i in svd.similar(user)]
def calculate_SVD_features(): print "Thanks for input, calculating..." svd = SVD() recsys.algorithm.VERBOSE = True dat_file = 'feature_matrix.csv' svd.load_data(filename=dat_file, sep=',', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) svd.compute(k=100, min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) return svd
def evaulte(train_set, test_set): svd = SVD() svd.set_data(train_set) svd.compute(k=KKK, min_values=MIN_ITEM, pre_normalize=None, mean_center=True, post_normalize=True) mae = MAE() k_err = 0 for rating, item_id, user_id in test_set.get(): try: pred_rating = svd.predict(item_id, user_id) mae.add(rating, pred_rating) except KeyError: #print "keyerror: ===========================================================>" k_err += 1 continue print "k_err", k_err, " -- ", "test-len: ", len(test_set.get()), "train-len: ", len(train_set.get()) result = mae.compute()/2.0 return result
def calculate_SVD_users(): print "Thanks for input, calculating..." svd = SVD() recsys.algorithm.VERBOSE = True dat_file = 'user_data_working.csv' svd.load_data(filename=dat_file, sep=',', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) svd.compute(k=100, min_values=2, pre_normalize=None, mean_center=True, post_normalize=True) shutil.copy('user_data_original.csv', 'user_data_working.csv') return svd
def compute(aws_region, s3_bucket, filename, sep, col_index, row_index, value_index, ids_type): download_from_s3(aws_region, s3_bucket, filename) svd = SVD() print 'Loading data to SVD module' svd.load_data(filename='./data/' + filename, sep=sep, format={'col':int(col_index), 'row':int(row_index), 'value':int(value_index), 'ids': ids_type}) k = derive_latent_dimensions(svd, energy_level=0.6) print 'Stating to compute SVD at ', strftime("%Y-%m-%d %H:%M:%S", gmtime()) svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile='./models/recommender') print "SVD model saved at ", strftime("%Y-%m-%d %H:%M:%S", gmtime()) sys.exit() # to make sure that process finishes at the end
def calculate_stats_users(pct_train): dat_file = 'user_data_working.csv' data = Data() data.load(dat_file, sep=',', format={'col':0, 'row':1, 'value':2,'ids':int}) train, test = data.split_train_test(percent=pct_train) svd = SVD() svd.set_data(train) svd.compute(k=100, min_values=2, pre_normalize=None, mean_center=True, post_normalize=False) rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s\n' % mae.compute()
def calculate_stats_features(pct_train): dat_file = 'feature_matrix.csv' data = Data() data.load(dat_file, sep=',', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) train, test = data.split_train_test(percent=pct_train) K = 100 svd = SVD() svd.set_data(train) svd.compute(k=K, min_values=0, pre_normalize=None, mean_center=False, post_normalize=False) return svd, train, test
def color_user(input_file, output_file, data_file): data = Data() # VALUE = 1.0 # for username in likes: # for user_likes in likes[username]: # data.add_tuple((VALUE, username, user_likes)) # Tuple format is: <value, row, column> #读取所有user的履历,制作成SVD可执行的matrix f_r = open(data_file, 'r') for line in f_r: info = line.split(',') data.add_tuple((1.0, info[0], info[1])) svd = SVD() svd.set_data(data) k = 5 # Usually, in a real dataset, you should set a higher number, e.g. 100 svd.compute(k=k, min_values=3, pre_normalize=None, mean_center=False, post_normalize=True) #从question里读取需要被推荐的userid fr = open(input_file, 'r') for line in fr: userid = line user_list = svd.similar(userid) #print('=============================================') #print(user_list) #print(len(user_list)) #保存所有相似度大于50%的用户id到answer file fw = open(output_file, 'w') del user_list[0] #删除需要被推荐的用户自身id for user in user_list: if user[1] > 0.5: fw.write(user[0] + '\n') fw.close()
def loadSVD(): filename = 'favRate.dat' svd = SVD() svd.load_data(filename=filename, sep='::', format={'col':0, 'row':1, 'value':2}) svd.save_data("svd.dat", False) K=20 svd.compute(k=K, min_values=1, pre_normalize="rows", mean_center=False, post_normalize=True, savefile='.') #svd.recommend(USERID, n=10, only_unknowns=True, is_row=False) sparse_matrix = svd.get_matrix() sim_matrix = svd.get_matrix_similarity() print sparse_matrix #print sim_matrix #1173893,1396943 sim = svd.similar(897346, 10) filename = 'swoffering.yaml' titleStream = file(filename, 'r') titleList = yaml.load(titleStream) #print sim for row in sim: (offid, similar) = row print offid, titleList[str(offid)], similar
def evaulte(train_set, test_set): svd = SVD() svd.set_data(train_set) svd.compute(k=KKK, min_values=MIN_ITEM, pre_normalize=None, mean_center=True, post_normalize=True) mae = MAE() k_err = 0 for rating, item_id, user_id in test_set.get(): try: pred_rating = svd.predict(item_id, user_id) mae.add(rating, pred_rating) except KeyError: #print "keyerror: ===========================================================>" k_err += 1 continue print "k_err", k_err, " -- ", "test-len: ", len( test_set.get()), "train-len: ", len(train_set.get()) result = mae.compute() / 2.0 return result
def ex1(dat_file='ml-1m/ratings.dat', pct_train=0.5): data = Data() data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2, 'ids':int}) # About format parameter: # 'row': 1 -> Rows in matrix come from column 1 in ratings.dat file # 'col': 0 -> Cols in matrix come from column 0 in ratings.dat file # 'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat # file # 'ids': int -> Ids (row and col ids) are integers (not strings) # create train/test split train, test = data.split_train_test(percent=pct_train) # create svd K = 100 svd = SVD() svd.set_data(train) svd.compute( k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) # evaluate performance rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()
def quickstart(): svd = SVD() recsys.algorithm.VERBOSE = True # load movielens data dat_file = 'ml-1m/ratings.dat' svd.load_data(filename=dat_file, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) # compute svd k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) pdb.set_trace() # movie id's ITEMID1 = 1 # toy story ITEMID2 = 1221 # godfather II # get movies similar to toy story svd.similar(ITEMID1) # get predicted rating for given user & movie MIN_RATING = 0.0 MAX_RATING = 5.0 USERID = 1 ITEMID = 1 # get predicted rating pred = svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING) actual = svd.get_matrix().value(ITEMID, USERID) print 'predicted rating = {0}'.format(pred) print 'actual rating = {0}'.format(actual) # which users should see Toy Story? svd.recommend(ITEMID)
def Compute(): svd = SVD() svd.load_data(filename='./ml-1m/ratings.dat', sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) svd.compute(k=100, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile='./mvsvd')
svd = SVD() filename = './data4' filename = './data3.csv' #filename = './data2.csv' filename = './data.csv' filename = './data_l2.csv' filename = './2016.6.29.for_svd.csv' svd.load_data(filename=filename, sep=',', format={'col':0, 'row':1, 'value':2, 'ids': str}) # col -> user, row -> item, value -> label, ids -> timestamp k = 100 r = svd.compute(k=k, min_values=2, pre_normalize=None, mean_center=False, post_normalize=True, savefile='/tmp/movielens') #ITEMID1 = 109 # Toy Story (1995) #ITEMID2 = 106 # A bug's life (1998) #print(svd.similarity(ITEMID1, ITEMID2)) # 0.67706936677315799 item_set = set() import csv with open(filename, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader:
path = "datasets/ml-latest-small/ratings_train_1.csv" svd = SVD() svd.load_data(filename=path, sep=',', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': float }) k = 30 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile='/tmp/movielens') # ITEMID1 = 1 # Toy Story (1995) # ITEMID2 = 2355 # A bug's life (1998) # print svd.similarity(ITEMID1, ITEMID2) MIN_RATING = 1.0 MAX_RATING = 5.0 USERID = 1 ITEMID = 1129 print svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING)
def recommended_files(user): if not type(user) is str: user = unidecode.unidecode(user) if db.done_users.find_one({'user': user})['recommended'] == False: user_files = db.user_list.find({'user': user}) f = open('./dc_recom.dat', 'a') for u in user_files: f.write(u['user'] + '::' + u['tth']) f.write('\n') f.close() db.done_users.update({'user': user}, { 'user': user, 'recommended': True }) data = Data() data.load('./dc_recom.dat', sep='::', format={'col': 1, 'row': 0}) svd = SVD() svd.set_data(data) svd.compute(k=1000, min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) similar_users = [i[0] for i in svd.similar(user, n=10)] newdata = Data() for i in range(0, len(similar_users), 1): files = db.user_list.find({'user': similar_users[i]}) for f in files: newdata.add_tuple((1.0, similar_users[i], f['tth'])) svd.set_data(newdata) svd.compute(k=1000, min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) recoms = svd.recommend(user, is_row=True, only_unknowns=True, n=100) res = [] c_res = 0 for p in recoms: flag = 0 for r in res: if similar( db.tths.find_one({'tth': p[0]})['name'], db.tths.find_one({'tth': r[0]})['name']): flag = 1 break if flag == 0: res.append(p) c_res += 1 if c_res > 10: k = [] for i in res: try: j = 'magnet:?xt=urn:tree:tiger:' + i[ 0] + "&dn=" + unidecode.unidecode( db.tths.find_one({'tth': i[0]})['name']) except: j = 'magnet:?xt=urn:tree:tiger:' + i[0] k.append(j) return k k = [] for i in res: try: j = 'magnet:?xt=urn:tree:tiger:' + i[ 0] + "&dn=" + unidecode.unidecode( db.tths.find_one({'tth': i[0]})['name']) except: j = 'magnet:?xt=urn:tree:tiger:' + i[0] k.append(j) return k
}) #Haciendo el split al dataset filename = './data/ratings.dat' data = Data() format = {'col': 0, 'row': 1, 'value': 2, 'ids': int} data.load(filename, sep='::', format=format) train_80, test_20 = data.split_train_test(percent=80) # 80% train, 20% test svd = SVD() svd.set_data(train_80) #Ingresando variables para crear la matrizx k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile='./temporal/') #Hallando similitud entre 2 items from recsys.algorithm.factorize import SVD svd2 = SVD(filename='./temporal/') # Loading already computed SVD model
"country music", "office", "birds" } } data = Data() VALUE = 1.0 for username in likes: for user_likes in likes[username]: data.add_tuple((VALUE, username, user_likes)) # Tuple format is: <value, row, column> svd = SVD() svd.set_data(data) k = 5 # Usually, in a real dataset, you should set a higher number, e.g. 100 svd.compute(k=k, min_values=3, pre_normalize=None, mean_center=False, post_normalize=True) print(svd.similar('sheila')) print("######################") import difflib for key in likes: rajat = likes['rajat'] key1 = likes[key] rajat_list = list(rajat) key_list = list(key1) print 'rajat', key, difflib.SequenceMatcher(None, rajat_list, key_list).ratio()
def svd(filepath): src_folder = parseOutputFolderPath(filepath) base_file_name = parseFileName(filepath) avg_rmse = 0.0 avg_mae = 0.0 out_file_base = base_file_name + "_pred_svd" out_file = open(src_folder + "output/" + out_file_base + EXT, "w") # for each fold for fold_index in xrange(1, NUM_FOLDS + 1): print "*** \t FOLD {0} \t ***".format(fold_index) M_test = lil_matrix((_N, _M)) rmse = 0.0 mae = 0.0 train_path = src_folder + base_file_name + TRAIN_PREFIX + str( fold_index) + EXT test_path = src_folder + base_file_name + TEST_PREFIX + str( fold_index) + EXT print train_path print test_path svd = SVD() svd.load_data(filename=train_path, sep=',', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': float }) svd.compute(k=_K, min_values=1, pre_normalize=None, mean_center=True, post_normalize=True) with open(test_path, "r") as infile: reader = csv.reader(infile, delimiter=",") for line in reader: userid = int(line[0], 10) movieid = int(line[1], 10) score = float(line[2]) M_test[userid, movieid] = score # GROUND_TRUTH = [3.0, 1.0, 5.0, 2.0, 3.0] # TEST = [2.3, 0.9, 4.9, 0.9, 1.5] # mae = MAE() # mae.load_ground_truth(GROUND_TRUTH) # mae.load_test(TEST) # mae.compute() #returns 0.7 # write predictions only for first test (fold) if (fold_index == 1): rows, cols = M_test.nonzero() for row, col in zip(rows, cols): try: r_xi = svd.predict(col, row, MIN_RATING, MAX_RATING) except: print row, col out_file.write( str(row) + '\t' + str(col) + '\t' + str(r_xi) + '\n') print "..done" print "" exit() out_file.close() # average rmse and mae on validation folds eval_out_path = src_folder + "output/" + out_file_base + "_eval" + EXT with open(eval_out_path, "w") as file: file.write("RMSE" + "\t" + "MAE" + "\n") avg_rmse /= float(NUM_FOLDS) avg_mae /= float(NUM_FOLDS) file.write(str(avg_rmse) + "\t" + str(avg_mae))
class NewsRec(): def __init__(self): self.svd = SVD() self.test_set = [] def load_data(self, filename='train_set_for_svd'): self.svd.load_data(filename, sep='\t', format={ 'value': 0, 'row': 2, 'col': 1, 'ids': int }) def load_test(self, filename='test_set_for_svd'): with open(filename, 'r') as f: for line in f: strs = line.split('\t') self.test_set.append((int(strs[1]), int(strs[2]))) def recom(self, user_id, recom_num=3, only_unknown=True): try: #index = self.svd._matrix._matrix.col_index(user_id) index = user_id return self.svd.recommend(index, recom_num, only_unknowns=only_unknown, is_row=False) except IndexError as e: return -1 def compute(self, k=100): self.svd.compute(k=k, min_values=None, pre_normalize=None, mean_center=False, post_normalize=True) def test(self, recom_num=3): hit_cnt = 0 self.ret = [] for user, item in self.test_set: re = self.recom(user, recom_num) #print re if type(re) != type([]): continue try: #item_index = self.svd._matrix._matrix.row_index(item) item_index = item except KeyError as e: continue for rec_index, rec_rate in re: self.ret.append((user, rec_index)) if item_index == rec_index: hit_cnt += 1 if hit_cnt == 0: return user_sum = len(self.test_set) recom_sum = recom_num * user_sum precise = float(hit_cnt) / recom_sum recall = float(hit_cnt) / user_sum f = 2.0 / ((1.0 / precise) + (1.0 / recall)) print 'hit:', hit_cnt print 'precise:', precise print 'recall:', recall print 'F:', f def print_ret(self, filename): string = ["userid,newsid\n"] for user, item in self.ret: string.append(str(user)) string.append(',') string.append(str(item)) string.append('\n') with open(filename, 'w') as f: f.write("".join(string))
class RecommendSystem(object): def __init__(self, filename, sep, **format): self.filename = filename self.sep = sep self.format = format # 训练参数 self.k = 100 self.min_values = 10 self.post_normalize = True self.svd = SVD() # 判断是否加载 self.is_load = False # 添加数据处理 self.data = Data() # 添加模型评估 self.rmse = RMSE() def get_data(self): """ 获取数据 :return: None """ # 如果模型不存在 if not os.path.exists(tmpfile): # 如果数据文件不存在 if not os.path.exists(self.filename): sys.exit() # self.svd.load_data(filename=self.filename, sep=self.sep, format=self.format) # 使用Data()来获取数据 self.data.load(self.filename, sep=self.sep, format=self.format) train, test = self.data.split_train_test(percent=80) return train, test else: self.svd.load_model(tmpfile) self.is_load = True return None, None def train(self, train): """ 训练模型 :param train: 训练数据 :return: None """ if not self.is_load: self.svd.set_data(train) self.svd.compute(k=self.k, min_values=self.min_values, post_normalize=self.post_normalize, savefile=tmpfile[:-4]) return None def rs_predict(self, itemid, userid): """ 评分预测 :param itemid: 电影id :param userid: 用户id :return: None """ score = self.svd.predict(itemid, userid) print "推荐的分数为:%f" % score return score def recommend_to_user(self, userid): """ 推荐给用户 :param userid: 用户id :return: None """ recommend_list = self.svd.recommend(userid, is_row=False) # 读取文件里的电影名称 movie_list = [] for line in open(moviefile, "r"): movie_list.append(' '.join(line.split("::")[1:2])) # 推荐具体电影名字和分数 for itemid, rate in recommend_list: print "给您推荐了%s,我们预测分数为%s" % (movie_list[itemid], rate) return None def evaluation(self, test): """ 模型的评估 :param test: 测试集 :return: None """ # 如果模型不是直接加载 if not self.is_load: # 循环取出测试集里面的元组数据<评分,电影,用户> for value, itemid, userid in test.get(): try: predict = self.rs_predict(itemid, userid) self.rmse.add(value, predict) except KeyError: continue # 计算返回误差(均方误差) error = self.rmse.compute() print "模型误差为%s:" % error return None
#This algorithm is called singular value decomposition and is used to compute the model from the ratings.csv file #This needs to be run only once. The computed model is created as a zip folder. # U(Sigma)V^T is the mathematical formula used for computing SVD. using the pyrecsys library to implement the SVD algorithm #Refer to docs for more details on SVD. import recsys.algorithm from recsys.algorithm.factorize import SVD #To obtain make the script verbose. recsys.algorithm.VERBOSE = True #computing the SVD model svd = SVD() #loading the ratings file. Format is used to create the matrix for SVD svd.load_data(filename='ratings_complete.csv', sep=',' , format={'col':0, 'row':1, 'value':2, 'ids':int}) #Now, lets compute the SVD. Formula is M = U(Sigma)V^T k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile='movielens_complete') print("Model Computed and Created")
class NewsRec(): def __init__(self): self.svd = SVD() self.test_set = [] def load_data(self,filename = 'train_set_for_svd'): self.svd.load_data(filename,sep='\t',format={'value':0,'row':2,'col':1,'ids':int}) def load_test(self,filename = 'test_set_for_svd'): with open(filename,'r') as f: for line in f: strs = line.split('\t') self.test_set.append((int(strs[1]),int(strs[2]))) def recom(self,user_id,recom_num=3,only_unknown=True): try: #index = self.svd._matrix._matrix.col_index(user_id) index = user_id return self.svd.recommend(index,recom_num,only_unknowns=only_unknown,is_row=False) except IndexError as e: return -1 def compute(self,k = 100): self.svd.compute(k=k, min_values=None, pre_normalize=None, mean_center=False, post_normalize=True) def test(self,recom_num=3): hit_cnt = 0 self.ret = [] for user,item in self.test_set: re = self.recom(user,recom_num) #print re if type(re) != type([]): continue try: #item_index = self.svd._matrix._matrix.row_index(item) item_index = item except KeyError as e: continue for rec_index,rec_rate in re: self.ret.append((user,rec_index)) if item_index == rec_index: hit_cnt += 1 if hit_cnt == 0: return user_sum = len(self.test_set) recom_sum = recom_num * user_sum precise = float(hit_cnt) / recom_sum recall = float(hit_cnt) / user_sum f = 2.0 / (( 1.0 / precise) + (1.0 / recall)) print 'hit:',hit_cnt print 'precise:',precise print 'recall:',recall print 'F:',f def print_ret(self,filename): string = ["userid,newsid\n"] for user,item in self.ret: string.append(str(user)) string.append(',') string.append(str(item)) string.append('\n') with open(filename,'w') as f: f.write("".join(string))
class Recommender: def __init__(self, datafile_path=None): self.svd = SVD() self.matrix = None self.datafile_path = datafile_path self.predict_matrix = None self.load_local_data(self.datafile_path, 100, 0) def load_web_data(self, filename, film_names_with_rate_list, K, min_values, MAX_COUNT_USER_FILMS=None, MAX_COUNT_FILM_USERS=None): self.matrix = rm.MatrixCreator(MAX_COUNT_USER_FILMS, MAX_COUNT_FILM_USERS).\ create_matrix_by_film_titles(film_names_with_rate_list) self.matrix.save_rating_matrix_as_file(filename) self.datafile_path = filename self.__compute_matrix(K, min_values) def load_local_data(self, filename, K, min_values): self.matrix = rm.MatrixCreator().restore_from_file(filename) self.datafile_path = filename self.__compute_matrix(K, min_values) def get_predictions_for_all_users(self, min_rate=1, max_rate=10, top=None, K=None, min_values=0): if K: self.__compute_matrix(K) self.predict_matrix = np.zeros((len(self.matrix.users_indexes_map), len(self.matrix.films_indexes_map))) for user in self.matrix.users_indexes_map.keys(): for film in self.matrix.films_indexes_map.keys(): user_index = self.matrix.users_indexes_map[user] film_index = self.matrix.films_indexes_map[film] self.predict_matrix[user_index][film_index] = self.svd.predict( user_index, film_index, MIN_VALUE=min_rate, MAX_VALUE=max_rate) return self.predict_matrix def predict_for_user(self, user_index, min_rate=1, max_rate=10, top=None, repeat=False, K=None, min_values=None): """ :param K: to change the number of properties :return: {Film : int(rate), ...} or [(Film, int(rate)), ...] if top is not None """ if K: self.__compute_matrix(K) prediction = {} np_matrix = self.matrix.get_rating_matrix() for index in xrange(np_matrix.shape[1]): rate = self.svd.predict(user_index, index, MIN_VALUE=min_rate, MAX_VALUE=max_rate) film = self.matrix.indexes_films_map[index] prediction[film] = rate if not repeat: fake_user_index = self.matrix.indexes_with_fake_user_ids.keys()[0] user = self.matrix.indexes_users_map[fake_user_index] films = user.get_preferences().keys() prediction = [(x, prediction[x]) for x in prediction if x not in films] if top: prediction = sorted(prediction.items(), key=operator.itemgetter(1)) prediction = list(reversed(prediction[-top:])) return prediction def predict_for_all_fake_users(self, min_rate=1, max_rate=10, top=None, K=None, min_values=0): """ :param K: to change the number of properties :return: [{Film : int(rate), ...}, ...] """ if K: self.__compute_matrix(K) predictions = [] for user_index in self.matrix.indexes_with_fake_user_ids.keys(): prediction = self.predict_for_user(user_index, min_rate, max_rate, top) predictions.append(prediction) return predictions def predicted_rating_submatrix(self, user_indexes): self.__compute_matrix(100) predicted = np.empty((1, self.matrix.rating_matrix.shape[1]), int) for index in user_indexes: row = [] for film_index in xrange(self.matrix.rating_matrix.shape[1]): row.append( self.svd.predict(index, film_index, MIN_VALUE=1, MAX_VALUE=10)) predicted = np.append(predicted, [row], axis=0) return predicted[1:] def predicted_rating_submatrix_for_fake(self): return self.predicted_rating_submatrix( self.matrix.indexes_with_fake_user_ids.keys()) def __compute_matrix(self, K, min_values=0, pre_normalize=None, mean_center=True, post_normalize=True): self.svd.load_data(self.datafile_path, sep=' ', format={ 'col': 1, 'row': 0, 'value': 2, 'ids': int }) self.svd.compute(K, min_values, pre_normalize, mean_center, post_normalize, savefile=None) def filter_films_data(self, min_user_votes): film_indexes = [] counter = collections.Counter() with open(self.datafile_path, 'rb') as my_file: r = csv.reader(my_file) for row in r: user_index, film_index, rate = row[0].split(' ') counter[int(film_index)] += 1 for k, v in counter.iteritems(): if v < min_user_votes: film_indexes.append(k) copyfile(self.datafile_path + '_user_map', self.datafile_path + '_' + str(min_user_votes) + '_user_map') new_indexes = {} with open(self.datafile_path + '_film_map', 'rb') as read_file: r = csv.reader(read_file) with open( self.datafile_path + '_' + str(min_user_votes) + '_film_map', 'wb') as write_file: wr = csv.writer(write_file, delimiter=' ') index = 0 for row in r: film_index, film_id = row[0].split(' ') if int(film_index) in film_indexes: continue new_indexes[film_index] = index wr.writerow([index, film_id]) index += 1 with open(self.datafile_path, 'rb') as read_file: r = csv.reader(read_file) with open(self.datafile_path + '_' + str(min_user_votes), 'wb') as write_file: wr = csv.writer(write_file, delimiter=' ') for row in r: user_index, film_index, rate = row[0].split(' ') if int(film_index) in film_indexes: continue wr.writerow([user_index, new_indexes[film_index], rate])
import recsys.algorithm recsys.algorithm.VERBOSE = True from recsys.algorithm.factorize import SVD svd = SVD() svd.load_data(filename='train.csv', sep=',', format={'col':0, 'row':1, 'value':2}) k = 100 svd.compute(k=k, pre_normalize=None, mean_center=True, post_normalize=True) MIN_RATING = 0.0 MAX_RATING = 5000.0 import csv test_file = 'test.csv' soln_file = 'recsys.csv' with open(test_file, 'r') as test_fh: test_csv = csv.reader(test_fh, delimiter=',', quotechar='"') next(test_csv, None) with open(soln_file, 'w') as soln_fh: soln_csv = csv.writer(soln_fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) soln_csv.writerow(['Id', 'plays']) for row in test_csv: id = row[0] user = row[1] artist = row[2] res = svd.predict(artist, user, MIN_RATING, MAX_RATING) soln_csv.writerow([id, res])
print len(data._data) for rate in data._data: rate[0] data.set([rate for rate in data._data if rate[1]<1000]) print len(data._data) svd.set_data(data) k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) #ITEMID1 = 1 # Toy Story (1995) #ITEMID2 = 2355 # A bug's life (1998) #print svd.similarity(ITEMID1, ITEMID2) #print svd.similar(ITEMID1) MIN_RATING = 0.0 MAX_RATING = 5.0 ITEMID = 1 USERID = 1
#!/usr/bin/env python # coding=utf-8 from recsys.algorithm.factorize import SVD svd = SVD() svd.load_data(filename='../invited_info_train_question_sort.txt', sep='\t', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': str }) k = 200 svd.compute(k=k, savefile='../tmp/weight') svd2 = SVD(filename='../tmp/weight') # Loading already computed SVD model output_path = "./output.txt" output_file = open(output_path, 'w') validate_file = file("../validate_nolabel.txt") line = validate_file.readline() line = validate_file.readline().strip("\r\n") while line: question_id = line.split(',')[0] user_id = line.split(',')[1] try: predict = svd2.predict(user_id, question_id, 0.0, 1.0) except: predict = 0
test.append( {"1_user_id": int(user), "2_item_id": int(item) }) return test recsys.algorithm.VERBOSE = True print "loading data" data = Data() data.load('../item_recom/train_info.tsv',sep='\t', format={'col':0, 'row':1, 'value':6, 'ids': int}) topic = 48 print "compute svd" svd = SVD() svd.set_data(data) svd.compute(k=topic, min_values=0.0, pre_normalize=None, mean_center=True, post_normalize=True) print "loading test data" test = loadTest('../item_recom/test_info.tsv') print svd.predict(0,0) print "creating submission" with open('../submissions/recsys_3.csv', 'w') as csvfile: fieldnames = ['uid#iid', 'pred'] writer = csv.DictWriter(csvfile, fieldnames) writer.writeheader() for ind in xrange(len(test)): writer.writerow( { 'uid#iid': "%d#%d"%(test[ind]["1_user_id"], test[ind]["2_item_id"]),
class RecommendSystem(object): def __init__(self, filename, sep, **format): # 文件信息 self.filename = filename self.sep = sep self.format = format # 初始化矩阵分解 self.svd = SVD() # 矩阵信息 self.k = 100 # 矩阵的隐因子睡昂 self.min_values = 10 # 删除评分少于10人的电影 self.post_normalize = False # 设置是否加载模型标志 self.load_model = False # 初始化均方误差 self.rmse = RMSE() def get_data(self): # 如果模型不存在,则需要加载数据 if not os.path.exists(filename): if not os.path.exists(self.filename): sys.exit() # SVD加载数据 # self.svd.load_data(filename=self.filename, sep=self.sep, format=self.format) data = Data() data.load(self.filename, sep=self.sep, format=self.format) # 分割数据集 train, test = data.split_train_test(percent=80) return train, test else: # 直接加载模型 self.svd.load_model(filename) # 将是否加载模型设为True self.load_model = True return None, None def train(self, train): """ 训练数据 :param train: 训练集 :return: """ if not self.load_model: # svd去获取训练数据集 self.svd.set_data(train) # 注意传入的文件名字,不是带后缀名 self.svd.compute(k=self.k, min_values=self.min_values, post_normalize=self.post_normalize, savefile=filename[:-4]) return None def recommend_to_user(self, userid): """ 推荐结果 :param usrid: 用于ID :return: None """ recommend_list = self.svd.recommend(userid, is_row=False) # 打印电影的名称,和预测的评分 # 构建电影名字的列表 movies_list = [] for line in open("./data/ml-1m/movies.dat", "r"): movies_list.append(' '.join(line.split("::")[1:2])) # 依次取出推荐ID for itemid, rating in recommend_list: print "给你推荐的电影叫%s, 预测你对它的评分是%f" % (movies_list[itemid], rating) return None def rs_predict(self, userid, itemid): """ 得出评分 :param userid: 用户ID :param itemid: 物品ID :return: 评分 """ score = self.svd.predict(itemid, userid) return score def evaluation(self, test): """ 均方误差评估模型 :param test: 测试数据 :return: None """ if not self.load_model: # 获取测试数据中的id,rat, <rat, row(itemid), col(userid)> for rating, itemid, userid in test.get(): try: # rating真是值 score = self.rs_predict(userid, itemid) # 添加所有的测试数据 self.rmse.add(rating, score) except KeyError: continue error = self.rmse.compute() print "均方误差为:%s" % error return None