def test_sort(): show_news_create_time() global x local_news = x.get_all_info() news = Documents.sort_news_by_time(local_news); print '____________________________________________________' for i in news: print i.get_create_time()#do not new.get here
def test_cbr(target_reader_list = []): input_target_len = len(target_reader_list) #use training data to build user vector testing = Documents('user_click_data.txt',True) test_news = testing.get_AllNews() user_read_dict = dict() target_reader = [] #you can specify the target_reader number here!!!! target_num = 5 if input_target_len != 0: target_num = input_target_len get = 0 while len(target_reader)<(target_num): utemp = test_news[get].get_userid() get += 1 if not utemp in target_reader: target_reader.append(utemp) if input_target_len != 0: target_reader = target_reader_list training = Documents('user_click_data.txt',True) train_news = training.get_AllNews() cbr = CBR(train_news) cbr.build_user_vector(target_reader) cbr.transform_user_vector() for i in test_news: uid = i.get_userid() nid = i.get_newsid() if uid in target_reader: pass else: #print uid target_reader.append(uid) if uid in user_read_dict: user_read_dict[uid].append(nid) else: user_read_dict[uid] = [] user_read_dict[uid].append(nid) #if you want to specify the target_readerlist, you can uncomment the folllwing line #target_reader = target_reader_list #print target_reader for i in range(target_num): recommend = cbr.get_recommendation_list(target_reader[i],test_news) #print 'recommend: ',recommend #print 'real: ',user_read_dict[target_reader[i]] temp = [] for item in recommend: temp.append(item[0]) count_read = 0 count = 0 for t in user_read_dict[target_reader[i]]: count_read += 1 if t in temp: count += 1 print 'reader ',target_reader[i],' read:',len(user_read_dict[target_reader[i]]),' accept: ',count
def recommend(self): if self.type == 0: mtrain_path = '/root/git/python/rs_system/rs/rs/CBR/user_click_data.txt' mtest_path = '/root/git/python/rs_system/rs/rs/CBR/user_click_data.txt' mtrain_path = self.train_path mtest_path = self.test_path if self.user: start_cbr.test_cbr([self.user], mtrain_path, mtest_path) else: start_cbr.test_cbr([], mtrain_path, mtest_path) if self.type == 1: results = dict() news = Documents(self.train_path, is_tfidf=False, type=1) m = news.get_user_item_matrix() # item_news_m = news.get_item_user_m() #item_news_m[newsid] represents users who saw newsid item_news_m = news.get_item_user_m() test = Documents(self.test_path, is_tfidf=False, type=1) test_user_item = test.get_user_item_matrix() recom_items = test.get_items() if self.user: users = [self.user] else: users = self.get_sample_users(m, 30) for i_user in users: if i_user not in test_user_item: continue test_user_seen = test_user_item[i_user].keys() for it in recom_items: seen_items = m[self.user].keys() if it in seen_items: seen_items.remove(it) rating = 0 user_id = item_news_m[it].keys() for id in user_id: count = self.user_sim(m[id].keys(), seen_items) similarity = count / math.sqrt( len(m[id].keys()) * len(m[self.user].keys())) rating += m[id][it] * similarity results[it] = rating res = sorted(results.items(), key=lambda k: k[1], reverse=True)[0:10] predict_seen = [it[0] for it in res] # for (key,val) in res: # print "item %d rating is %f" % (key, val) acc, hit_items = self.cal_accuracy(test_user_seen, predict_seen) print "%d %f " % (i_user, acc), hit_items if self.type == 2: news = Documents(self.train_path, is_tfidf=False, type=2) user_item_m = news.get_user_item_matrix() item_news_m = news.get_item_user_m( ) #item_news_m[newsid] represents users who saw newsid test = Documents(self.test_path, is_tfidf=False, type=2) test_user_item = test.get_user_item_matrix() recom_items = test.get_items() if self.user: users = [self.user] else: users = self.get_sample_users(user_item_m, 30) for i_user in users: if i_user not in test_user_item: continue test_user_seen = test_user_item[i_user].keys() user_seen = user_item_m[i_user].keys( ) #items that specified user has seen results = {} for it in recom_items: if it in user_seen: user_seen.remove(it) rating = 0 for seen in user_seen: similarity = self.sim(item_news_m[it], item_news_m[seen]) rating += user_item_m[i_user][seen] * similarity results[it] = rating # print "item %d rating is %f" % (it, rating) res = sorted(results.items(), key=lambda k: k[1], reverse=True)[0:10] predict_seen = [it[0] for it in res] # print "predict user %d will see " %i_user, predict_seen # for (key,val) in res: # print "item %d rating is %f" % (key, val) acc, hit_items = self.cal_accuracy(test_user_seen, predict_seen) # print "mark user %d see news in test " %i_user , test_user_seen # print "prediction accuracy is %f" % acc # print "hit news are", hit_items print "%d %f " % (i_user, acc), hit_items
def recommend(self): if self.type == 0: mtrain_path = '/root/git/python/rs_system/rs/rs/CBR/user_click_data.txt' mtest_path = '/root/git/python/rs_system/rs/rs/CBR/user_click_data.txt' mtrain_path = self.train_path mtest_path = self.test_path if self.user: start_cbr.test_cbr([self.user],mtrain_path,mtest_path) else: start_cbr.test_cbr([],mtrain_path,mtest_path) if self.type == 1: results=dict() news = Documents(self.train_path, is_tfidf=False,type=1) m = news.get_user_item_matrix() # item_news_m = news.get_item_user_m() #item_news_m[newsid] represents users who saw newsid item_news_m = news.get_item_user_m() test = Documents(self.test_path, is_tfidf=False, type=1) test_user_item = test.get_user_item_matrix() recom_items = test.get_items() if self.user: users = [self.user] else: users = self.get_sample_users(m,30) for i_user in users: if i_user not in test_user_item: continue test_user_seen = test_user_item[i_user].keys() for it in recom_items: seen_items = m[self.user].keys() if it in seen_items: seen_items.remove(it) rating=0 user_id=item_news_m[it].keys() for id in user_id: count=self.user_sim(m[id].keys(),seen_items) similarity=count/math.sqrt(len(m[id].keys())*len(m[self.user].keys())) rating+=m[id][it]*similarity results[it]=rating res = sorted(results.items(),key = lambda k:k[1],reverse=True)[0:10] predict_seen = [it[0] for it in res] # for (key,val) in res: # print "item %d rating is %f" % (key, val) acc,hit_items = self.cal_accuracy(test_user_seen,predict_seen) print "%d %f "% (i_user,acc),hit_items if self.type == 2: news = Documents(self.train_path, is_tfidf=False, type=2) user_item_m = news.get_user_item_matrix() item_news_m = news.get_item_user_m() #item_news_m[newsid] represents users who saw newsid test = Documents(self.test_path, is_tfidf=False, type=2) test_user_item = test.get_user_item_matrix() recom_items = test.get_items() if self.user: users = [self.user] else: users = self.get_sample_users(user_item_m,30) for i_user in users: if i_user not in test_user_item: continue test_user_seen = test_user_item[i_user].keys() user_seen = user_item_m[i_user].keys() #items that specified user has seen results = {} for it in recom_items: if it in user_seen: user_seen.remove(it) rating = 0 for seen in user_seen: similarity = self.sim(item_news_m[it],item_news_m[seen]) rating += user_item_m[i_user][seen] * similarity results[it] = rating # print "item %d rating is %f" % (it, rating) res = sorted(results.items(),key = lambda k:k[1],reverse=True)[0:10] predict_seen = [it[0] for it in res] # print "predict user %d will see " %i_user, predict_seen # for (key,val) in res: # print "item %d rating is %f" % (key, val) acc,hit_items = self.cal_accuracy(test_user_seen,predict_seen) # print "mark user %d see news in test " %i_user , test_user_seen # print "prediction accuracy is %f" % acc # print "hit news are", hit_items print "%d %f "% (i_user,acc),hit_items