コード例 #1
0
def test_sort():
    show_news_create_time()
    global x
    local_news = x.get_all_info()
    news = Documents.sort_news_by_time(local_news);
    print '____________________________________________________'
    for i in news:
        print i.get_create_time()#do not new.get here
コード例 #2
0
def test_cbr(target_reader_list = []):
    input_target_len = len(target_reader_list)
    #use training data to build user vector
    testing = Documents('user_click_data.txt',True)
    test_news = testing.get_AllNews()
    user_read_dict = dict()
    target_reader = []
    #you can specify the target_reader number here!!!!
    target_num = 5
    if input_target_len != 0:
        target_num = input_target_len
    get = 0
    while len(target_reader)<(target_num):
        utemp = test_news[get].get_userid()
        get += 1
        if not utemp in target_reader:
            target_reader.append(utemp)

    if input_target_len != 0:
        target_reader = target_reader_list
    training = Documents('user_click_data.txt',True)
    train_news = training.get_AllNews()
    cbr = CBR(train_news)
    cbr.build_user_vector(target_reader)
    cbr.transform_user_vector()

    for i in test_news:
        uid = i.get_userid()
        nid = i.get_newsid()
        if uid in target_reader:
            pass
        else:
            #print uid
            target_reader.append(uid)

        if uid in user_read_dict:
            user_read_dict[uid].append(nid)
        else:
            user_read_dict[uid] = []
            user_read_dict[uid].append(nid)

    #if you want to specify the target_readerlist, you can uncomment the folllwing line
    #target_reader = target_reader_list
    #print target_reader
    for i in range(target_num):
        recommend = cbr.get_recommendation_list(target_reader[i],test_news)
        #print 'recommend: ',recommend
        #print 'real: ',user_read_dict[target_reader[i]]
        temp = []
        for item in recommend:
            temp.append(item[0])

        count_read = 0
        count = 0
        for t in user_read_dict[target_reader[i]]:
            count_read += 1
            if t in temp:
                count += 1
        print 'reader ',target_reader[i],'  read:',len(user_read_dict[target_reader[i]]),'  accept: ',count
コード例 #3
0
ファイル: Core.py プロジェクト: zhangqb/rs
    def recommend(self):
        if self.type == 0:
            mtrain_path = '/root/git/python/rs_system/rs/rs/CBR/user_click_data.txt'
            mtest_path = '/root/git/python/rs_system/rs/rs/CBR/user_click_data.txt'
            mtrain_path = self.train_path
            mtest_path = self.test_path
            if self.user:
                start_cbr.test_cbr([self.user], mtrain_path, mtest_path)
            else:
                start_cbr.test_cbr([], mtrain_path, mtest_path)

        if self.type == 1:

            results = dict()
            news = Documents(self.train_path, is_tfidf=False, type=1)
            m = news.get_user_item_matrix()
            # item_news_m = news.get_item_user_m() #item_news_m[newsid] represents users who saw newsid
            item_news_m = news.get_item_user_m()

            test = Documents(self.test_path, is_tfidf=False, type=1)
            test_user_item = test.get_user_item_matrix()
            recom_items = test.get_items()

            if self.user:
                users = [self.user]
            else:
                users = self.get_sample_users(m, 30)

            for i_user in users:
                if i_user not in test_user_item:
                    continue
                test_user_seen = test_user_item[i_user].keys()
                for it in recom_items:
                    seen_items = m[self.user].keys()
                    if it in seen_items:
                        seen_items.remove(it)
                    rating = 0
                    user_id = item_news_m[it].keys()
                    for id in user_id:
                        count = self.user_sim(m[id].keys(), seen_items)
                        similarity = count / math.sqrt(
                            len(m[id].keys()) * len(m[self.user].keys()))
                        rating += m[id][it] * similarity
                    results[it] = rating
                res = sorted(results.items(), key=lambda k: k[1],
                             reverse=True)[0:10]
                predict_seen = [it[0] for it in res]
                # for (key,val) in res:
                #     print "item %d rating is %f" % (key, val)
                acc, hit_items = self.cal_accuracy(test_user_seen,
                                                   predict_seen)
                print "%d %f " % (i_user, acc), hit_items
        if self.type == 2:

            news = Documents(self.train_path, is_tfidf=False, type=2)
            user_item_m = news.get_user_item_matrix()
            item_news_m = news.get_item_user_m(
            )  #item_news_m[newsid] represents users who saw newsid

            test = Documents(self.test_path, is_tfidf=False, type=2)
            test_user_item = test.get_user_item_matrix()
            recom_items = test.get_items()

            if self.user:
                users = [self.user]
            else:
                users = self.get_sample_users(user_item_m, 30)
            for i_user in users:
                if i_user not in test_user_item:
                    continue
                test_user_seen = test_user_item[i_user].keys()
                user_seen = user_item_m[i_user].keys(
                )  #items that specified user has seen

                results = {}
                for it in recom_items:
                    if it in user_seen:
                        user_seen.remove(it)
                    rating = 0
                    for seen in user_seen:
                        similarity = self.sim(item_news_m[it],
                                              item_news_m[seen])
                        rating += user_item_m[i_user][seen] * similarity
                    results[it] = rating
                    # print "item %d rating is %f" % (it, rating)
                res = sorted(results.items(), key=lambda k: k[1],
                             reverse=True)[0:10]
                predict_seen = [it[0] for it in res]
                # print "predict user %d will see " %i_user, predict_seen

                # for (key,val) in res:
                #     print "item %d rating is %f" % (key, val)
                acc, hit_items = self.cal_accuracy(test_user_seen,
                                                   predict_seen)
                # print "mark user %d see news in test " %i_user , test_user_seen
                # print "prediction accuracy is %f" % acc
                # print "hit news are", hit_items
                print "%d %f " % (i_user, acc), hit_items
コード例 #4
0
ファイル: Core.py プロジェクト: hustqy/rs
    def recommend(self):
        if self.type == 0:
            mtrain_path = '/root/git/python/rs_system/rs/rs/CBR/user_click_data.txt'
            mtest_path = '/root/git/python/rs_system/rs/rs/CBR/user_click_data.txt'
            mtrain_path = self.train_path
            mtest_path = self.test_path
            if self.user:
                start_cbr.test_cbr([self.user],mtrain_path,mtest_path)
            else:
                start_cbr.test_cbr([],mtrain_path,mtest_path)

        if self.type == 1:

            results=dict()
            news = Documents(self.train_path, is_tfidf=False,type=1)
            m = news.get_user_item_matrix()
           # item_news_m = news.get_item_user_m() #item_news_m[newsid] represents users who saw newsid
            item_news_m = news.get_item_user_m()

            test = Documents(self.test_path, is_tfidf=False, type=1)
            test_user_item = test.get_user_item_matrix()
            recom_items = test.get_items()

            if self.user:
                users = [self.user]
            else:
                users = self.get_sample_users(m,30)

            for i_user in users:
                if i_user not in test_user_item:
                    continue
                test_user_seen = test_user_item[i_user].keys()
                for it in recom_items:
                    seen_items = m[self.user].keys()
                    if it in seen_items:
                        seen_items.remove(it)
                    rating=0
                    user_id=item_news_m[it].keys()
                    for id in user_id:
                        count=self.user_sim(m[id].keys(),seen_items)
                        similarity=count/math.sqrt(len(m[id].keys())*len(m[self.user].keys()))
                        rating+=m[id][it]*similarity
                    results[it]=rating
                res = sorted(results.items(),key = lambda k:k[1],reverse=True)[0:10]
                predict_seen = [it[0] for it in res]
                # for (key,val) in res:
                #     print "item %d rating is %f" % (key, val)
                acc,hit_items = self.cal_accuracy(test_user_seen,predict_seen)
                print "%d %f "% (i_user,acc),hit_items
        if self.type == 2:

            news = Documents(self.train_path, is_tfidf=False, type=2)
            user_item_m = news.get_user_item_matrix()
            item_news_m = news.get_item_user_m()           #item_news_m[newsid] represents users who saw newsid

            test = Documents(self.test_path, is_tfidf=False, type=2)
            test_user_item = test.get_user_item_matrix()
            recom_items = test.get_items()

            if self.user:
                users = [self.user]
            else:
                users = self.get_sample_users(user_item_m,30)
            for i_user in users:
                if i_user not in test_user_item:
                    continue
                test_user_seen = test_user_item[i_user].keys()
                user_seen = user_item_m[i_user].keys()      #items that specified user has seen

                results = {}
                for it in recom_items:
                    if it in user_seen:
                        user_seen.remove(it)
                    rating = 0
                    for seen in user_seen:
                        similarity = self.sim(item_news_m[it],item_news_m[seen])
                        rating += user_item_m[i_user][seen] * similarity
                    results[it] = rating
                    # print "item %d rating is %f" % (it, rating)
                res = sorted(results.items(),key = lambda k:k[1],reverse=True)[0:10]
                predict_seen = [it[0] for it in res]
                # print "predict user %d will see " %i_user, predict_seen

                # for (key,val) in res:
                #     print "item %d rating is %f" % (key, val)
                acc,hit_items = self.cal_accuracy(test_user_seen,predict_seen)
                # print "mark user %d see news in test " %i_user , test_user_seen
                # print "prediction accuracy is %f" % acc
                # print "hit news are", hit_items
                print "%d %f "% (i_user,acc),hit_items