예제 #1
0
파일: recommender.py 프로젝트: yuyu2223/Yue
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        threshold = 0

        N = int(self.ranking['-topN'])
        if N > 100 or N < 0:
            print 'N can not be larger than 100! It has been reassigned with 10'
            N = 10

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userCount = len(self.data.testSet)
        rawRes = {}
        for i, user in enumerate(self.data.testSet):
            itemSet = {}
            line = user + ':'
            predictedItems = self.predict(user)

            recList[user] = predictedItems

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in recList[user]:
                if self.data.testSet[user].has_key(item[0]):
                    line += '*'
                line += item + ','

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-top-' + str(
                        N) + 'items' + self.foldInfo + '.txt'
            elif self.ranking.contains('-threshold'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-threshold-' + str(
                        threshold) + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The result has been output to ', abspath(outDir), '.'
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        if self.ranking.contains('-topN'):
            self.measure = Measure.rankingMeasure(self.data.testSet, recList,
                                                  rawRes, N)

        FileIO.writeFile(outDir, fileName, self.measure)
        print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo,
                                            ''.join(self.measure))
예제 #2
0
    def evalRatings(self):
        res = []  #used to contain the text of the result
        res.append('userId  itemId  original  prediction\n')
        #predict
        for ind, entry in enumerate(self.data.testData):
            user, item, rating = entry

            #predict
            prediction = self.predict(user, item)
            #denormalize
            #prediction = denormalize(prediction,self.data.rScale[-1],self.data.rScale[0])
            #####################################
            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            self.data.testData[ind].append(pred)
            res.append(user + ' ' + item + ' ' + str(rating) + ' ' +
                       str(pred) + '\n')
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        #output prediction result
        if self.isOutput:
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print('The result has been output to ', abspath(outDir), '.')
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.ratingMeasure(self.data.testData)
        FileIO.writeFile(outDir, fileName, self.measure)
        print('The result of %s %s:\n%s' %
              (self.algorName, self.foldInfo, ''.join(self.measure)))
예제 #3
0
 def evalRatings(self):
     res = [] #used to contain the text of the result
     res.append('userId  itemId  original  prediction\n')
     #predict
     for userId in self.dao.testSet_u:
         for ind,item in enumerate(self.dao.testSet_u[userId]):
             itemId = item[0]
             originRating = item[1]
             #predict
             prediction = self.predict(userId,itemId)
             #denormalize
             prediction = denormalize(prediction,self.dao.rScale[-1],self.dao.rScale[0])
             #####################################
             pred = self.checkRatingBoundary(prediction)
             # add prediction in order to measure
             self.dao.testSet_u[userId][ind].append(pred)
             res.append(userId+' '+itemId+' '+str(originRating)+' '+str(pred)+'\n')
     currentTime = strftime("%Y-%m-%d %H-%M-%S",localtime(time()))
     #output prediction result
     if self.isOutput:
         outDir = self.output['-dir']
         fileName = self.config['recommender']+'@'+currentTime+'-rating-predictions'+self.foldInfo+'.txt'
         FileIO.writeFile(outDir,fileName,res)
         print 'The Result has been output to ',abspath(outDir),'.'
     #output evaluation result
     outDir = self.output['-dir']
     fileName = self.config['recommender'] + '@'+currentTime +'-measure'+ self.foldInfo + '.txt'
     measure = Measure.ratingMeasure(self.dao.testSet_u)
     FileIO.writeFile(outDir, fileName, measure)
예제 #4
0
    def performance(self):
        #res = []  # used to contain the text of the result
        #res.append('userId  itemId  original  prediction\n')
        # predict
        res = []
        for ind, entry in enumerate(self.dao.testData):
            user, item, rating = entry

            # predict
            prediction = self.predict(user, item)
            # denormalize
            prediction = denormalize(prediction, self.dao.rScale[-1],
                                     self.dao.rScale[0])
            #####################################
            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            res.append([user, item, rating, pred])
            #res.append(user + ' ' + item + ' ' + str(rating) + ' ' + str(pred) + '\n')
        #currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        # if self.isOutput:
        #     outDir = self.output['-dir']
        #     fileName = self.config['recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt'
        #     FileIO.writeFile(outDir, fileName, res)
        #     print 'The Result has been output to ', abspath(outDir), '.'
        # output evaluation result
        # outDir = self.output['-dir']
        # fileName = self.config['recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.ratingMeasure(res)
        return self.measure
예제 #5
0
    def ranking_performance(self):
        N = 10
        recList = {}
        testSample = {}
        for user in self.data.testSet_u:
            if len(testSample) == 300:
                break
            testSample[user] = self.data.testSet_u[user]

        for user in testSample:
            itemSet = {}
            predictedItems = self.predictForRanking(user)
            for id, rating in enumerate(predictedItems):
                itemSet[self.data.id2item[id]] = rating

            ratedList, ratingList = self.data.userRated(user)
            for item in ratedList:
                del itemSet[item]

            Nrecommendations = []
            for item in itemSet:
                if len(Nrecommendations) < N:
                    Nrecommendations.append((item, itemSet[item]))
                else:
                    break

            Nrecommendations.sort(key=lambda d: d[1], reverse=True)
            recommendations = [item[1] for item in Nrecommendations]
            resNames = [item[0] for item in Nrecommendations]

            # find the K biggest scores
            for item in itemSet:
                ind = N
                l = 0
                r = N - 1

                if recommendations[r] < itemSet[item]:
                    while True:
                        mid = (l + r) / 2
                        if recommendations[mid] >= itemSet[item]:
                            l = mid + 1
                        elif recommendations[mid] < itemSet[item]:
                            r = mid - 1
                        if r < l:
                            ind = r
                            break
                # ind = bisect(recommendations, itemSet[item])
                if ind < N - 1:
                    recommendations[ind + 1] = itemSet[item]
                    resNames[ind + 1] = item
            recList[user] = zip(resNames, recommendations)
        measure = Measure.rankingMeasure(testSample, recList, [10])
        print '-'*80
        print 'Ranking Performance '+self.foldInfo+' (Top-10 On 300 sampled users)'
        for m in measure[1:]:
            print m.strip()
        print '-'*80
        #self.record.append(measure[3].strip()+' '+measure[4])
        return measure
예제 #6
0
파일: Recommender.py 프로젝트: nonva/RecQ
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = int(self.ranking['-topN'])
        if N > 100 or N < 0:
            N = 100
        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item is matched\n'
        )
        # predict
        topNSet = {}
        userCount = len(self.dao.testSet_u)
        for i, user in enumerate(self.dao.testSet_u):
            itemSet = []
            line = user + ':'

            for item in self.dao.item:
                if not self.dao.rating(user, item):
                    # predict
                    prediction = self.predict(user, item)
                    # denormalize

                    prediction = denormalize(prediction, self.dao.rScale[-1],
                                             self.dao.rScale[0])

                    prediction = round(prediction, 4)
                    #pred = self.checkRatingBoundary(prediction)
                    #####################################
                    # add prediction in order to measure
                    itemSet.append((item, prediction))

            itemSet.sort(key=lambda d: d[1], reverse=True)
            topNSet[user] = itemSet[0:N]

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in topNSet[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.dao.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-top-' + str(
                    N) + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The Result has been output to ', abspath(outDir), '.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.rankingMeasure(self.dao.testSet_u, topNSet, N)
        FileIO.writeFile(outDir, fileName, self.measure)
예제 #7
0
 def rating_performance(self):
     res = []
     for ind, entry in enumerate(self.data.testData):
         user, item, rating = entry
         # predict
         prediction = self.predict(user, item)
         pred = self.checkRatingBoundary(prediction)
         res.append([user,item,rating,pred])
     self.measure = Measure.ratingMeasure(res)
     return self.measure
예제 #8
0
    def rating_performance(self):

        res = []
        for ind, entry in enumerate(self.data.testData):
            user, item, rating = entry

            # predict
            prediction = self.predict(user, item)
            # denormalize
            #prediction = denormalize(prediction, self.data.rScale[-1], self.data.rScale[0])
            #####################################
            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            res.append([user,item,rating,pred])

        self.measure = Measure.ratingMeasure(res)

        return self.measure
예제 #9
0
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = int(self.ranking['-topN'])
        if N > 100 or N < 0:
            N = 100
        res.append(
            'userId: recommendations in (itemId, ranking score) pairs\n')
        # predict
        topNSet = {}
        userCount = len(self.dao.testSet_u)
        for i, userId in enumerate(self.dao.testSet_u):
            itemSet = {}
            line = userId + ':'
            for itemId in self.dao.item:
                pred = self.predict(userId, itemId)
                # add prediction in order to measure
                itemSet[itemId] = pred
            topNSet[userId] = sorted(itemSet.iteritems(),
                                     key=lambda d: d[1],
                                     reverse=True)[0:N]

            if i % 100 == 0:
                print 'Progress:' + str(i) + '/' + str(userCount)
            for item in topNSet[userId]:
                line += '(' + item[0] + ',' + str(item[1]) + ') '
            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-top-' + str(
                    N) + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The Result has been output to ', abspath(outDir), '.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        measure = Measure.rankingMeasure(self.dao.testSet_u, topNSet, N)
        FileIO.writeFile(outDir, fileName, measure)
예제 #10
0
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        threshold = 0
        bThres = False
        bTopN = False
        if self.ranking.contains('-topN'):
            bTopN = True
            N = int(self.ranking['-topN'])
            if N > 100 or N < 0:
                print 'N can not be larger than 100! It has been reassigned with 100'
                N = 100
        elif self.ranking.contains('-threshold'):
            threshold = float(self.ranking['-threshold'])
            bThres = True
        else:
            print 'No correct evaluation metric is specified!'
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}
        userCount = len(self.dao.testSet_u)
        for i, user in enumerate(self.dao.testSet_u):
            itemSet = {}
            line = user + ':'

            for item in self.dao.item:
                # predict
                prediction = self.predict(user, item)
                # denormalize

                prediction = denormalize(prediction, self.dao.rScale[-1],
                                         self.dao.rScale[0])

                #prediction = self.checkRatingBoundary(prediction)
                #pred = self.checkRatingBoundary(prediction)
                #####################################
                # add prediction in order to measure
                if bThres:
                    if prediction > threshold:
                        itemSet[item] = prediction
                else:
                    itemSet[item] = prediction

            ratedList, ratingList = self.dao.userRated(user)
            for item in ratedList:
                del itemSet[self.dao.id2item[item]]
            itemSet = sorted(itemSet.iteritems(),
                             key=lambda d: d[1],
                             reverse=True)
            if self.ranking.contains('-topN'):
                recList[user] = itemSet[0:N]
            elif self.ranking.contains('-threshold'):
                recList[user] = itemSet[:]
                userN[user] = len(itemSet)

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in recList[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.dao.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-top-' + str(
                        N) + 'items' + self.foldInfo + '.txt'
            elif self.ranking.contains('-threshold'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-threshold-' + str(
                        threshold) + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The Result has been output to ', abspath(outDir), '.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        if self.ranking.contains('-topN'):
            self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList,
                                                  N)
        elif self.ranking.contains('-threshold'):
            origin = self.dao.testSet_u.copy()
            for user in origin:
                temp = {}
                for item in origin[user]:
                    if origin[user][item] >= threshold:
                        temp[item] = threshold
                origin[user] = temp
            self.measure = Measure.rankingMeasure_threshold(
                origin, recList, userN)
        FileIO.writeFile(outDir, fileName, self.measure)
예제 #11
0
    def ranking_performance(self,iteration):
        #for quick evaluation, we only rank 2000 items
        #results of 1000 users would be evaluated
        N = 10
        recList = {}
        testSample = {}
        for user in self.data.testSet_u:
            if len(testSample) == 1000:
                break
            if user not in self.data.trainSet_u:
                continue
            testSample[user] = self.data.testSet_u[user]

        for user in testSample:
            itemSet = {}
            predictedItems = self.predictForRanking(user)
            for id, rating in enumerate(predictedItems[:2000]):
                itemSet[self.data.id2item[id]] = rating
            ratedList, ratingList = self.data.userRated(user)
            for item in ratedList:
                if item in itemSet:
                    del itemSet[item]
            Nrecommendations = []
            for item in itemSet:
                if len(Nrecommendations) < N:
                    Nrecommendations.append((item, itemSet[item]))
                else:
                    break
            Nrecommendations.sort(key=lambda d: d[1], reverse=True)
            recommendations = [item[1] for item in Nrecommendations]
            resNames = [item[0] for item in Nrecommendations]
            # find the K biggest scores
            for item in itemSet:
                ind = N
                l = 0
                r = N - 1
                if recommendations[r] < itemSet[item]:
                    while True:
                        mid = int((l + r) / 2)
                        if recommendations[mid] >= itemSet[item]:
                            l = mid + 1
                        elif recommendations[mid] < itemSet[item]:
                            r = mid - 1
                        if r < l:
                            ind = r
                            break
                if ind < N - 2:
                    recommendations[ind + 2:] = recommendations[ind + 1:-1]
                    resNames[ind + 2:] = resNames[ind + 1:-1]
                if ind < N - 1:
                    recommendations[ind + 1] = itemSet[item]
                    resNames[ind + 1] = item
            recList[user] = list(zip(resNames, recommendations))
        measure = Measure.rankingMeasure(testSample, recList, [10])
        if len(self.bestPerformance)>0:
            count = 0
            performance = {}
            for m in measure[1:]:
                k,v = m.strip().split(':')
                performance[k]=float(v)
            for k in self.bestPerformance[1]:
                if self.bestPerformance[1][k] > performance[k]:
                    count += 1
                else:
                    count -=1
            if count<0:
                self.bestPerformance[1]=performance
                self.bestPerformance[0]=iteration
                self.saveModel()
        else:
            self.bestPerformance.append(iteration)
            performance = {}
            for m in measure[1:]:
                k,v = m.strip().split(':')
                performance[k]=float(v)
                self.bestPerformance.append(performance)
            self.saveModel()
        print('-'*120)
        print('Quick Ranking Performance '+self.foldInfo+' (Top-10 Item Recommendation On 1000 sampled users)')
        measure = [m.strip() for m in measure[1:]]
        print('*Current Performance*')
        print('iteration:',iteration,' | '.join(measure))
        bp = ''
        # for k in self.bestPerformance[1]:
        #     bp+=k+':'+str(self.bestPerformance[1][k])+' | '
        bp += 'Precision'+':'+str(self.bestPerformance[1]['Precision'])+' | '
        bp += 'Recall' + ':' + str(self.bestPerformance[1]['Recall']) + ' | '
        bp += 'F1' + ':' + str(self.bestPerformance[1]['F1']) + ' | '
        bp += 'MAP' + ':' + str(self.bestPerformance[1]['MAP']) + ' | '
        bp += 'MDCG' + ':' + str(self.bestPerformance[1]['NDCG'])
        print('*Best Performance* ')
        print('iteration:',self.bestPerformance[0],bp)
        print('-'*120)
        return measure
예제 #12
0
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        threshold = 0
        top = self.ranking['-topN'].split(',')
        top = [int(num) for num in top]
        N = int(top[-1])
        if N > 100 or N < 0:
            print 'N can not be larger than 100! It has been reassigned with 10'
            N = 10

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userCount = len(self.data.testSet)

        for i, user in enumerate(self.data.testSet):

            line = user + ':'
            if self.data.userRecord.has_key(user):
                predictedItems = self.predict(user)
            else:
                predictedItems = ['0'] * N
            predicted = {}
            for k, item in enumerate(predictedItems):
                predicted[item] = k
            for item in self.data.userRecord[user]:
                if predicted.has_key(item[self.recType]):
                    del predicted[item[self.recType]]
            predicted = sorted(predicted.iteritems(), key=lambda d: d[1])
            predictedItems = [item[0] for item in predicted]
            recList[user] = predictedItems[:N]

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in recList[user]:
                if self.data.testSet[user].has_key(item):
                    line += '*'
                line += item + ','

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config['recommender'] + '@' + currentTime + '-top-' + self.ranking['-topN']\
                           + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The result has been output to ', abspath(outDir), '.'
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'

        self.measure = Measure.rankingMeasure(self.data.testSet, recList, top,
                                              self.data.getSize(self.recType))

        FileIO.writeFile(outDir, fileName, self.measure)
        print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo,
                                            ''.join(self.measure))
예제 #13
0
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        top = self.ranking['-topN'].split(',')
        top = [int(num) for num in top]
        N = max(top)

        if N > 100 or N < 0:
            print ('N can not be larger than 100! It has been reassigned with 10')
            N = 10

        res.append('userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n')
        # predict
        recList = {}
        userCount = len(self.data.testSet)

        for i, user in enumerate(self.data.testSet):
            itemSet = {}
            line = user + ':'
            predictedItems = self.predict(user)

            for id, score in enumerate(predictedItems):

                itemSet[self.data.id2name[self.recType][id]] = score

            for item in self.data.userRecord[user]:
                try:
                    del itemSet[item[self.recType]]
                except KeyError:
                    pass
            Nrecommendations = []
            for item in itemSet:
                if len(Nrecommendations) < N:
                    Nrecommendations.append((item, itemSet[item]))
                else:
                    break

            Nrecommendations.sort(key=lambda d: d[1], reverse=True)
            recommendations = [item[1] for item in Nrecommendations]
            resNames = [item[0] for item in Nrecommendations]

            # itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True)
            # if bTopN:
            # find the K biggest scores
            for item in itemSet:
                ind = N
                l = 0
                r = N - 1

                if recommendations[r] < itemSet[item]:
                    while True:

                        mid = (l + r) // 2
                        if recommendations[mid] >= itemSet[item]:
                            l = mid + 1
                        elif recommendations[mid] < itemSet[item]:
                            r = mid - 1
                        else:
                            ind = mid
                            break
                        if r < l:
                            ind = r
                            break
                # ind = bisect(recommendations, itemSet[item])

                if ind < N - 1:
                    recommendations[ind + 1] = itemSet[item]
                    resNames[ind + 1] = item
            recList[user] = resNames

            if i % 100 == 0:
                print (self.algorName, self.foldInfo, 'progress:' + str(i) + '/' + str(userCount))
            for item in recList[user]:
                line += item
                if item in self.data.testSet[user]:
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config['recommender'] + '@' + currentTime + '-top-' + self.ranking['-topN']\
                           + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print ('The result has been output to ', abspath(outDir), '.')
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config['recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'

        self.measure = Measure.rankingMeasure(self.data.testSet, recList,top,self.data.getSize(self.recType))

        FileIO.writeFile(outDir, fileName, self.measure)
        print ('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure)))
예제 #14
0
    def ranking_performance(self):
        N = 10
        itemcount = 0
        recList = {}
        testSample = {}
        for user in self.data.testSet:
            itemcount+=len(self.data.testSet[user])
            if len(testSample) == 300:
                break
            testSample[user] = self.data.testSet[user]

        for user in testSample:
            itemSet = {}
            predictedItems = self.predict(user)
            for id, rating in enumerate(predictedItems):
                itemSet[self.data.id2name['track'][id]] = rating

            ratedList = self.data.testSet[user].keys()
            ratingList = self.data.testSet[user].values()
            for item in ratedList:
                del itemSet[item]

            Nrecommendations = []
            for item in itemSet:
                if len(Nrecommendations) < N:
                    Nrecommendations.append((item, itemSet[item]))
                else:
                    break

            Nrecommendations.sort(key=lambda d: d[1], reverse=True)
            recommendations = [item[1] for item in Nrecommendations]
            resNames = [item[0] for item in Nrecommendations]

            # find the K biggest scores
            for item in itemSet:
                ind = N
                l = 0
                r = N - 1

                if recommendations[r] < itemSet[item]:
                    while True:
                        mid = int((l + r) / 2)
                        if recommendations[mid] >= itemSet[item]:
                            l = mid + 1
                        elif recommendations[mid] < itemSet[item]:
                            r = mid - 1
                        if r < l:
                            ind = r
                            break
                # ind = bisect(recommendations, itemSet[item])
                if ind < N - 1:
                    recommendations[ind + 1] = itemSet[item]
                    resNames[ind + 1] = item
            recList[user] = resNames
        measure = Measure.rankingMeasure(testSample, recList, [10], itemcount)
        print ('-'*80)
        print ('Ranking Performance '+self.foldInfo+' (Top-10 On 300 sampled users)')
        for m in measure[1:]:
            print (m.strip())
        print ('-'*80)
        return measure
예제 #15
0
    def evalRanking(self):
        res = []  # used to contain the text of the result

        if self.ranking.contains('-topN'):
            top = self.ranking['-topN'].split(',')
            top = [int(num) for num in top]
            N = int(top[-1])
            if N > 100 or N < 0:
                print(
                    'N can not be larger than 100! It has been reassigned with 10'
                )
                N = 10
            if N > len(self.data.item):
                N = len(self.data.item)
        else:
            print('No correct evaluation metric is specified!')
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}
        userCount = len(self.data.testSet_u)
        #rawRes = {}
        for i, user in enumerate(self.data.testSet_u):
            itemSet = {}
            line = user + ':'
            predictedItems = self.predictForRanking(user)
            # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
            for id, rating in enumerate(predictedItems):
                # if not self.data.rating(user, self.data.id2item[id]):
                # prediction = self.checkRatingBoundary(prediction)
                # pred = self.checkRatingBoundary(prediction)
                #####################################
                # add prediction in order to measure

                itemSet[self.data.id2item[id]] = rating

            ratedList, ratingList = self.data.userRated(user)
            for item in ratedList:
                del itemSet[item]

            Nrecommendations = []
            for item in itemSet:
                if len(Nrecommendations) < N:
                    Nrecommendations.append((item, itemSet[item]))
                else:
                    break

            Nrecommendations.sort(key=lambda d: d[1], reverse=True)
            recommendations = [item[1] for item in Nrecommendations]
            resNames = [item[0] for item in Nrecommendations]

            # find the N biggest scores
            for item in itemSet:
                ind = N
                l = 0
                r = N - 1

                if recommendations[r] < itemSet[item]:
                    while r >= l:
                        mid = (r - l) / 2 + l
                        if recommendations[mid] >= itemSet[item]:
                            l = mid + 1
                        elif recommendations[mid] < itemSet[item]:
                            r = mid - 1

                        if r < l:
                            ind = r
                            break
                #move the items backwards
                if ind < N - 2:
                    recommendations[ind + 2:] = recommendations[ind + 1:-1]
                    resNames[ind + 2:] = resNames[ind + 1:-1]
                if ind < N - 1:
                    recommendations[ind + 1] = itemSet[item]
                    resNames[ind + 1] = item

            recList[user] = zip(resNames, recommendations)

            if i % 100 == 0:
                print(self.algorName, self.foldInfo,
                      'progress:' + str(i) + '/' + str(userCount))
            for item in recList[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.data.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-top-' + str(
                    N) + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print('The result has been output to ', abspath(outDir), '.')
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.rankingMeasure(self.data.testSet_u, recList,
                                              top)
        FileIO.writeFile(outDir, fileName, self.measure)
        print('The result of %s %s:\n%s' %
              (self.algorName, self.foldInfo, ''.join(self.measure)))
예제 #16
0
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        threshold = 0
        bThres = False
        bTopN = False
        if self.ranking.contains('-topN'):
            bTopN = True
            N = int(self.ranking['-topN'])
            if N > 100 or N < 0:
                print 'N can not be larger than 100! It has been reassigned with 100'
                N = 100
            if N > len(self.dao.item):
                N = len(self.dao.item)
        elif self.ranking.contains('-threshold'):
            threshold = float(self.ranking['-threshold'])
            bThres = True
        else:
            print 'No correct evaluation metric is specified!'
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}
        userCount = len(self.dao.testSet_u)
        for i, user in enumerate(self.dao.testSet_u):
            itemSet = {}
            line = user + ':'
            predictedItems = self.predictForRanking(user)
            # predictedItems = denormalize(predictedItems, self.dao.rScale[-1], self.dao.rScale[0])
            for id, rating in enumerate(predictedItems):
                # if not self.dao.rating(user, self.dao.id2item[id]):
                # prediction = self.checkRatingBoundary(prediction)
                # pred = self.checkRatingBoundary(prediction)
                #####################################
                # add prediction in order to measure
                # if bThres:
                #     if rating > threshold:
                #         itemSet[self.dao.id2item[id]]= rating
                # else:
                itemSet[self.dao.id2item[id]] = rating

            ratedList, ratingList = self.dao.userRated(user)
            for item in ratedList:
                del itemSet[item]

            Nrecommendations = []
            for item in itemSet:
                if len(Nrecommendations) < N:
                    Nrecommendations.append((item, itemSet[item]))
                else:
                    break

            Nrecommendations.sort(key=lambda d: d[1], reverse=True)
            recommendations = [item[1] for item in Nrecommendations]
            resNames = [item[0] for item in Nrecommendations]

            # itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True)
            # if bTopN:
            # find the K biggest scores
            for item in itemSet:
                ind = N
                l = 0
                r = N - 1

                if recommendations[r] < itemSet[item]:
                    while True:

                        mid = (l + r) / 2
                        if recommendations[mid] >= itemSet[item]:
                            l = mid + 1
                        elif recommendations[mid] < itemSet[item]:
                            r = mid - 1
                        else:
                            ind = mid
                            break
                        if r < l:
                            ind = r
                            break
                # ind = bisect(recommendations, itemSet[item])

                if ind < N - 1:
                    recommendations[ind + 1] = itemSet[item]
                    resNames[ind + 1] = item
            recList[user] = zip(resNames, recommendations)
            # elif bThres:
            #     itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True)
            #     recList[user] = itemSet[:]
            #     userN[user] = len(itemSet)

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in recList[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.dao.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-top-' + str(
                        N) + 'items' + self.foldInfo + '.txt'
            elif self.ranking.contains('-threshold'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-threshold-' + str(
                        threshold) + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The result has been output to ', abspath(outDir), '.'
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        if self.ranking.contains('-topN'):
            self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList,
                                                  N)
        # elif self.ranking.contains('-threshold'):
        #     origin = self.dao.testSet_u.copy()
        #     for user in origin:
        #         temp = {}
        #         for item in origin[user]:
        #             if origin[user][item] >= threshold:
        #                 temp[item] = threshold
        #         origin[user] = temp
        #     self.measure = Measure.rankingMeasure_threshold(origin, recList, userN)
        FileIO.writeFile(outDir, fileName, self.measure)
        print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo,
                                            ''.join(self.measure))
예제 #17
0
    def evalRanking(self, write_to_file=True, use_now_time=False):
        res = []  # used to contain the text of the result

        if self.ranking.contains('-topN'):
            top = self.ranking['-topN'].split(',')
            top = [int(num) for num in top]
            N = max(top)
            if N > 100 or N < 0:
                print(
                    'N can not be larger than 100! It has been reassigned with 10'
                )
                N = 10
            if N > len(self.data.item):
                N = len(self.data.item)
        else:
            print('No correct evaluation metric is specified!')
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}

        testSample = self.testSample

        # # multiprocessing way
        # pool = Pool(12)
        # dataset = []
        # for user, testSample_u in testSample.items():
        #     identified_user = self.map_from_true_to_identify.get(user, -1)
        #     if identified_user == -1:
        #         continue
        #     dataset.append([user, identified_user, testSample_u])
        #
        # result_generator = pool.imap_unordered(partial(self.get_recommendation, N=N), dataset)
        # for result in tqdm(result_generator, total=len(dataset), desc='Measuring [{}]'):
        #     user, line, recList_user = result
        #     recList[user] = recList_user
        #     res.append(line)
        # pool.close()
        # pool.join()

        testSample_copy = testSample.copy()

        for i, user in tqdm(enumerate(testSample),
                            total=len(testSample),
                            desc='Measuring [{}]'.format(self.algorName)):
            identified_user = self.map_from_true_to_identify.get(user, -1)
            if identified_user == -1:
                del testSample_copy[user]
                continue
            user, line, recList_user = self.get_recommendation(
                (user, identified_user, testSample[user]), N)

            recList[user] = recList_user
            res.append(line)

        self.measure = Measure.rankingMeasure(testSample_copy, recList, top)
        try:
            self.measure.append("C:{}\n".format(self.C))
        except:
            pass
        try:
            self.measure.append("L:{}\n".format(self.L))
        except:
            pass
        try:
            self.measure.append("K:{}\n".format(self.K))
        except:
            pass
        try:
            self.measure.append("N:{}\n".format(self.N))
        except:
            pass

        if use_now_time:
            currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        else:
            currentTime = self.currentTime
        if write_to_file:
            # output prediction result
            if False and self.isOutput:
                fileName = ''
                outDir = self.output['-dir']
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-top-' + str(
                        N) + 'items' + self.foldInfo + '.txt'
                FileIO.writeFile(outDir, fileName, res)
            # output evaluation result
            outDir = self.output['-dir']
            try:
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '_C{}'.format(
                        self.C) + '.txt'
            except:
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, self.measure)
            # FileIO.writeFile(outDir, fileName, "C:{}".format(self.C))

            print('The result has been output to ', abspath(outDir), '.')
        print('The result of %s %s:\n%s' %
              (self.algorName, self.foldInfo, ''.join(self.measure)))