Example #1
0
 def evalRatings(self):
     res = [] #used to contain the text of the result
     res.append('userId  itemId  original  prediction\n')
     #predict
     for userId in self.dao.testSet_u:
         for ind,item in enumerate(self.dao.testSet_u[userId]):
             itemId = item[0]
             originRating = item[1]
             #predict
             prediction = self.predict(userId,itemId)
             #denormalize
             prediction = denormalize(prediction,self.dao.rScale[-1],self.dao.rScale[0])
             #####################################
             pred = self.checkRatingBoundary(prediction)
             # add prediction in order to measure
             self.dao.testSet_u[userId][ind].append(pred)
             res.append(userId+' '+itemId+' '+str(originRating)+' '+str(pred)+'\n')
     currentTime = strftime("%Y-%m-%d %H-%M-%S",localtime(time()))
     #output prediction result
     if self.isOutput:
         outDir = self.output['-dir']
         fileName = self.config['recommender']+'@'+currentTime+'-rating-predictions'+self.foldInfo+'.txt'
         FileIO.writeFile(outDir,fileName,res)
         print 'The Result has been output to ',abspath(outDir),'.'
     #output evaluation result
     outDir = self.output['-dir']
     fileName = self.config['recommender'] + '@'+currentTime +'-measure'+ self.foldInfo + '.txt'
     measure = Measure.ratingMeasure(self.dao.testSet_u)
     FileIO.writeFile(outDir, fileName, measure)
Example #2
0
    def performance(self):
        #res = []  # used to contain the text of the result
        #res.append('userId  itemId  original  prediction\n')
        # predict
        res = []
        for ind, entry in enumerate(self.dao.testData):
            user, item, rating = entry

            # predict
            prediction = self.predict(user, item)
            # denormalize
            prediction = denormalize(prediction, self.dao.rScale[-1],
                                     self.dao.rScale[0])
            #####################################
            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            res.append([user, item, rating, pred])
            #res.append(user + ' ' + item + ' ' + str(rating) + ' ' + str(pred) + '\n')
        #currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        # if self.isOutput:
        #     outDir = self.output['-dir']
        #     fileName = self.config['recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt'
        #     FileIO.writeFile(outDir, fileName, res)
        #     print 'The Result has been output to ', abspath(outDir), '.'
        # output evaluation result
        # outDir = self.output['-dir']
        # fileName = self.config['recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.ratingMeasure(res)
        return self.measure
Example #3
0
    def evalRatings(self):
        res = [] #used to contain the text of the result
        res.append('userId  itemId  original  prediction\n')
        #predict
        for ind,entry in enumerate(self.dao.testData):
            user,item,rating = entry

            #predict
            prediction = self.predict(user,item)
            #denormalize
            prediction = denormalize(prediction,self.dao.rScale[-1],self.dao.rScale[0])
            #####################################
            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            self.dao.testData[ind].append(pred)
            res.append(user+' '+item+' '+str(rating)+' '+str(pred)+'\n')
        currentTime = strftime("%Y-%m-%d %H-%M-%S",localtime(time()))
        #output prediction result
        if self.isOutput:
            outDir = self.output['-dir']
            fileName = self.config['recommender']+'@'+currentTime+'-rating-predictions'+self.foldInfo+'.txt'
            FileIO.writeFile(outDir,fileName,res)
            print 'The result has been output to ',abspath(outDir),'.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config['recommender'] + '@'+currentTime +'-measure'+ self.foldInfo + '.txt'
        self.measure = Measure.ratingMeasure(self.dao.testData)
        FileIO.writeFile(outDir, fileName, self.measure)
        print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure))
Example #4
0
File: ALS.py Project: 0411tony/RecQ
    def buildModel(self):
        print 'run the MF_ALS algorithm'

        print 'training...'
        iteration = 0
        while iteration < self.maxIter:
            self.loss = 0
            I = np.ones(len(self.dao.item))
            for user in self.dao.user:
                C_u = np.ones(len(self.dao.item))
                P_u = np.zeros(len(self.dao.item))
                uid = self.dao.user[user]
                for item in self.dao.trainSet_u[user]:
                    iid = self.dao.getItemId(item)
                    r_ui = denormalize(self.dao.trainSet_u[user][item],
                                       self.dao.rScale[-1], self.dao.rScale[0])
                    C_u[iid] += log(1 + r_ui)
                    P_u[iid] = 1
                    error = (P_u[iid] - self.P[uid].dot(self.Q[iid]))
                    self.loss += C_u[iid] * error**2

                Temp = (self.Q.T.dot(self.Q) + (self.Q.T *
                                                (C_u - I)).dot(self.Q) +
                        self.regU * np.eye(self.k))**-1
                self.P[uid] = (Temp.dot(self.Q.T) * C_u).dot(P_u)

            I = np.ones(len(self.dao.user))
            for item in self.dao.item:
                C_i = np.ones(len(self.dao.user))
                P_i = np.zeros(len(self.dao.user))
                iid = self.dao.item[item]
                for user in self.dao.trainSet_i[item]:
                    uid = self.dao.getUserId(user)
                    r_ui = denormalize(self.dao.trainSet_i[item][user],
                                       self.dao.rScale[-1], self.dao.rScale[0])
                    C_i[uid] += log(r_ui + 1)
                    P_i[uid] = 1
                Temp = (self.P.T.dot(self.P) + (self.P.T *
                                                (C_i - I)).dot(self.P) +
                        self.regU * np.eye(self.k))**-1
                self.Q[iid] = (Temp.dot(self.P.T) * C_i).dot(P_i)

            # self.loss += (self.P * self.P).sum() + (self.Q * self.Q).sum()

            iteration += 1
            if self.isConverged(iteration):
                break
Example #5
0
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = int(self.ranking['-topN'])
        if N > 100 or N < 0:
            N = 100
        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item is matched\n'
        )
        # predict
        topNSet = {}
        userCount = len(self.dao.testSet_u)
        for i, user in enumerate(self.dao.testSet_u):
            itemSet = []
            line = user + ':'

            for item in self.dao.item:
                if not self.dao.rating(user, item):
                    # predict
                    prediction = self.predict(user, item)
                    # denormalize

                    prediction = denormalize(prediction, self.dao.rScale[-1],
                                             self.dao.rScale[0])

                    prediction = round(prediction, 4)
                    #pred = self.checkRatingBoundary(prediction)
                    #####################################
                    # add prediction in order to measure
                    itemSet.append((item, prediction))

            itemSet.sort(key=lambda d: d[1], reverse=True)
            topNSet[user] = itemSet[0:N]

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in topNSet[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.dao.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-top-' + str(
                    N) + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The Result has been output to ', abspath(outDir), '.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.rankingMeasure(self.dao.testSet_u, topNSet, N)
        FileIO.writeFile(outDir, fileName, self.measure)
Example #6
0
    def rating_performance(self):

        res = []
        for ind, entry in enumerate(self.data.testData):
            user, item, rating = entry

            # predict
            prediction = self.predict(user, item)
            # denormalize
            prediction = denormalize(prediction, self.data.rScale[-1], self.data.rScale[0])
            #####################################
            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            res.append([user,item,rating,pred])

        self.measure = Measure.ratingMeasure(res)

        return self.measure
Example #7
0
    def render(self):
        self.draw()
        html ="<html><head><title>Data Analysis</title>\n" \
              "<link rel='stylesheet' type='text/css' href='reportStyle.css'/></head>\n" \
              "<body><div class='reportTitle'><div class='in'>Data Analysis</div></div>\n" \
              "<div class='main'><div class='area1'>\n" \
              "<div class='title'><h3>Data Files</h3></div><div class='text'>"
        if self.conf.contains('ratings'):
            html += "<b>Rating Data</b>: {rating}".format(
                rating=abspath(self.conf['ratings']))
        if self.conf.contains('social'):
            html += "<br><b>Social Data</b>: {social}".format(
                social=abspath(self.conf['social']))
        html+="</div></div><div style='padding-top:20px'><center>" \
              "<img src='images/header2.png'/></center></div>\n"
        if self.conf.contains('ratings'):
            html += "<div class='area1'><div class='title'><h3>Rating Data</h3></div>\n"
            html += "<div class='text'><b>Rating Scale</b>: {scale}</br>".format(
                scale=' '.join([str(item) for item in self.dao.rScale]))
            html += "<b>User Count</b>: {user}<br><b>Item Count</b>: {item}<br><b>Record Count</b>: {record}<br><b>Global Mean</b>: {mean}</div>\n"\
                .format(user = str(len(self.dao.user)),item=str(len(self.dao.item)),record = str(len(self.dao.trainingData)),
                        mean = str(round(denormalize(self.dao.globalMean,self.dao.rScale[-1],self.dao.rScale[0]),3)))
            html += "<center><div class='img'><img src='images/rh.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/rcu.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/rci.png' width='640px' height='480px'/></div></center>\n"
            html += "</div><div style='padding-top:20px'><center>" \
              "<img src='images/header2.png'/></center></div>\n"
        if self.conf.contains('social'):
            html += "<div class='area1'><div class='title'><h3>Social Data</h3></div>\n"
            html += "<div class='text'><b>User Count</b>: {user}<br><b>Relation Count</b>: {relation}<br></div>\n" \
                .format(user=str(len(self.sao.user)), relation=str(len(self.sao.relation)))
            html += "<center><div class='img'><img src='images/ff.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/fd1.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/fd2.png' width='640px' height='480px'/></div></center>\n"
            html += "</div><div style='padding-top:20px'><center>" \
                    "<img src='images/header2.png'/></center></div>\n"

        html += "</div></body></html>"
        FileIO.writeFile('../visual/visualization/', 'analysis.html', html)
        print 'The report has been output to', abspath(
            '../visual/visualization/analysis.html')
        webbrowser.open(abspath('../visual/visualization/analysis.html'),
                        new=0,
                        autoraise=True)
Example #8
0
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        threshold = 0
        bThres = False
        bTopN = False
        if self.ranking.contains('-topN'):
            bTopN = True
            N = int(self.ranking['-topN'])
            if N > 100 or N < 0:
                print 'N can not be larger than 100! It has been reassigned with 100'
                N = 100
        elif self.ranking.contains('-threshold'):
            threshold = float(self.ranking['-threshold'])
            bThres = True
        else:
            print 'No correct evaluation metric is specified!'
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}
        userCount = len(self.dao.testSet_u)
        for i, user in enumerate(self.dao.testSet_u):
            itemSet = {}
            line = user + ':'

            for item in self.dao.item:
                # predict
                prediction = self.predict(user, item)
                # denormalize

                prediction = denormalize(prediction, self.dao.rScale[-1],
                                         self.dao.rScale[0])

                #prediction = self.checkRatingBoundary(prediction)
                #pred = self.checkRatingBoundary(prediction)
                #####################################
                # add prediction in order to measure
                if bThres:
                    if prediction > threshold:
                        itemSet[item] = prediction
                else:
                    itemSet[item] = prediction

            ratedList, ratingList = self.dao.userRated(user)
            for item in ratedList:
                del itemSet[self.dao.id2item[item]]
            itemSet = sorted(itemSet.iteritems(),
                             key=lambda d: d[1],
                             reverse=True)
            if self.ranking.contains('-topN'):
                recList[user] = itemSet[0:N]
            elif self.ranking.contains('-threshold'):
                recList[user] = itemSet[:]
                userN[user] = len(itemSet)

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in recList[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.dao.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-top-' + str(
                        N) + 'items' + self.foldInfo + '.txt'
            elif self.ranking.contains('-threshold'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-threshold-' + str(
                        threshold) + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The Result has been output to ', abspath(outDir), '.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        if self.ranking.contains('-topN'):
            self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList,
                                                  N)
        elif self.ranking.contains('-threshold'):
            origin = self.dao.testSet_u.copy()
            for user in origin:
                temp = {}
                for item in origin[user]:
                    if origin[user][item] >= threshold:
                        temp[item] = threshold
                origin[user] = temp
            self.measure = Measure.rankingMeasure_threshold(
                origin, recList, userN)
        FileIO.writeFile(outDir, fileName, self.measure)