コード例 #1
0
ファイル: SDetection.py プロジェクト: CoderWZW/SDLib
    def execute(self):
        self.readConfiguration()
        if self.foldInfo == '[1]':
            self.printAlgorConfig()
        # load model from disk or build model
        if self.isLoad:
            print 'Loading model %s...' % (self.foldInfo)
            self.loadModel()
        else:
            print 'Initializing model %s...' % (self.foldInfo)
            self.initModel()
            print 'Building Model %s...' % (self.foldInfo)
            self.buildModel()

        # preict the ratings or item ranking
        print 'Predicting %s...' % (self.foldInfo)
        prediction = self.predict()
        report = classification_report(self.testLabels, prediction, digits=4)
        currentTime = currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        FileIO.writeFile(self.output['-dir'],self.algorName+'@'+currentTime+self.foldInfo,report)
        # save model
        if self.isSave:
            print 'Saving model %s...' % (self.foldInfo)
            self.saveModel()
        print report
        return report
コード例 #2
0
 def evalRatings(self):
     res = [] #used to contain the text of the result
     res.append('userId  itemId  original  prediction\n')
     #predict
     for userId in self.dao.testSet_u:
         for ind,item in enumerate(self.dao.testSet_u[userId]):
             itemId = item[0]
             originRating = item[1]
             #predict
             prediction = self.predict(userId,itemId)
             #denormalize
             prediction = denormalize(prediction,self.dao.rScale[-1],self.dao.rScale[0])
             #####################################
             pred = self.checkRatingBoundary(prediction)
             # add prediction in order to measure
             self.dao.testSet_u[userId][ind].append(pred)
             res.append(userId+' '+itemId+' '+str(originRating)+' '+str(pred)+'\n')
     currentTime = strftime("%Y-%m-%d %H-%M-%S",localtime(time()))
     #output prediction result
     if self.isOutput:
         outDir = self.output['-dir']
         fileName = self.config['recommender']+'@'+currentTime+'-rating-predictions'+self.foldInfo+'.txt'
         FileIO.writeFile(outDir,fileName,res)
         print 'The Result has been output to ',abspath(outDir),'.'
     #output evaluation result
     outDir = self.output['-dir']
     fileName = self.config['recommender'] + '@'+currentTime +'-measure'+ self.foldInfo + '.txt'
     measure = Measure.ratingMeasure(self.dao.testSet_u)
     FileIO.writeFile(outDir, fileName, measure)
コード例 #3
0
    def dataSplit(data,
                  test_ratio=0.3,
                  output=False,
                  path='./',
                  order=1,
                  binarized=False):
        if test_ratio >= 1 or test_ratio <= 0:
            test_ratio = 0.3
        testSet = []
        trainingSet = []
        for entry in data:
            if random() < test_ratio:
                if binarized:
                    if entry[2]:
                        testSet.append(entry)
                else:
                    testSet.append(entry)
            else:
                trainingSet.append(entry)

        if output:
            FileIO.writeFile(path, 'testSet[' + str(order) + ']', testSet)
            FileIO.writeFile(path, 'trainingSet[' + str(order) + ']',
                             trainingSet)
        return trainingSet, testSet
コード例 #4
0
ファイル: recommender.py プロジェクト: yuyu2223/Yue
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        threshold = 0

        N = int(self.ranking['-topN'])
        if N > 100 or N < 0:
            print 'N can not be larger than 100! It has been reassigned with 10'
            N = 10

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userCount = len(self.data.testSet)
        rawRes = {}
        for i, user in enumerate(self.data.testSet):
            itemSet = {}
            line = user + ':'
            predictedItems = self.predict(user)

            recList[user] = predictedItems

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in recList[user]:
                if self.data.testSet[user].has_key(item[0]):
                    line += '*'
                line += item + ','

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-top-' + str(
                        N) + 'items' + self.foldInfo + '.txt'
            elif self.ranking.contains('-threshold'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-threshold-' + str(
                        threshold) + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The result has been output to ', abspath(outDir), '.'
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        if self.ranking.contains('-topN'):
            self.measure = Measure.rankingMeasure(self.data.testSet, recList,
                                                  rawRes, N)

        FileIO.writeFile(outDir, fileName, self.measure)
        print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo,
                                            ''.join(self.measure))
コード例 #5
0
ファイル: RecQ.py プロジェクト: nicoleljc1227/RecQ
    def execute(self):
        exec ('from algorithm.rating.' + self.config['recommender'] + ' import ' + self.config['recommender'])
        if self.evaluation.contains('-cv'):
            i = 1
            for train,test in DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])):
                fold = '['+str(i)+']'
                recommender = self.config['recommender']+ "(self.config,train,test,fold)"
                measure = eval(recommender).execute()
                self.measure.append(measure)
                i+=1
            res = []
            for i in range(len(self.measure[0])):
                measure = self.measure[0][i].split(':')[0]
                total = 0
                for j in range(len(self.measure)):
                    total += float(self.measure[j][i].split(':')[1])
                res.append(measure+':'+str(total/len(self.measure))+'\n')
            outDir = LineConfig(self.config['output.setup'])['-dir']
            fileName = self.config['recommender'] +'@'+str(int(self.evaluation['-cv']))+'-fold-cv' + '.txt'
            FileIO.writeFile(outDir,fileName,res)


        else:
            recommender = self.config['recommender']+'(self.config,self.trainingData,self.testData)'
            eval(recommender).execute()
コード例 #6
0
ファイル: SDetection.py プロジェクト: chensi01/SDLib
    def execute(self):
        self.readConfiguration()
        if self.foldInfo == '[1]':
            self.printAlgorConfig()
        # load model from disk or build model
        if self.isLoad:
            print ('Loading model %s...' % (self.foldInfo))
            self.loadModel()
        else:
            print ('Initializing model %s...' % (self.foldInfo))
            self.initModel()
            print ('Building Model %s...' % (self.foldInfo))
            self.buildModel()

        # preict the ratings or item ranking
        print ('Predicting %s...' % (self.foldInfo))
        prediction = self.predict()
        report = classification_report(self.testLabels, prediction, digits=4)
        currentTime = currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        FileIO.writeFile(self.output['-dir'],self.algorName+'@'+currentTime+self.foldInfo,report)
        # save model
        if self.isSave:
            print ('Saving model %s...' % (self.foldInfo))
            self.saveModel()
        # print (report)
        # return report
        return [i for i in report.split('\n') if len(i)>0][2].split()
コード例 #7
0
    def execute(self):
        self.readConfiguration()
        if self.foldInfo == '[1]':
            self.printAlgorConfig()
        # load model from disk or build model
        if self.isLoad:
            print 'Loading model %s...' % (self.foldInfo)
            self.loadModel()
        else:
            print 'Initializing model %s...' % (self.foldInfo)
            self.initModel()
            print 'Building Model %s...' % (self.foldInfo)
            self.buildModel()

        # preict the ratings or item ranking
        print 'Predicting %s...' % (self.foldInfo)
        report = self.predict()
        currentTime = currentTime = strftime("%Y-%m-%d %H-%M-%S",
                                             localtime(time()))
        FileIO.writeFile(self.output['-dir'],
                         self.algorName + '@' + currentTime + self.foldInfo,
                         report)
        # save model
        if self.isSave:
            print 'Saving model %s...' % (self.foldInfo)
            self.saveModel()
        return report
コード例 #8
0
    def evalRatings(self):
        res = []  #used to contain the text of the result
        res.append('userId  itemId  original  prediction\n')
        #predict
        for ind, entry in enumerate(self.data.testData):
            user, item, rating = entry

            #predict
            prediction = self.predict(user, item)
            #denormalize
            #prediction = denormalize(prediction,self.data.rScale[-1],self.data.rScale[0])
            #####################################
            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            self.data.testData[ind].append(pred)
            res.append(user + ' ' + item + ' ' + str(rating) + ' ' +
                       str(pred) + '\n')
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        #output prediction result
        if self.isOutput:
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print('The result has been output to ', abspath(outDir), '.')
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.ratingMeasure(self.data.testData)
        FileIO.writeFile(outDir, fileName, self.measure)
        print('The result of %s %s:\n%s' %
              (self.algorName, self.foldInfo, ''.join(self.measure)))
コード例 #9
0
ファイル: Recommender.py プロジェクト: nonva/RecQ
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = int(self.ranking['-topN'])
        if N > 100 or N < 0:
            N = 100
        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item is matched\n'
        )
        # predict
        topNSet = {}
        userCount = len(self.dao.testSet_u)
        for i, user in enumerate(self.dao.testSet_u):
            itemSet = []
            line = user + ':'

            for item in self.dao.item:
                if not self.dao.rating(user, item):
                    # predict
                    prediction = self.predict(user, item)
                    # denormalize

                    prediction = denormalize(prediction, self.dao.rScale[-1],
                                             self.dao.rScale[0])

                    prediction = round(prediction, 4)
                    #pred = self.checkRatingBoundary(prediction)
                    #####################################
                    # add prediction in order to measure
                    itemSet.append((item, prediction))

            itemSet.sort(key=lambda d: d[1], reverse=True)
            topNSet[user] = itemSet[0:N]

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in topNSet[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.dao.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-top-' + str(
                    N) + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The Result has been output to ', abspath(outDir), '.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.rankingMeasure(self.dao.testSet_u, topNSet, N)
        FileIO.writeFile(outDir, fileName, self.measure)
コード例 #10
0
ファイル: RecQ.py プロジェクト: nonva/RecQ
    def execute(self):
        #import the algorithm module
        importStr = 'from algorithm.rating.' + self.config[
            'recommender'] + ' import ' + self.config['recommender']
        exec(importStr)
        if self.evaluation.contains('-cv'):
            k = int(self.evaluation['-cv'])
            if k <= 1 or k > 10:
                k = 3
            #create the manager used to communication in multiprocess
            manager = Manager()
            m = manager.dict()
            i = 1
            tasks = []
            for train, test in DataSplit.crossValidation(self.trainingData, k):
                fold = '[' + str(i) + ']'
                if self.config.contains('social'):
                    recommender = self.config[
                        'recommender'] + "(self.config,train,test,self.relation,fold)"
                else:
                    recommender = self.config[
                        'recommender'] + "(self.config,train,test,fold)"
            #create the process
                p = Process(target=run, args=(m, eval(recommender), i))
                tasks.append(p)
                i += 1
            #start the processes
            for p in tasks:
                p.start()
            #wait until all processes are completed
            for p in tasks:
                p.join()
            #compute the mean error of k-fold cross validation
            self.measure = [dict(m)[i] for i in range(1, k + 1)]
            res = []
            for i in range(len(self.measure[0])):
                measure = self.measure[0][i].split(':')[0]
                total = 0
                for j in range(k):
                    total += float(self.measure[j][i].split(':')[1])
                res.append(measure + ':' + str(total / k) + '\n')
            #output result
            outDir = LineConfig(self.config['output.setup'])['-dir']
            fileName = self.config['recommender'] + '@' + str(
                k) + '-fold-cv' + '.txt'
            FileIO.writeFile(outDir, fileName, res)

        else:
            if self.config.contains('social'):
                recommender = self.config[
                    'recommender'] + '(self.config,self.trainingData,self.testData,self.relation)'
            else:
                recommender = self.config[
                    'recommender'] + '(self.config,self.trainingData,self.testData)'
            eval(recommender).execute()
コード例 #11
0
ファイル: display.py プロジェクト: zzh6333/RecQ
 def __init__(self, conf):
     self.conf = conf
     if not conf.contains('ratings') and not conf.contains('social'):
         print 'The config file is not in the correct format!'
         exit(-1)
     if conf.contains('ratings'):
         ratingData = FileIO.loadDataSet(conf, conf['ratings'])
         self.dao = RatingDAO(conf, ratingData)
     if conf.contains('social'):
         relationData = FileIO.loadRelationship(conf, conf['social'])
         self.sao = SocialDAO(conf, relationData)
コード例 #12
0
ファイル: RecQ.py プロジェクト: SuperSupeng/pythonIsAmazing
    def __init__(self, config):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.relation = []
        self.measure = []
        self.config = config
        self.ratingConfig = LineConfig(config['ratings.setup'])
        if self.config.contains('evaluation.setup'):
            self.evaluation = LineConfig(config['evaluation.setup'])
            binarized = False
            bottom = 0
            if self.evaluation.contains('-b'):
                binarized = True
                bottom = float(self.evaluation['-b'])
            if self.evaluation.contains('-testSet'):
                #specify testSet

                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'],
                                                       binarized=binarized,
                                                       threshold=bottom)
                self.testData = FileIO.loadDataSet(config,
                                                   self.evaluation['-testSet'],
                                                   bTest=True,
                                                   binarized=binarized,
                                                   threshold=bottom)

            elif self.evaluation.contains('-ap'):
                #auto partition

                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'],
                                                       binarized=binarized,
                                                       threshold=bottom)
                self.trainingData,self.testData = DataSplit.\
                    dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']),binarized=binarized)
            elif self.evaluation.contains('-cv'):
                #cross validation
                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'],
                                                       binarized=binarized,
                                                       threshold=bottom)
                #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))

        else:
            print('Evaluation is not well configured!')
            exit(-1)

        if config.contains('social'):
            self.socialConfig = LineConfig(self.config['social.setup'])
            self.relation = FileIO.loadRelationship(config,
                                                    self.config['social'])

        print('preprocessing...')
コード例 #13
0
    def __init__(self,config):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.measure = []
        self.config =config
        setup = LineConfig(config['record.setup'])
        columns = {}
        labels = setup['-columns'].split(',')
        delim = ''
        if setup.contains('-delim'):
            delim=setup['-delim']
        for col in labels:
            label = col.split(':')
            columns[label[0]] = int(label[1])
        if self.config.contains('evaluation.setup'):
            self.evaluation = LineConfig(config['evaluation.setup'])
            binarized = False
            bottom = 0
            if self.evaluation.contains('-b'):
                binarized = True
                bottom = float(self.evaluation['-b'])
            if self.evaluation.contains('-testSet'):
                #specify testSet

                self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim)
                self.testData = FileIO.loadDataSet(self.evaluation['-testSet'],binarized=binarized,columns=columns,threshold=bottom,delim=delim)

            elif self.evaluation.contains('-ap'):
                #auto partition

                self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim)
                self.trainingData,self.testData = DataSplit.\
                    dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']))

            elif self.evaluation.contains('-byTime'):
                self.trainingData = FileIO.loadDataSet(config['record'], columns=columns, binarized=binarized,threshold=bottom, delim=delim)
                self.testData = []

            elif self.evaluation.contains('-cv'):
                #cross validation
                self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim)
                #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))

        else:
            print 'Evaluation is not well configured!'
            exit(-1)

        # if config.contains('social'):
        #     self.socialConfig = LineConfig(self.config['social.setup'])
        #     self.relation = FileIO.loadRelationship(config,self.config['social'])

        print 'preprocessing...'
コード例 #14
0
    def dataSplit(data, test_ratio=0.3, output=False, path='./', order=1):
        testSet = []
        trainingSet = []
        for entry in data:
            if random() < test_ratio:
                testSet.append(entry)
            else:
                trainingSet.append(entry)

        if output:
            FileIO.writeFile(path, 'testSet[' + str(order) + ']', testSet)
            FileIO.writeFile(path, 'trainingSet[' + str(order) + ']',
                             trainingSet)
        return trainingSet, testSet
コード例 #15
0
    def dataSplit(data,test_ratio = 0.3,output=False,path='./',order=1):
        if test_ratio>=1 or test_ratio <=0:
            test_ratio = 0.3
        testSet = {}
        trainingSet = {}
        for user in data:
            if random() < test_ratio:
                testSet[user] = data[user].copy()
            else:
                trainingSet[user] = data[user].copy()

        if output:
            FileIO.writeFile(path,'testSet['+str(order)+']',testSet)
            FileIO.writeFile(path, 'trainingSet[' + str(order) + ']', trainingSet)
        return trainingSet,testSet
コード例 #16
0
ファイル: attack.py プロジェクト: CoderWZW/SDLib
 def __init__(self,conf):
     self.config = Config(conf)
     self.userProfile = FileIO.loadDataSet(self.config,self.config['ratings'])
     self.itemProfile = defaultdict(dict)
     self.attackSize = float(self.config['attackSize'])
     self.fillerSize = float(self.config['fillerSize'])
     self.selectedSize = float(self.config['selectedSize'])
     self.targetCount = int(self.config['targetCount'])
     self.targetScore = float(self.config['targetScore'])
     self.threshold = float(self.config['threshold'])
     self.minCount = int(self.config['minCount'])
     self.maxCount = int(self.config['maxCount'])
     self.minScore = float(self.config['minScore'])
     self.maxScore = float(self.config['maxScore'])
     self.outputDir = self.config['outputDir']
     if not os.path.exists(self.outputDir):
         os.makedirs(self.outputDir)
     for user in self.userProfile:
         for item in self.userProfile[user]:
             self.itemProfile[item][user] = self.userProfile[user][item]
     self.spamProfile = defaultdict(dict)
     self.spamItem = defaultdict(list) #items rated by spammers
     self.targetItems = []
     self.itemAverage = {}
     self.getAverageRating()
     self.selectTarget()
     self.startUserID = 0
コード例 #17
0
ファイル: attack.py プロジェクト: Chocalataa/SD
 def __init__(self, conf):
     self.config = Config(conf)
     self.userProfile = FileIO.loadDataSet(self.config,
                                           self.config['ratings'])
     self.itemProfile = defaultdict(dict)
     self.attackSize = float(self.config['attackSize'])
     self.fillerSize = float(self.config['fillerSize'])
     self.selectedSize = float(self.config['selectedSize'])
     self.targetCount = int(self.config['targetCount'])
     self.targetScore = float(self.config['targetScore'])
     self.threshold = float(self.config['threshold'])
     self.minCount = int(self.config['minCount'])
     self.maxCount = int(self.config['maxCount'])
     self.outputDir = self.config['outputDir']
     if not os.path.exists(self.outputDir):
         os.makedirs(self.outputDir)
     for user in self.userProfile:
         for item in self.userProfile[user]:
             self.itemProfile[item][user] = self.userProfile[user][item]
     self.spamProfile = defaultdict(dict)
     self.spamItem = defaultdict(list)  #items rated by spammers
     self.targetItems = []
     self.itemAverage = {}
     self.getAverageRating()
     self.selectTarget()
コード例 #18
0
ファイル: display.py プロジェクト: zzh6333/RecQ
    def render(self):
        self.draw()
        html ="<html><head><title>Data Analysis</title>\n" \
              "<link rel='stylesheet' type='text/css' href='reportStyle.css'/></head>\n" \
              "<body><div class='reportTitle'><div class='in'>Data Analysis</div></div>\n" \
              "<div class='main'><div class='area1'>\n" \
              "<div class='title'><h3>Data Files</h3></div><div class='text'>"
        if self.conf.contains('ratings'):
            html += "<b>Rating Data</b>: {rating}".format(
                rating=abspath(self.conf['ratings']))
        if self.conf.contains('social'):
            html += "<br><b>Social Data</b>: {social}".format(
                social=abspath(self.conf['social']))
        html+="</div></div><div style='padding-top:20px'><center>" \
              "<img src='images/header2.png'/></center></div>\n"
        if self.conf.contains('ratings'):
            html += "<div class='area1'><div class='title'><h3>Rating Data</h3></div>\n"
            html += "<div class='text'><b>Rating Scale</b>: {scale}</br>".format(
                scale=' '.join([str(item) for item in self.dao.rScale]))
            html += "<b>User Count</b>: {user}<br><b>Item Count</b>: {item}<br><b>Record Count</b>: {record}<br><b>Global Mean</b>: {mean}</div>\n"\
                .format(user = str(len(self.dao.user)),item=str(len(self.dao.item)),record = str(len(self.dao.trainingData)),
                        mean = str(round(denormalize(self.dao.globalMean,self.dao.rScale[-1],self.dao.rScale[0]),3)))
            html += "<center><div class='img'><img src='images/rh.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/rcu.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/rci.png' width='640px' height='480px'/></div></center>\n"
            html += "</div><div style='padding-top:20px'><center>" \
              "<img src='images/header2.png'/></center></div>\n"
        if self.conf.contains('social'):
            html += "<div class='area1'><div class='title'><h3>Social Data</h3></div>\n"
            html += "<div class='text'><b>User Count</b>: {user}<br><b>Relation Count</b>: {relation}<br></div>\n" \
                .format(user=str(len(self.sao.user)), relation=str(len(self.sao.relation)))
            html += "<center><div class='img'><img src='images/ff.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/fd1.png' width='640px' height='480px'/></div></center>\n"
            html += "<center><div class='img'><img src='images/fd2.png' width='640px' height='480px'/></div></center>\n"
            html += "</div><div style='padding-top:20px'><center>" \
                    "<img src='images/header2.png'/></center></div>\n"

        html += "</div></body></html>"
        FileIO.writeFile('../visual/visualization/', 'analysis.html', html)
        print 'The report has been output to', abspath(
            '../visual/visualization/analysis.html')
        webbrowser.open(abspath('../visual/visualization/analysis.html'),
                        new=0,
                        autoraise=True)
コード例 #19
0
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = int(self.ranking['-topN'])
        if N > 100 or N < 0:
            N = 100
        res.append(
            'userId: recommendations in (itemId, ranking score) pairs\n')
        # predict
        topNSet = {}
        userCount = len(self.dao.testSet_u)
        for i, userId in enumerate(self.dao.testSet_u):
            itemSet = {}
            line = userId + ':'
            for itemId in self.dao.item:
                pred = self.predict(userId, itemId)
                # add prediction in order to measure
                itemSet[itemId] = pred
            topNSet[userId] = sorted(itemSet.iteritems(),
                                     key=lambda d: d[1],
                                     reverse=True)[0:N]

            if i % 100 == 0:
                print 'Progress:' + str(i) + '/' + str(userCount)
            for item in topNSet[userId]:
                line += '(' + item[0] + ',' + str(item[1]) + ') '
            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-top-' + str(
                    N) + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The Result has been output to ', abspath(outDir), '.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        measure = Measure.rankingMeasure(self.dao.testSet_u, topNSet, N)
        FileIO.writeFile(outDir, fileName, measure)
コード例 #20
0
ファイル: relationAttack.py プロジェクト: CoderWZW/SDLib
 def __init__(self,conf):
     super(RelationAttack, self).__init__(conf)
     self.spamLink = defaultdict(list)
     self.relation = FileIO.loadRelationship(self.config,self.config['social'])
     self.trustLink = defaultdict(list)
     self.trusteeLink = defaultdict(list)
     for u1,u2,t in self.relation:
         self.trustLink[u1].append(u2)
         self.trusteeLink[u2].append(u1)
     self.activeUser = {}  # 关注了虚假用户的正常用户
     self.linkedUser = {}  # 被虚假用户种植过链接的用户
コード例 #21
0
 def __init__(self, conf):
     super(RelationAttack, self).__init__(conf)
     self.spamLink = defaultdict(list)
     self.relation = FileIO.loadRelationship(self.config,
                                             self.config['social'])
     self.trustLink = defaultdict(list)
     self.trusteeLink = defaultdict(list)
     for u1, u2, t in self.relation:
         self.trustLink[u1].append(u2)
         self.trusteeLink[u2].append(u1)
     self.activeUser = {}  # 关注了虚假用户的正常用户
     self.linkedUser = {}  # 被虚假用户种植过链接的用户
コード例 #22
0
    def __init__(self, config):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.relation = []
        self.measure = []
        self.config = config
        self.ratingConfig = LineConfig(config['ratings.setup'])
        self.labels = FileIO.loadLabels(config['label'])

        if self.config.contains('evaluation.setup'):
            self.evaluation = LineConfig(config['evaluation.setup'])

            if self.evaluation.contains('-testSet'):
                #specify testSet
                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'])
                self.testData = FileIO.loadDataSet(config,
                                                   self.evaluation['-testSet'],
                                                   bTest=True)

            elif self.evaluation.contains('-ap'):
                #auto partition
                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'])
                self.trainingData,self.testData = DataSplit.\
                    dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']))

            elif self.evaluation.contains('-cv'):
                #cross validation
                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'])
                #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))

        else:
            print 'Evaluation is not well configured!'
            exit(-1)

        if config.contains('social'):
            self.socialConfig = LineConfig(self.config['social.setup'])
            self.relation = FileIO.loadRelationship(config,
                                                    self.config['social'])
        print 'preprocessing...'
コード例 #23
0
ファイル: SDLib.py プロジェクト: CoderWZW/SDLib
    def __init__(self,config):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.relation = []
        self.measure = []
        self.config =config
        self.ratingConfig = LineConfig(config['ratings.setup'])
        self.labels = FileIO.loadLabels(config['label'])

        if self.config.contains('evaluation.setup'):
            self.evaluation = LineConfig(config['evaluation.setup'])
            
            if self.evaluation.contains('-testSet'):
                #specify testSet
                self.trainingData = FileIO.loadDataSet(config, config['ratings'])
                self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True)

            elif self.evaluation.contains('-ap'):
                #auto partition
                self.trainingData = FileIO.loadDataSet(config,config['ratings'])
                self.trainingData,self.testData = DataSplit.\
                    dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']))

            elif self.evaluation.contains('-cv'):
                #cross validation
                self.trainingData = FileIO.loadDataSet(config, config['ratings'])
                #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))

        else:
            print 'Evaluation is not well configured!'
            exit(-1)

        if config.contains('social'):
            self.socialConfig = LineConfig(self.config['social.setup'])
            self.relation = FileIO.loadRelationship(config,self.config['social'])
        print 'preprocessing...'
コード例 #24
0
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        threshold = 0
        bThres = False
        bTopN = False
        if self.ranking.contains('-topN'):
            bTopN = True
            N = int(self.ranking['-topN'])
            if N > 100 or N < 0:
                print 'N can not be larger than 100! It has been reassigned with 100'
                N = 100
        elif self.ranking.contains('-threshold'):
            threshold = float(self.ranking['-threshold'])
            bThres = True
        else:
            print 'No correct evaluation metric is specified!'
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}
        userCount = len(self.dao.testSet_u)
        for i, user in enumerate(self.dao.testSet_u):
            itemSet = {}
            line = user + ':'

            for item in self.dao.item:
                # predict
                prediction = self.predict(user, item)
                # denormalize

                prediction = denormalize(prediction, self.dao.rScale[-1],
                                         self.dao.rScale[0])

                #prediction = self.checkRatingBoundary(prediction)
                #pred = self.checkRatingBoundary(prediction)
                #####################################
                # add prediction in order to measure
                if bThres:
                    if prediction > threshold:
                        itemSet[item] = prediction
                else:
                    itemSet[item] = prediction

            ratedList, ratingList = self.dao.userRated(user)
            for item in ratedList:
                del itemSet[self.dao.id2item[item]]
            itemSet = sorted(itemSet.iteritems(),
                             key=lambda d: d[1],
                             reverse=True)
            if self.ranking.contains('-topN'):
                recList[user] = itemSet[0:N]
            elif self.ranking.contains('-threshold'):
                recList[user] = itemSet[:]
                userN[user] = len(itemSet)

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in recList[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.dao.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-top-' + str(
                        N) + 'items' + self.foldInfo + '.txt'
            elif self.ranking.contains('-threshold'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-threshold-' + str(
                        threshold) + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The Result has been output to ', abspath(outDir), '.'
        #output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        if self.ranking.contains('-topN'):
            self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList,
                                                  N)
        elif self.ranking.contains('-threshold'):
            origin = self.dao.testSet_u.copy()
            for user in origin:
                temp = {}
                for item in origin[user]:
                    if origin[user][item] >= threshold:
                        temp[item] = threshold
                origin[user] = temp
            self.measure = Measure.rankingMeasure_threshold(
                origin, recList, userN)
        FileIO.writeFile(outDir, fileName, self.measure)
コード例 #25
0
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        threshold = 0
        top = self.ranking['-topN'].split(',')
        top = [int(num) for num in top]
        N = int(top[-1])
        if N > 100 or N < 0:
            print 'N can not be larger than 100! It has been reassigned with 10'
            N = 10

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userCount = len(self.data.testSet)

        for i, user in enumerate(self.data.testSet):

            line = user + ':'
            if self.data.userRecord.has_key(user):
                predictedItems = self.predict(user)
            else:
                predictedItems = ['0'] * N
            predicted = {}
            for k, item in enumerate(predictedItems):
                predicted[item] = k
            for item in self.data.userRecord[user]:
                if predicted.has_key(item[self.recType]):
                    del predicted[item[self.recType]]
            predicted = sorted(predicted.iteritems(), key=lambda d: d[1])
            predictedItems = [item[0] for item in predicted]
            recList[user] = predictedItems[:N]

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in recList[user]:
                if self.data.testSet[user].has_key(item):
                    line += '*'
                line += item + ','

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config['recommender'] + '@' + currentTime + '-top-' + self.ranking['-topN']\
                           + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The result has been output to ', abspath(outDir), '.'
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'

        self.measure = Measure.rankingMeasure(self.data.testSet, recList, top,
                                              self.data.getSize(self.recType))

        FileIO.writeFile(outDir, fileName, self.measure)
        print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo,
                                            ''.join(self.measure))
コード例 #26
0
    def __init__(self, config, account_DAO=None):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.relation = []
        self.measure = []
        self.config = config
        self.ratingConfig = LineConfig(config['ratings.setup'])

        # self.accountDAO = account_DAO
        # self.currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))

        if self.config.contains('evaluation.setup'):
            self.evaluation = LineConfig(config['evaluation.setup'])
            binarized = False
            bottom = 0
            if self.evaluation.contains('-b'):
                binarized = True
                bottom = float(self.evaluation['-b'])
            if self.evaluation.contains('-testSet'):
                # specify testSet

                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'],
                                                       binarized=binarized,
                                                       threshold=bottom)
                self.testData = FileIO.loadDataSet(config,
                                                   self.evaluation['-testSet'],
                                                   bTest=True,
                                                   binarized=binarized,
                                                   threshold=bottom)

            elif self.evaluation.contains('-ap'):
                # auto partition

                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'],
                                                       binarized=binarized,
                                                       threshold=bottom)
                self.trainingData, self.testData = DataSplit. \
                    dataSplit(self.trainingData, test_ratio=float(self.evaluation['-ap']), binarized=binarized)
            elif self.evaluation.contains('-cv'):
                # cross validation
                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'],
                                                       binarized=binarized,
                                                       threshold=bottom)
                # self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))

            elif self.evaluation.contains('--account'):
                self.training_user_item = account_DAO.training_user_item
                self.training_account_item = account_DAO.training_account_item
                self.relation = account_DAO.relation
                self.test_user_item = account_DAO.test_user_item

        else:
            print('Evaluation is not well configured!')
            exit(-1)

        if config.contains('social'):
            self.socialConfig = LineConfig(self.config['social.setup'])
            self.relation = FileIO.loadRelationship(config,
                                                    self.config['social'])

        print('preprocessing...')
コード例 #27
0
ファイル: Recommender.py プロジェクト: alexmay21/RecQ
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        threshold = 0
        bThres = False
        bTopN = False
        if self.ranking.contains('-topN'):
            bTopN = True
            N = int(self.ranking['-topN'])
            if N > 100 or N < 0:
                print 'N can not be larger than 100! It has been reassigned with 100'
                N = 100
            if N > len(self.dao.item):
                N = len(self.dao.item)
        elif self.ranking.contains('-threshold'):
            threshold = float(self.ranking['-threshold'])
            bThres = True
        else:
            print 'No correct evaluation metric is specified!'
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}
        userCount = len(self.dao.testSet_u)
        for i, user in enumerate(self.dao.testSet_u):
            itemSet = {}
            line = user + ':'
            predictedItems = self.predictForRanking(user)
            # predictedItems = denormalize(predictedItems, self.dao.rScale[-1], self.dao.rScale[0])
            for id, rating in enumerate(predictedItems):
                # if not self.dao.rating(user, self.dao.id2item[id]):
                # prediction = self.checkRatingBoundary(prediction)
                # pred = self.checkRatingBoundary(prediction)
                #####################################
                # add prediction in order to measure
                # if bThres:
                #     if rating > threshold:
                #         itemSet[self.dao.id2item[id]]= rating
                # else:
                itemSet[self.dao.id2item[id]] = rating

            ratedList, ratingList = self.dao.userRated(user)
            for item in ratedList:
                del itemSet[item]

            Nrecommendations = []
            for item in itemSet:
                if len(Nrecommendations) < N:
                    Nrecommendations.append((item, itemSet[item]))
                else:
                    break

            Nrecommendations.sort(key=lambda d: d[1], reverse=True)
            recommendations = [item[1] for item in Nrecommendations]
            resNames = [item[0] for item in Nrecommendations]

            # itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True)
            # if bTopN:
            # find the K biggest scores
            for item in itemSet:
                ind = N
                l = 0
                r = N - 1

                if recommendations[r] < itemSet[item]:
                    while True:

                        mid = (l + r) / 2
                        if recommendations[mid] >= itemSet[item]:
                            l = mid + 1
                        elif recommendations[mid] < itemSet[item]:
                            r = mid - 1
                        else:
                            ind = mid
                            break
                        if r < l:
                            ind = r
                            break
                # ind = bisect(recommendations, itemSet[item])

                if ind < N - 1:
                    recommendations[ind + 1] = itemSet[item]
                    resNames[ind + 1] = item
            recList[user] = zip(resNames, recommendations)
            # elif bThres:
            #     itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True)
            #     recList[user] = itemSet[:]
            #     userN[user] = len(itemSet)

            if i % 100 == 0:
                print self.algorName, self.foldInfo, 'progress:' + str(
                    i) + '/' + str(userCount)
            for item in recList[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.dao.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-top-' + str(
                        N) + 'items' + self.foldInfo + '.txt'
            elif self.ranking.contains('-threshold'):
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-threshold-' + str(
                        threshold) + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print 'The result has been output to ', abspath(outDir), '.'
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        if self.ranking.contains('-topN'):
            self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList,
                                                  N)
        # elif self.ranking.contains('-threshold'):
        #     origin = self.dao.testSet_u.copy()
        #     for user in origin:
        #         temp = {}
        #         for item in origin[user]:
        #             if origin[user][item] >= threshold:
        #                 temp[item] = threshold
        #         origin[user] = temp
        #     self.measure = Measure.rankingMeasure_threshold(origin, recList, userN)
        FileIO.writeFile(outDir, fileName, self.measure)
        print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo,
                                            ''.join(self.measure))
コード例 #28
0
    def evalRanking(self):
        res = []  # used to contain the text of the result

        if self.ranking.contains('-topN'):
            top = self.ranking['-topN'].split(',')
            top = [int(num) for num in top]
            N = int(top[-1])
            if N > 100 or N < 0:
                print(
                    'N can not be larger than 100! It has been reassigned with 10'
                )
                N = 10
            if N > len(self.data.item):
                N = len(self.data.item)
        else:
            print('No correct evaluation metric is specified!')
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}
        userCount = len(self.data.testSet_u)
        #rawRes = {}
        for i, user in enumerate(self.data.testSet_u):
            itemSet = {}
            line = user + ':'
            predictedItems = self.predictForRanking(user)
            # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
            for id, rating in enumerate(predictedItems):
                # if not self.data.rating(user, self.data.id2item[id]):
                # prediction = self.checkRatingBoundary(prediction)
                # pred = self.checkRatingBoundary(prediction)
                #####################################
                # add prediction in order to measure

                itemSet[self.data.id2item[id]] = rating

            ratedList, ratingList = self.data.userRated(user)
            for item in ratedList:
                del itemSet[item]

            Nrecommendations = []
            for item in itemSet:
                if len(Nrecommendations) < N:
                    Nrecommendations.append((item, itemSet[item]))
                else:
                    break

            Nrecommendations.sort(key=lambda d: d[1], reverse=True)
            recommendations = [item[1] for item in Nrecommendations]
            resNames = [item[0] for item in Nrecommendations]

            # find the N biggest scores
            for item in itemSet:
                ind = N
                l = 0
                r = N - 1

                if recommendations[r] < itemSet[item]:
                    while r >= l:
                        mid = (r - l) / 2 + l
                        if recommendations[mid] >= itemSet[item]:
                            l = mid + 1
                        elif recommendations[mid] < itemSet[item]:
                            r = mid - 1

                        if r < l:
                            ind = r
                            break
                #move the items backwards
                if ind < N - 2:
                    recommendations[ind + 2:] = recommendations[ind + 1:-1]
                    resNames[ind + 2:] = resNames[ind + 1:-1]
                if ind < N - 1:
                    recommendations[ind + 1] = itemSet[item]
                    resNames[ind + 1] = item

            recList[user] = zip(resNames, recommendations)

            if i % 100 == 0:
                print(self.algorName, self.foldInfo,
                      'progress:' + str(i) + '/' + str(userCount))
            for item in recList[user]:
                line += ' (' + item[0] + ',' + str(item[1]) + ')'
                if self.data.testSet_u[user].has_key(item[0]):
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-top-' + str(
                    N) + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print('The result has been output to ', abspath(outDir), '.')
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config[
            'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
        self.measure = Measure.rankingMeasure(self.data.testSet_u, recList,
                                              top)
        FileIO.writeFile(outDir, fileName, self.measure)
        print('The result of %s %s:\n%s' %
              (self.algorName, self.foldInfo, ''.join(self.measure)))
コード例 #29
0
import sys
sys.path.append("..")
from tool.config import Config
from tool.file import FileIO
from algorithm.SoReg import SoReg

if __name__ == '__main__':

    print'='*80
    print'this is the algorithm of the test'
    algor = -1
    conf = -1
    conf = Config('../conf/SoReg.conf')
    trainset = []
    testset = []
    relation = []
    ui={}
    vj={}
    user_item_avg={}
    trainset = FileIO.loadDataSet(conf,conf['ratings'],bTest=False)
    testset =   FileIO.loadDataSet(conf,conf['testset'],bTest=True)
    relation = FileIO.loadRelationship(conf,conf['social'])
    ui,vj,user_item_avg=SoReg.buildmodel(trainset,relation)
    SoReg.pred(testset,ui,vj,user_item_avg)
#sotre the data
コード例 #30
0
ファイル: RecQ.py プロジェクト: SuperSupeng/pythonIsAmazing
    def execute(self):
        #import the algorithm module
        try:
            importStr = 'from algorithm.rating.' + self.config[
                'recommender'] + ' import ' + self.config['recommender']
            exec(importStr)
        except ImportError:
            importStr = 'from algorithm.ranking.' + self.config[
                'recommender'] + ' import ' + self.config['recommender']
            exec(importStr)
        if self.evaluation.contains('-cv'):
            k = int(self.evaluation['-cv'])
            if k <= 1 or k > 10:
                k = 3

            mkl.set_num_threads(max(1, mkl.get_max_threads() / k))

            #create the manager
            manager = Manager()
            m = manager.dict()
            i = 1
            tasks = []

            binarized = False
            if self.evaluation.contains('-b'):
                binarized = True

            for train, test in DataSplit.crossValidation(self.trainingData,
                                                         k,
                                                         binarized=binarized):
                fold = '[' + str(i) + ']'
                if self.config.contains('social'):
                    recommender = self.config[
                        'recommender'] + "(self.config,train,test,self.relation,fold)"
                else:
                    recommender = self.config[
                        'recommender'] + "(self.config,train,test,fold)"
            #create the process
                p = Process(target=run, args=(m, eval(recommender), i))
                tasks.append(p)
                i += 1
            #start the processes
            for p in tasks:
                p.start()
                if not self.evaluation.contains('-p'):
                    p.join()
            #wait until all processes are completed
            if self.evaluation.contains('-p'):
                for p in tasks:
                    p.join()
            #compute the mean error of k-fold cross validation
            self.measure = [dict(m)[i] for i in range(1, k + 1)]
            res = []
            for i in range(len(self.measure[0])):
                if self.measure[0][i][:3] == 'Top':
                    res.append(self.measure[0][i])
                    continue
                measure = self.measure[0][i].split(':')[0]
                total = 0
                for j in range(k):
                    total += float(self.measure[j][i].split(':')[1])
                res.append(measure + ':' + str(total / k) + '\n')
            #output result
            currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
            outDir = LineConfig(self.config['output.setup'])['-dir']
            fileName = self.config[
                'recommender'] + '@' + currentTime + '-' + str(
                    k) + '-fold-cv' + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print('The result of %d-fold cross validation:\n%s' %
                  (k, ''.join(res)))

        else:
            if self.config.contains('social'):
                recommender = self.config[
                    'recommender'] + '(self.config,self.trainingData,self.testData,self.relation)'
            else:
                recommender = self.config[
                    'recommender'] + '(self.config,self.trainingData,self.testData)'
            eval(recommender).execute()
コード例 #31
0
ファイル: IterativeRecommender.py プロジェクト: zkalan/Yue
    def evalRanking(self):
        res = []  # used to contain the text of the result
        N = 0
        top = self.ranking['-topN'].split(',')
        top = [int(num) for num in top]
        N = max(top)

        if N > 100 or N < 0:
            print ('N can not be larger than 100! It has been reassigned with 10')
            N = 10

        res.append('userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n')
        # predict
        recList = {}
        userCount = len(self.data.testSet)

        for i, user in enumerate(self.data.testSet):
            itemSet = {}
            line = user + ':'
            predictedItems = self.predict(user)

            for id, score in enumerate(predictedItems):

                itemSet[self.data.id2name[self.recType][id]] = score

            for item in self.data.userRecord[user]:
                try:
                    del itemSet[item[self.recType]]
                except KeyError:
                    pass
            Nrecommendations = []
            for item in itemSet:
                if len(Nrecommendations) < N:
                    Nrecommendations.append((item, itemSet[item]))
                else:
                    break

            Nrecommendations.sort(key=lambda d: d[1], reverse=True)
            recommendations = [item[1] for item in Nrecommendations]
            resNames = [item[0] for item in Nrecommendations]

            # itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True)
            # if bTopN:
            # find the K biggest scores
            for item in itemSet:
                ind = N
                l = 0
                r = N - 1

                if recommendations[r] < itemSet[item]:
                    while True:

                        mid = (l + r) // 2
                        if recommendations[mid] >= itemSet[item]:
                            l = mid + 1
                        elif recommendations[mid] < itemSet[item]:
                            r = mid - 1
                        else:
                            ind = mid
                            break
                        if r < l:
                            ind = r
                            break
                # ind = bisect(recommendations, itemSet[item])

                if ind < N - 1:
                    recommendations[ind + 1] = itemSet[item]
                    resNames[ind + 1] = item
            recList[user] = resNames

            if i % 100 == 0:
                print (self.algorName, self.foldInfo, 'progress:' + str(i) + '/' + str(userCount))
            for item in recList[user]:
                line += item
                if item in self.data.testSet[user]:
                    line += '*'

            line += '\n'
            res.append(line)
        currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        # output prediction result
        if self.isOutput:
            fileName = ''
            outDir = self.output['-dir']
            if self.ranking.contains('-topN'):
                fileName = self.config['recommender'] + '@' + currentTime + '-top-' + self.ranking['-topN']\
                           + 'items' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, res)
            print ('The result has been output to ', abspath(outDir), '.')
        # output evaluation result
        outDir = self.output['-dir']
        fileName = self.config['recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'

        self.measure = Measure.rankingMeasure(self.data.testSet, recList,top,self.data.getSize(self.recType))

        FileIO.writeFile(outDir, fileName, self.measure)
        print ('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure)))
コード例 #32
0
ファイル: SDLib.py プロジェクト: CoderWZW/SDLib
 def execute(self):
     #import the algorithm module
     importStr = 'from method.' + self.config['methodName'] + ' import ' + self.config['methodName']
     exec (importStr)
     if self.evaluation.contains('-cv'):
         k = int(self.evaluation['-cv'])
         if k <= 1 or k > 10:
             k = 3
         #create the manager used to communication in multiprocess
         manager = Manager()
         m = manager.dict()
         i = 1
         tasks = []
         for train,test in DataSplit.crossValidation(self.trainingData,k):
             fold = '['+str(i)+']'
             if self.config.contains('social'):
                 method = self.config['methodName'] + "(self.config,train,test,self.labels,self.relation,fold)"
             else:
                 method = self.config['methodName'] + "(self.config,train,test,self.labels,fold)"
            #create the process
             p = Process(target=run,args=(m,eval(method),i))
             tasks.append(p)
             i+=1
         #start the processes
         for p in tasks:
             p.start()
         #wait until all processes are completed
         for p in tasks:
             p.join()
         #compute the mean error of k-fold cross validation
         self.measure = [dict(m)[i] for i in range(1,k+1)]
         res = []
         pattern = re.compile('(\d+\.\d+)')
         countPattern = re.compile('\d+\\n')
         labelPattern = re.compile('\s\d{1}[^\.|\n|\d]')
         labels = re.findall(labelPattern, self.measure[0])
         values = np.array([0]*9,dtype=float)
         count = np.array([0,0,0],dtype=int)
         for report in self.measure:
             values += np.array(re.findall(pattern,report),dtype=float)
             count+=np.array(re.findall(countPattern,report),dtype=int)
         values/=k
         values=np.around(values,decimals=4)
         res.append('             precision  recall  f1-score  support\n\n')
         res.append('         '+labels[0]+'  '+'    '.join(np.array(values[0:3],dtype=str).tolist())+'   '+str(count[0])+'\n')
         res.append('         '+labels[1]+'  '+'    '.join(np.array(values[3:6],dtype=str).tolist())+'   '+str(count[1])+'\n\n')
         res.append('  avg/total   ' + '    '.join(np.array(values[6:9], dtype=str).tolist()) + '   ' + str(count[2]) + '\n')
         print 'Total:'
         print ''.join(res)
             # for line in lines[1:]:
             #
             # measure = self.measure[0][i].split(':')[0]
             # total = 0
             # for j in range(k):
             #     total += float(self.measure[j][i].split(':')[1])
             # res.append(measure+':'+str(total/k)+'\n')
         #output result
         currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
         outDir = LineConfig(self.config['output.setup'])['-dir']
         fileName = self.config['methodName'] +'@'+currentTime+'-'+str(k)+'-fold-cv' + '.txt'
         FileIO.writeFile(outDir,fileName,res)
         print 'The results have been output to '+abspath(LineConfig(self.config['output.setup'])['-dir'])+'\n'
     else:
         if self.config.contains('social'):
             method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels,self.relation)'
         else:
             method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels)'
         eval(method).execute()
コード例 #33
0
 def execute(self):
     #import the algorithm module
     importStr = 'from method.' + self.config[
         'methodName'] + ' import ' + self.config['methodName']
     exec(importStr)
     if self.evaluation.contains('-cv'):
         k = int(self.evaluation['-cv'])
         if k <= 1 or k > 10:
             k = 3
         #create the manager used to communication in multiprocess
         manager = Manager()
         m = manager.dict()
         i = 1
         tasks = []
         for train, test in DataSplit.crossValidation(self.trainingData, k):
             fold = '[' + str(i) + ']'
             if self.config.contains('social'):
                 method = self.config[
                     'methodName'] + "(self.config,train,test,self.labels,self.relation,fold)"
             else:
                 method = self.config[
                     'methodName'] + "(self.config,train,test,self.labels,fold)"
         #create the process
             p = Process(target=run, args=(m, eval(method), i))
             tasks.append(p)
             i += 1
         #start the processes
         for p in tasks:
             p.start()
         #wait until all processes are completed
         for p in tasks:
             p.join()
         #compute the mean error of k-fold cross validation
         self.measure = [dict(m)[i] for i in range(1, k + 1)]
         res = []
         pattern = re.compile('(\d+\.\d+)')
         countPattern = re.compile('\d+\\n')
         labelPattern = re.compile('\s\d{1}[^\.|\n|\d]')
         labels = re.findall(labelPattern, self.measure[0])
         values = np.array([0] * 9, dtype=float)
         count = np.array([0, 0, 0], dtype=int)
         for report in self.measure:
             values += np.array(re.findall(pattern, report), dtype=float)
             count += np.array(re.findall(countPattern, report), dtype=int)
         values /= k
         values = np.around(values, decimals=4)
         res.append('             precision  recall  f1-score  support\n\n')
         res.append('         ' + labels[0] + '  ' +
                    '    '.join(np.array(values[0:3], dtype=str).tolist()) +
                    '   ' + str(count[0]) + '\n')
         res.append('         ' + labels[1] + '  ' +
                    '    '.join(np.array(values[3:6], dtype=str).tolist()) +
                    '   ' + str(count[1]) + '\n\n')
         res.append('  avg/total   ' +
                    '    '.join(np.array(values[6:9], dtype=str).tolist()) +
                    '   ' + str(count[2]) + '\n')
         print('Total:')
         print(''.join(res))
         # for line in lines[1:]:
         #
         # measure = self.measure[0][i].split(':')[0]
         # total = 0
         # for j in range(k):
         #     total += float(self.measure[j][i].split(':')[1])
         # res.append(measure+':'+str(total/k)+'\n')
         #output result
         currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
         outDir = LineConfig(self.config['output.setup'])['-dir']
         fileName = self.config[
             'methodName'] + '@' + currentTime + '-' + str(
                 k) + '-fold-cv' + '.txt'
         FileIO.writeFile(outDir, fileName, res)
         print('The results have been output to ' +
               abspath(LineConfig(self.config['output.setup'])['-dir']) +
               '\n')
     else:
         if self.config.contains('social'):
             method = self.config[
                 'methodName'] + '(self.config,self.trainingData,self.testData,self.labels,self.relation)'
         else:
             method = self.config[
                 'methodName'] + '(self.config,self.trainingData,self.testData,self.labels)'
         result = eval(method).execute()
         return result
コード例 #34
0
    def evalRanking(self, write_to_file=True, use_now_time=False):
        res = []  # used to contain the text of the result

        if self.ranking.contains('-topN'):
            top = self.ranking['-topN'].split(',')
            top = [int(num) for num in top]
            N = max(top)
            if N > 100 or N < 0:
                print(
                    'N can not be larger than 100! It has been reassigned with 10'
                )
                N = 10
            if N > len(self.data.item):
                N = len(self.data.item)
        else:
            print('No correct evaluation metric is specified!')
            exit(-1)

        res.append(
            'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n'
        )
        # predict
        recList = {}
        userN = {}

        testSample = self.testSample

        # # multiprocessing way
        # pool = Pool(12)
        # dataset = []
        # for user, testSample_u in testSample.items():
        #     identified_user = self.map_from_true_to_identify.get(user, -1)
        #     if identified_user == -1:
        #         continue
        #     dataset.append([user, identified_user, testSample_u])
        #
        # result_generator = pool.imap_unordered(partial(self.get_recommendation, N=N), dataset)
        # for result in tqdm(result_generator, total=len(dataset), desc='Measuring [{}]'):
        #     user, line, recList_user = result
        #     recList[user] = recList_user
        #     res.append(line)
        # pool.close()
        # pool.join()

        testSample_copy = testSample.copy()

        for i, user in tqdm(enumerate(testSample),
                            total=len(testSample),
                            desc='Measuring [{}]'.format(self.algorName)):
            identified_user = self.map_from_true_to_identify.get(user, -1)
            if identified_user == -1:
                del testSample_copy[user]
                continue
            user, line, recList_user = self.get_recommendation(
                (user, identified_user, testSample[user]), N)

            recList[user] = recList_user
            res.append(line)

        self.measure = Measure.rankingMeasure(testSample_copy, recList, top)
        try:
            self.measure.append("C:{}\n".format(self.C))
        except:
            pass
        try:
            self.measure.append("L:{}\n".format(self.L))
        except:
            pass
        try:
            self.measure.append("K:{}\n".format(self.K))
        except:
            pass
        try:
            self.measure.append("N:{}\n".format(self.N))
        except:
            pass

        if use_now_time:
            currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
        else:
            currentTime = self.currentTime
        if write_to_file:
            # output prediction result
            if False and self.isOutput:
                fileName = ''
                outDir = self.output['-dir']
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-top-' + str(
                        N) + 'items' + self.foldInfo + '.txt'
                FileIO.writeFile(outDir, fileName, res)
            # output evaluation result
            outDir = self.output['-dir']
            try:
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '_C{}'.format(
                        self.C) + '.txt'
            except:
                fileName = self.config[
                    'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt'
            FileIO.writeFile(outDir, fileName, self.measure)
            # FileIO.writeFile(outDir, fileName, "C:{}".format(self.C))

            print('The result has been output to ', abspath(outDir), '.')
        print('The result of %s %s:\n%s' %
              (self.algorName, self.foldInfo, ''.join(self.measure)))