Esempio n. 1
0
    def __init__(self,
                 config_dict,
                 config_account,
                 account_DAO,
                 C=3,
                 K=1,
                 L=-1,
                 N=0):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.relation = []
        self.measure = []
        self.config_dict = config_dict
        self.C = C
        self.K = K
        self.L = L
        self.N = N

        self.accountDAO = account_DAO

        if config_account.contains('evaluation.setup'):
            all_evaluation = LineConfig(config_account['evaluation.setup'])
            if all_evaluation.contains('--account'):
                self.training_user_item = account_DAO.training_user_item
                self.training_account_item = account_DAO.training_account_item
                self.relation = account_DAO.relation
                self.test_user_item = account_DAO.test_user_item
        else:
            raise Exception('Evaluation is not well configured!')

        print('preprocessing...')
Esempio n. 2
0
    def execute(self, config, max_sample=1000):
        # import the algorithm module

        importStr = 'from algorithm.ranking.' + config[
            'recommender'] + ' import ' + config['recommender']
        exec(importStr)

        algo_evaluation = LineConfig(config['evaluation.setup'])
        if algo_evaluation.contains('-ul') and eval(
                algo_evaluation['-ul']) > 0:
            training_data = 'self.training_user_item'
            social_info = 'relation=self.relation'
        else:
            training_data = 'self.training_account_item'
            social_info = ''

        if config['recommender'].startswith('ABPR'):
            recommender = config['recommender'] + '(config, {}, self.test_user_item, {}, C={}, N={})'. \
                format(training_data, social_info, self.C, self.N)
        else:
            recommender = config['recommender'] + '(config, {}, self.test_user_item, {})'.\
                format(training_data, social_info)

        algorithum = eval(recommender)
        algorithum.accountDAO = self.accountDAO
        algorithum.evaluation_conf = algo_evaluation
        algorithum.get_test_map(K=self.K, L=self.L)
        algorithum.get_test_sample_data(max_sample=max_sample)

        algorithum.execute()
Esempio n. 3
0
 def __init__(self, config):
     self.config = config
     self.ratingConfig = LineConfig(config['ratings.setup'])
     self.evaluation = LineConfig(config['evaluation.setup'])
     self.user = {}  #used to store the order of users
     self.item = {}  #used to store the order of items
     self.userMeans = {}  #used to store the mean values of users's ratings
     self.itemMeans = {}  #used to store the mean values of items's ratings
     self.triple = []  #training data
     self.globalMean = 0
     self.timestamp = {}
     self.ratingMatrix = None
     self.trainingMatrix = None
     self.validationMatrix = None
     self.testSet_u = None  # used to store the test set by hierarchy user:[item,rating]
     self.testSet_i = None  # used to store the test set by hierarchy item:[user,rating]
     self.rScale = [-9999999, 999999]
     if self.evaluation.contains('-testSet'):
         #specify testSet
         self.trainingMatrix = self.__loadRatings(config['ratings'])
         self.testSet_u, self.testSet_i = self.__loadRatings(
             self.evaluation['-testSet'], True)
     else:  #cross validation and leave-one-out
         self.ratingMatrix = self.__loadRatings(config['ratings'])
     self.__computeItemMean()
     self.__computeUserMean()
     self.__globalAverage()
Esempio n. 4
0
 def __init__(self, config, trainingSet, testSet):
     self.config = config
     self.recordConfig = LineConfig(config['record.setup'])
     self.evalConfig = LineConfig(config['evaluation.setup'])
     self.name2id = defaultdict(dict)
     self.id2name = defaultdict(dict)
     self.artistListened = defaultdict(
         dict)  #key:aritst id, value:{user id1:count, user id2:count, ...}
     self.albumListened = defaultdict(
         dict)  #key:album id, value:{user id1:count, user id2:count, ...}
     self.trackListened = defaultdict(
         dict)  #key:track id, value:{user id1:count, user id2:count, ...}
     self.artist2Album = defaultdict(
         dict)  #key:artist id, value:{album id1:1, album id2:1 ...}
     self.album2Track = defaultdict(dict)  #
     self.artist2Track = defaultdict(dict)  #
     self.userRecord = defaultdict(
         list)  #user data in training set. form: {user:[record1,record2]}
     self.testSet = defaultdict(
         dict
     )  #user data in test set. form: {user:{recommenedObject1:1,recommendedObject:1}}
     self.recordCount = 0
     if self.evalConfig.contains('-byTime'):
         trainingSet, testSet = self.splitDataByTime(trainingSet)
     self.preprocess(trainingSet, testSet)
Esempio n. 5
0
 def readConfiguration(self):
     super(IPF, self).readConfiguration()
     self.rho = float(LineConfig(self.config['IPF'])['-rho'])
     if self.rho<0 or self.rho>1:
         self.rho=0.5
     self.beta = float(LineConfig(self.config['IPF'])['-beta'])
     self.eta = float(LineConfig(self.config['IPF'])['-eta'])
Esempio n. 6
0
 def loadRelationship(conf, filePath):
     socialConfig = LineConfig(conf['social.setup'])
     relation = []
     print('loading social data...')
     with open(filePath) as f:
         relations = f.readlines()
         # ignore the headline
     if socialConfig.contains('-header'):
         relations = relations[1:]
     # order of the columns
     order = socialConfig['-columns'].strip().split()
     if len(order) <= 2:
         print('The social file is not in a correct format.')
     for lineNo, line in enumerate(relations):
         items = split(' |,|\t', line.strip())
         if len(order) < 2:
             print(
                 'The social file is not in a correct format. Error: Line num %d'
                 % lineNo)
             exit(-1)
         userId1 = items[int(order[0])]
         userId2 = items[int(order[1])]
         if len(order) < 3:
             weight = 1
         else:
             weight = float(items[int(order[2])])
         relation.append([userId1, userId2, weight])
     return relation
Esempio n. 7
0
    def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'):
        self.config = conf
        self.isSaveModel = False
        self.isLoadModel = False
        self.isOutput = True
        self.data = Record(self.config, trainingSet, testSet)
        self.foldInfo = fold
        self.evalConfig = LineConfig(self.config['evaluation.setup'])
        if self.evalConfig.contains('-target'):
            self.recType = self.evalConfig['-target']
        else:
            self.recType = 'track'
        if LineConfig(self.config['evaluation.setup']).contains('-cold'):
            #evaluation on cold-start users
            threshold = int(
                LineConfig(self.config['evaluation.setup'])['-cold'])
            removedUser = []
            for user in self.data.testSet:
                if self.data.userRecord.has_key(user) and len(
                        self.data.userRecord[user]) > threshold:
                    removedUser.append(user)
            for user in removedUser:
                del self.data.testSet[user]

        if LineConfig(self.config['evaluation.setup']).contains('-sample'):
            userList = self.data.testSet.keys()
            removedUser = userList[:int(len(userList) * 0.9)]
            for user in removedUser:
                del self.data.testSet[user]
Esempio n. 8
0
    def __init__(self, conf, trainingSet, testSet, fold='[1]'):
        self.config = conf
        self.data = None
        self.isSaveModel = False
        self.ranking = None
        self.isLoadModel = False
        self.output = None
        self.isOutput = True
        self.data = RatingDAO(self.config, trainingSet, testSet)
        self.foldInfo = fold
        self.evalSettings = LineConfig(self.config['evaluation.setup'])
        self.measure = []
        self.record = []
        if self.evalSettings.contains('-cold'):
            #evaluation on cold-start users
            threshold = int(self.evalSettings['-cold'])
            removedUser = {}
            for user in self.data.testSet_u:
                if self.data.trainSet_u.has_key(user) and len(
                        self.data.trainSet_u[user]) > threshold:
                    removedUser[user] = 1

            for user in removedUser:
                del self.data.testSet_u[user]

            testData = []
            for item in self.data.testData:
                if not removedUser.has_key(item[0]):
                    testData.append(item)
            self.data.testData = testData

        self.num_users, self.num_items, self.train_size = self.data.trainingSize(
        )
Esempio n. 9
0
 def __init__(self, config, trainingSet, testSet):
     self.config = config
     self.recordConfig = LineConfig(config['record.setup'])
     self.evalConfig = LineConfig(config['evaluation.setup'])
     self.name2id = defaultdict(dict)
     self.id2name = defaultdict(dict)
     self.listened = {}
     self.listened['artist'] = defaultdict(dict)
     self.listened['track'] = defaultdict(dict)
     self.listened['album'] = defaultdict(dict)
     self.artist2Album = defaultdict(
         dict)  #key:artist id, value:{album id1:1, album id2:1 ...}
     self.album2Track = defaultdict(dict)  #
     self.artist2Track = defaultdict(dict)  #
     self.userRecord = defaultdict(
         list)  #user data in training set. form: {user:[record1,record2]}
     self.testSet = defaultdict(
         dict
     )  #user data in test set. form: {user:{recommenedObject1:1,recommendedObject:1}}
     self.recordCount = 0
     self.columns = {}
     labels = self.recordConfig['-columns'].split(',')
     for col in labels:
         label = col.split(':')
         self.columns[label[0]] = int(label[1])
     if self.evalConfig.contains('-byTime'):
         trainingSet, testSet = self.splitDataByTime(trainingSet)
     self.preprocess(trainingSet, testSet)
Esempio n. 10
0
 def __init__(self, conf):
     self.config = conf
     self.socialConfig = LineConfig(self.config['social.setup'])
     self.user = {}  #used to store the order of users
     self.triple = []
     self.followees = {}
     self.followers = {}
     self.trustMatrix = self.loadRelationship(self.config['social'])
Esempio n. 11
0
 def printAlgorConfig(self):
     "show algorithm's configuration"
     print 'Algorithm:',self.config['recommender']
     print 'Training set:',abspath(self.config['record'])
     if LineConfig(self.config['evaluation.setup']).contains('-testSet'):
         print 'Test set:',abspath(LineConfig(self.config['evaluation.setup']).getOption('-testSet'))
     #print 'Count of the users in training set: ',len()
     self.data.printTrainingSize()
     print '='*80
Esempio n. 12
0
 def printAlgorConfig(self):
     "show algorithm's configuration"
     print 'Algorithm:',self.config['recommender']
     print 'Ratings dataSet:',abspath(self.config['ratings'])
     if LineConfig(self.config['evaluation.setup']).contains('-testSet'):
         print 'Test set:',abspath(LineConfig(self.config['evaluation.setup']).getOption('-testSet'))
     #print 'Count of the users in training set: ',len()
     print 'Training set size: (user count: %d, item count %d, record count: %d)' %(self.dao.trainingSize())
     print 'Test set size: (user count: %d, item count %d, record count: %d)' %(self.dao.testSize())
     print '='*80
Esempio n. 13
0
 def loadDataSet(conf, file, bTest=False, binarized=False, threshold=3.0):
     trainingData = []
     testData = []
     ratingConfig = LineConfig(conf['ratings.setup'])
     if not bTest:
         print('loading training data...')
     else:
         print('loading test data...')
     with open(file) as f:
         ratings = f.readlines()
     # ignore the headline
     if ratingConfig.contains('-header'):
         ratings = ratings[1:]
     # order of the columns
     order = ratingConfig['-columns'].strip().split()
     delim = ' |,|\t'
     if ratingConfig.contains('-delim'):
         delim = ratingConfig['-delim']
     for lineNo, line in enumerate(ratings):
         items = split(delim, line.strip())
         if not bTest and len(order) < 2:
             print(
                 'The rating file is not in a correct format. Error: Line num %d'
                 % lineNo)
             exit(-1)
         try:
             userId = items[int(order[0])]
             itemId = items[int(order[1])]
             if len(order) < 3:
                 rating = 1  #default value
             else:
                 rating = items[int(order[2])]
             if binarized:
                 if float(items[int(order[2])]) < threshold:
                     continue
                 else:
                     rating = 1
         except ValueError:
             print(
                 'Error! Have you added the option -header to the rating.setup?'
             )
             exit(-1)
         if not bTest:
             trainingData.append([userId, itemId, float(rating)])
         else:
             if binarized:
                 if rating == 1:
                     testData.append([userId, itemId, float(rating)])
                 else:
                     continue
             testData.append([userId, itemId, float(rating)])
     if not bTest:
         return trainingData
     else:
         return testData
Esempio n. 14
0
    def __init__(self, config):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.relation = []
        self.measure = []
        self.config = config
        self.ratingConfig = LineConfig(config['ratings.setup'])
        if self.config.contains('evaluation.setup'):
            self.evaluation = LineConfig(config['evaluation.setup'])
            binarized = False
            bottom = 0
            if self.evaluation.contains('-b'):
                binarized = True
                bottom = float(self.evaluation['-b'])
            if self.evaluation.contains('-testSet'):
                #specify testSet

                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'],
                                                       binarized=binarized,
                                                       threshold=bottom)
                self.testData = FileIO.loadDataSet(config,
                                                   self.evaluation['-testSet'],
                                                   bTest=True,
                                                   binarized=binarized,
                                                   threshold=bottom)

            elif self.evaluation.contains('-ap'):
                #auto partition

                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'],
                                                       binarized=binarized,
                                                       threshold=bottom)
                self.trainingData,self.testData = DataSplit.\
                    dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']),binarized=binarized)
            elif self.evaluation.contains('-cv'):
                #cross validation
                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'],
                                                       binarized=binarized,
                                                       threshold=bottom)
                #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))

        else:
            print('Evaluation is not well configured!')
            exit(-1)

        if config.contains('social'):
            self.socialConfig = LineConfig(self.config['social.setup'])
            self.relation = FileIO.loadRelationship(config,
                                                    self.config['social'])

        print('preprocessing...')
Esempio n. 15
0
    def __init__(self,config):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.measure = []
        self.config =config
        setup = LineConfig(config['record.setup'])
        columns = {}
        labels = setup['-columns'].split(',')
        delim = ''
        if setup.contains('-delim'):
            delim=setup['-delim']
        for col in labels:
            label = col.split(':')
            columns[label[0]] = int(label[1])
        if self.config.contains('evaluation.setup'):
            self.evaluation = LineConfig(config['evaluation.setup'])
            binarized = False
            bottom = 0
            if self.evaluation.contains('-b'):
                binarized = True
                bottom = float(self.evaluation['-b'])
            if self.evaluation.contains('-testSet'):
                #specify testSet

                self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim)
                self.testData = FileIO.loadDataSet(self.evaluation['-testSet'],binarized=binarized,columns=columns,threshold=bottom,delim=delim)

            elif self.evaluation.contains('-ap'):
                #auto partition

                self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim)
                self.trainingData,self.testData = DataSplit.\
                    dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']))

            elif self.evaluation.contains('-byTime'):
                self.trainingData = FileIO.loadDataSet(config['record'], columns=columns, binarized=binarized,threshold=bottom, delim=delim)
                self.testData = []

            elif self.evaluation.contains('-cv'):
                #cross validation
                self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim)
                #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))

        else:
            print 'Evaluation is not well configured!'
            exit(-1)

        # if config.contains('social'):
        #     self.socialConfig = LineConfig(self.config['social.setup'])
        #     self.relation = FileIO.loadRelationship(config,self.config['social'])

        print 'preprocessing...'
Esempio n. 16
0
 def readConfiguration(self):
     super(MEM, self).readConfiguration()
     MEMConfig = LineConfig(self.config['MEM'])
     self.epoch = int(MEMConfig['-epoch'])
     self.winSize = int(MEMConfig['-winSize'])
     self.negCount = int(MEMConfig['-negCount'])
     self.beta = float(MEMConfig['-beta'])
Esempio n. 17
0
    def __init__(self,config,trainingSet, testSet):
        self.config = config
        self.ratingConfig = LineConfig(config['ratings.setup'])
        self.user = {} #used to store the order of users in the training set
        self.item = {} #used to store the order of items in the training set
        self.id2user = {}
        self.id2item = {}
        self.all_Item = {}
        self.all_User = {}
        self.userMeans = {} #used to store the mean values of users's ratings
        self.itemMeans = {} #used to store the mean values of items's ratings
        self.globalMean = 0
        self.timestamp = {}
        self.trainSet_u = defaultdict(dict)
        self.trainSet_i = defaultdict(dict)
        self.testSet_u = defaultdict(dict) # used to store the test set by hierarchy user:[item,rating]
        self.testSet_i = defaultdict(dict) # used to store the test set by hierarchy item:[user,rating]
        self.rScale = []

        self.trainingData = trainingSet[:]
        self.testData = testSet[:]

        self.__generateSet()

        self.__computeItemMean()
        self.__computeUserMean()
        self.__globalAverage()
Esempio n. 18
0
    def execute(self):
        exec ('from algorithm.rating.' + self.config['recommender'] + ' import ' + self.config['recommender'])
        if self.evaluation.contains('-cv'):
            i = 1
            for train,test in DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])):
                fold = '['+str(i)+']'
                recommender = self.config['recommender']+ "(self.config,train,test,fold)"
                measure = eval(recommender).execute()
                self.measure.append(measure)
                i+=1
            res = []
            for i in range(len(self.measure[0])):
                measure = self.measure[0][i].split(':')[0]
                total = 0
                for j in range(len(self.measure)):
                    total += float(self.measure[j][i].split(':')[1])
                res.append(measure+':'+str(total/len(self.measure))+'\n')
            outDir = LineConfig(self.config['output.setup'])['-dir']
            fileName = self.config['recommender'] +'@'+str(int(self.evaluation['-cv']))+'-fold-cv' + '.txt'
            FileIO.writeFile(outDir,fileName,res)


        else:
            recommender = self.config['recommender']+'(self.config,self.trainingData,self.testData)'
            eval(recommender).execute()
Esempio n. 19
0
    def __init__(self, config, trainingSet=None, testSet=None):
        self.config = config
        self.ratingConfig = LineConfig(config['ratings.setup'])
        self.user = {}  #used to store the order of users
        self.item = {}  #used to store the order of items
        self.userMeans = {}  #used to store the mean values of users's ratings
        self.itemMeans = {}  #used to store the mean values of items's ratings
        self.trainingData = []  #training data
        self.testData = []  #testData
        self.globalMean = 0
        self.timestamp = {}
        self.trainingMatrix = None
        self.validationMatrix = None
        self.testSet_u = {
        }  # used to store the test set by hierarchy user:[item,rating]
        self.testSet_i = {
        }  # used to store the test set by hierarchy item:[user,rating]
        self.rScale = []

        self.trainingData = trainingSet
        self.testData = testSet
        self.__generateSet()

        self.__computeItemMean()
        self.__computeUserMean()
        self.__globalAverage()
Esempio n. 20
0
 def readConfiguration(self):
     super(TSWalker, self).readConfiguration()
     self.sim = self.config['similarity']
     TW = LineConfig(self.config['TSWalker'])
     self.k = int(TW['-k'])
     self.v = float(TW['-v'])
     self.tw = int(TW['-tw'])
Esempio n. 21
0
    def loadDataSet(conf, file, bTest=False):
        trainingData = defaultdict(dict)
        testData = defaultdict(dict)
        ratingConfig = LineConfig(conf['ratings.setup'])
        if not bTest:
            print('loading training data...')
        else:
            print('loading test data...')
        with open(file) as f:
            ratings = f.readlines()
        # ignore the headline
        if ratingConfig.contains('-header'):
            ratings = ratings[1:]
        # order of the columns
        order = ratingConfig['-columns'].strip().split()

        for lineNo, line in enumerate(ratings):
            items = split(' |,|\t', line.strip())
            if not bTest and len(order) < 3:
                print(
                    'The rating file is not in a correct format. Error: Line num %d'
                    % lineNo)
                exit(-1)
            try:
                userId = items[int(order[0])]
                itemId = items[int(order[1])]
                if bTest and len(order) < 3:
                    rating = 1  #default value
                else:
                    rating = items[int(order[2])]

            except ValueError:
                print(
                    'Error! Have you added the option -header to the rating.setup?'
                )
                exit(-1)
            if not bTest:
                trainingData[userId][itemId] = float(rating)
            else:
                testData[userId][itemId] = float(rating)
        if not bTest:
            return trainingData
        else:
            return testData
Esempio n. 22
0
File: RecQ.py Progetto: nonva/RecQ
    def execute(self):
        #import the algorithm module
        importStr = 'from algorithm.rating.' + self.config[
            'recommender'] + ' import ' + self.config['recommender']
        exec(importStr)
        if self.evaluation.contains('-cv'):
            k = int(self.evaluation['-cv'])
            if k <= 1 or k > 10:
                k = 3
            #create the manager used to communication in multiprocess
            manager = Manager()
            m = manager.dict()
            i = 1
            tasks = []
            for train, test in DataSplit.crossValidation(self.trainingData, k):
                fold = '[' + str(i) + ']'
                if self.config.contains('social'):
                    recommender = self.config[
                        'recommender'] + "(self.config,train,test,self.relation,fold)"
                else:
                    recommender = self.config[
                        'recommender'] + "(self.config,train,test,fold)"
            #create the process
                p = Process(target=run, args=(m, eval(recommender), i))
                tasks.append(p)
                i += 1
            #start the processes
            for p in tasks:
                p.start()
            #wait until all processes are completed
            for p in tasks:
                p.join()
            #compute the mean error of k-fold cross validation
            self.measure = [dict(m)[i] for i in range(1, k + 1)]
            res = []
            for i in range(len(self.measure[0])):
                measure = self.measure[0][i].split(':')[0]
                total = 0
                for j in range(k):
                    total += float(self.measure[j][i].split(':')[1])
                res.append(measure + ':' + str(total / k) + '\n')
            #output result
            outDir = LineConfig(self.config['output.setup'])['-dir']
            fileName = self.config['recommender'] + '@' + str(
                k) + '-fold-cv' + '.txt'
            FileIO.writeFile(outDir, fileName, res)

        else:
            if self.config.contains('social'):
                recommender = self.config[
                    'recommender'] + '(self.config,self.trainingData,self.testData,self.relation)'
            else:
                recommender = self.config[
                    'recommender'] + '(self.config,self.trainingData,self.testData)'
            eval(recommender).execute()
Esempio n. 23
0
    def __init__(self, config):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.relation = []
        self.measure = []
        self.config = config
        self.ratingConfig = LineConfig(config['ratings.setup'])
        self.labels = FileIO.loadLabels(config['label'])

        if self.config.contains('evaluation.setup'):
            self.evaluation = LineConfig(config['evaluation.setup'])

            if self.evaluation.contains('-testSet'):
                #specify testSet
                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'])
                self.testData = FileIO.loadDataSet(config,
                                                   self.evaluation['-testSet'],
                                                   bTest=True)

            elif self.evaluation.contains('-ap'):
                #auto partition
                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'])
                self.trainingData,self.testData = DataSplit.\
                    dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']))

            elif self.evaluation.contains('-cv'):
                #cross validation
                self.trainingData = FileIO.loadDataSet(config,
                                                       config['ratings'])
                #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))

        else:
            print 'Evaluation is not well configured!'
            exit(-1)

        if config.contains('social'):
            self.socialConfig = LineConfig(self.config['social.setup'])
            self.relation = FileIO.loadRelationship(config,
                                                    self.config['social'])
        print 'preprocessing...'
Esempio n. 24
0
 def __init__(self, config):
     self.config = config
     self.ratingConfig = LineConfig(config['ratings.setup'])
     self.evaluation = LineConfig(config['evaluation.setup'])
     self.user = {}
     self.item = {}
     self.timestamp = {}
     self.ratingMatrix = None
     self.trainingMatrix = None
     self.validationMatrix = None
     self.testSet_u = None  # used to store the test set by hierarchy user:[item,rating]
     self.testSet_i = None  # used to store the test set by hierarchy item:[user,rating]
     self.rScale = [-9999999, 999999]
     if self.evaluation.contains('-testSet'):
         #specify testSet
         self.trainingMatrix = self.loadRatings(config['ratings'])
         self.testSet_u, self.testSet_i = self.loadRatings(
             self.evaluation['-testSet'], True)
     else:  #cross validation and leave-one-out
         self.ratingMatrix = self.loadRatings(config['ratings'])
Esempio n. 25
0
    def __init__(self,config,trainingSet,testSet):
        self.config = config
        self.recordConfig = LineConfig(config['record.setup'])
        self.evalConfig = LineConfig(config['evaluation.setup'])
        self.name2id = defaultdict(dict)
        self.id2name = defaultdict(dict)
        self.listened = {}
        self.listened['artist']=defaultdict(dict)
        self.listened['track']=defaultdict(dict)
        self.listened['album']=defaultdict(dict)
        self.artist2Album = defaultdict(dict) #key:artist id, value:{album id1:1, album id2:1 ...}
        self.album2Track = defaultdict(dict) #
        self.artist2Track = defaultdict(dict) #
        self.Track2artist = defaultdict(dict) #
        self.Track2album = defaultdict(dict) #
        self.userRecord = defaultdict(list) #user data in training set. form: {user:[record1,record2]}
        self.trackRecord = defaultdict(list) # track data in training set. form: {track:[record1, record2]}
        self.testSet = defaultdict(dict) #user data in test set. form: {user:{recommenedObject1:1,recommendedObject:1}}
        self.recordCount = 0
        self.columns = {}
        self.globalMean = 0
        self.userMeans = {} #used to store the mean values of users's listen tims
        self.trackListen = {}

        self.trainingData = trainingSet

        self.computeUserMean()
        self.globalAverage()
        self.PopTrack = {}

        labels = self.recordConfig['-columns'].split(',')
        for col in labels:
            label = col.split(':')
            self.columns[label[0]] = int(label[1])
        if self.evalConfig.contains('-byTime'):
            trainingSet,testSet = self.splitDataByTime(trainingSet)

        self.preprocess(trainingSet,testSet)


        self.computePop(trainingSet)
Esempio n. 26
0
    def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'):
        self.config = conf
        self.isSaveModel = False
        self.isLoadModel = False
        self.isOutput = True
        self.data = Record(self.config, trainingSet, testSet)
        self.foldInfo = fold
        self.evalConfig = LineConfig(self.config['evaluation.setup'])
        if self.evalConfig.contains('-target'):
            self.recType = self.evalConfig['-target']
        else:
            self.recType = 'track'
        if LineConfig(self.config['evaluation.setup']).contains('-cold'):
            #evaluation on cold-start users
            threshold = int(
                LineConfig(self.config['evaluation.setup'])['-cold'])
            removedUser = []
            removedTrack = defaultdict(list)
            #for user in self.data.testSet:
            #    if user in self.data.userRecord and len(self.data.userRecord[user])>threshold:
            #        removedUser.append(user)
            for user in self.data.testSet:
                if user in self.data.userRecord:
                    for item in self.data.testSet[user]:
                        if len(self.data.trackRecord[item]) > threshold:
                            removedTrack[user].append(item)
            for user in removedTrack:
                for item in removedTrack[user]:
                    del self.data.testSet[user][item]
                if len(self.data.testSet[user]) == 0:
                    del self.data.testSet[user]
            #for user in removedUser:
            #    del self.data.testSet[user]

        if LineConfig(self.config['evaluation.setup']).contains('-sample'):
            userList = list(self.data.testSet.keys())
            removedUser = userList[:int(len(userList) * 0.9)]
            for user in removedUser:
                del self.data.testSet[user]
Esempio n. 27
0
 def __init__(self,config):
     self.trainingData = []  # training data
     self.testData = []  # testData
     self.measure = []
     self.config =config
     self.ratingConfig = LineConfig(config['ratings.setup'])
     if self.config.contains('evaluation.setup'):
         self.evaluation = LineConfig(config['evaluation.setup'])
         if self.evaluation.contains('-testSet'):
             #specify testSet
             self.__loadDataSet(config['ratings'])
             self.__loadDataSet(self.evaluation['-testSet'],bTest=True)
         elif self.evaluation.contains('-ap'):
             #auto partition
             self.__loadDataSet(config['ratings'])
             self.trainingData,self.testData = DataSplit.\
                 dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']))
         elif self.evaluation.contains('-cv'):
             #cross validation
             self.__loadDataSet(config['ratings'])
             #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))
         else:
             print 'Evaluation is not well configured!'
             exit(-1)
Esempio n. 28
0
    def __init__(self,config,trainingSet, testSet):
        self.config = config
        self.ratingConfig = LineConfig(config['ratings.setup'])
        self.user = {} #map user names to identifiers (id)
        self.item = {} #map item names to identifiers (id)
        self.id2user = {}
        self.id2item = {}
        self.userMeans = {} #Store the mean values of users's ratings
        self.itemMeans = {} #Store the mean values of items's ratings
        self.globalMean = 0
        self.trainSet_u = defaultdict(dict)
        self.trainSet_i = defaultdict(dict)
        self.testSet_u = defaultdict(dict) # Store the test set in the form of [user][item]=rating
        self.testSet_i = defaultdict(dict) # Store the test set in the form of [item][user]=rating]
        self.rScale = [] #rating scale

        self.trainingData = trainingSet[:]
        self.testData = testSet[:]

        self.__generateSet()
        self.__computeItemMean()
        self.__computeUserMean()
        self.__globalAverage()
Esempio n. 29
0
    def __init__(self,config):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.relation = []
        self.measure = []
        self.config =config
        self.ratingConfig = LineConfig(config['ratings.setup'])
        self.labels = FileIO.loadLabels(config['label'])

        if self.config.contains('evaluation.setup'):
            self.evaluation = LineConfig(config['evaluation.setup'])
            
            if self.evaluation.contains('-testSet'):
                #specify testSet
                self.trainingData = FileIO.loadDataSet(config, config['ratings'])
                self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True)

            elif self.evaluation.contains('-ap'):
                #auto partition
                self.trainingData = FileIO.loadDataSet(config,config['ratings'])
                self.trainingData,self.testData = DataSplit.\
                    dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']))

            elif self.evaluation.contains('-cv'):
                #cross validation
                self.trainingData = FileIO.loadDataSet(config, config['ratings'])
                #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))

        else:
            print 'Evaluation is not well configured!'
            exit(-1)

        if config.contains('social'):
            self.socialConfig = LineConfig(self.config['social.setup'])
            self.relation = FileIO.loadRelationship(config,self.config['social'])
        print 'preprocessing...'
Esempio n. 30
0
 def readConfiguration(self):
     super(FISM, self).readConfiguration()
     self.rho = int(LineConfig(self.config['FISM'])['-rho'])
     if self.rho < 1:
         self.rho = 1
     self.alpha = float(LineConfig(self.config['FISM'])['-alpha'])
Esempio n. 31
0
 def readConfiguration(self):
     self.algorName = self.config['recommender']
     self.output = LineConfig(self.config['output.setup'])
     self.isOutput = self.output.isMainOn()
     self.ranking = LineConfig(self.config['item.ranking'])
Esempio n. 32
0
class SDLib(object):
    def __init__(self,config):
        self.trainingData = []  # training data
        self.testData = []  # testData
        self.relation = []
        self.measure = []
        self.config =config
        self.ratingConfig = LineConfig(config['ratings.setup'])
        self.labels = FileIO.loadLabels(config['label'])

        if self.config.contains('evaluation.setup'):
            self.evaluation = LineConfig(config['evaluation.setup'])
            
            if self.evaluation.contains('-testSet'):
                #specify testSet
                self.trainingData = FileIO.loadDataSet(config, config['ratings'])
                self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True)

            elif self.evaluation.contains('-ap'):
                #auto partition
                self.trainingData = FileIO.loadDataSet(config,config['ratings'])
                self.trainingData,self.testData = DataSplit.\
                    dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']))

            elif self.evaluation.contains('-cv'):
                #cross validation
                self.trainingData = FileIO.loadDataSet(config, config['ratings'])
                #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))

        else:
            print 'Evaluation is not well configured!'
            exit(-1)

        if config.contains('social'):
            self.socialConfig = LineConfig(self.config['social.setup'])
            self.relation = FileIO.loadRelationship(config,self.config['social'])
        print 'preprocessing...'


    def execute(self):
        #import the algorithm module
        importStr = 'from method.' + self.config['methodName'] + ' import ' + self.config['methodName']
        exec (importStr)
        if self.evaluation.contains('-cv'):
            k = int(self.evaluation['-cv'])
            if k <= 1 or k > 10:
                k = 3
            #create the manager used to communication in multiprocess
            manager = Manager()
            m = manager.dict()
            i = 1
            tasks = []
            for train,test in DataSplit.crossValidation(self.trainingData,k):
                fold = '['+str(i)+']'
                if self.config.contains('social'):
                    method = self.config['methodName'] + "(self.config,train,test,self.labels,self.relation,fold)"
                else:
                    method = self.config['methodName'] + "(self.config,train,test,self.labels,fold)"
               #create the process
                p = Process(target=run,args=(m,eval(method),i))
                tasks.append(p)
                i+=1
            #start the processes
            for p in tasks:
                p.start()
            #wait until all processes are completed
            for p in tasks:
                p.join()
            #compute the mean error of k-fold cross validation
            self.measure = [dict(m)[i] for i in range(1,k+1)]
            res = []
            pattern = re.compile('(\d+\.\d+)')
            countPattern = re.compile('\d+\\n')
            labelPattern = re.compile('\s\d{1}[^\.|\n|\d]')
            labels = re.findall(labelPattern, self.measure[0])
            values = np.array([0]*9,dtype=float)
            count = np.array([0,0,0],dtype=int)
            for report in self.measure:
                values += np.array(re.findall(pattern,report),dtype=float)
                count+=np.array(re.findall(countPattern,report),dtype=int)
            values/=k
            values=np.around(values,decimals=4)
            res.append('             precision  recall  f1-score  support\n\n')
            res.append('         '+labels[0]+'  '+'    '.join(np.array(values[0:3],dtype=str).tolist())+'   '+str(count[0])+'\n')
            res.append('         '+labels[1]+'  '+'    '.join(np.array(values[3:6],dtype=str).tolist())+'   '+str(count[1])+'\n\n')
            res.append('  avg/total   ' + '    '.join(np.array(values[6:9], dtype=str).tolist()) + '   ' + str(count[2]) + '\n')
            print 'Total:'
            print ''.join(res)
                # for line in lines[1:]:
                #
                # measure = self.measure[0][i].split(':')[0]
                # total = 0
                # for j in range(k):
                #     total += float(self.measure[j][i].split(':')[1])
                # res.append(measure+':'+str(total/k)+'\n')
            #output result
            currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
            outDir = LineConfig(self.config['output.setup'])['-dir']
            fileName = self.config['methodName'] +'@'+currentTime+'-'+str(k)+'-fold-cv' + '.txt'
            FileIO.writeFile(outDir,fileName,res)
            print 'The results have been output to '+abspath(LineConfig(self.config['output.setup'])['-dir'])+'\n'
        else:
            if self.config.contains('social'):
                method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels,self.relation)'
            else:
                method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels)'
            eval(method).execute()