def __init__(self, config_dict, config_account, account_DAO, C=3, K=1, L=-1, N=0): self.trainingData = [] # training data self.testData = [] # testData self.relation = [] self.measure = [] self.config_dict = config_dict self.C = C self.K = K self.L = L self.N = N self.accountDAO = account_DAO if config_account.contains('evaluation.setup'): all_evaluation = LineConfig(config_account['evaluation.setup']) if all_evaluation.contains('--account'): self.training_user_item = account_DAO.training_user_item self.training_account_item = account_DAO.training_account_item self.relation = account_DAO.relation self.test_user_item = account_DAO.test_user_item else: raise Exception('Evaluation is not well configured!') print('preprocessing...')
def execute(self, config, max_sample=1000): # import the algorithm module importStr = 'from algorithm.ranking.' + config[ 'recommender'] + ' import ' + config['recommender'] exec(importStr) algo_evaluation = LineConfig(config['evaluation.setup']) if algo_evaluation.contains('-ul') and eval( algo_evaluation['-ul']) > 0: training_data = 'self.training_user_item' social_info = 'relation=self.relation' else: training_data = 'self.training_account_item' social_info = '' if config['recommender'].startswith('ABPR'): recommender = config['recommender'] + '(config, {}, self.test_user_item, {}, C={}, N={})'. \ format(training_data, social_info, self.C, self.N) else: recommender = config['recommender'] + '(config, {}, self.test_user_item, {})'.\ format(training_data, social_info) algorithum = eval(recommender) algorithum.accountDAO = self.accountDAO algorithum.evaluation_conf = algo_evaluation algorithum.get_test_map(K=self.K, L=self.L) algorithum.get_test_sample_data(max_sample=max_sample) algorithum.execute()
def __init__(self, config): self.config = config self.ratingConfig = LineConfig(config['ratings.setup']) self.evaluation = LineConfig(config['evaluation.setup']) self.user = {} #used to store the order of users self.item = {} #used to store the order of items self.userMeans = {} #used to store the mean values of users's ratings self.itemMeans = {} #used to store the mean values of items's ratings self.triple = [] #training data self.globalMean = 0 self.timestamp = {} self.ratingMatrix = None self.trainingMatrix = None self.validationMatrix = None self.testSet_u = None # used to store the test set by hierarchy user:[item,rating] self.testSet_i = None # used to store the test set by hierarchy item:[user,rating] self.rScale = [-9999999, 999999] if self.evaluation.contains('-testSet'): #specify testSet self.trainingMatrix = self.__loadRatings(config['ratings']) self.testSet_u, self.testSet_i = self.__loadRatings( self.evaluation['-testSet'], True) else: #cross validation and leave-one-out self.ratingMatrix = self.__loadRatings(config['ratings']) self.__computeItemMean() self.__computeUserMean() self.__globalAverage()
def __init__(self, config, trainingSet, testSet): self.config = config self.recordConfig = LineConfig(config['record.setup']) self.evalConfig = LineConfig(config['evaluation.setup']) self.name2id = defaultdict(dict) self.id2name = defaultdict(dict) self.artistListened = defaultdict( dict) #key:aritst id, value:{user id1:count, user id2:count, ...} self.albumListened = defaultdict( dict) #key:album id, value:{user id1:count, user id2:count, ...} self.trackListened = defaultdict( dict) #key:track id, value:{user id1:count, user id2:count, ...} self.artist2Album = defaultdict( dict) #key:artist id, value:{album id1:1, album id2:1 ...} self.album2Track = defaultdict(dict) # self.artist2Track = defaultdict(dict) # self.userRecord = defaultdict( list) #user data in training set. form: {user:[record1,record2]} self.testSet = defaultdict( dict ) #user data in test set. form: {user:{recommenedObject1:1,recommendedObject:1}} self.recordCount = 0 if self.evalConfig.contains('-byTime'): trainingSet, testSet = self.splitDataByTime(trainingSet) self.preprocess(trainingSet, testSet)
def readConfiguration(self): super(IPF, self).readConfiguration() self.rho = float(LineConfig(self.config['IPF'])['-rho']) if self.rho<0 or self.rho>1: self.rho=0.5 self.beta = float(LineConfig(self.config['IPF'])['-beta']) self.eta = float(LineConfig(self.config['IPF'])['-eta'])
def loadRelationship(conf, filePath): socialConfig = LineConfig(conf['social.setup']) relation = [] print('loading social data...') with open(filePath) as f: relations = f.readlines() # ignore the headline if socialConfig.contains('-header'): relations = relations[1:] # order of the columns order = socialConfig['-columns'].strip().split() if len(order) <= 2: print('The social file is not in a correct format.') for lineNo, line in enumerate(relations): items = split(' |,|\t', line.strip()) if len(order) < 2: print( 'The social file is not in a correct format. Error: Line num %d' % lineNo) exit(-1) userId1 = items[int(order[0])] userId2 = items[int(order[1])] if len(order) < 3: weight = 1 else: weight = float(items[int(order[2])]) relation.append([userId1, userId2, weight]) return relation
def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): self.config = conf self.isSaveModel = False self.isLoadModel = False self.isOutput = True self.data = Record(self.config, trainingSet, testSet) self.foldInfo = fold self.evalConfig = LineConfig(self.config['evaluation.setup']) if self.evalConfig.contains('-target'): self.recType = self.evalConfig['-target'] else: self.recType = 'track' if LineConfig(self.config['evaluation.setup']).contains('-cold'): #evaluation on cold-start users threshold = int( LineConfig(self.config['evaluation.setup'])['-cold']) removedUser = [] for user in self.data.testSet: if self.data.userRecord.has_key(user) and len( self.data.userRecord[user]) > threshold: removedUser.append(user) for user in removedUser: del self.data.testSet[user] if LineConfig(self.config['evaluation.setup']).contains('-sample'): userList = self.data.testSet.keys() removedUser = userList[:int(len(userList) * 0.9)] for user in removedUser: del self.data.testSet[user]
def __init__(self, conf, trainingSet, testSet, fold='[1]'): self.config = conf self.data = None self.isSaveModel = False self.ranking = None self.isLoadModel = False self.output = None self.isOutput = True self.data = RatingDAO(self.config, trainingSet, testSet) self.foldInfo = fold self.evalSettings = LineConfig(self.config['evaluation.setup']) self.measure = [] self.record = [] if self.evalSettings.contains('-cold'): #evaluation on cold-start users threshold = int(self.evalSettings['-cold']) removedUser = {} for user in self.data.testSet_u: if self.data.trainSet_u.has_key(user) and len( self.data.trainSet_u[user]) > threshold: removedUser[user] = 1 for user in removedUser: del self.data.testSet_u[user] testData = [] for item in self.data.testData: if not removedUser.has_key(item[0]): testData.append(item) self.data.testData = testData self.num_users, self.num_items, self.train_size = self.data.trainingSize( )
def __init__(self, config, trainingSet, testSet): self.config = config self.recordConfig = LineConfig(config['record.setup']) self.evalConfig = LineConfig(config['evaluation.setup']) self.name2id = defaultdict(dict) self.id2name = defaultdict(dict) self.listened = {} self.listened['artist'] = defaultdict(dict) self.listened['track'] = defaultdict(dict) self.listened['album'] = defaultdict(dict) self.artist2Album = defaultdict( dict) #key:artist id, value:{album id1:1, album id2:1 ...} self.album2Track = defaultdict(dict) # self.artist2Track = defaultdict(dict) # self.userRecord = defaultdict( list) #user data in training set. form: {user:[record1,record2]} self.testSet = defaultdict( dict ) #user data in test set. form: {user:{recommenedObject1:1,recommendedObject:1}} self.recordCount = 0 self.columns = {} labels = self.recordConfig['-columns'].split(',') for col in labels: label = col.split(':') self.columns[label[0]] = int(label[1]) if self.evalConfig.contains('-byTime'): trainingSet, testSet = self.splitDataByTime(trainingSet) self.preprocess(trainingSet, testSet)
def __init__(self, conf): self.config = conf self.socialConfig = LineConfig(self.config['social.setup']) self.user = {} #used to store the order of users self.triple = [] self.followees = {} self.followers = {} self.trustMatrix = self.loadRelationship(self.config['social'])
def printAlgorConfig(self): "show algorithm's configuration" print 'Algorithm:',self.config['recommender'] print 'Training set:',abspath(self.config['record']) if LineConfig(self.config['evaluation.setup']).contains('-testSet'): print 'Test set:',abspath(LineConfig(self.config['evaluation.setup']).getOption('-testSet')) #print 'Count of the users in training set: ',len() self.data.printTrainingSize() print '='*80
def printAlgorConfig(self): "show algorithm's configuration" print 'Algorithm:',self.config['recommender'] print 'Ratings dataSet:',abspath(self.config['ratings']) if LineConfig(self.config['evaluation.setup']).contains('-testSet'): print 'Test set:',abspath(LineConfig(self.config['evaluation.setup']).getOption('-testSet')) #print 'Count of the users in training set: ',len() print 'Training set size: (user count: %d, item count %d, record count: %d)' %(self.dao.trainingSize()) print 'Test set size: (user count: %d, item count %d, record count: %d)' %(self.dao.testSize()) print '='*80
def loadDataSet(conf, file, bTest=False, binarized=False, threshold=3.0): trainingData = [] testData = [] ratingConfig = LineConfig(conf['ratings.setup']) if not bTest: print('loading training data...') else: print('loading test data...') with open(file) as f: ratings = f.readlines() # ignore the headline if ratingConfig.contains('-header'): ratings = ratings[1:] # order of the columns order = ratingConfig['-columns'].strip().split() delim = ' |,|\t' if ratingConfig.contains('-delim'): delim = ratingConfig['-delim'] for lineNo, line in enumerate(ratings): items = split(delim, line.strip()) if not bTest and len(order) < 2: print( 'The rating file is not in a correct format. Error: Line num %d' % lineNo) exit(-1) try: userId = items[int(order[0])] itemId = items[int(order[1])] if len(order) < 3: rating = 1 #default value else: rating = items[int(order[2])] if binarized: if float(items[int(order[2])]) < threshold: continue else: rating = 1 except ValueError: print( 'Error! Have you added the option -header to the rating.setup?' ) exit(-1) if not bTest: trainingData.append([userId, itemId, float(rating)]) else: if binarized: if rating == 1: testData.append([userId, itemId, float(rating)]) else: continue testData.append([userId, itemId, float(rating)]) if not bTest: return trainingData else: return testData
def __init__(self, config): self.trainingData = [] # training data self.testData = [] # testData self.relation = [] self.measure = [] self.config = config self.ratingConfig = LineConfig(config['ratings.setup']) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) binarized = False bottom = 0 if self.evaluation.contains('-b'): binarized = True bottom = float(self.evaluation['-b']) if self.evaluation.contains('-testSet'): #specify testSet self.trainingData = FileIO.loadDataSet(config, config['ratings'], binarized=binarized, threshold=bottom) self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True, binarized=binarized, threshold=bottom) elif self.evaluation.contains('-ap'): #auto partition self.trainingData = FileIO.loadDataSet(config, config['ratings'], binarized=binarized, threshold=bottom) self.trainingData,self.testData = DataSplit.\ dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap']),binarized=binarized) elif self.evaluation.contains('-cv'): #cross validation self.trainingData = FileIO.loadDataSet(config, config['ratings'], binarized=binarized, threshold=bottom) #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) else: print('Evaluation is not well configured!') exit(-1) if config.contains('social'): self.socialConfig = LineConfig(self.config['social.setup']) self.relation = FileIO.loadRelationship(config, self.config['social']) print('preprocessing...')
def __init__(self,config): self.trainingData = [] # training data self.testData = [] # testData self.measure = [] self.config =config setup = LineConfig(config['record.setup']) columns = {} labels = setup['-columns'].split(',') delim = '' if setup.contains('-delim'): delim=setup['-delim'] for col in labels: label = col.split(':') columns[label[0]] = int(label[1]) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) binarized = False bottom = 0 if self.evaluation.contains('-b'): binarized = True bottom = float(self.evaluation['-b']) if self.evaluation.contains('-testSet'): #specify testSet self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim) self.testData = FileIO.loadDataSet(self.evaluation['-testSet'],binarized=binarized,columns=columns,threshold=bottom,delim=delim) elif self.evaluation.contains('-ap'): #auto partition self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim) self.trainingData,self.testData = DataSplit.\ dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap'])) elif self.evaluation.contains('-byTime'): self.trainingData = FileIO.loadDataSet(config['record'], columns=columns, binarized=binarized,threshold=bottom, delim=delim) self.testData = [] elif self.evaluation.contains('-cv'): #cross validation self.trainingData = FileIO.loadDataSet(config['record'],columns=columns,binarized=binarized,threshold=bottom,delim=delim) #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) else: print 'Evaluation is not well configured!' exit(-1) # if config.contains('social'): # self.socialConfig = LineConfig(self.config['social.setup']) # self.relation = FileIO.loadRelationship(config,self.config['social']) print 'preprocessing...'
def readConfiguration(self): super(MEM, self).readConfiguration() MEMConfig = LineConfig(self.config['MEM']) self.epoch = int(MEMConfig['-epoch']) self.winSize = int(MEMConfig['-winSize']) self.negCount = int(MEMConfig['-negCount']) self.beta = float(MEMConfig['-beta'])
def __init__(self,config,trainingSet, testSet): self.config = config self.ratingConfig = LineConfig(config['ratings.setup']) self.user = {} #used to store the order of users in the training set self.item = {} #used to store the order of items in the training set self.id2user = {} self.id2item = {} self.all_Item = {} self.all_User = {} self.userMeans = {} #used to store the mean values of users's ratings self.itemMeans = {} #used to store the mean values of items's ratings self.globalMean = 0 self.timestamp = {} self.trainSet_u = defaultdict(dict) self.trainSet_i = defaultdict(dict) self.testSet_u = defaultdict(dict) # used to store the test set by hierarchy user:[item,rating] self.testSet_i = defaultdict(dict) # used to store the test set by hierarchy item:[user,rating] self.rScale = [] self.trainingData = trainingSet[:] self.testData = testSet[:] self.__generateSet() self.__computeItemMean() self.__computeUserMean() self.__globalAverage()
def execute(self): exec ('from algorithm.rating.' + self.config['recommender'] + ' import ' + self.config['recommender']) if self.evaluation.contains('-cv'): i = 1 for train,test in DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])): fold = '['+str(i)+']' recommender = self.config['recommender']+ "(self.config,train,test,fold)" measure = eval(recommender).execute() self.measure.append(measure) i+=1 res = [] for i in range(len(self.measure[0])): measure = self.measure[0][i].split(':')[0] total = 0 for j in range(len(self.measure)): total += float(self.measure[j][i].split(':')[1]) res.append(measure+':'+str(total/len(self.measure))+'\n') outDir = LineConfig(self.config['output.setup'])['-dir'] fileName = self.config['recommender'] +'@'+str(int(self.evaluation['-cv']))+'-fold-cv' + '.txt' FileIO.writeFile(outDir,fileName,res) else: recommender = self.config['recommender']+'(self.config,self.trainingData,self.testData)' eval(recommender).execute()
def __init__(self, config, trainingSet=None, testSet=None): self.config = config self.ratingConfig = LineConfig(config['ratings.setup']) self.user = {} #used to store the order of users self.item = {} #used to store the order of items self.userMeans = {} #used to store the mean values of users's ratings self.itemMeans = {} #used to store the mean values of items's ratings self.trainingData = [] #training data self.testData = [] #testData self.globalMean = 0 self.timestamp = {} self.trainingMatrix = None self.validationMatrix = None self.testSet_u = { } # used to store the test set by hierarchy user:[item,rating] self.testSet_i = { } # used to store the test set by hierarchy item:[user,rating] self.rScale = [] self.trainingData = trainingSet self.testData = testSet self.__generateSet() self.__computeItemMean() self.__computeUserMean() self.__globalAverage()
def readConfiguration(self): super(TSWalker, self).readConfiguration() self.sim = self.config['similarity'] TW = LineConfig(self.config['TSWalker']) self.k = int(TW['-k']) self.v = float(TW['-v']) self.tw = int(TW['-tw'])
def loadDataSet(conf, file, bTest=False): trainingData = defaultdict(dict) testData = defaultdict(dict) ratingConfig = LineConfig(conf['ratings.setup']) if not bTest: print('loading training data...') else: print('loading test data...') with open(file) as f: ratings = f.readlines() # ignore the headline if ratingConfig.contains('-header'): ratings = ratings[1:] # order of the columns order = ratingConfig['-columns'].strip().split() for lineNo, line in enumerate(ratings): items = split(' |,|\t', line.strip()) if not bTest and len(order) < 3: print( 'The rating file is not in a correct format. Error: Line num %d' % lineNo) exit(-1) try: userId = items[int(order[0])] itemId = items[int(order[1])] if bTest and len(order) < 3: rating = 1 #default value else: rating = items[int(order[2])] except ValueError: print( 'Error! Have you added the option -header to the rating.setup?' ) exit(-1) if not bTest: trainingData[userId][itemId] = float(rating) else: testData[userId][itemId] = float(rating) if not bTest: return trainingData else: return testData
def execute(self): #import the algorithm module importStr = 'from algorithm.rating.' + self.config[ 'recommender'] + ' import ' + self.config['recommender'] exec(importStr) if self.evaluation.contains('-cv'): k = int(self.evaluation['-cv']) if k <= 1 or k > 10: k = 3 #create the manager used to communication in multiprocess manager = Manager() m = manager.dict() i = 1 tasks = [] for train, test in DataSplit.crossValidation(self.trainingData, k): fold = '[' + str(i) + ']' if self.config.contains('social'): recommender = self.config[ 'recommender'] + "(self.config,train,test,self.relation,fold)" else: recommender = self.config[ 'recommender'] + "(self.config,train,test,fold)" #create the process p = Process(target=run, args=(m, eval(recommender), i)) tasks.append(p) i += 1 #start the processes for p in tasks: p.start() #wait until all processes are completed for p in tasks: p.join() #compute the mean error of k-fold cross validation self.measure = [dict(m)[i] for i in range(1, k + 1)] res = [] for i in range(len(self.measure[0])): measure = self.measure[0][i].split(':')[0] total = 0 for j in range(k): total += float(self.measure[j][i].split(':')[1]) res.append(measure + ':' + str(total / k) + '\n') #output result outDir = LineConfig(self.config['output.setup'])['-dir'] fileName = self.config['recommender'] + '@' + str( k) + '-fold-cv' + '.txt' FileIO.writeFile(outDir, fileName, res) else: if self.config.contains('social'): recommender = self.config[ 'recommender'] + '(self.config,self.trainingData,self.testData,self.relation)' else: recommender = self.config[ 'recommender'] + '(self.config,self.trainingData,self.testData)' eval(recommender).execute()
def __init__(self, config): self.trainingData = [] # training data self.testData = [] # testData self.relation = [] self.measure = [] self.config = config self.ratingConfig = LineConfig(config['ratings.setup']) self.labels = FileIO.loadLabels(config['label']) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) if self.evaluation.contains('-testSet'): #specify testSet self.trainingData = FileIO.loadDataSet(config, config['ratings']) self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True) elif self.evaluation.contains('-ap'): #auto partition self.trainingData = FileIO.loadDataSet(config, config['ratings']) self.trainingData,self.testData = DataSplit.\ dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap'])) elif self.evaluation.contains('-cv'): #cross validation self.trainingData = FileIO.loadDataSet(config, config['ratings']) #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) else: print 'Evaluation is not well configured!' exit(-1) if config.contains('social'): self.socialConfig = LineConfig(self.config['social.setup']) self.relation = FileIO.loadRelationship(config, self.config['social']) print 'preprocessing...'
def __init__(self, config): self.config = config self.ratingConfig = LineConfig(config['ratings.setup']) self.evaluation = LineConfig(config['evaluation.setup']) self.user = {} self.item = {} self.timestamp = {} self.ratingMatrix = None self.trainingMatrix = None self.validationMatrix = None self.testSet_u = None # used to store the test set by hierarchy user:[item,rating] self.testSet_i = None # used to store the test set by hierarchy item:[user,rating] self.rScale = [-9999999, 999999] if self.evaluation.contains('-testSet'): #specify testSet self.trainingMatrix = self.loadRatings(config['ratings']) self.testSet_u, self.testSet_i = self.loadRatings( self.evaluation['-testSet'], True) else: #cross validation and leave-one-out self.ratingMatrix = self.loadRatings(config['ratings'])
def __init__(self,config,trainingSet,testSet): self.config = config self.recordConfig = LineConfig(config['record.setup']) self.evalConfig = LineConfig(config['evaluation.setup']) self.name2id = defaultdict(dict) self.id2name = defaultdict(dict) self.listened = {} self.listened['artist']=defaultdict(dict) self.listened['track']=defaultdict(dict) self.listened['album']=defaultdict(dict) self.artist2Album = defaultdict(dict) #key:artist id, value:{album id1:1, album id2:1 ...} self.album2Track = defaultdict(dict) # self.artist2Track = defaultdict(dict) # self.Track2artist = defaultdict(dict) # self.Track2album = defaultdict(dict) # self.userRecord = defaultdict(list) #user data in training set. form: {user:[record1,record2]} self.trackRecord = defaultdict(list) # track data in training set. form: {track:[record1, record2]} self.testSet = defaultdict(dict) #user data in test set. form: {user:{recommenedObject1:1,recommendedObject:1}} self.recordCount = 0 self.columns = {} self.globalMean = 0 self.userMeans = {} #used to store the mean values of users's listen tims self.trackListen = {} self.trainingData = trainingSet self.computeUserMean() self.globalAverage() self.PopTrack = {} labels = self.recordConfig['-columns'].split(',') for col in labels: label = col.split(':') self.columns[label[0]] = int(label[1]) if self.evalConfig.contains('-byTime'): trainingSet,testSet = self.splitDataByTime(trainingSet) self.preprocess(trainingSet,testSet) self.computePop(trainingSet)
def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): self.config = conf self.isSaveModel = False self.isLoadModel = False self.isOutput = True self.data = Record(self.config, trainingSet, testSet) self.foldInfo = fold self.evalConfig = LineConfig(self.config['evaluation.setup']) if self.evalConfig.contains('-target'): self.recType = self.evalConfig['-target'] else: self.recType = 'track' if LineConfig(self.config['evaluation.setup']).contains('-cold'): #evaluation on cold-start users threshold = int( LineConfig(self.config['evaluation.setup'])['-cold']) removedUser = [] removedTrack = defaultdict(list) #for user in self.data.testSet: # if user in self.data.userRecord and len(self.data.userRecord[user])>threshold: # removedUser.append(user) for user in self.data.testSet: if user in self.data.userRecord: for item in self.data.testSet[user]: if len(self.data.trackRecord[item]) > threshold: removedTrack[user].append(item) for user in removedTrack: for item in removedTrack[user]: del self.data.testSet[user][item] if len(self.data.testSet[user]) == 0: del self.data.testSet[user] #for user in removedUser: # del self.data.testSet[user] if LineConfig(self.config['evaluation.setup']).contains('-sample'): userList = list(self.data.testSet.keys()) removedUser = userList[:int(len(userList) * 0.9)] for user in removedUser: del self.data.testSet[user]
def __init__(self,config): self.trainingData = [] # training data self.testData = [] # testData self.measure = [] self.config =config self.ratingConfig = LineConfig(config['ratings.setup']) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) if self.evaluation.contains('-testSet'): #specify testSet self.__loadDataSet(config['ratings']) self.__loadDataSet(self.evaluation['-testSet'],bTest=True) elif self.evaluation.contains('-ap'): #auto partition self.__loadDataSet(config['ratings']) self.trainingData,self.testData = DataSplit.\ dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap'])) elif self.evaluation.contains('-cv'): #cross validation self.__loadDataSet(config['ratings']) #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) else: print 'Evaluation is not well configured!' exit(-1)
def __init__(self,config,trainingSet, testSet): self.config = config self.ratingConfig = LineConfig(config['ratings.setup']) self.user = {} #map user names to identifiers (id) self.item = {} #map item names to identifiers (id) self.id2user = {} self.id2item = {} self.userMeans = {} #Store the mean values of users's ratings self.itemMeans = {} #Store the mean values of items's ratings self.globalMean = 0 self.trainSet_u = defaultdict(dict) self.trainSet_i = defaultdict(dict) self.testSet_u = defaultdict(dict) # Store the test set in the form of [user][item]=rating self.testSet_i = defaultdict(dict) # Store the test set in the form of [item][user]=rating] self.rScale = [] #rating scale self.trainingData = trainingSet[:] self.testData = testSet[:] self.__generateSet() self.__computeItemMean() self.__computeUserMean() self.__globalAverage()
def __init__(self,config): self.trainingData = [] # training data self.testData = [] # testData self.relation = [] self.measure = [] self.config =config self.ratingConfig = LineConfig(config['ratings.setup']) self.labels = FileIO.loadLabels(config['label']) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) if self.evaluation.contains('-testSet'): #specify testSet self.trainingData = FileIO.loadDataSet(config, config['ratings']) self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True) elif self.evaluation.contains('-ap'): #auto partition self.trainingData = FileIO.loadDataSet(config,config['ratings']) self.trainingData,self.testData = DataSplit.\ dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap'])) elif self.evaluation.contains('-cv'): #cross validation self.trainingData = FileIO.loadDataSet(config, config['ratings']) #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) else: print 'Evaluation is not well configured!' exit(-1) if config.contains('social'): self.socialConfig = LineConfig(self.config['social.setup']) self.relation = FileIO.loadRelationship(config,self.config['social']) print 'preprocessing...'
def readConfiguration(self): super(FISM, self).readConfiguration() self.rho = int(LineConfig(self.config['FISM'])['-rho']) if self.rho < 1: self.rho = 1 self.alpha = float(LineConfig(self.config['FISM'])['-alpha'])
def readConfiguration(self): self.algorName = self.config['recommender'] self.output = LineConfig(self.config['output.setup']) self.isOutput = self.output.isMainOn() self.ranking = LineConfig(self.config['item.ranking'])
class SDLib(object): def __init__(self,config): self.trainingData = [] # training data self.testData = [] # testData self.relation = [] self.measure = [] self.config =config self.ratingConfig = LineConfig(config['ratings.setup']) self.labels = FileIO.loadLabels(config['label']) if self.config.contains('evaluation.setup'): self.evaluation = LineConfig(config['evaluation.setup']) if self.evaluation.contains('-testSet'): #specify testSet self.trainingData = FileIO.loadDataSet(config, config['ratings']) self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True) elif self.evaluation.contains('-ap'): #auto partition self.trainingData = FileIO.loadDataSet(config,config['ratings']) self.trainingData,self.testData = DataSplit.\ dataSplit(self.trainingData,test_ratio=float(self.evaluation['-ap'])) elif self.evaluation.contains('-cv'): #cross validation self.trainingData = FileIO.loadDataSet(config, config['ratings']) #self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) else: print 'Evaluation is not well configured!' exit(-1) if config.contains('social'): self.socialConfig = LineConfig(self.config['social.setup']) self.relation = FileIO.loadRelationship(config,self.config['social']) print 'preprocessing...' def execute(self): #import the algorithm module importStr = 'from method.' + self.config['methodName'] + ' import ' + self.config['methodName'] exec (importStr) if self.evaluation.contains('-cv'): k = int(self.evaluation['-cv']) if k <= 1 or k > 10: k = 3 #create the manager used to communication in multiprocess manager = Manager() m = manager.dict() i = 1 tasks = [] for train,test in DataSplit.crossValidation(self.trainingData,k): fold = '['+str(i)+']' if self.config.contains('social'): method = self.config['methodName'] + "(self.config,train,test,self.labels,self.relation,fold)" else: method = self.config['methodName'] + "(self.config,train,test,self.labels,fold)" #create the process p = Process(target=run,args=(m,eval(method),i)) tasks.append(p) i+=1 #start the processes for p in tasks: p.start() #wait until all processes are completed for p in tasks: p.join() #compute the mean error of k-fold cross validation self.measure = [dict(m)[i] for i in range(1,k+1)] res = [] pattern = re.compile('(\d+\.\d+)') countPattern = re.compile('\d+\\n') labelPattern = re.compile('\s\d{1}[^\.|\n|\d]') labels = re.findall(labelPattern, self.measure[0]) values = np.array([0]*9,dtype=float) count = np.array([0,0,0],dtype=int) for report in self.measure: values += np.array(re.findall(pattern,report),dtype=float) count+=np.array(re.findall(countPattern,report),dtype=int) values/=k values=np.around(values,decimals=4) res.append(' precision recall f1-score support\n\n') res.append(' '+labels[0]+' '+' '.join(np.array(values[0:3],dtype=str).tolist())+' '+str(count[0])+'\n') res.append(' '+labels[1]+' '+' '.join(np.array(values[3:6],dtype=str).tolist())+' '+str(count[1])+'\n\n') res.append(' avg/total ' + ' '.join(np.array(values[6:9], dtype=str).tolist()) + ' ' + str(count[2]) + '\n') print 'Total:' print ''.join(res) # for line in lines[1:]: # # measure = self.measure[0][i].split(':')[0] # total = 0 # for j in range(k): # total += float(self.measure[j][i].split(':')[1]) # res.append(measure+':'+str(total/k)+'\n') #output result currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) outDir = LineConfig(self.config['output.setup'])['-dir'] fileName = self.config['methodName'] +'@'+currentTime+'-'+str(k)+'-fold-cv' + '.txt' FileIO.writeFile(outDir,fileName,res) print 'The results have been output to '+abspath(LineConfig(self.config['output.setup'])['-dir'])+'\n' else: if self.config.contains('social'): method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels,self.relation)' else: method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels)' eval(method).execute()