def getErrorOfRecMethod(recType = 0): """Get error of all recommender. Input: recType - id of recommender. Output: recalls,precisions,f1s,maes,rmses. """ start_time = time.time() songDict = persist.readSongFromFile() allPlaylist = persist.readPlaylistFromFile_Session() recalls = [] precisions = [] f1s = [] maes = [] rmses = [] for scale in range(10): playlistDict = allPlaylist[scale] if recType == const.ARIMA: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.SIMILAR: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.AVG: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) index = 0 for topN in range(1,const.TOP_N,1): recall,precision,f1 = util.getTopNIndex(recDict,playlistDict,topN) mae,rmse = util.getMAEandRMSE(recDict,playlistDict,songDict,topN) if scale == 0: recalls.append(recall) precisions.append(precision) f1s.append(f1) maes.append(mae) rmses.append(rmse) else: recalls[index] += recall precisions[index] += precision f1s[index] += f1 maes[index] += mae rmses[index] += rmse index += 1 #cal the avg value recalls = [recall / 10 for recall in recalls] precisions = [precision / 10 for precision in precisions] f1s = [f1 / 10 for f1 in f1s] maes = [mae / 10 for mae in maes] rmses = [rmse / 10 for rmse in rmses] #logging info to log index = 0 for topN in range(1,const.TOP_N,1): print '%d:TopN = %d:%f %f %f %f %f' % (recType,topN,recalls[index],precisions[index],f1s[index],maes[index],rmses[index]) logging.info('%d>%d:%f %f %f %f %f' % (recType,topN,recalls[index],precisions[index],f1s[index],maes[index],rmses[index])) index += 1 end_time = time.time() print 'Consumed:%d' % (end_time-start_time) return recalls,precisions,f1s,maes,rmses
def errorTest(): songDict = persist.readSongFromFile() print len(songDict) disDict = {} tarDict = songDict[672661981].getTopicDict() for sid in songDict: song = songDict[sid] topicDict = song.getTopicDict() dis = util.similarity(tarDict,topicDict) disDict[sid] = dis disList = sorted(disDict.iteritems(),key=lambda x:x[1]) for i in range(0,100): print disList[i]
def errorTest(): songDict = persist.readSongFromFile() print len(songDict) disDict = {} tarDict = songDict[672661981].getTopicDict() for sid in songDict: song = songDict[sid] topicDict = song.getTopicDict() dis = util.similarity(tarDict, topicDict) disDict[sid] = dis disList = sorted(disDict.iteritems(), key=lambda x: x[1]) for i in range(0, 100): print disList[i]
def testRecMethod(recType = 0): """Test single recommender. Input: recType - id of recommender. Output: None. """ info = '############%s#############' % util.getMethodName(recType) start_time = time.time() songDict = persist.readSongFromFile() allPlaylist = persist.readPlaylistFromFile() recallTotal = 0.0 precisionTotal = 0.0 f1Total = 0.0 maeTotal = 0.0 rmseTotal = 0.0 if recType == const.KNN: simFilename = '../txt/simMatrix.txt' pid2Index,index2Pid,countMatrix = util.getUserSongMatrix(allPlaylist,songDict) if os.path.exists(simFilename): print '......' simFile = open(simFilename,'r') line = simFile.readline().rstrip('\n') simMatrix = eval(line) print len(simMatrix) print len(simMatrix[1234]) simFile.close() else: print '++++++' simMatrix = util.getUserSimMatrix(countMatrix,pid2Index) for scale in range(10): playlistDict = allPlaylist[scale] playlistDict = allPlaylist[scale] if recType == const.ARIMA: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.SIMILAR: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.AVG: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.ARIMA_SIMILAR: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.ARIMA_AVG: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.KNN: recDict = predict.getRecDictOfUserKNN(playlistDict,songDict,scale,pid2Index,countMatrix,simMatrix) elif recType == const.MARKOV: recDict = predict.getRecDictOfFirstMarkov(allPlaylist,songDict,scale) elif recType == const.MARKOV_3: recDict = predict.getRecDictOfThreeOrderMarkov(allPlaylist,songDict,scale) elif recType == const.PATTERN: recDict = predict.getRecDictOfMostPattern(allPlaylist,songDict,scale) recall,precision,f1 = util.getTopNIndex(recDict,playlistDict) mae,rmse = util.getMAEandRMSE(recDict,playlistDict,songDict) recallTotal += recall precisionTotal += precision f1Total += f1 maeTotal += mae rmseTotal += rmse recall = recallTotal / 10 precision = precisionTotal / 10 f1 = f1Total / 10 mae = maeTotal / 10 rmse = rmseTotal / 10 print info logging.info(info) print 'Recall = ',recall logging.info('Recall = %f' % recall) print 'Precision = ',precision logging.info('Precision = %f' % precision) print 'F1-Score = ',f1 logging.info('F1-Score = %f' % f1) print 'MAE = ',mae logging.info('MAE = %f' % mae) print 'RMSE = ',rmse logging.info('RMSE = %f' % rmse) print 'Consumed: %ds' % (time.time()-start_time) logging.info('Consumed: %ds' % (time.time()-start_time))
def getErrorOfRecMethod(recType = 0): """Get error of all recommender. Input: recType - id of recommender. Output: recalls,precisions,f1s,maes,rmses. """ start_time = time.time() songDict = persist.readSongFromFile() allPlaylist = persist.readPlaylistFromFile() recalls = [] precisions = [] f1s = [] maes = [] rmses = [] if recType == const.KNN: pid2Index,index2Pid,countMatrix = util.getUserSongMatrix(allPlaylist,songDict) simFilename = '../txt/simMatrix.txt' if os.path.exists(simFilename): print '......' simFile = open(simFilename,'r') line = simFile.readline().rstrip('\n') simMatrix = eval(line) print len(simMatrix) print len(simMatrix[1234]) simFile.close() else: print '++++++' simMatrix = util.getUserSimMatrix(countMatrix,pid2Index) for scale in range(10): playlistDict = allPlaylist[scale] if recType == const.ARIMA: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.SIMILAR: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.AVG: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.ARIMA_SIMILAR: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.ARIMA_AVG: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.ALL_HYBRID: recDict = predict.getRecDict(playlistDict,songDict,recType,scale) elif recType == const.KNN: recDict = predict.getRecDictOfUserKNN(playlistDict,songDict,scale,pid2Index,countMatrix,simMatrix) elif recType == const.MARKOV: recDict = predict.getRecDictOfFirstMarkov(allPlaylist,songDict,scale) elif recType == const.MARKOV_3: recDict = predict.getRecDictOfThreeOrderMarkov(allPlaylist,songDict,scale) elif recType == const.PATTERN: recDict = predict.getRecDictOfMostPattern(allPlaylist,songDict,scale) index = 0 for topN in range(1,const.TOP_N,1): recall,precision,f1 = util.getTopNIndex(recDict,playlistDict,topN) mae,rmse = util.getMAEandRMSE(recDict,playlistDict,songDict,topN) if scale == 0: recalls.append(recall) precisions.append(precision) f1s.append(f1) maes.append(mae) rmses.append(rmse) else: recalls[index] += recall precisions[index] += precision f1s[index] += f1 maes[index] += mae rmses[index] += rmse index += 1 #cal the avg value recalls = [recall / 10 for recall in recalls] precisions = [precision / 10 for precision in precisions] f1s = [f1 / 10 for f1 in f1s] maes = [mae / 10 for mae in maes] rmses = [rmse / 10 for rmse in rmses] #logging info to log index = 0 for topN in range(1,const.TOP_N,1): print '%d:TopN = %d:%f %f %f %f %f' % (recType,topN,recalls[index],precisions[index],f1s[index],maes[index],rmses[index]) logging.info('%d>%d:%f %f %f %f %f' % (recType,topN,recalls[index],precisions[index],f1s[index],maes[index],rmses[index])) index += 1 end_time = time.time() print 'Consumed:%d' % (end_time-start_time) return recalls,precisions,f1s,maes,rmses
songDict, scale) print 'get trainingPatterns and testingPatterns...' #print allPattern #print len(allPattern) #S = read("../txt/PrefixSpan.txt") print 'get frequent pattern...' patterns = prefixSpan(SquencePattern([], sys.maxint), allTrainingPattern, 30) print 'Finish getting frequent pattern...' #print_patterns(patterns) #print len(patterns) #count = 0 size = len(testingPatternDict) index = 0 for pid in testingPatternDict.keys(): #print '======%d======' % pid index += 1 print 'scale=%d > %d/%d:%d' % (scale,index,size,pid) pattern = testingPatternDict[pid] predictTopic = getPredictTopic(patterns,pattern) predictTopicDict[pid] = predictTopic #count += len(predictTopic) #print 'avg = ',count*2.0/len(patternDict) print 'I am out getPredictTopicDict......' return predictTopicDict if __name__ == "__main__": songDict = persist.readSongFromFile() playlistDict = persist.readPlaylistFromFile() getPredictTopicDict(playlistDict,songDict)
def testRecMethod(recType=0): """Test single recommender. Input: recType - id of recommender. Output: None. """ info = '############%s#############' % util.getMethodName(recType) start_time = time.time() songDict = persist.readSongFromFile() allPlaylist = persist.readPlaylistFromFile() recallTotal = 0.0 precisionTotal = 0.0 f1Total = 0.0 maeTotal = 0.0 rmseTotal = 0.0 if recType == const.KNN: simFilename = '../txt/simMatrix.txt' pid2Index, index2Pid, countMatrix = util.getUserSongMatrix( allPlaylist, songDict) if os.path.exists(simFilename): print '......' simFile = open(simFilename, 'r') line = simFile.readline().rstrip('\n') simMatrix = eval(line) print len(simMatrix) print len(simMatrix[1234]) simFile.close() else: print '++++++' simMatrix = util.getUserSimMatrix(countMatrix, pid2Index) for scale in range(10): playlistDict = allPlaylist[scale] playlistDict = allPlaylist[scale] if recType == const.ARIMA: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.SIMILAR: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.AVG: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.ARIMA_SIMILAR: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.ARIMA_AVG: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.KNN: recDict = predict.getRecDictOfUserKNN(playlistDict, songDict, scale, pid2Index, countMatrix, simMatrix) elif recType == const.MARKOV: recDict = predict.getRecDictOfFirstMarkov(allPlaylist, songDict, scale) elif recType == const.MARKOV_3: recDict = predict.getRecDictOfThreeOrderMarkov( allPlaylist, songDict, scale) elif recType == const.PATTERN: recDict = predict.getRecDictOfMostPattern(allPlaylist, songDict, scale) recall, precision, f1 = util.getTopNIndex(recDict, playlistDict) mae, rmse = util.getMAEandRMSE(recDict, playlistDict, songDict) recallTotal += recall precisionTotal += precision f1Total += f1 maeTotal += mae rmseTotal += rmse recall = recallTotal / 10 precision = precisionTotal / 10 f1 = f1Total / 10 mae = maeTotal / 10 rmse = rmseTotal / 10 print info logging.info(info) print 'Recall = ', recall logging.info('Recall = %f' % recall) print 'Precision = ', precision logging.info('Precision = %f' % precision) print 'F1-Score = ', f1 logging.info('F1-Score = %f' % f1) print 'MAE = ', mae logging.info('MAE = %f' % mae) print 'RMSE = ', rmse logging.info('RMSE = %f' % rmse) print 'Consumed: %ds' % (time.time() - start_time) logging.info('Consumed: %ds' % (time.time() - start_time))
def getErrorOfRecMethod(recType=0): """Get error of all recommender. Input: recType - id of recommender. Output: recalls,precisions,f1s,maes,rmses. """ start_time = time.time() songDict = persist.readSongFromFile() allPlaylist = persist.readPlaylistFromFile() recalls = [] precisions = [] f1s = [] maes = [] rmses = [] if recType == const.KNN: pid2Index, index2Pid, countMatrix = util.getUserSongMatrix( allPlaylist, songDict) simFilename = '../txt/simMatrix.txt' if os.path.exists(simFilename): print '......' simFile = open(simFilename, 'r') line = simFile.readline().rstrip('\n') simMatrix = eval(line) print len(simMatrix) print len(simMatrix[1234]) simFile.close() else: print '++++++' simMatrix = util.getUserSimMatrix(countMatrix, pid2Index) for scale in range(10): playlistDict = allPlaylist[scale] if recType == const.ARIMA: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.SIMILAR: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.AVG: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.ARIMA_SIMILAR: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.ARIMA_AVG: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.ALL_HYBRID: recDict = predict.getRecDict(playlistDict, songDict, recType, scale) elif recType == const.KNN: recDict = predict.getRecDictOfUserKNN(playlistDict, songDict, scale, pid2Index, countMatrix, simMatrix) elif recType == const.MARKOV: recDict = predict.getRecDictOfFirstMarkov(allPlaylist, songDict, scale) elif recType == const.MARKOV_3: recDict = predict.getRecDictOfThreeOrderMarkov( allPlaylist, songDict, scale) elif recType == const.PATTERN: recDict = predict.getRecDictOfMostPattern(allPlaylist, songDict, scale) index = 0 for topN in range(1, const.TOP_N, 1): recall, precision, f1 = util.getTopNIndex(recDict, playlistDict, topN) mae, rmse = util.getMAEandRMSE(recDict, playlistDict, songDict, topN) if scale == 0: recalls.append(recall) precisions.append(precision) f1s.append(f1) maes.append(mae) rmses.append(rmse) else: recalls[index] += recall precisions[index] += precision f1s[index] += f1 maes[index] += mae rmses[index] += rmse index += 1 #cal the avg value recalls = [recall / 10 for recall in recalls] precisions = [precision / 10 for precision in precisions] f1s = [f1 / 10 for f1 in f1s] maes = [mae / 10 for mae in maes] rmses = [rmse / 10 for rmse in rmses] #logging info to log index = 0 for topN in range(1, const.TOP_N, 1): print '%d:TopN = %d:%f %f %f %f %f' % (recType, topN, recalls[index], precisions[index], f1s[index], maes[index], rmses[index]) logging.info('%d>%d:%f %f %f %f %f' % (recType, topN, recalls[index], precisions[index], f1s[index], maes[index], rmses[index])) index += 1 end_time = time.time() print 'Consumed:%d' % (end_time - start_time) return recalls, precisions, f1s, maes, rmses
def getResultOfArimaMethod(): """Run MTSA with different maximum length of time series and then return the average hit ratio, precision, f1, mae and rmse. Input: None. Output: result - result of experiments. """ start_time = time.time() songDict = persist.readSongFromFile() # read total songs from file. allPlaylist = persist.readPlaylistFromFile() # read all playlists from file. result = [] totalRecalls = [] totalPrecisions = [] totalF1s = [] totalMaes = [] totalRmses = [] for length in range(5,51,5): # set maximum length between 5 to 50. recalls = 0.0 precisions = 0.0 f1s = 0.0 maes = 0.0 rmses = 0.0 for scale in range(10): playlistDict = allPlaylist[scale] # get the playlist in specific scale recDict = predict.getRecDict(playlistDict,songDict,const.ARIMA,scale, const.TOP_N,length) # get predicted result #get the middle result recall,precision,f1 = util.getTopNIndex(recDict,playlistDict,const.TOP_N) mae,rmse = util.getMAEandRMSE(recDict,playlistDict,songDict,const.TOP_N) #add to summary recalls += recall precisions += precision f1s += f1 maes += mae rmses += rmse #cal the avg value recalls = recalls / 10 precisions = precisions / 10 f1s = f1s / 10 maes = maes / 10 rmses = rmses / 10 #log info to lod file print 'Length = %d:%f %f %f %f %f' % (length,recalls,precisions,f1s,maes,rmses) logging.info('%d:%f %f %f %f %f' % (length,recalls,precisions,f1s,maes,rmses)) #add result to list totalRecalls.append(recalls) totalPrecisions.append(precisions) totalF1s.append(f1s) totalMaes.append(maes) totalRmses.append(rmses) result.append(totalRecalls) result.append(totalPrecisions) result.append(totalF1s) result.append(totalMaes) result.append(totalRmses) end_time = time.time() print 'Consumed:%d' % (end_time-start_time) return result