Esempio n. 1
0
def getErrorOfRecMethod(recType = 0):
  """Get error of all recommender.
     Input:
       recType - id of recommender.
     Output:
       recalls,precisions,f1s,maes,rmses.
  """
  start_time = time.time()
  songDict = persist.readSongFromFile()
  allPlaylist = persist.readPlaylistFromFile_Session()
  recalls = []
  precisions = []
  f1s = []
  maes = []
  rmses = []
  for scale in range(10):
    playlistDict = allPlaylist[scale]
    if recType == const.ARIMA:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.SIMILAR:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.AVG:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    index = 0
    for topN in range(1,const.TOP_N,1):
      recall,precision,f1 = util.getTopNIndex(recDict,playlistDict,topN)
      mae,rmse = util.getMAEandRMSE(recDict,playlistDict,songDict,topN)
      if scale == 0:
        recalls.append(recall)
        precisions.append(precision)
        f1s.append(f1)
        maes.append(mae)
        rmses.append(rmse)
      else:
        recalls[index] += recall
        precisions[index] += precision
        f1s[index] += f1
        maes[index] += mae
        rmses[index] += rmse
      index += 1

  #cal the avg value
  recalls = [recall / 10 for recall in recalls]
  precisions = [precision / 10 for precision in precisions]
  f1s = [f1 / 10 for f1 in f1s]
  maes = [mae / 10 for mae in maes]
  rmses = [rmse / 10 for rmse in rmses]

  #logging info to log
  index = 0
  for topN in range(1,const.TOP_N,1):
    print '%d:TopN = %d:%f %f %f %f %f' % (recType,topN,recalls[index],precisions[index],f1s[index],maes[index],rmses[index])
    logging.info('%d>%d:%f %f %f %f %f' % (recType,topN,recalls[index],precisions[index],f1s[index],maes[index],rmses[index]))
    index += 1
  end_time = time.time()
  print 'Consumed:%d' % (end_time-start_time)
  return recalls,precisions,f1s,maes,rmses  
Esempio n. 2
0
def errorTest():
  songDict = persist.readSongFromFile()
  print len(songDict)
  disDict = {}
  tarDict = songDict[672661981].getTopicDict()
  for sid in songDict:
    song = songDict[sid]
    topicDict = song.getTopicDict()
    dis = util.similarity(tarDict,topicDict)
    disDict[sid] = dis

  disList = sorted(disDict.iteritems(),key=lambda x:x[1])
  for i in range(0,100):
    print disList[i]
Esempio n. 3
0
def errorTest():
    songDict = persist.readSongFromFile()
    print len(songDict)
    disDict = {}
    tarDict = songDict[672661981].getTopicDict()
    for sid in songDict:
        song = songDict[sid]
        topicDict = song.getTopicDict()
        dis = util.similarity(tarDict, topicDict)
        disDict[sid] = dis

    disList = sorted(disDict.iteritems(), key=lambda x: x[1])
    for i in range(0, 100):
        print disList[i]
Esempio n. 4
0
def testRecMethod(recType = 0):
  """Test single recommender.
     Input:
       recType - id of recommender.
     Output:
       None.
  """
  info = '############%s#############' % util.getMethodName(recType)
  start_time = time.time()
  songDict = persist.readSongFromFile()
  allPlaylist = persist.readPlaylistFromFile()
  recallTotal = 0.0
  precisionTotal = 0.0
  f1Total = 0.0
  maeTotal = 0.0
  rmseTotal = 0.0
  if recType == const.KNN:
    simFilename = '../txt/simMatrix.txt'
    pid2Index,index2Pid,countMatrix = util.getUserSongMatrix(allPlaylist,songDict)
    if os.path.exists(simFilename):
      print '......'
      simFile = open(simFilename,'r')
      line = simFile.readline().rstrip('\n')
      simMatrix = eval(line)
      print len(simMatrix)
      print len(simMatrix[1234])
      simFile.close()
    else:
      print '++++++'
      simMatrix = util.getUserSimMatrix(countMatrix,pid2Index)
  for scale in range(10):
    playlistDict = allPlaylist[scale]
    playlistDict = allPlaylist[scale]
    if recType == const.ARIMA:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.SIMILAR:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.AVG:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.ARIMA_SIMILAR:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.ARIMA_AVG:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.KNN:
      recDict = predict.getRecDictOfUserKNN(playlistDict,songDict,scale,pid2Index,countMatrix,simMatrix)
    elif recType == const.MARKOV:
      recDict = predict.getRecDictOfFirstMarkov(allPlaylist,songDict,scale)
    elif recType == const.MARKOV_3:
      recDict = predict.getRecDictOfThreeOrderMarkov(allPlaylist,songDict,scale)
    elif recType == const.PATTERN:
      recDict = predict.getRecDictOfMostPattern(allPlaylist,songDict,scale)
    recall,precision,f1 = util.getTopNIndex(recDict,playlistDict)
    mae,rmse = util.getMAEandRMSE(recDict,playlistDict,songDict)
    recallTotal += recall
    precisionTotal += precision
    f1Total += f1
    maeTotal += mae
    rmseTotal += rmse

  recall = recallTotal / 10
  precision = precisionTotal / 10
  f1 = f1Total / 10
  mae = maeTotal / 10
  rmse = rmseTotal / 10

  print info
  logging.info(info)
  print 'Recall = ',recall
  logging.info('Recall = %f' % recall)
  print 'Precision = ',precision
  logging.info('Precision = %f' % precision)
  print 'F1-Score = ',f1
  logging.info('F1-Score = %f' % f1)
  print 'MAE = ',mae
  logging.info('MAE = %f' % mae)
  print 'RMSE = ',rmse
  logging.info('RMSE = %f' % rmse)
  print 'Consumed: %ds' % (time.time()-start_time)
  logging.info('Consumed: %ds' % (time.time()-start_time))
Esempio n. 5
0
def getErrorOfRecMethod(recType = 0):
  """Get error of all recommender.
     Input:
       recType - id of recommender.
     Output:
       recalls,precisions,f1s,maes,rmses.
  """
  start_time = time.time()
  songDict = persist.readSongFromFile()
  allPlaylist = persist.readPlaylistFromFile()
  recalls = []
  precisions = []
  f1s = []
  maes = []
  rmses = []
  if recType == const.KNN:
    pid2Index,index2Pid,countMatrix = util.getUserSongMatrix(allPlaylist,songDict)
    simFilename = '../txt/simMatrix.txt'
    if os.path.exists(simFilename):
      print '......'
      simFile = open(simFilename,'r')
      line = simFile.readline().rstrip('\n')
      simMatrix = eval(line)
      print len(simMatrix)
      print len(simMatrix[1234])
      simFile.close()
    else:
      print '++++++'
      simMatrix = util.getUserSimMatrix(countMatrix,pid2Index)
  for scale in range(10):
    playlistDict = allPlaylist[scale]
    if recType == const.ARIMA:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.SIMILAR:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.AVG:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.ARIMA_SIMILAR:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.ARIMA_AVG:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.ALL_HYBRID:
      recDict = predict.getRecDict(playlistDict,songDict,recType,scale)
    elif recType == const.KNN:
      recDict = predict.getRecDictOfUserKNN(playlistDict,songDict,scale,pid2Index,countMatrix,simMatrix)
    elif recType == const.MARKOV:
      recDict = predict.getRecDictOfFirstMarkov(allPlaylist,songDict,scale)
    elif recType == const.MARKOV_3:
      recDict = predict.getRecDictOfThreeOrderMarkov(allPlaylist,songDict,scale)
    elif recType == const.PATTERN:
      recDict = predict.getRecDictOfMostPattern(allPlaylist,songDict,scale)
    index = 0
    for topN in range(1,const.TOP_N,1):
      recall,precision,f1 = util.getTopNIndex(recDict,playlistDict,topN)
      mae,rmse = util.getMAEandRMSE(recDict,playlistDict,songDict,topN)
      if scale == 0:
        recalls.append(recall)
        precisions.append(precision)
        f1s.append(f1)
        maes.append(mae)
        rmses.append(rmse)
      else:
        recalls[index] += recall
        precisions[index] += precision
        f1s[index] += f1
        maes[index] += mae
        rmses[index] += rmse
      index += 1

  #cal the avg value
  recalls = [recall / 10 for recall in recalls]
  precisions = [precision / 10 for precision in precisions]
  f1s = [f1 / 10 for f1 in f1s]
  maes = [mae / 10 for mae in maes]
  rmses = [rmse / 10 for rmse in rmses]

  #logging info to log
  index = 0
  for topN in range(1,const.TOP_N,1):
    print '%d:TopN = %d:%f %f %f %f %f' % (recType,topN,recalls[index],precisions[index],f1s[index],maes[index],rmses[index])
    logging.info('%d>%d:%f %f %f %f %f' % (recType,topN,recalls[index],precisions[index],f1s[index],maes[index],rmses[index]))
    index += 1
  end_time = time.time()
  print 'Consumed:%d' % (end_time-start_time)
  return recalls,precisions,f1s,maes,rmses  
Esempio n. 6
0
                                                                    songDict,
                                                                    scale)
  print 'get trainingPatterns and testingPatterns...'
  #print allPattern
  #print len(allPattern)
  #S = read("../txt/PrefixSpan.txt")
  print 'get frequent pattern...'
  patterns = prefixSpan(SquencePattern([], sys.maxint), allTrainingPattern, 30)
  print 'Finish getting frequent pattern...'
  #print_patterns(patterns)
  #print len(patterns)
  #count = 0
  size = len(testingPatternDict)
  index = 0
  for pid in testingPatternDict.keys():
    #print '======%d======' % pid
    index += 1
    print 'scale=%d > %d/%d:%d' % (scale,index,size,pid)
    pattern = testingPatternDict[pid]      
    predictTopic = getPredictTopic(patterns,pattern)
    predictTopicDict[pid] = predictTopic
    #count += len(predictTopic)
  #print 'avg = ',count*2.0/len(patternDict)
  print 'I am out getPredictTopicDict......'
  return predictTopicDict
 
if __name__ == "__main__":
  songDict = persist.readSongFromFile()
  playlistDict = persist.readPlaylistFromFile()
  getPredictTopicDict(playlistDict,songDict)
Esempio n. 7
0
def testRecMethod(recType=0):
    """Test single recommender.
     Input:
       recType - id of recommender.
     Output:
       None.
  """
    info = '############%s#############' % util.getMethodName(recType)
    start_time = time.time()
    songDict = persist.readSongFromFile()
    allPlaylist = persist.readPlaylistFromFile()
    recallTotal = 0.0
    precisionTotal = 0.0
    f1Total = 0.0
    maeTotal = 0.0
    rmseTotal = 0.0
    if recType == const.KNN:
        simFilename = '../txt/simMatrix.txt'
        pid2Index, index2Pid, countMatrix = util.getUserSongMatrix(
            allPlaylist, songDict)
        if os.path.exists(simFilename):
            print '......'
            simFile = open(simFilename, 'r')
            line = simFile.readline().rstrip('\n')
            simMatrix = eval(line)
            print len(simMatrix)
            print len(simMatrix[1234])
            simFile.close()
        else:
            print '++++++'
            simMatrix = util.getUserSimMatrix(countMatrix, pid2Index)
    for scale in range(10):
        playlistDict = allPlaylist[scale]
        playlistDict = allPlaylist[scale]
        if recType == const.ARIMA:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.SIMILAR:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.AVG:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.ARIMA_SIMILAR:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.ARIMA_AVG:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.KNN:
            recDict = predict.getRecDictOfUserKNN(playlistDict, songDict,
                                                  scale, pid2Index,
                                                  countMatrix, simMatrix)
        elif recType == const.MARKOV:
            recDict = predict.getRecDictOfFirstMarkov(allPlaylist, songDict,
                                                      scale)
        elif recType == const.MARKOV_3:
            recDict = predict.getRecDictOfThreeOrderMarkov(
                allPlaylist, songDict, scale)
        elif recType == const.PATTERN:
            recDict = predict.getRecDictOfMostPattern(allPlaylist, songDict,
                                                      scale)
        recall, precision, f1 = util.getTopNIndex(recDict, playlistDict)
        mae, rmse = util.getMAEandRMSE(recDict, playlistDict, songDict)
        recallTotal += recall
        precisionTotal += precision
        f1Total += f1
        maeTotal += mae
        rmseTotal += rmse

    recall = recallTotal / 10
    precision = precisionTotal / 10
    f1 = f1Total / 10
    mae = maeTotal / 10
    rmse = rmseTotal / 10

    print info
    logging.info(info)
    print 'Recall = ', recall
    logging.info('Recall = %f' % recall)
    print 'Precision = ', precision
    logging.info('Precision = %f' % precision)
    print 'F1-Score = ', f1
    logging.info('F1-Score = %f' % f1)
    print 'MAE = ', mae
    logging.info('MAE = %f' % mae)
    print 'RMSE = ', rmse
    logging.info('RMSE = %f' % rmse)
    print 'Consumed: %ds' % (time.time() - start_time)
    logging.info('Consumed: %ds' % (time.time() - start_time))
Esempio n. 8
0
def getErrorOfRecMethod(recType=0):
    """Get error of all recommender.
     Input:
       recType - id of recommender.
     Output:
       recalls,precisions,f1s,maes,rmses.
  """
    start_time = time.time()
    songDict = persist.readSongFromFile()
    allPlaylist = persist.readPlaylistFromFile()
    recalls = []
    precisions = []
    f1s = []
    maes = []
    rmses = []
    if recType == const.KNN:
        pid2Index, index2Pid, countMatrix = util.getUserSongMatrix(
            allPlaylist, songDict)
        simFilename = '../txt/simMatrix.txt'
        if os.path.exists(simFilename):
            print '......'
            simFile = open(simFilename, 'r')
            line = simFile.readline().rstrip('\n')
            simMatrix = eval(line)
            print len(simMatrix)
            print len(simMatrix[1234])
            simFile.close()
        else:
            print '++++++'
            simMatrix = util.getUserSimMatrix(countMatrix, pid2Index)
    for scale in range(10):
        playlistDict = allPlaylist[scale]
        if recType == const.ARIMA:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.SIMILAR:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.AVG:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.ARIMA_SIMILAR:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.ARIMA_AVG:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.ALL_HYBRID:
            recDict = predict.getRecDict(playlistDict, songDict, recType,
                                         scale)
        elif recType == const.KNN:
            recDict = predict.getRecDictOfUserKNN(playlistDict, songDict,
                                                  scale, pid2Index,
                                                  countMatrix, simMatrix)
        elif recType == const.MARKOV:
            recDict = predict.getRecDictOfFirstMarkov(allPlaylist, songDict,
                                                      scale)
        elif recType == const.MARKOV_3:
            recDict = predict.getRecDictOfThreeOrderMarkov(
                allPlaylist, songDict, scale)
        elif recType == const.PATTERN:
            recDict = predict.getRecDictOfMostPattern(allPlaylist, songDict,
                                                      scale)
        index = 0
        for topN in range(1, const.TOP_N, 1):
            recall, precision, f1 = util.getTopNIndex(recDict, playlistDict,
                                                      topN)
            mae, rmse = util.getMAEandRMSE(recDict, playlistDict, songDict,
                                           topN)
            if scale == 0:
                recalls.append(recall)
                precisions.append(precision)
                f1s.append(f1)
                maes.append(mae)
                rmses.append(rmse)
            else:
                recalls[index] += recall
                precisions[index] += precision
                f1s[index] += f1
                maes[index] += mae
                rmses[index] += rmse
            index += 1

    #cal the avg value
    recalls = [recall / 10 for recall in recalls]
    precisions = [precision / 10 for precision in precisions]
    f1s = [f1 / 10 for f1 in f1s]
    maes = [mae / 10 for mae in maes]
    rmses = [rmse / 10 for rmse in rmses]

    #logging info to log
    index = 0
    for topN in range(1, const.TOP_N, 1):
        print '%d:TopN = %d:%f %f %f %f %f' % (recType, topN, recalls[index],
                                               precisions[index], f1s[index],
                                               maes[index], rmses[index])
        logging.info('%d>%d:%f %f %f %f %f' %
                     (recType, topN, recalls[index], precisions[index],
                      f1s[index], maes[index], rmses[index]))
        index += 1
    end_time = time.time()
    print 'Consumed:%d' % (end_time - start_time)
    return recalls, precisions, f1s, maes, rmses
Esempio n. 9
0
def getResultOfArimaMethod():
  """Run MTSA with different maximum length of time series
  and then return the average hit ratio, precision, f1, mae and rmse.
  Input:
    None.
  Output:
    result - result of experiments.
  """
  start_time = time.time()
  songDict = persist.readSongFromFile() # read total songs from file.
  allPlaylist = persist.readPlaylistFromFile() # read all playlists from file.
  result = []
  totalRecalls = []
  totalPrecisions = []
  totalF1s = []
  totalMaes = []
  totalRmses = []
  for length in range(5,51,5): # set maximum length between 5 to 50.
    recalls = 0.0
    precisions = 0.0
    f1s = 0.0
    maes = 0.0
    rmses = 0.0
    for scale in range(10):
      playlistDict = allPlaylist[scale] # get the playlist in specific scale
      recDict = predict.getRecDict(playlistDict,songDict,const.ARIMA,scale,
                                     const.TOP_N,length) # get predicted result
      #get the middle result
      recall,precision,f1 = util.getTopNIndex(recDict,playlistDict,const.TOP_N)
      mae,rmse = util.getMAEandRMSE(recDict,playlistDict,songDict,const.TOP_N)
      #add to summary
      recalls += recall
      precisions += precision
      f1s += f1
      maes += mae
      rmses += rmse

    #cal the avg value
    recalls = recalls / 10
    precisions = precisions / 10 
    f1s = f1s / 10
    maes = maes / 10
    rmses = rmses / 10

    #log info to lod file
    print 'Length = %d:%f %f %f %f %f' % (length,recalls,precisions,f1s,maes,rmses)
    logging.info('%d:%f %f %f %f %f' % (length,recalls,precisions,f1s,maes,rmses))
    #add result to list
    totalRecalls.append(recalls)
    totalPrecisions.append(precisions)
    totalF1s.append(f1s)
    totalMaes.append(maes)
    totalRmses.append(rmses)

  result.append(totalRecalls)
  result.append(totalPrecisions)
  result.append(totalF1s)
  result.append(totalMaes)
  result.append(totalRmses)
  end_time = time.time()
  print 'Consumed:%d' % (end_time-start_time)
  return result