def initialize():
  global category
  freqByService(dbRepo)
  freqByCategory(dbRepo.frequency, dbRepo.freqbyCtgry)
  kfirf(category, 0.4, isSynset, True, dbRepo, True)
  wordToSynset(dbRepo)
  frequencySynset(dbRepo, True)
  freqByCategory(dbRepo.synsetFrequency, dbRepo.synsetFreqbyCtgry)
  kfirf(category, 0.4, isSynset, False, dbRepo)
  #kfidfdf is only for category's api
  kfidfdf(0.5, category, 100, True, dbRepo)
Beispiel #2
0
def initialize():
  global category
  consInitTrainSetAndTestSet(category, 0.2, dbRepo)
  freqByService(dbTrain)
  freqByCategory(dbTrain.frequency, dbTrain.freqbyCtgry)
  kfirf(category, 0.4, True, dbTrain)
  wordToSynset(dbTrain)
  frequencySynset(dbTrain)
  freqByCategory(dbTrain.synsetFrequency, dbTrain.synsetFreqbyCtgry)
  kfirf(category, 0.4, False, dbTrain)
  #kfidfdf is only for category's api
  kfidfdf(0.5, category, 100, True)
  kfidfdf(0.5, category, 100, False)
  for line in f_test:
    ID = line.split(' ')[0]
    lineNumToService[lineNum] = ID
    lineNum += 1
  lineNum = 0
  for line in f_result:
    if db.frequency.find({'api_id':lineNumToService[lineNum]}, {'category':1})[0]['category'] == category:
      originCategory = 1
    else:
      originCategory = 0
    if originCategory == 1 and line.rstrip('\n') == '0':
      #remove original travel now non-travel service, because we cannot assign a category for it
      db.frequency.remove({'api_id':lineNumToService[lineNum]})
      db.synsetFrequency.remove({'api_id':lineNumToService[lineNum]})
      print 'remove', lineNumToService[lineNum]
    if originCategory == 0 and line.rstrip('\n') == '1':
      db.frequency.update({'api_id':lineNumToService[lineNum]}, {'$set':{'category': category}})
      db.synsetFrequency.update({'api_id':lineNumToService[lineNum]}, {'$set':{'category': category}})
      print 'update', lineNumToService[lineNum]
    lineNum += 1
  #re-build all the table
  freqByCategory(db.frequency, db.freqbyCtgry)
  kfirf(category, 0.4, isSynset, True, db)
  wordToSynset(db)
  frequencySynset(db)
  freqByCategory(db.synsetFrequency, db.synsetFreqbyCtgry)
  kfirf(category, 0.4, isSynset, False, db)
  loop += 1
  print loop
  checkStability(db, category, isSynset)