예제 #1
0
def mineSequence(fileName):

  wordVariants = []
  for user, sesId, session, sesString in getSessionWithInfo(fileName):
    sesSeq = createSequence(session)
    for entry in sesSeq:
      if len(entry) > 1:
        ind = findWordVariants(entry, wordVariants)
        if ind > -1:
          #print ind, entry, wordVariants[ind]
          wordVariants[ind].union(entry)
        else:
          wordVariants.append(entry)

  for entry in wordVariants:
    print entry
예제 #2
0
def main(argv):
  argParser = loadOptions(argv[1])
  args = argParser.parse_args([argv[1]])
  print args
  tscore = {
      QCC: float(args.qccThresh),
      HTC: float(args.htcThresh),
      DBS: float(args.dbScanThresh)
  }

  stats = {
      QCC: {'ttotal': 0,
            'etotal': 0,
            'sc': {},
            'scTotal': 0,
            'scCount': 0},
      HTC: {'ttotal': 0,
            'etotal': 0,
            'sc': {},
            'scTotal': 0,
            'scCount': 0}
  }
  '''if not os.path.exists(featDir):
		os.mkdir(featDir)

	if not os.path.exists(args.sessDir):
		os.mkdir(args.sessDir)
	'''

  if not os.path.exists(args.taskDir):
    os.mkdir(args.taskDir)

  i = 0
  for fileName in os.listdir(args.inputDir):
    #featFile = open(args.featDir+'//'+fileName,'w')
    #sessionFile = open(args.sessDir + '//'+fileName,'w')
    taskFile = open(args.taskDir + '//' + fileName, 'w')
    taskFeatures1 = open(args.taskDir + '//Feat1' + fileName, 'w')
    taskFeatures2 = open(args.taskDir + '//Feat2' + fileName, 'w')

    #get the session features
    for user, sId, session, sessionString in getSessionWithInfo(
        args.inputDir + '//' + fileName, args.inputDelim, 1500):
      if len(session) > 5:
        wxScore, jScore, featList, featString = getSessionFeatures(session, sId,
                                                                   user)
        #qcc
        qcc = getComponents(wxScore, tscore[QCC])  #array of array of queries
        #htc
        htc = getHTC(jScore, wxScore, tscore[HTC], len(session))
        #dbscan
        #if len(featList) > 0:
        #	dbTask = getDbScan(wxScore, 0.4, tscore[DBSCAN])
        sc1, sc2, sc3 = compareAlgos(qcc, htc, wxScore)

        #write Tasks to file
        taskDict = {}
        formatResults(session, taskDict, QCC, qcc, sc1)
        formatResults(session, taskDict, HTC, htc, sc2)
        updateStats(taskDict, stats, HTC)
        updateStats(taskDict, stats, QCC)
        taskFile.write(str(sId) + '\t' + str(session[0][USER]) + '\t' +
                       str(taskDict) + '\n')

        taskFeatures1.write(
            str(sId) + '\t' + str(getTaskFeatures(session, taskDict, qcc, sc1)))
        #featFile.write(featString+'\n')
        #sessionFile.write(sessionString+'\n')

      if i % 10000 == 0:
        print 'STATS', i
        print stats

      i += 1
    sessionFile.close()
    #featFile.close()
    taskFile.close()