def mineSequence(fileName): wordVariants = [] for user, sesId, session, sesString in getSessionWithInfo(fileName): sesSeq = createSequence(session) for entry in sesSeq: if len(entry) > 1: ind = findWordVariants(entry, wordVariants) if ind > -1: #print ind, entry, wordVariants[ind] wordVariants[ind].union(entry) else: wordVariants.append(entry) for entry in wordVariants: print entry
def main(argv): argParser = loadOptions(argv[1]) args = argParser.parse_args([argv[1]]) print args tscore = { QCC: float(args.qccThresh), HTC: float(args.htcThresh), DBS: float(args.dbScanThresh) } stats = { QCC: {'ttotal': 0, 'etotal': 0, 'sc': {}, 'scTotal': 0, 'scCount': 0}, HTC: {'ttotal': 0, 'etotal': 0, 'sc': {}, 'scTotal': 0, 'scCount': 0} } '''if not os.path.exists(featDir): os.mkdir(featDir) if not os.path.exists(args.sessDir): os.mkdir(args.sessDir) ''' if not os.path.exists(args.taskDir): os.mkdir(args.taskDir) i = 0 for fileName in os.listdir(args.inputDir): #featFile = open(args.featDir+'//'+fileName,'w') #sessionFile = open(args.sessDir + '//'+fileName,'w') taskFile = open(args.taskDir + '//' + fileName, 'w') taskFeatures1 = open(args.taskDir + '//Feat1' + fileName, 'w') taskFeatures2 = open(args.taskDir + '//Feat2' + fileName, 'w') #get the session features for user, sId, session, sessionString in getSessionWithInfo( args.inputDir + '//' + fileName, args.inputDelim, 1500): if len(session) > 5: wxScore, jScore, featList, featString = getSessionFeatures(session, sId, user) #qcc qcc = getComponents(wxScore, tscore[QCC]) #array of array of queries #htc htc = getHTC(jScore, wxScore, tscore[HTC], len(session)) #dbscan #if len(featList) > 0: # dbTask = getDbScan(wxScore, 0.4, tscore[DBSCAN]) sc1, sc2, sc3 = compareAlgos(qcc, htc, wxScore) #write Tasks to file taskDict = {} formatResults(session, taskDict, QCC, qcc, sc1) formatResults(session, taskDict, HTC, htc, sc2) updateStats(taskDict, stats, HTC) updateStats(taskDict, stats, QCC) taskFile.write(str(sId) + '\t' + str(session[0][USER]) + '\t' + str(taskDict) + '\n') taskFeatures1.write( str(sId) + '\t' + str(getTaskFeatures(session, taskDict, qcc, sc1))) #featFile.write(featString+'\n') #sessionFile.write(sessionString+'\n') if i % 10000 == 0: print 'STATS', i print stats i += 1 sessionFile.close() #featFile.close() taskFile.close()