def main(argv): #load the session #find the terms porter = stem.porter.PorterStemmer() searcher = SearchIndex(argv[2]) searcher.initializeAnalyzer() #oSession = open('session-words.all','w') #oClicked = open('clicked-words.all','w') oOtherSession = open('other-session-words.all', 'w') oOtherClicked = open('other-clicked-words.all', 'w') otherSessions = {} otherClicked = {} ranker = Ranker() qId = 1 for topicId, session, doc, click, cTitle, cSummary in getSessionWithXML( argv[1]): if topicId not in otherSessions: otherSessions[topicId] = {} otherClicked[topicId] = {} #add session terms sessionTerms = getSessionTerms(session, porter) otherSessions[topicId] = mergeDicts(otherSessions[topicId], sessionTerms) #eTerms = ranker.getTopKWithFilter(sessionTerms,10,15) #print session[-1], sessionTerms, eTerms #k = 0 #for entry in searcher.getTopDocumentsWithExpansion(session[-1],eTerms, 1000, 'content','id'): #oSession.write(str(qId)+' Q0 '+entry[0]+' '+str(k)+' '+str(round(entry[1],2))+' session\n') #k+=1 #add clicked terms cTTerms, cSTerms = getClickedSummaryTerms(session, joinLists( cSummary.values()), joinLists(cTitle.values()), porter) nTerms = mergeDicts(cTTerms, cSTerms) otherClicked[topicId] = mergeDicts(otherClicked[topicId], nTerms) #fTerms = normalizeDict(nTerms, porter) #eTerms = ranker.getTopKWithFilter(nTerms,10,15) ## #k = 0 #for entry in searcher.getTopDocumentsWithExpansion(session[-1],eTerms, 1000, 'content','id'): #oClicked.write(str(qId)+' Q0 '+entry[0]+' '+str(k)+' '+str(round(entry[1],2))+' click\n') #k+=1 qId += 1 oSession.close() oClicked.close() searcher.close()
def main(argv): #open the index searcher = SearchIndex(argv[2]) searcher.initializeAnalyzer() ipaddress = 'localhost' #dexter object tagURL = 'http://' + ipaddress + ':8080/rest/annotate' catURL = 'http://' + ipaddress + ':8080/rest/graph/get-entity-categories' dexter = Dexter(tagURL, catURL, argv[7]) #category vector catVect = loadCategoryVector(argv[3]) catManage1 = CategoryManager(catVect, argv[4], Category) catManage2 = CategoryManager(catVect, argv[5], CategorySubcluster) #load the Category co-occurrence bit catCoMan = CoOcManager(argv[6], CoOccurrence(), ' ') #ranker ranker = Ranker() #task extraction htcTask = TaskExpansion('Indexes/htcIndex', ranker, 3000) qccTask = TaskExpansion('Indexes/qccIndex', ranker, 3000) #taskK = argv[5][argv[5].rfind('/')+1:] #totalVocab = loadFileInList(argv[6]); #expansion entExp1 = CatThesExpansion(dexter, catManage1, ranker, catCoMan) entExp2 = CatThesExpansion(dexter, catManage2, ranker, catCoMan) #term expansion coOccExp = CoOccurExpansion(catCoMan, None, ranker) rel, noRel = loadRelJudgements(argv[8]) outFolder = argv[9] #randomWalk #randWalk = RandomWalk(argv[3],argv[4],ranker) #randWalk = RandomWalk(catManage,catCoMan,entTermVect, catTermVect,ranker) #result String #query key terms #queryList = loadQueryList(argv[4]); #plotMap = {'baseline':{},'ent':{}, 'entSub':{}, 'qccTask':{}, 'htcTask':{},'co':{}}; #plotNDCG = {'baseline':{},'ent':{}, 'entSub':{}, 'qccTask':{}, 'htcTask':{},'co':{}}; #viewedFileFolder = argv[5] #i=0 ##qMap = []; ##qNdcg = []; #meth = 'baseline' #oFile = open(outFolder+'/baseline.RL1','w'); #covered = {}; #porter = stem.porter.PorterStemmer(); #for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML(argv[1]): #i+=1 #query = session[0].strip(); #if i in rel and query not in covered: #covered[query] = 1.0; #docList = searcher.getTopDocuments(query,1000,'content','id'); #k = 1 #for dtuple in docList: #oFile.write(str(i)+' Q0 '+dtuple[0]+' '+str(k)+' '+str(round(dtuple[1],2))+' baseline\n'); #k +=1 #'''qmap = findAvgPrec(docList,rel[i],noRel[i]); #dcg10, idcg10 = findDCG(docList[:10],rel[i]); ##print dcg10, idcg10, rel[i].values(); #ndcg10 = 0.0; #if idcg10 > 0: #ndcg10 = dcg10/idcg10; #qMap.append(qmap); #qNdcg.append(ndcg10); #oFile.write('ndcg10 '+str(i)+' '+str(ndcg10)+'\n'); #oFile.write('map '+str(i)+' '+str(qmap)+'\n'); #''' #else: #print 'No rel ', i, session[0]; #oFile.close(); #''' #fmap = sum(qMap)/len(qMap); #fnd = sum(qNdcg)/len(qNdcg); #oFile.write('all map ' +str(fmap)+'\n'); #oFile.write('all ndcg10 '+str(fnd)+'\n'); #for val in range(0,55,5): #plotMap[meth][val] = fmap; #plotNDCG[meth][val] = fnd; #oFile.close(); #''' i = 0 #qMap = {}; #qNdcg = {}; oFile = {} meth = 'co' covered = {} for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML( argv[1]): i += 1 query = session[0].strip() if i in rel and query not in covered: covered[query] = 1.0 coExpTerms = coOccExp.expandTextWithStep(query, 50, 55, 5) for noTerms, terms in coExpTerms.items(): #if noTerms not in qMap: # qMap[noTerms] = []; # qNdcg[noTerms] = []; if noTerms not in oFile: oFile[noTerms] = open( outFolder + '/' + meth + '_' + str(noTerms) + '.RL1', 'w') docList = searcher.getTopDocumentsWithExpansion(query, terms, 1000, 'content', 'id') k = 1 for dtuple in docList: oFile[noTerms].write(str(i) + ' Q0 ' + dtuple[0] + ' ' + str(k) + ' ' + str(round(dtuple[1], 2)) + ' baseline\n') k += 1 '''qmap = findAvgPrec(docList,rel[i],noRel[i]); dcg10, idcg10 = findDCG(docList[:10],rel[i]); ndcg10 = 0.0; if idcg10 > 0: ndcg10 = dcg10/idcg10; qMap[noTerms].append(qmap); qNdcg[noTerms].append(ndcg10); oFile[noTerms].write('ndcg10 '+str(i)+' '+str(ndcg10)+' '+str(dcg10)+' '+str(idcg10)+'\n'); oFile[noTerms].write('map '+str(i)+' '+str(qmap)+'\n'); ''' ''' for entry, vlist in qMap.items(): i = len(vlist); fmap = sum(vlist)/i; fnd = sum(qNdcg[entry])/i; print sum(vlist), len(vlist); oFile[entry].write('all map ' +str(fmap)+'\n'); oFile[entry].write('all ndcg10 '+str(fnd)+'\n'); plotMap[meth][entry] = fmap; plotNDCG[meth][entry] = fnd; oFile[entry].close(); ''' for entry in oFile.keys(): oFile[entry].close() i = 0 #qMap = {}; #qNdcg = {}; oFile = {} meth = 'ent' covered = {} for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML( argv[1]): i += 1 query = session[0].strip() cText = normalize(' '.join(cTitle[0]), porter) if i in rel and query not in covered: covered[query] = 1.0 entStatus1, entExpTerms1 = entExp1.expandTextWithStep(query, cText, 1, 50, 55, 5) for noTerms, terms in entExpTerms1.items(): #if noTerms not in qMap: # qMap[noTerms] = []; # qNdcg[noTerms] = []; if noTerms not in oFile: oFile[noTerms] = open( outFolder + '/' + meth + '_' + str(noTerms) + '.RL1', 'w') docList = searcher.getTopDocumentsWithExpansion(session[0], terms, 1000, 'content', 'id') k = 1 for dtuple in docList: oFile[noTerms].write(str(i) + ' Q0 ' + dtuple[0] + ' ' + str(k) + ' ' + str(round(dtuple[1], 2)) + ' baseline\n') k += 1 ''' qmap = findAvgPrec(docList,rel[i],noRel[i]); dcg10, idcg10 = findDCG(docList[:10],rel[i]); ndcg10 = 0.0; if idcg10 > 0: ndcg10 = dcg10/idcg10; qMap[noTerms].append(qmap); qNdcg[noTerms].append(ndcg10); oFile[noTerms].write('ndcg10 '+str(i)+' '+str(ndcg10)+' '+str(dcg10)+' '+str(idcg10)+'\n'); oFile[noTerms].write('map '+str(i)+' '+str(qmap)+'\n'); for entry, vlist in qMap.items(): i = len(vlist); fmap = sum(qMap[entry])/i; fnd = sum(qNdcg[entry])/i; oFile[entry].write('all map ' +str(fmap)+'\n'); oFile[entry].write('all ndcg10 '+str(fnd)+'\n'); plotMap[meth][entry] = fmap; plotNDCG[meth][entry] = fnd; oFile[entry].close(); ''' for entry in oFile.keys(): oFile[entry].close() i = 0 #qMap = {}; #qNdcg = {}; oFile = {} meth = 'entSub' covered = {} for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML( argv[1]): i += 1 query = session[0].strip() cText = normalize(' '.join(cTitle[0]), porter) if i in rel and query not in covered: covered[query] = 1.0 entStatus2, entExpTerms2 = entExp2.expandTextWithStepAndSubcluster( query, cText, 1, 50, 55, 5) for noTerms, terms in entExpTerms2.items(): #if noTerms not in qMap: #qMap[noTerms] = []; #qNdcg[noTerms] = []; if noTerms not in oFile: oFile[noTerms] = open( outFolder + '/' + meth + '_' + str(noTerms) + '.RL1', 'w') docList = searcher.getTopDocumentsWithExpansion(session[0], terms, 1000, 'content', 'id') k = 1 for dtuple in docList: oFile[noTerms].write(str(i) + ' Q0 ' + dtuple[0] + ' ' + str(k) + ' ' + str(round(dtuple[1], 2)) + ' baseline\n') k += 1 '''qmap = findAvgPrec(docList,rel[i],noRel[i]); dcg10, idcg10 = findDCG(docList[:10],rel[i]); ndcg10 = 0.0; if idcg10 > 0: ndcg10 = dcg10/idcg10; qMap[noTerms].append(qmap); qNdcg[noTerms].append(ndcg10); oFile[noTerms].write('ndcg10 '+str(i)+' '+str(ndcg10)+' '+str(dcg10)+' '+str(idcg10)+'\n'); oFile[noTerms].write('map '+str(i)+' '+str(qmap)+'\n'); for entry, vlist in qMap.items(): i = len(vlist); fmap = sum(qMap[entry])/i; fnd = sum(qNdcg[entry])/i; oFile[entry].write('all map ' +str(fmap)+'\n'); oFile[entry].write('all ndcg10 '+str(fnd)+'\n'); plotMap[meth][entry] = fmap; plotNDCG[meth][entry] = fnd; oFile[entry].close(); ''' for entry in oFile.keys(): oFile[entry].close() i = 0 #qMap = {}; #qNdcg = {}; oFile = {} meth = 'qccTask' covered = {} for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML( argv[1]): i += 1 query = session[0].strip() if i in rel and query not in covered: covered[query] = 1.0 qccTaskTerms = qccTask.expandTextWithStep(query, 50, 55, 5) for noTerms, terms in qccTaskTerms.items(): #if noTerms not in qMap: #qMap[noTerms] = []; #qNdcg[noTerms] = []; if noTerms not in oFile: oFile[noTerms] = open( outFolder + '/' + meth + '_' + str(noTerms) + '.RL1', 'w') docList = searcher.getTopDocumentsWithExpansion(session[0], terms, 1000, 'content', 'id') k = 1 for dtuple in docList: oFile[noTerms].write(str(i) + ' Q0 ' + dtuple[0] + ' ' + str(k) + ' ' + str(round(dtuple[1], 2)) + ' baseline\n') k += 1 #qmap = findAvgPrec(docList,rel[i],noRel[i]); #dcg10, idcg10 = findDCG(docList[:10],rel[i]); #ndcg10 = 0.0; #if idcg10 > 0: #ndcg10 = dcg10/idcg10; # #qMap[noTerms].append(qmap); #qNdcg[noTerms].append(ndcg10); #oFile[noTerms].write('ndcg10 '+str(i)+' '+str(ndcg10)+' '+str(dcg10)+' '+str(idcg10)+'\n'); #oFile[noTerms].write('map '+str(i)+' '+str(qmap)+'\n'); # #for entry, vlist in qMap.items(): #i = len(vlist); #fmap = sum(qMap[entry])/i; #fnd = sum(qNdcg[entry])/i; #oFile[entry].write('all map ' +str(fmap)+'\n'); #oFile[entry].write('all ndcg10 '+str(fnd)+'\n'); #plotMap[meth][entry] = fmap; #plotNDCG[meth][entry] = fnd; #oFile[entry].close(); # for entry in oFile.keys(): oFile[entry].close() i = 0 #qMap = {}; #qNdcg = {}; oFile = {} meth = 'htcTask' covered = {} for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML( argv[1]): i += 1 query = session[0].strip() if i in rel and query not in covered: covered[query] = 1.0 htcTaskTerms = htcTask.expandTextWithStep(query, 50, 55, 5) for noTerms, terms in htcTaskTerms.items(): #if noTerms not in qMap: #qMap[noTerms] = []; #qNdcg[noTerms] = []; if noTerms not in oFile: oFile[noTerms] = open( outFolder + '/' + meth + '_' + str(noTerms) + '.RL1', 'w') docList = searcher.getTopDocumentsWithExpansion(session[0], terms, 1000, 'content', 'id') k = 1 for dtuple in docList: oFile[noTerms].write(str(i) + ' Q0 ' + dtuple[0] + ' ' + str(k) + ' ' + str(round(dtuple[1], 2)) + ' baseline\n') k += 1 #qmap = findAvgPrec(docList,rel[i],noRel[i]); #dcg10, idcg10 = findDCG(docList[:10],rel[i]); #ndcg10 = 0.0; #if idcg10 > 0: #ndcg10 = dcg10/idcg10; #qMap[noTerms].append(qmap); #qNdcg[noTerms].append(ndcg10); #oFile[noTerms].write('ndcg10 '+str(i)+' '+str(ndcg10)+' '+str(dcg10)+' '+str(idcg10)+'\n'); #oFile[noTerms].write('map '+str(i)+' '+str(qmap)+'\n'); # #for entry, vlist in qMap.items(): #i = len(vlist); #fmap = sum(qMap[entry])/i; #fnd = sum(qNdcg[entry])/i; #oFile[entry].write('all map ' +str(fmap)+'\n'); #oFile[entry].write('all ndcg10 '+str(fnd)+'\n'); #plotMap[meth][entry] = fmap; #plotNDCG[meth][entry] = fnd; #oFile[entry].close(); for entry in oFile.keys(): oFile[entry].close() #plotMultipleSys(plotMap,'No of Terms', 'MAP',outFolder+'/map.png','Retrieval MAP Plot'); #plotMultipleSys(plotNDCG,'No of Terms', 'NDCG@10',outFolder+'/ndcg10.png','Retrieval NDCG Plot'); searcher.close()