def predictDaysProbDens(kdeDict, maxDay): daysProbDens = {} daysList = [i for i in range(0, maxDay + 1)] for key in kdeDict.keys(): probDens = prediction(kdeDict[key], daysList) daysProbDens[key] = probDens return daysProbDens
def dtfWeightingT(queriesEpoch, tweetsEpoch, kdeQwordDict, qid, qword, docId, wordsIndex, docsLength, avgDocsLength, k1, b): kde = kdeQwordDict[qid + '_' + qword] queryEpoch = queriesEpoch[qid] tweetEpoch = tweetsEpoch[docId] if queryEpoch >= tweetEpoch: timeSpan = getTimeSpan(queryEpoch, tweetEpoch ) probDens = prediction(kde, [timeSpan]) probDen = probDens[0] count = wordsIndex[qword][docId] docLength = docsLength[docId] numerator = (k1 + 1) * count * probDen denominator = k1 * (1 - b + b * docLength / avgDocsLength) + count * probDen dtf = 1.0 * numerator / denominator else: dtf = 0 return dtf
maxDay = 16 # 2011,2012: 16 ; 2013, 2014: 58 daysList = [i for i in range(0, maxDay + 1)] queryTimeFile = 'E:\\eclipse\\QueryExpansion\\data\\QueryTime\\' + year + '.MBid_query_time.txt' tweetsEpochFile = 'E:\\eclipse\\TemporalRetrieval\\data\\pickle_data\\tweetsEpoch\\tweetsEpoch_'+ year + '.pkl' qrelFile = 'E:\\eclipse\\QueryExpansion\\data\\qrels\\' + 'qrels.microblog' + year + '_new.txt' kdePrfTimeFile ='E:\\eclipse\\TemporalRetrieval\\data\\pickle_data\\KDE\\' + year + '\\kde_prf' + str(topN) +'_' + year + '.pkl' kdeDict = getPickleData(kdePrfTimeFile) queriesEpoch = getQueriesEpoch(queryTimeFile, year) tweetsEpoch = getPickleData(tweetsEpochFile) relevantResults = relevantGet(qrelFile) relevantTimeSpan = getResultsTimeSpan(relevantResults, tweetsEpoch, queriesEpoch) x1 = np.array(daysList, dtype=np.float) for qid in kdeDict.keys(): probDens = prediction(kdeDict[qid], x1) y1 = probDens drawHistLine(relevantTimeSpan[qid], x1, y1, maxDay, qid) figPath = 'E:\eclipse\TemporalRetrieval\data\img\\' + qid + '.png' plt.savefig(figPath) plt.close() print 'draw for ' + qid