def calculate_score(self, doc, m):

        #docScore = 0.0

        if m == 'W':
            docEnt = eventUtils.getEntities(doc)[0]
            docEnt['Topic'] = eventUtils.getTokens(doc)
            score = self.getDocProb(docEnt)
        else:

            score = self.calculate_similarity(doc)
        return score
 def calculate_score(self,doc,m):
     
     #docScore = 0.0
     
     
     if m == 'W':
         docEnt = eventUtils.getEntities(doc)[0]
         docEnt['Topic'] = eventUtils.getTokens(doc)
         score = self.getDocProb(docEnt)
     else:
         
         score = self.calculate_similarity(doc)
     return score
Beispiel #3
0
    def webpageEntities(self, docText=""):
        disasters = set(self.entities["Disaster"].keys())

        sentences = eventUtils.getSentences(docText)
        webpageEnts = []
        for sent in sentences:
            sentToks = eventUtils.getTokens(sent)
            if len(sentToks) > 100:
                continue
            intersect = eventUtils.getIntersection(disasters, sentToks)
            if len(intersect) > self.intersectionTh:
                #print intersect
                sentEnts = eventUtils.getEntities(sent)[0]
                if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'):
                    sentEnts['Disaster'] = intersect
                    webpageEnts.append((sent, sentEnts))

        return webpageEnts
Beispiel #4
0
 def webpageEntities(self,docText=""):
     disasters=set(self.entities["Disaster"].keys())
     
     sentences = eventUtils.getSentences(docText)
     webpageEnts =[]
     for sent in sentences:
         sentToks = eventUtils.getTokens(sent)
         if len(sentToks) > 100:
             continue
         intersect = eventUtils.getIntersection(disasters, sentToks)
         if len(intersect) > self.intersectionTh:
             #print intersect
             sentEnts = eventUtils.getEntities(sent)[0]
             if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'):
                 sentEnts['Disaster'] = intersect
                 webpageEnts.append((sent,sentEnts))
     
     return webpageEnts
Beispiel #5
0
def getEM_Sents(wps):
    docsEntities=[]
    docsEntitiesFreq = []
    entitiesProb = {}
    
    
    collSents = []
    #for i,wp in enumerate(wps):
    for wp in wps:
        if 'text' not in wp:
            continue
        wpContent = wp['text']+wp['title']
        wpSplit = wpContent.split('\n')
        wpFiltered = filter(None,wpSplit)
        wpContentf = '\n'.join(wpFiltered)
        sents = eventUtils.getSentences(wpContentf)
        collSents.append(sents)
    allSents = []
    for sents in collSents:
        allSents.extend(sents)
    fw = eventUtils.getFreqTokens(allSents)
    fw = [w[0] for w in fw]
    
    #collFilteredSents = []
    collEventModelInsts=[]
    for sents in collSents:
        filtEvtModelInsts = []
        for s in sents:
            sentToks = eventUtils.getTokens(s)
            cw = eventUtils.getIntersection(fw, sentToks)
            if len(cw) >= 2:
                emi = {}
                emi['TOPIC'] = list(cw)
                ents = eventUtils.getEntities(s)[0]
                if ents.has_key('LOCATION'):
                    emi['LOCATION'] = ents['LOCATION']
                    #filtEvtModelInsts.append(emi)
                if ents.has_key('DATE'):
                        #emi['TOPIC'] = cw
                    emi['DATE']=ents['DATE']
                filtEvtModelInsts.append(emi)
        collEventModelInsts.append(filtEvtModelInsts)
    '''
Beispiel #6
0
def extractDatesLocs(urls):
    webpagesTxt = eventUtils.getWebpageText_NoURLs(urls)
    txts = [
        webpageTxt['text'] for webpageTxt in webpagesTxt
        if 'text' in webpageTxt
    ]
    webpageEnts = eventUtils.getEntities(txts)
    #webpageEnts = eventUtils.getEntities(webpageTxt[0]['text'])
    #print webpageEnts[0]['LOCATION']
    #print webpageEnts[0]['DATE']

    locs = []
    dates = []

    for wbE in webpageEnts:
        #print wbE['LOCATION']
        #print wbE['DATE']
        #print '-----------------------'
        if 'LOCATION' in wbE:
            locs.extend(wbE['LOCATION'])
        if 'DATE' in wbE:
            dates.extend(wbE['DATE'])

    freqLocs = eventUtils.getFreq(locs)
    freqDates = eventUtils.getFreq(dates)
    '''
    freqDates_norm = normalizeDates(freqDates)
    sortedDates = eventUtils.getSorted(freqDates_norm.iteritems(),1)
    print sortedDates
    print "Most Frequent Date (i.e. most probably event's date) is: ", sortedDates[0]
    print '________________________________'
    #print freqDates_norm
    '''
    freqLocs_norm = normalizeLocs(freqLocs)
    sortedLocs = eventUtils.getSorted(freqLocs_norm.iteritems(), 1)
    print sortedLocs
    print "Most Frequent Location (i.e. most probably event's location) is: ", sortedLocs[
        0]
    #print freqLocs_norm
    return
Beispiel #7
0
    def webpageEntities_old(self, docText=""):
        disasters = self.entities["Disaster"]

        sentences = eventUtils.getSentences(docText)
        #impSentences = getIndicativeSents(sentences, disasters, len(disasters), 0)
        #impSentences = []
        webpageEnts = []
        for sent in sentences:
            sentToks = eventUtils.getTokens(sent)
            if len(sentToks) > 100:
                continue
            intersect = eventUtils.getIntersection(disasters, sentToks)
            if len(intersect) > self.intersectionTh:
                #impSentences.append(sent)
                sentEnts = eventUtils.getEntities(sent)[0]
                if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'):
                    sentEnts['Disaster'] = intersect
                    webpageEnts.append((sent, sentEnts))
        #entities = getEntities(impSentences)
        #webpageEnts = zip(impSentences,entities)

        return webpageEnts
Beispiel #8
0
 def webpageEntities_old(self,docText=""):
     disasters=self.entities["Disaster"]
     
     sentences = eventUtils.getSentences(docText)
     #impSentences = getIndicativeSents(sentences, disasters, len(disasters), 0)
     #impSentences = []
     webpageEnts =[]
     for sent in sentences:
         sentToks = eventUtils.getTokens(sent)
         if len(sentToks) > 100:
             continue
         intersect = eventUtils.getIntersection(disasters, sentToks)
         if len(intersect) > self.intersectionTh:
             #impSentences.append(sent)
             sentEnts = eventUtils.getEntities(sent)[0]
             if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'):
                 sentEnts['Disaster'] = intersect
                 webpageEnts.append((sent,sentEnts))
     #entities = getEntities(impSentences)
     #webpageEnts = zip(impSentences,entities)
     
     return webpageEnts
Beispiel #9
0
def extractDatesLocs(urls):
    webpagesTxt = eventUtils.getWebpageText_NoURLs(urls)
    txts = [webpageTxt['text'] for webpageTxt in webpagesTxt if 'text' in webpageTxt]
    webpageEnts = eventUtils.getEntities(txts)
    #webpageEnts = eventUtils.getEntities(webpageTxt[0]['text'])
    #print webpageEnts[0]['LOCATION']
    #print webpageEnts[0]['DATE']
    
    locs = []
    dates = []
    
    for wbE in webpageEnts:
        #print wbE['LOCATION']
        #print wbE['DATE']
        #print '-----------------------'
        if 'LOCATION' in wbE:
            locs.extend(wbE['LOCATION'])
        if 'DATE' in wbE:
            dates.extend(wbE['DATE'])
    
    freqLocs = eventUtils.getFreq(locs)
    freqDates = eventUtils.getFreq(dates)
   
    '''
    freqDates_norm = normalizeDates(freqDates)
    sortedDates = eventUtils.getSorted(freqDates_norm.iteritems(),1)
    print sortedDates
    print "Most Frequent Date (i.e. most probably event's date) is: ", sortedDates[0]
    print '________________________________'
    #print freqDates_norm
    '''
    freqLocs_norm = normalizeLocs(freqLocs)
    sortedLocs = eventUtils.getSorted(freqLocs_norm.iteritems(),1)
    print sortedLocs
    print "Most Frequent Location (i.e. most probably event's location) is: ", sortedLocs[0]
    #print freqLocs_norm
    return
 def buildProbEventModel(self,urlsList,topK):
     
     docsList = eventUtils.getWebpageText(urlsList) #self.getCollectionDocs(urlsList)
     t = ''
     #docsTotalFreqs=[]
     docsEntities=[]
     docsEntitiesFreq = []
     entitiesProb = {}
     
     # Convert each doc to tokens, locations, dates lists and their corresponding frequency distributions
     # Also produces the total frequency for each document of each list (tokens, locations, and dates)
     for doc in docsList:
         
         if doc.has_key('text'):
             t = doc['text']
             if doc.has_key('title'):
                 t =doc['title']+ " "+t
         if t:
             #print 'Reading ' + t[:100]
             ents = eventUtils.getEntities(t)[0]
             docEnt = {}
             docEnt['LOCATION']={}
             if 'LOCATION' in ents:
                 docEnt['LOCATION'] =  ents['LOCATION']
             docEnt['DATE']={}
             if 'DATE' in ents:
                 docEnt['DATE'] = ents['DATE']
             toks = eventUtils.getTokens(t)
             docEnt['Topic'] = toks
             docsEntities.append(docEnt)
             
             docEntFreq = {}
             #docTotals = {}
             for k in docEnt:
                 docEntFreq[k] = eventUtils.getFreq(docEnt[k])
                 #totalFreq = sum([v for _,v in docEntFreq[k].items()])
                 
                 #docTotals[k] = totalFreq
             docsEntitiesFreq.append(docEntFreq)
             #docsTotalFreqs.append(docTotals)
     
     # Collection-level frequency for each entity(tokens, locations, dates)
     
     #Calculating prob for each item in each entity lists (tokens, locations, and dates) as 
     # freq of item in all docs / total freq of all terms in that list
     entitiesProb['LOCATION']={}
     entitiesProb['DATE']={}
     entitiesProb['Topic']={}
     
     for docEntFreq in docsEntitiesFreq:
         for entity in docEntFreq:
             for val in docEntFreq[entity]:
                 if val in entitiesProb[entity]:
                     entitiesProb[entity][val] += docEntFreq[entity][val]
                 else:
                     entitiesProb[entity][val] = docEntFreq[entity][val]
     
     for ent in entitiesProb:
         allvalsFreq = sum([v for _,v in entitiesProb[ent].items()])
         for k in entitiesProb[ent]:
             #entitiesProb[ent][k] = (1.0 + (entitiesProb[ent][k] *1.0)) / (docsTotalFreqs[ent] + allDocsTotal[ent])
             
             entitiesProb[ent][k] = (1.0 + (entitiesProb[ent][k] *1.0)) / (len(entitiesProb[ent]) + allvalsFreq)
             
         
     #self.probEvtModel = entitiesProb
     
     mle =  self.getMLEEventEntities(entitiesProb,10)
     for k in mle:
         print k, mle[k]
         
     
     self.probEvtModel = {}
     for k in mle:
         self.probEvtModel[k] = dict(mle[k])#entitiesProb[k][:topK]
     
     self.eDisDic = self.probEvtModel['Topic']
     
     
     locToks = self.probEvtModel['LOCATION'].keys()
     locToks = eventUtils.getStemmedWords(locToks)
     self.locDic = dict(zip(locToks,self.probEvtModel['LOCATION'].values()))
     
 
     dToks = self.probEvtModel['DATE'].keys()
     dToks = eventUtils.getStemmedWords(dToks)
     self.dDic = dict(zip(dToks,self.probEvtModel['DATE'].values()))
     
     
     
     return docsEntities, entitiesProb
Beispiel #11
0
def buildProbEventModel(docsList):
    t = ''
    docsTotalFreqs=[]
    docsEntities=[]
    docsEntitiesFreq = []
    entitiesProb = {}
    
    # Convert each doc to tokens, locations, dates lists and their corresponding frequency distributions
    # Also produces the total frequency for each document of each list (tokens, locations, and dates)
    for doc in docsList:
        
        if doc.has_key('text'):
            t = doc['text']
            if doc.has_key('title'):
                t =doc['title']+ " "+t
        if t:
            print 'Reading ' + t[:100]
            ents = eventUtils.getEntities(t)[0]
            docEnt = {}
            docEnt['LOCATION']={}
            if 'LOCATION' in ents:
                docEnt['LOCATION'] =  ents['LOCATION']
            docEnt['DATE']={}
            if 'DATE' in ents:
                docEnt['DATE'] = ents['DATE']
            toks = eventUtils.getTokens(t)
            docEnt['Topic'] = toks
            docsEntities.append(docEnt)
            
            docEntFreq = {}
            #docTotals = {}
            for k in docEnt:
                docEntFreq[k] = eventUtils.getFreq(docEnt[k])
                #totalFreq = sum([v for _,v in docEntFreq[k].items()])
                
                #docTotals[k] = totalFreq
            docsEntitiesFreq.append(docEntFreq)
            #docsTotalFreqs.append(docTotals)
    
    # Collection-level frequency for each entity(tokens, locations, dates)
    
    # Total Frequency of keywords, locations, and dates in all documents
    '''
    allDocsTotal = {}
    allDocsTotal['LOCATION'] = 0
    allDocsTotal['DATE']=0
    allDocsTotal['Topic'] = 0
    
    for docTotFreq in docsTotalFreqs:
        for k in docTotFreq:
            allDocsTotal[k]+= docTotFreq[k]
    '''
    
    #Calculating prob for each item in each entity lists (tokens, locations, and dates) as 
    # freq of item in all docs / total freq of all terms in that list
    entitiesProb['LOCATION']={}
    entitiesProb['DATE']={}
    entitiesProb['Topic']={}
    
    for docEntFreq in docsEntitiesFreq:
        for entity in docEntFreq:
            for val in docEntFreq[entity]:
                if val in entitiesProb[entity]:
                    entitiesProb[entity][val] += docEntFreq[entity][val]
                else:
                    entitiesProb[entity][val] = docEntFreq[entity][val]
    
    for ent in entitiesProb:
        allvalsFreq = sum([v for _,v in entitiesProb[ent].items()])
        for k in entitiesProb[ent]:
            #entitiesProb[ent][k] = (1.0 + (entitiesProb[ent][k] *1.0)) / (docsTotalFreqs[ent] + allDocsTotal[ent])
            
            entitiesProb[ent][k] = (1.0 + (entitiesProb[ent][k] *1.0)) / (len(entitiesProb[ent]) + allvalsFreq)
            
        
            
    return docsEntities, entitiesProb
Beispiel #12
0
def buildProbEventModel(docsList):
    t = ''
    docsTotalFreqs = []
    docsEntities = []
    docsEntitiesFreq = []
    entitiesProb = {}

    # Convert each doc to tokens, locations, dates lists and their corresponding frequency distributions
    # Also produces the total frequency for each document of each list (tokens, locations, and dates)
    for doc in docsList:

        if doc.has_key('text'):
            t = doc['text']
            if doc.has_key('title'):
                t = doc['title'] + " " + t
        if t:
            print 'Reading ' + t[:100]
            ents = eventUtils.getEntities(t)[0]
            docEnt = {}
            docEnt['LOCATION'] = {}
            if 'LOCATION' in ents:
                docEnt['LOCATION'] = ents['LOCATION']
            docEnt['DATE'] = {}
            if 'DATE' in ents:
                docEnt['DATE'] = ents['DATE']
            toks = eventUtils.getTokens(t)
            docEnt['Topic'] = toks
            docsEntities.append(docEnt)

            docEntFreq = {}
            #docTotals = {}
            for k in docEnt:
                docEntFreq[k] = eventUtils.getFreq(docEnt[k])
                #totalFreq = sum([v for _,v in docEntFreq[k].items()])

                #docTotals[k] = totalFreq
            docsEntitiesFreq.append(docEntFreq)
            #docsTotalFreqs.append(docTotals)

    # Collection-level frequency for each entity(tokens, locations, dates)

    # Total Frequency of keywords, locations, and dates in all documents
    '''
    allDocsTotal = {}
    allDocsTotal['LOCATION'] = 0
    allDocsTotal['DATE']=0
    allDocsTotal['Topic'] = 0
    
    for docTotFreq in docsTotalFreqs:
        for k in docTotFreq:
            allDocsTotal[k]+= docTotFreq[k]
    '''

    #Calculating prob for each item in each entity lists (tokens, locations, and dates) as
    # freq of item in all docs / total freq of all terms in that list
    entitiesProb['LOCATION'] = {}
    entitiesProb['DATE'] = {}
    entitiesProb['Topic'] = {}

    for docEntFreq in docsEntitiesFreq:
        for entity in docEntFreq:
            for val in docEntFreq[entity]:
                if val in entitiesProb[entity]:
                    entitiesProb[entity][val] += docEntFreq[entity][val]
                else:
                    entitiesProb[entity][val] = docEntFreq[entity][val]

    for ent in entitiesProb:
        allvalsFreq = sum([v for _, v in entitiesProb[ent].items()])
        for k in entitiesProb[ent]:
            #entitiesProb[ent][k] = (1.0 + (entitiesProb[ent][k] *1.0)) / (docsTotalFreqs[ent] + allDocsTotal[ent])

            entitiesProb[ent][k] = (1.0 + (entitiesProb[ent][k] * 1.0)) / (
                len(entitiesProb[ent]) + allvalsFreq)

    return docsEntities, entitiesProb
Beispiel #13
0
 def buildEventModel_wholeCollection(self,seedURLs):
     
     corpus = Collection(seedURLs)
     
     #NoTFDF
     self.toksDic= corpus.getIndicativeWords('TF')
     #sortedImptSents = corpus.getIndicativeSentences(keywordsTh,self.intersectionTh)
     #for s in sortedImptSents[:self.topK]: 
     #    print s 
     # Get Event Model
     docsTexts = [d.text for d in corpus.documents]
     eventModelInstances = eventUtils.getEntities(docsTexts)
     #eventModelInstances = eventUtils.getEventModelInsts(docsTexts)
     #print eventModelInstances[:self.topK]
     
     self.entities['LOCATION']= []
     self.entities['DATE'] = []
     self.entities['Topic']=[]
     
     for e in eventModelInstances:
         if 'LOCATION' in e:
             self.entities['LOCATION'].extend( e['LOCATION'])
         if 'DATE' in e:
             self.entities['DATE'].extend( e['DATE'])
         #self.entities['Topic'].extend(e['Topic'])
     
     entitiesFreq = {}
     entitiesFreq['LOCATION'] = self.getEntitiesFreq(self.entities['LOCATION'])
     entitiesFreq['DATE'] = self.getEntitiesFreq(self.entities['DATE'])
     entitiesFreq['Topic'] = eventUtils.getSorted(self.toksDic.items(), 1)
    
     filteredDates = []
     months = ['jan','feb','mar','apr','aug','sept','oct','nov','dec','january','february','march','april','may','june','july','august','september','october','november','december']
     for d,v in entitiesFreq['DATE']:
         if d.isdigit() and len(d) == 4:
             filteredDates.append((d,v))
         elif d.lower() in months:
             filteredDates.append((d,v))
     entitiesFreq['DATE']=filteredDates
     
     llen = self.topK
     dlen = self.topK
     #l = [k for k,_ in entitiesFreq['LOCATION']]
     s = len(entitiesFreq['LOCATION'])
     
     if llen < s:
         s = llen
     t = entitiesFreq['LOCATION'][:s]
     print t
     self.entities['LOCATION'] = dict(t)
            
     #d = [k for k,_ in entitiesFreq['DATE']]
     s = len(entitiesFreq['DATE'])
     if dlen < s:
         s = dlen
     self.entities['DATE'] = dict(entitiesFreq['DATE'][:s])
     print entitiesFreq['DATE'][:s]
     
     
     #locDate = [k for k,_ in entitiesFreq['LOCATION']] + [m for m,_ in entitiesFreq['DATE']]
     locDate = self.entities['LOCATION'].keys() + self.entities['DATE'].keys()
     
     locDate = eventUtils.getTokens(' '.join(locDate))
     
     
     ntopToks = []
     topToks = [k for k,_ in entitiesFreq['Topic']]
     for tok in topToks:
         if tok not in locDate:
             ntopToks.append(tok)
     topToks = ntopToks
     
     if self.topK < len(topToks):
         topToks =  topToks[:self.topK]
     #print "Disaster: ", topToks
     
     
     topToksDic = {}
     for t in topToks:
         topToksDic[t] = self.toksDic[t]
     #self.entities['Disaster'] = set(topToks)
     self.entities['Topic'] = topToksDic
     
     #print self.entities
     print topToksDic
     
     #self.vecs = {}
     self.scalars = {}
     for k in self.entities:
         ekv = self.entities[k]
         '''
         if k == 'Disaster':
             ev = [1+math.log(e*v) for e,v in ekv.values()]
         else:
             ev = [1+math.log(e) for e in ekv.values()]
         '''
         #NoTFDF
         ev = [1+math.log(e) for e in ekv.values()]
         #self.vecs[k] = ev
         self.scalars[k] = self.getScalar(ev)
    def buildProbEventModel(self, urlsList, topK):

        docsList = eventUtils.getWebpageText_NoURLs(urlsList)  #getWebpageText
        docsList = [d for d in docsList if 'text' in d]
        t = ''
        #docsTotalFreqs=[]
        docsEntities = []
        docsEntitiesFreq = []
        entitiesFreq = {}

        # Convert each doc to tokens, locations, dates lists and their corresponding frequency distributions
        # Also produces the total frequency for each document of each list (tokens, locations, and dates)
        for doc in docsList:
            #t = ""
            #if doc.has_key('text'):
            t = doc['text']
            #if doc.has_key('title'):
            #    t =doc['title']+ " "+t
            #if t:
            #print 'Reading ' + t[:100]
            ents = eventUtils.getEntities(t)[0]
            docEnt = {}
            docEnt['LOCATION'] = {}
            if 'LOCATION' in ents:
                docEnt['LOCATION'] = ents['LOCATION']
            docEnt['DATE'] = {}
            if 'DATE' in ents:
                docEnt['DATE'] = ents['DATE']
            toks = eventUtils.getTokens(t)
            docEnt['Topic'] = toks
            docsEntities.append(docEnt)

            docEntFreq = {}
            #docTotals = {}
            for k in docEnt:
                docEntFreq[k] = eventUtils.getFreq(docEnt[k])
                #totalFreq = sum([v for _,v in docEntFreq[k].items()])

                #docTotals[k] = totalFreq
            docsEntitiesFreq.append(docEntFreq)
            #docsTotalFreqs.append(docTotals)

        # Collection-level frequency for each entity(tokens, locations, dates)

        #Calculating prob for each item in each entity lists (tokens, locations, and dates) as
        # freq of item in all docs / total freq of all terms in that list
        entitiesFreq['LOCATION'] = defaultdict(float)  #{}
        entitiesFreq['DATE'] = defaultdict(float)  #{}
        entitiesFreq['Topic'] = defaultdict(float)  #{}

        for docEntFreq in docsEntitiesFreq:
            for entity in docEntFreq:
                for val in docEntFreq[entity]:
                    #if val in entitiesProb[entity]:
                    entitiesFreq[entity][val] += docEntFreq[entity][val]
                    #else:
                    #    entitiesProb[entity][val] = docEntFreq[entity][val]
        self.defaultProb = {}
        entitiesProb = {}
        for ent in entitiesFreq:
            allvalsFreq = sum([v for _, v in entitiesFreq[ent].items()])
            l = len(entitiesFreq[ent])
            denom = l + allvalsFreq
            self.defaultProb[ent] = 1.0 / denom
            entitiesProb[ent] = defaultdict(lambda: 1.0 / denom)
            for k in entitiesFreq[ent]:
                #entitiesProb[ent][k] = (1.0 + (entitiesProb[ent][k] *1.0)) / (docsTotalFreqs[ent] + allDocsTotal[ent])

                entitiesProb[ent][k] = (
                    1.0 + entitiesProb[ent][k]) / denom  #(l + allvalsFreq)

        #self.probEvtModel = entitiesProb

        mle = self.getMLEEventEntities(entitiesProb, 10)
        for k in mle:
            print k, mle[k]

        self.probEvtModel = {}
        for k in mle:
            #self.probEvtModel[k] = dict(mle[k])#entitiesProb[k][:topK]
            self.probEvtModel[k] = defaultdict(lambda: self.defaultProb[k])
            for e, v in mle[k]:
                self.probEvtModel[k][e] = v

        #self.eDisDic = self.probEvtModel['Topic']

        locToks = self.probEvtModel['LOCATION'].keys()
        locToks = eventUtils.getStemmedWords(locToks)
        #self.locDic = dict(zip(locToks,self.probEvtModel['LOCATION'].values()))
        locDic = defaultdict(lambda: self.defaultProb['LOCATION'])
        for k, v in zip(locToks, self.probEvtModel['LOCATION'].values()):
            locDic[k] = v
        self.probEvtModel['LOCATION'] = locDic

        dToks = self.probEvtModel['DATE'].keys()
        dToks = eventUtils.getStemmedWords(dToks)
        #self.dDic = dict(zip(dToks,self.probEvtModel['DATE'].values()))
        dDic = defaultdict(lambda: self.defaultProb['DATE'])
        for k, v in zip(locToks, self.probEvtModel['DATE'].values()):
            dDic[k] = v
        self.probEvtModel['DATE'] = dDic

        return docsEntities, entitiesProb