def getSentences(self): if self.sentences: return self.sentences else: r = utils.getSentences(self.text) if r: self.sentences = r return self.sentences
def getSentences(self): if len(self.sentences)>0: return self.sentences else: if self.text: r = utils.getSentences(self.text) if len(r)>0: self.sentences = [s for s in r] return self.sentences else: return []
def getSentences(self): if len(self.sentences) > 0: return self.sentences else: if self.text: r = utils.getSentences(self.text) if len(r) > 0: self.sentences = [s for s in r] return self.sentences else: return []
def webpageEntities(self, docText=""): disasters = set(self.entities["Disaster"].keys()) sentences = eventUtils.getSentences(docText) webpageEnts = [] for sent in sentences: sentToks = eventUtils.getTokens(sent) if len(sentToks) > 100: continue intersect = eventUtils.getIntersection(disasters, sentToks) if len(intersect) > self.intersectionTh: #print intersect sentEnts = eventUtils.getEntities(sent)[0] if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'): sentEnts['Disaster'] = intersect webpageEnts.append((sent, sentEnts)) return webpageEnts
def webpageEntities(self,docText=""): disasters=set(self.entities["Disaster"].keys()) sentences = eventUtils.getSentences(docText) webpageEnts =[] for sent in sentences: sentToks = eventUtils.getTokens(sent) if len(sentToks) > 100: continue intersect = eventUtils.getIntersection(disasters, sentToks) if len(intersect) > self.intersectionTh: #print intersect sentEnts = eventUtils.getEntities(sent)[0] if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'): sentEnts['Disaster'] = intersect webpageEnts.append((sent,sentEnts)) return webpageEnts
def getEM_Sents(wps): docsEntities=[] docsEntitiesFreq = [] entitiesProb = {} collSents = [] #for i,wp in enumerate(wps): for wp in wps: if 'text' not in wp: continue wpContent = wp['text']+wp['title'] wpSplit = wpContent.split('\n') wpFiltered = filter(None,wpSplit) wpContentf = '\n'.join(wpFiltered) sents = eventUtils.getSentences(wpContentf) collSents.append(sents) allSents = [] for sents in collSents: allSents.extend(sents) fw = eventUtils.getFreqTokens(allSents) fw = [w[0] for w in fw] #collFilteredSents = [] collEventModelInsts=[] for sents in collSents: filtEvtModelInsts = [] for s in sents: sentToks = eventUtils.getTokens(s) cw = eventUtils.getIntersection(fw, sentToks) if len(cw) >= 2: emi = {} emi['TOPIC'] = list(cw) ents = eventUtils.getEntities(s)[0] if ents.has_key('LOCATION'): emi['LOCATION'] = ents['LOCATION'] #filtEvtModelInsts.append(emi) if ents.has_key('DATE'): #emi['TOPIC'] = cw emi['DATE']=ents['DATE'] filtEvtModelInsts.append(emi) collEventModelInsts.append(filtEvtModelInsts) '''
def webpageEntities_old(self, docText=""): disasters = self.entities["Disaster"] sentences = eventUtils.getSentences(docText) #impSentences = getIndicativeSents(sentences, disasters, len(disasters), 0) #impSentences = [] webpageEnts = [] for sent in sentences: sentToks = eventUtils.getTokens(sent) if len(sentToks) > 100: continue intersect = eventUtils.getIntersection(disasters, sentToks) if len(intersect) > self.intersectionTh: #impSentences.append(sent) sentEnts = eventUtils.getEntities(sent)[0] if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'): sentEnts['Disaster'] = intersect webpageEnts.append((sent, sentEnts)) #entities = getEntities(impSentences) #webpageEnts = zip(impSentences,entities) return webpageEnts
def webpageEntities_old(self,docText=""): disasters=self.entities["Disaster"] sentences = eventUtils.getSentences(docText) #impSentences = getIndicativeSents(sentences, disasters, len(disasters), 0) #impSentences = [] webpageEnts =[] for sent in sentences: sentToks = eventUtils.getTokens(sent) if len(sentToks) > 100: continue intersect = eventUtils.getIntersection(disasters, sentToks) if len(intersect) > self.intersectionTh: #impSentences.append(sent) sentEnts = eventUtils.getEntities(sent)[0] if sentEnts.has_key('LOCATION') or sentEnts.has_key('DATE'): sentEnts['Disaster'] = intersect webpageEnts.append((sent,sentEnts)) #entities = getEntities(impSentences) #webpageEnts = zip(impSentences,entities) return webpageEnts