def __init__(self, fileName): self.reader = Conll09FileReader(fileName) self.eventAttribute = EventAttribute() self.compoundVerb = CompoundVerb() self.compoundNoun = CompoundNoun() self.AspectualNouns = [] self.AspectualVerbs = [] for word in open('E:/GitRepository/EventProcessing/Corpus/NounAspectual.txt'): self.AspectualNouns.append(word.strip('\n')) for phrase in open('E:/GitRepository/EventProcessing/Corpus/VerbAspectual.txt'): self.AspectualVerbs.append(phrase.strip('\n')) self.AgentiveNominals = [] for item in open('E:/GitRepository/EventProcessing/Corpus/AgentiveNominals.txt'): self.AgentiveNominals.append(item.strip('\n'))
def __init__(self, fileName, ): self.reader = Conll09FileReader(fileName) self.compoundVerb = CompoundVerb() self.eventAttributes = EventAttributes() self.eventAttribute = EventAttribute()
class NounEventTagger(object): ''' This class only tag Noun in the files ''' def __init__(self, fileName): self.reader = Conll09FileReader(fileName) self.eventAttribute = EventAttribute() self.compoundVerb = CompoundVerb() self.compoundNoun = CompoundNoun() self.AspectualNouns = [] self.AspectualVerbs = [] for word in open('E:/GitRepository/EventProcessing/Corpus/NounAspectual.txt'): self.AspectualNouns.append(word.strip('\n')) for phrase in open('E:/GitRepository/EventProcessing/Corpus/VerbAspectual.txt'): self.AspectualVerbs.append(phrase.strip('\n')) self.AgentiveNominals = [] for item in open('E:/GitRepository/EventProcessing/Corpus/AgentiveNominals.txt'): self.AgentiveNominals.append(item.strip('\n')) def Tag(self,sentence): sentenceLength = sentence.getSentenceLength() modalityDetector = ModalityDetector() polarityDetector = PolarityDetector() eventAttributes = []#EventAttributes() for wordIndex in range(sentenceLength): if sentence.getPOSCoarse(wordIndex) == 'N': word = sentence.getWord(wordIndex) classfeature = 'STATE' pos = 'NOUN'#sentence.getPOSCoarse(wordIndex) aspect = sentence.getAspectuality(wordIndex) modality = modalityDetector.Detect(sentence, wordIndex) polarity = polarityDetector.detect(word, pos, sentence.getLemma(wordIndex)) tense = sentence.getTense(wordIndex) mood = sentence.getMood(wordIndex) if self.compoundVerb.IsPartOfCompoundVerb(sentence,wordIndex): self.eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=word) eventAttributes.append(self.eventAttribute) continue if sentence.getPOSFine(wordIndex) == 'ANM': self.eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=word) eventAttributes.append(self.eventAttribute) continue if self.compoundNoun.IsPartOfCompoundNoun(sentence, wordIndex): eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood) eventAttributes.append(eventAttribute) globals.eventID+=1 globals.id+=1 continue if self.IsCausativeInContext(sentence,wordIndex): eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood) eventAttributes.append(eventAttribute) globals.eventID+=1 globals.id+=1 continue if self.IsAspectual(sentence,wordIndex): eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood) eventAttributes.append(eventAttribute) globals.eventID+=1 globals.id+=1 continue if self.IsAspectualInContext(sentence,wordIndex): eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood) eventAttributes.append(eventAttribute) globals.eventID+=1 globals.id+=1 continue eventAttribute = self.eventAttribute.SetAttributes(isEvent=False) eventAttributes.append(eventAttribute) else: eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=sentence.getWord(wordIndex)) eventAttributes.append(eventAttribute) return eventAttributes def BatchTag(self): NounIndices = [] numberOfSentences = self.reader.getNumberOfSentences() for sentenceIndex in range(1,numberOfSentences): sentence = self.reader.getSentence(sentenceIndex) sentenceLength = sentence.getSentenceLength() for wordIndex in range(sentenceLength): if sentence.getPOSCoarse(wordIndex) == 'N': if self.compoundVerb.IsPartOfCompoundVerb(sentence,wordIndex): self.eventAttributes.add(sentence, 0) continue if sentence.getPOSFine(wordIndex) == 'ANM': self.eventAttributes.add(sentence, 0) continue if self.compoundNoun.IsPartOfCompoundNoun(sentence, wordIndex): #NounIndices.append([sentenceIndex,wordIndex]) #print(self.compoundNoun.getGoverningNounIndex(sentence, wordIndex)) self.eventAttributes.add(sentence, wordIndex) if self.IsCausativeInContext(sentence,wordIndex): #print(sentence.getWord(wordIndex)) #NounIndices.append([sentenceIndex,wordIndex]) self.eventAttributes.add(sentence, wordIndex) if self.IsAspectual(sentence,wordIndex): #print(sentence.getWord(wordIndex)) #NounIndices.append([sentenceIndex,wordIndex]) self.eventAttributes.add(sentence, wordIndex) if self.IsAspectualInContext(sentence,wordIndex): #NounIndices.append([sentenceIndex,wordIndex]) self.eventAttributes.add(sentence, wordIndex) else: self.eventAttributes.add(sentence, 0) ''' if sentence.getDependencyRel(wordIndex) == 'PREDEP': parent = sentence.getDependencyParentNum(wordIndex) parentOfParent = sentence.getDependencyParentNum(parent) if sentence.getDependencyRel(parent) == 'OBJ':# and sentence.getWord(parentOfParent) == 'آغاز': #a=1 print(sentence.getWord(wordIndex)+' '+sentence.getWord(parent)+' '+sentence.getWord(parentOfParent)) #print(sentence.getSenID(wordIndex)) #print(sentence.getWord(parentOfParent)) ''' ''' if sentence.getDependencyRel(wordIndex) == 'SBJ': parent = int(sentence.getDependencyParentNum(wordIndex)) print(sentence.getWord(parent)) if sentence.getWord(parent) == 'شروع': print(sentence.getSenID(wordIndex)) word = sentence.getWord(wordIndex).decode('utf-8')#'تا'.decode('utf-8') if word[-3:] == 'نده'.decode('utf-8') and sentence.getPOSCoarse(wordIndex) == 'N': AgentiveNominal.append(sentence.getWord(wordIndex)) ''' return self.eventAttributes def IsAgentiveNominal(self,word): if word in self.AgentiveNominals: return True return False def IsAspectual(self,sentence,wordIndex): if sentence.getPOSCoarse(wordIndex) == 'V': compoundVerb = self.compoundVerb.extractCompundVerb(sentence, wordIndex) if compoundVerb.strip('\n') in self.AspectualVerbs: return True else:#this branch is only for Noun words if sentence.getLemma(wordIndex) in self.AspectualNouns: return True return False def IsAspectualInContext(self,sentence,wordIndex): headIndex = sentence.getDependencyParentNum(wordIndex) headOfHeadIndex = sentence.getDependencyParentNum(headIndex) if self.IsAspectual(sentence, headIndex): #print(sentence.getWord(headIndex)+' '+sentence.getWord(wordIndex)) return True if sentence.getDependencyRel(headIndex) == 'NPP' and self.IsAspectual(sentence, headOfHeadIndex): #print(sentence.getWord(headOfHeadIndex)+' '+sentence.getWord(headIndex)+' '+sentence.getWord(wordIndex)) return True if sentence.getDependencyRel(wordIndex)=='SBJ' and self.IsAspectual(sentence, headIndex): #print(self.compoundVerb.extractCompundVerb(sentence,headIndex)+' '+sentence.getWord(wordIndex)) return True return False def IsCausativeInContext(self,sentence,wordIndex): CausativePhrases = ['موجب','باعث','دلیل','سبب','علت'] parentNum = sentence.getDependencyParentNum(wordIndex) #if sentence.getLemma(wordIndex) in CausativePhrases: # return True if sentence.getLemma(parentNum) in CausativePhrases: return True if sentence.getDependencyRel(wordIndex) == 'SBJ': for i in range(wordIndex,sentence.getSentenceLength()): if sentence.getDependencyParentNum(i) == sentence.getDependencyParentNum(wordIndex) and sentence.getLemma(i) in CausativePhrases: return True
class VerbEventTagger(object): ''' This Class only tags verbs as Events in sentences ''' def __init__(self, fileName, ): self.reader = Conll09FileReader(fileName) self.compoundVerb = CompoundVerb() self.eventAttributes = EventAttributes() self.eventAttribute = EventAttribute() def Tag(self, sentence): sentenceLength = sentence.getSentenceLength() eventAttributes = []#EventAttributes() modalityDetector = ModalityDetector() classFeatureExtractor = ClassFeatureExtraction() polarityDetector = PolarityDetector() global eventID for wordIndex in range(sentenceLength): #word = sentence.getWord(wordIndex) #print(word) if sentence.getPOSCoarse(wordIndex) == 'V': word = sentence.getWord(wordIndex) aspect = sentence.getAspectuality(wordIndex) modality = modalityDetector.Detect(sentence, wordIndex) pos = 'VERB' polarity = polarityDetector.detect(word, sentence.getPOSCoarse(wordIndex), sentence.getLemma(wordIndex)) tense = sentence.getTense(wordIndex) mood = sentence.getMood(wordIndex) if self.IsAstBoodHast(sentence,wordIndex): self.eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=sentence.getWord(wordIndex)) eventAttributes.append(self.eventAttribute) continue if self.IsProgressive(sentence,wordIndex): self.eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=sentence.getWord(wordIndex)) eventAttributes.append(self.eventAttribute) continue if self.IsModal(sentence,wordIndex): self.eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=sentence.getWord(wordIndex)) eventAttributes.append(self.eventAttribute) continue #s3 In this scenario we already have compound verb with consecutive parts as one single token we tagged them with one eventID and for other verbs just the verb part tagged ''' #s2 In this scenario compound verbs detected and the verbs with consecutive parts tagged with the same eventID and others just the verb part tagged if self.IsCompoundVerb(sentence, wordIndex): verbIdices = self.compoundVerb.extractNonVerbalIndexOfCompoundVerb(sentence, wordIndex) verbWord = '' for index in verbIdices: verbWord += sentence.getWord(index)+' ' verbWord += sentence.getWord(wordIndex) classfeature = classFeatureExtractor.Extract(verbWord) verbIdices.append(wordIndex) if self.IsConsecutive(verbIdices): verbWord='' for index in verbIdices: verbWord += sentence.getWord(index)+' ' polarity = polarityDetector.detect(verbWord, sentence.getPOSCoarse(wordIndex),sentence.getLemma(wordIndex)) eventAttribute = self.eventAttribute.SetAttributes(True,globals.eventID,classfeature,verbWord,aspect,modality,pos,polarity,tense,mood) else: eventAttribute = self.eventAttribute.SetAttributes(True,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood) ''' #s1 In this scenario all the parts tagged with the same eventID if self.IsCompoundVerb(sentence, wordIndex): verbIdices = self.compoundVerb.extractNonVerbalIndexOfCompoundVerb(sentence, wordIndex) verbWord = '' for index in verbIdices: verbWord += sentence.getWord(index)+' ' verbWord = verbWord.rstrip(' ') verbWord = verbWord + ' ' + word classfeature = classFeatureExtractor.Extract(verbWord) for index in verbIdices: #verbWord += sentence.getWord(wordIndex)+' ' eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,sentence.getWord(index),aspect,modality,pos,polarity,tense,mood) eventAttributes[index] = eventAttribute globals.id+=1 eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood) eventAttributes.append(eventAttribute) globals.eventID +=1 globals.id+=1 else: classfeature = classFeatureExtractor.Extract(word) eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood) eventAttributes.append(eventAttribute) globals.eventID +=1 globals.id+=1 else: eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=sentence.getWord(wordIndex)) eventAttributes.append(eventAttribute) return eventAttributes def BatchTag(self): numberOfSentences = self.reader.getNumberOfSentences() EventVerbs = [] for sentenceIndex in range(1,numberOfSentences): sentence = self.reader.getSentence(sentenceIndex) sentenceLength = sentence.getSentenceLength() for wordIndex in range(sentenceLength): if sentence.getPOSCoarse(wordIndex) == 'V': if self.IsAstBoodHast(sentence,wordIndex): #self.eventAttributes.add(sentence, 0) continue if self.IsProgressive(sentence,wordIndex): #self.eventAttributes.add(sentence, 0) continue if self.IsModal(sentence,wordIndex): #self.eventAttributes.add(sentence, 0) continue #print(sentence.getWord(wordIndex)) if self.IsCompoundVerb(sentence, wordIndex): verbIdices = self.compoundVerb.extractNonVerbalIndexOfCompoundVerb(sentence, wordIndex) s = '' for item in verbIdices: s += sentence.getWord(item)+' ' #s+=sentence.getWord(wordIndex) s+=sentence.getLemma(wordIndex).split('#')[0]+'ن' EventVerbs.append(s) ''' for index in verbIdices: self.eventAttributes.add(sentence, index) self.eventAttributes.add(sentence, wordIndex) ''' else: #self.eventAttributes.add(sentence, wordIndex) EventVerbs.append(sentence.getLemma(wordIndex).split('#')[0]+'ن') #VerbInices.append([sentenceIndex,wordIndex]) #else: # self.eventAttributes.add(sentence, 0) return EventVerbs def IsAstBoodHast(self,sentence,wordIndex): lemma = sentence.getLemma(wordIndex) lemmas = lemma.split('#') if 'است' in lemmas or 'بود' in lemmas or 'هست' in lemmas: return True return False def IsProgressive(self,sentence,wordIndex): dependencyRel = sentence.getDependencyRel(wordIndex) if dependencyRel == 'PROG': return True return False def IsModal(self,sentence,wordIndex): posFine = sentence.getPOSFine(wordIndex) if posFine == 'MODL': return True return False def IsCompoundVerb(self,sentence,verbIndex): if not self.compoundVerb.extractNonVerbalIndexOfCompoundVerb(sentence, verbIndex): return False return True def IsConsecutive(self,verbIdices): for i in range(0,len(verbIdices)-1): if verbIdices[i+1]-verbIdices[i]!=1: return False return True