예제 #1
0
 def __init__(self, fileName):
     self.reader = Conll09FileReader(fileName)
     self.eventAttribute = EventAttribute()
     self.compoundVerb = CompoundVerb()
     self.compoundNoun = CompoundNoun()
     self.AspectualNouns = []
     self.AspectualVerbs = []
     for word in open('E:/GitRepository/EventProcessing/Corpus/NounAspectual.txt'):
         self.AspectualNouns.append(word.strip('\n'))
     for phrase in open('E:/GitRepository/EventProcessing/Corpus/VerbAspectual.txt'):
         self.AspectualVerbs.append(phrase.strip('\n'))
     self.AgentiveNominals = []
     for item in open('E:/GitRepository/EventProcessing/Corpus/AgentiveNominals.txt'):
         self.AgentiveNominals.append(item.strip('\n'))
예제 #2
0
 def __init__(self, fileName, ):
     self.reader = Conll09FileReader(fileName)
     self.compoundVerb = CompoundVerb()
     self.eventAttributes = EventAttributes()
     self.eventAttribute = EventAttribute()
예제 #3
0
class NounEventTagger(object):
    '''
    This class only tag Noun in the files
    '''
    
    def __init__(self, fileName):
        self.reader = Conll09FileReader(fileName)
        self.eventAttribute = EventAttribute()
        self.compoundVerb = CompoundVerb()
        self.compoundNoun = CompoundNoun()
        self.AspectualNouns = []
        self.AspectualVerbs = []
        for word in open('E:/GitRepository/EventProcessing/Corpus/NounAspectual.txt'):
            self.AspectualNouns.append(word.strip('\n'))
        for phrase in open('E:/GitRepository/EventProcessing/Corpus/VerbAspectual.txt'):
            self.AspectualVerbs.append(phrase.strip('\n'))
        self.AgentiveNominals = []
        for item in open('E:/GitRepository/EventProcessing/Corpus/AgentiveNominals.txt'):
            self.AgentiveNominals.append(item.strip('\n'))
        
    def Tag(self,sentence):
        sentenceLength = sentence.getSentenceLength()
        modalityDetector = ModalityDetector()
        polarityDetector = PolarityDetector()
        eventAttributes = []#EventAttributes()
        
        for wordIndex in range(sentenceLength):
            if sentence.getPOSCoarse(wordIndex) == 'N':
                word = sentence.getWord(wordIndex)
                classfeature = 'STATE'
                pos = 'NOUN'#sentence.getPOSCoarse(wordIndex)
                aspect = sentence.getAspectuality(wordIndex)
                modality = modalityDetector.Detect(sentence, wordIndex)
                polarity = polarityDetector.detect(word, pos, sentence.getLemma(wordIndex))
                tense = sentence.getTense(wordIndex)
                mood = sentence.getMood(wordIndex)
                
                if self.compoundVerb.IsPartOfCompoundVerb(sentence,wordIndex):
                    self.eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=word)
                    eventAttributes.append(self.eventAttribute)
                    continue
                if sentence.getPOSFine(wordIndex) == 'ANM':
                    self.eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=word)
                    eventAttributes.append(self.eventAttribute)
                    continue
                if self.compoundNoun.IsPartOfCompoundNoun(sentence, wordIndex):
                    eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood)
                    eventAttributes.append(eventAttribute)
                    globals.eventID+=1
                    globals.id+=1
                    continue
                if self.IsCausativeInContext(sentence,wordIndex):
                    eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood)
                    eventAttributes.append(eventAttribute)
                    globals.eventID+=1
                    globals.id+=1
                    continue
                if self.IsAspectual(sentence,wordIndex):
                    eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood)
                    eventAttributes.append(eventAttribute)
                    globals.eventID+=1
                    globals.id+=1
                    continue
                if self.IsAspectualInContext(sentence,wordIndex):
                    eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood)
                    eventAttributes.append(eventAttribute)
                    globals.eventID+=1
                    globals.id+=1
                    continue
                
                eventAttribute = self.eventAttribute.SetAttributes(isEvent=False)
                eventAttributes.append(eventAttribute)
            else:
                eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=sentence.getWord(wordIndex))
                eventAttributes.append(eventAttribute)
        return eventAttributes
    def BatchTag(self):
        NounIndices = []
        numberOfSentences = self.reader.getNumberOfSentences()
        for sentenceIndex in range(1,numberOfSentences):
            sentence = self.reader.getSentence(sentenceIndex)
            sentenceLength = sentence.getSentenceLength()
            for wordIndex in range(sentenceLength):
                if sentence.getPOSCoarse(wordIndex) == 'N':
                    if self.compoundVerb.IsPartOfCompoundVerb(sentence,wordIndex):
                        self.eventAttributes.add(sentence, 0)
                        continue
                    if sentence.getPOSFine(wordIndex) == 'ANM':
                        self.eventAttributes.add(sentence, 0)
                        continue
                    if self.compoundNoun.IsPartOfCompoundNoun(sentence, wordIndex):
                        #NounIndices.append([sentenceIndex,wordIndex])
                        #print(self.compoundNoun.getGoverningNounIndex(sentence, wordIndex))
                        self.eventAttributes.add(sentence, wordIndex)
                        
                    if self.IsCausativeInContext(sentence,wordIndex):
                        #print(sentence.getWord(wordIndex))
                        #NounIndices.append([sentenceIndex,wordIndex])
                        self.eventAttributes.add(sentence, wordIndex)
                        
                    if self.IsAspectual(sentence,wordIndex):
                        #print(sentence.getWord(wordIndex))
                        #NounIndices.append([sentenceIndex,wordIndex])
                        self.eventAttributes.add(sentence, wordIndex)
                        
                    if self.IsAspectualInContext(sentence,wordIndex):
                        #NounIndices.append([sentenceIndex,wordIndex])
                        self.eventAttributes.add(sentence, wordIndex)
                else:
                    self.eventAttributes.add(sentence, 0)
                    '''
                    if sentence.getDependencyRel(wordIndex) == 'PREDEP':
                        parent = sentence.getDependencyParentNum(wordIndex)
                        parentOfParent = sentence.getDependencyParentNum(parent)
                        if sentence.getDependencyRel(parent) == 'OBJ':# and sentence.getWord(parentOfParent) == 'آغاز':
                            #a=1
                            print(sentence.getWord(wordIndex)+' '+sentence.getWord(parent)+' '+sentence.getWord(parentOfParent))
                            #print(sentence.getSenID(wordIndex))
                            #print(sentence.getWord(parentOfParent))
                    '''
                    '''
                    if sentence.getDependencyRel(wordIndex) == 'SBJ':
                        parent = int(sentence.getDependencyParentNum(wordIndex))
                        print(sentence.getWord(parent))
                        if sentence.getWord(parent) == 'شروع':
                            print(sentence.getSenID(wordIndex))
                    
                    
                    word = sentence.getWord(wordIndex).decode('utf-8')#'تا'.decode('utf-8')
                    if word[-3:] == 'نده'.decode('utf-8') and sentence.getPOSCoarse(wordIndex) == 'N':
                        AgentiveNominal.append(sentence.getWord(wordIndex))
                    '''
        return self.eventAttributes
    def IsAgentiveNominal(self,word):
        if word in self.AgentiveNominals:
            return True
        return False
    
    def IsAspectual(self,sentence,wordIndex):
        if sentence.getPOSCoarse(wordIndex) == 'V':
            compoundVerb = self.compoundVerb.extractCompundVerb(sentence, wordIndex)
            if compoundVerb.strip('\n') in self.AspectualVerbs:
                return True
        else:#this branch is only for Noun words
            if sentence.getLemma(wordIndex) in self.AspectualNouns:
                return True
        return False
                
    def IsAspectualInContext(self,sentence,wordIndex):
        headIndex = sentence.getDependencyParentNum(wordIndex)
        headOfHeadIndex = sentence.getDependencyParentNum(headIndex)
        if self.IsAspectual(sentence, headIndex):
            #print(sentence.getWord(headIndex)+' '+sentence.getWord(wordIndex))
            return True
        if sentence.getDependencyRel(headIndex) == 'NPP' and self.IsAspectual(sentence, headOfHeadIndex):
            #print(sentence.getWord(headOfHeadIndex)+' '+sentence.getWord(headIndex)+' '+sentence.getWord(wordIndex))
            return True
        if  sentence.getDependencyRel(wordIndex)=='SBJ' and self.IsAspectual(sentence, headIndex):
            #print(self.compoundVerb.extractCompundVerb(sentence,headIndex)+' '+sentence.getWord(wordIndex))
            return True
        return False           
                
    def IsCausativeInContext(self,sentence,wordIndex):
        CausativePhrases = ['موجب','باعث','دلیل','سبب','علت']
        parentNum = sentence.getDependencyParentNum(wordIndex)
        #if sentence.getLemma(wordIndex) in CausativePhrases:
        #    return True
        if sentence.getLemma(parentNum) in CausativePhrases:
            return True
        if sentence.getDependencyRel(wordIndex) == 'SBJ':
            for i in range(wordIndex,sentence.getSentenceLength()):
                if sentence.getDependencyParentNum(i) == sentence.getDependencyParentNum(wordIndex) and sentence.getLemma(i) in CausativePhrases:
                    return True
예제 #4
0
class VerbEventTagger(object):
    '''
    This Class only tags verbs as Events in sentences
    '''
    
    def __init__(self, fileName, ):
        self.reader = Conll09FileReader(fileName)
        self.compoundVerb = CompoundVerb()
        self.eventAttributes = EventAttributes()
        self.eventAttribute = EventAttribute()
    
    def Tag(self, sentence):
        sentenceLength = sentence.getSentenceLength()
        eventAttributes = []#EventAttributes()
        modalityDetector = ModalityDetector()
        classFeatureExtractor = ClassFeatureExtraction()
        polarityDetector = PolarityDetector()
        global eventID
        for wordIndex in range(sentenceLength):
            #word = sentence.getWord(wordIndex)
            #print(word)
            if sentence.getPOSCoarse(wordIndex) == 'V':
                word = sentence.getWord(wordIndex)
                aspect = sentence.getAspectuality(wordIndex)
                modality = modalityDetector.Detect(sentence, wordIndex)
                pos = 'VERB'
                polarity = polarityDetector.detect(word, sentence.getPOSCoarse(wordIndex), sentence.getLemma(wordIndex))
                tense = sentence.getTense(wordIndex)
                mood = sentence.getMood(wordIndex)
                
                if self.IsAstBoodHast(sentence,wordIndex):
                    self.eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=sentence.getWord(wordIndex))
                    eventAttributes.append(self.eventAttribute)
                    continue
                if self.IsProgressive(sentence,wordIndex):
                    self.eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=sentence.getWord(wordIndex))
                    eventAttributes.append(self.eventAttribute)
                    continue
                if self.IsModal(sentence,wordIndex):
                    self.eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=sentence.getWord(wordIndex))
                    eventAttributes.append(self.eventAttribute)
                    continue
                
                
                #s3 In this scenario we already have compound verb with consecutive parts as one single token we tagged them with one eventID and for other verbs just the verb part tagged
                    
                '''
                #s2 In this scenario compound verbs detected and the verbs with consecutive parts tagged with the same eventID and others just the verb part tagged
                if self.IsCompoundVerb(sentence, wordIndex):
                    
                    verbIdices = self.compoundVerb.extractNonVerbalIndexOfCompoundVerb(sentence, wordIndex)
                    verbWord = ''
                    for index in verbIdices:
                        verbWord += sentence.getWord(index)+' '
                    verbWord += sentence.getWord(wordIndex)
                    classfeature = classFeatureExtractor.Extract(verbWord)
                    
                    verbIdices.append(wordIndex)
                    if self.IsConsecutive(verbIdices):
                        verbWord=''
                        for index in verbIdices:
                            verbWord += sentence.getWord(index)+' '
                        polarity = polarityDetector.detect(verbWord, sentence.getPOSCoarse(wordIndex),sentence.getLemma(wordIndex))
                        eventAttribute = self.eventAttribute.SetAttributes(True,globals.eventID,classfeature,verbWord,aspect,modality,pos,polarity,tense,mood)
                    else:
                        eventAttribute = self.eventAttribute.SetAttributes(True,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood)
                    '''
                #s1 In this scenario all the parts tagged with the same eventID
                if self.IsCompoundVerb(sentence, wordIndex):
                    verbIdices = self.compoundVerb.extractNonVerbalIndexOfCompoundVerb(sentence, wordIndex)
                    verbWord = ''
                    for index in verbIdices:
                        verbWord += sentence.getWord(index)+' '
                    verbWord = verbWord.rstrip(' ')
                    verbWord = verbWord + ' ' + word
                    classfeature = classFeatureExtractor.Extract(verbWord)
                    for index in verbIdices:
                        #verbWord += sentence.getWord(wordIndex)+' '
                        eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,sentence.getWord(index),aspect,modality,pos,polarity,tense,mood)
                        eventAttributes[index] = eventAttribute
                        globals.id+=1
                    eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood)
                    eventAttributes.append(eventAttribute)
                    globals.eventID +=1
                    globals.id+=1
                else:
                    classfeature = classFeatureExtractor.Extract(word)
                    eventAttribute = self.eventAttribute.SetAttributes(True,globals.id,globals.eventID,classfeature,word,aspect,modality,pos,polarity,tense,mood)
                    eventAttributes.append(eventAttribute)
                    globals.eventID +=1
                    globals.id+=1
            else:
                eventAttribute = self.eventAttribute.SetAttributes(isEvent=False,text=sentence.getWord(wordIndex))
                eventAttributes.append(eventAttribute)
        return eventAttributes
                   
    def BatchTag(self):
        numberOfSentences = self.reader.getNumberOfSentences()
        EventVerbs = []
        for sentenceIndex in range(1,numberOfSentences):
            sentence = self.reader.getSentence(sentenceIndex)
            sentenceLength = sentence.getSentenceLength()
            for wordIndex in range(sentenceLength):
                if sentence.getPOSCoarse(wordIndex) == 'V':
                    if self.IsAstBoodHast(sentence,wordIndex):
                        #self.eventAttributes.add(sentence, 0)
                        continue
                    if self.IsProgressive(sentence,wordIndex):
                        #self.eventAttributes.add(sentence, 0)
                        continue
                    if self.IsModal(sentence,wordIndex):
                        #self.eventAttributes.add(sentence, 0)
                        continue
                    #print(sentence.getWord(wordIndex))
                    if self.IsCompoundVerb(sentence, wordIndex):
                        verbIdices = self.compoundVerb.extractNonVerbalIndexOfCompoundVerb(sentence, wordIndex)
                        
                        
                        s = ''
                        for item in verbIdices:
                            s += sentence.getWord(item)+' '
                        #s+=sentence.getWord(wordIndex)
                        s+=sentence.getLemma(wordIndex).split('#')[0]+'ن'
                        EventVerbs.append(s)
                        '''
                        for index in verbIdices:
                            self.eventAttributes.add(sentence, index)
                        self.eventAttributes.add(sentence, wordIndex)
                        '''
                    else:
                        #self.eventAttributes.add(sentence, wordIndex)
                        EventVerbs.append(sentence.getLemma(wordIndex).split('#')[0]+'ن')
                        #VerbInices.append([sentenceIndex,wordIndex])
                #else:
                 #   self.eventAttributes.add(sentence, 0)
        return EventVerbs
                    
    
    
    def IsAstBoodHast(self,sentence,wordIndex):
        lemma = sentence.getLemma(wordIndex)
        lemmas = lemma.split('#')
        if 'است' in lemmas or 'بود' in lemmas or 'هست' in lemmas:
            return True
        return False
    
    def IsProgressive(self,sentence,wordIndex):
        dependencyRel = sentence.getDependencyRel(wordIndex)
        if dependencyRel == 'PROG':
            return True
        return False
    
    def IsModal(self,sentence,wordIndex):
        posFine = sentence.getPOSFine(wordIndex)
        if posFine == 'MODL':
            return True
        return False
    def IsCompoundVerb(self,sentence,verbIndex):
        if not self.compoundVerb.extractNonVerbalIndexOfCompoundVerb(sentence, verbIndex):
            return False
        return True
    def IsConsecutive(self,verbIdices):
        for i in range(0,len(verbIdices)-1):
            if verbIdices[i+1]-verbIdices[i]!=1:
                return False
        return True