Example #1
0
    def collinsHeadSenseExtractor(self,questions, colName,training):
        rawQuestions = DBStore.getDB()['raw'+colName]
        adaptedLesk = AdaptedLesk(6)
        i = 1
        questions.rewind()
        p = re.compile('(?P<head1>.+)--(?P<head2>.+)')
        for question in questions:
#        line = "was:What was archy , and mehitabel ?"
            print i
            i = i + 1
            headWord = question['head']
            try:
                match = p.match(headWord[0])
                if match:
                    headWord[0] = match.group('head1')
            except StandardError:
                    pass
            if headWord[0] is None \
                or len(wordnet.synsets(headWord[0]))==0 \
                or headWord[0] == 'null':
                headSense = "null"
            else:
                pos = DataRetrieval.replace(question['tagged'][headWord[0]])
                if question['whWord'] ==  'whWord-how':
                    headSense = 'null'
                else:
                    print question['tokenized'],headWord[0],question['tagged']
                    headSense = adaptedLesk.wsd(question['tokenized'],headWord[0],question['tagged'])
            rawQuestions.update({'qID':question['qID']},{"$set":{"headSense":headSense}},safe=True,multi=True)
Example #2
0
 def replaceTag(tag):
     pos = DataRetrieval.replace(tag)
     if pos=='a':
         pos = 'n'
     return pos