Beispiel #1
0
    def collinsHeadSenseExtractor(self,questions, colName,training):
        rawQuestions = DBStore.getDB()['raw'+colName]
        adaptedLesk = AdaptedLesk(6)
        i = 1
        questions.rewind()
        p = re.compile('(?P<head1>.+)--(?P<head2>.+)')
        for question in questions:
#        line = "was:What was archy , and mehitabel ?"
            print i
            i = i + 1
            headWord = question['head']
            try:
                match = p.match(headWord[0])
                if match:
                    headWord[0] = match.group('head1')
            except StandardError:
                    pass
            if headWord[0] is None \
                or len(wordnet.synsets(headWord[0]))==0 \
                or headWord[0] == 'null':
                headSense = "null"
            else:
                pos = DataRetrieval.replace(question['tagged'][headWord[0]])
                if question['whWord'] ==  'whWord-how':
                    headSense = 'null'
                else:
                    print question['tokenized'],headWord[0],question['tagged']
                    headSense = adaptedLesk.wsd(question['tokenized'],headWord[0],question['tagged'])
            rawQuestions.update({'qID':question['qID']},{"$set":{"headSense":headSense}},safe=True,multi=True)
Beispiel #2
0
def featureInsertion():
#    if training:
        #===============================================================================
        # Generating bag of words
        #===============================================================================
#        print "Generating bag of words"
#        termExtractor.bagOfWordBuilder(questions,'words'+colName,insert)
#        print "Done"

    #===============================================================================
    # Inserting bag of words to vector space
    #===============================================================================
    print 'Inserting bag of words to vector space'
#    termExtractor.vectorSpaceBuilder(questions, colName, 'feature'+colName+'_'+classType,common,insert)
    print 'Done'
    
    #===============================================================================
    # Generating data for classifier
    #===============================================================================
    print 'Generating data for classifier'
    dataRetrieval = DataRetrieval()
    dataRetrieval.extractData('feature'+colName+'_'+classType,training,common,False,classType)
    dataRetrieval.extractData('feature'+colName+'_'+classType,training,common,True,classType)
    print 'Done'
Beispiel #3
0
def featureInsertion():
    if training:
        # ===============================================================================
        # Generating bag of words
        # ===============================================================================
        print "Generating bag of words"
        termExtractor.bagOfWordBuilder(questions, "words" + colName, insert)
        print "Done"

    # ===============================================================================
    # Inserting bag of words to vector space
    # ===============================================================================
    print "Inserting bag of words to vector space"
    termExtractor.vectorSpaceBuilder(questions, colName, "feature" + colName + "_" + classType, common, insert)
    print "Done"

    # ===============================================================================
    # Generating data for classifier
    # ===============================================================================
    print "Generating data for classifier"
    dataRetrieval = DataRetrieval()
    dataRetrieval.extractData("feature" + colName + "_" + classType, training, common, False, classType)
    #    dataRetrieval.extractData('feature'+colName+'_'+classType,training,common,True,classType)
    print "Done"
Beispiel #4
0
 def replaceTag(tag):
     pos = DataRetrieval.replace(tag)
     if pos=='a':
         pos = 'n'
     return pos