Python getfeatures Examples

Programming Language: Python

Namespace/Package Name: connExtractFeat

Method/Function: getfeatures

Examples at hotexamples.com: 4

Python getfeatures - 4 examples found. These are the top rated real world Python examples of connExtractFeat.getfeatures extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def classifyConnective(sentence, wordNum, connClassifier):

    parsetree = nltk.ParentedTree.fromstring(sentence['parsetree'])
    """
    wordList=parsetree.leaves()
    word=wordList[wordNum]
    wordString,connHead=connMatching.matchConnective(parsetree,word,wordNum)
    indices=[]
    for word in wordString:
        if word in wordList:
            indices.append(wordList.index(word))
    #print 'conn ',wordString
    """
    wordList = sentence['words']
    wordString, skip = matchConnectiveList(wordList, wordNum)

    if wordString == 'False':
        return 'False', 'N', 0
    else:

        if parsetree.leaves() != []:
            connLabel = connClassifier.classify(
                connExtractFeat.getfeatures(parsetree,
                                            range(wordNum,
                                                  wordNum + skip + 1)))
            #connLabel=connClassifier.classify(connExtractFeat.getfeatures(parsetree,indices))
            return wordString, connLabel, skip
        else:
            return 'False', 'N', 0

Example #2

Show file

File: Classifiers.py Project: opensourceware/Shallow-Discourse-Parser

def classifyConnective(sentence,wordNum,connClassifier):

    parsetree = nltk.ParentedTree.fromstring(sentence['parsetree'])
    """
    wordList=parsetree.leaves()
    word=wordList[wordNum]
    wordString,connHead=connMatching.matchConnective(parsetree,word,wordNum)
    indices=[]
    for word in wordString:
        if word in wordList:
            indices.append(wordList.index(word))
    #print 'conn ',wordString
    """
    wordList=sentence['words']
    wordString,skip=matchConnectiveList(wordList,wordNum)

    if wordString=='False':
        return 'False','N',0
    else:
         
         if parsetree.leaves()!=[]:                        
             connLabel=connClassifier.classify(connExtractFeat.getfeatures(parsetree,range(wordNum,wordNum+skip+1)))
             #connLabel=connClassifier.classify(connExtractFeat.getfeatures(parsetree,indices))
             return wordString,connLabel,skip
         else:
             return 'False','N',0

Example #3

Show file

File: ConnectiveClassifier.py Project: opensourceware/Shallow-Discourse-Parser

def dataProcess(discourseBank, treeBank, connectiveList):
    featureSets = []
    docList = treeBank.keys()
    docList.sort()
    totalDiscourses = len(discourseBank)
    lastDiscourse = totalDiscourses - 1
    #print totalDiscourses
    dBIterator = 0
    oldExplicitIterator = dBIterator
    explicit = 0
    oldexplicit = explicit
    i = 0
    j = 0
    k = 0
    for doc in docList:

        sentenceList = treeBank[doc]['sentences']
        for sentenceOffset, sentence in enumerate(sentenceList):

            wordList = sentence['words']
            #print wordList
            lengthWordList = len(wordList)
            wordNum = 0
            while (wordNum < lengthWordList):
                wordStructure = wordList[wordNum]
                word = wordStructure[0]
                word = word.lower()
                #print word
                wordDictionary = wordStructure[1]
                #print wordDictionary
                #if not matchConnectiveList(connectiveList,word):
                #    continue
                relation = discourseBank[dBIterator]
                #print relation['Type']

                while (1):
                    #print 'Consecutive:%d'%dBIterator
                    if (relation['Type'] == 'Explicit'
                            or dBIterator == lastDiscourse):
                        break

                    dBIterator += 1
                    relation = discourseBank[dBIterator]
                    if (relation['Type'] == 'Explicit'):
                        connective = relation['ConnectiveHead']
                        #print 1,explicit,connective
                        explicit += 1
                    #print relation['Type']

                docWord = int(doc[4:])
                docConnective = int(relation['DocID'][4:])
                cOBWord = wordDictionary['CharacterOffsetBegin']
                cOEWord = wordDictionary['CharacterOffsetEnd']

                if relation['Type'] == 'Explicit':
                    connective = relation['ConnectiveHead']
                    connectiveLength = len(
                        relation['Connective']['CharacterSpanList'])
                    cOBConnective = relation['Connective'][
                        'CharacterSpanList'][0][0]
                    cOEConnective = relation['Connective'][
                        'CharacterSpanList'][connectiveLength - 1][1]

                if ((docConnective > docWord) or
                    (docWord == docConnective and cOEWord < cOBConnective)):

                    #                    if word=='in':
                    #                        print doc,sentenceOffset
                    #                        print sentence
                    #                        print relation
                    result, skip = matchConnectiveList(wordList, wordNum)

                    #if relation['Type']=='Explicit' and explicit==40:
                    #    print 1,explicit,word,connective
                    if result != 'False':
                        #print 3,word,result,connective
                        label = 'N'
                        tokenNo = range(wordNum, wordNum + skip + 1)
                        #tokenNo=[words[4] for words in tokenNumberLists]
                        tokens = [token[0] for token in wordList]
                        #if word=='either':
                        #print sentenceList[sentenceOffset-1]
                        #print word,wordNum
                        #print tokens
                        #print relation
                        parsetree = nltk.ParentedTree.fromstring(
                            sentence['parsetree'])
                        if parsetree.leaves() != []:
                            featureSets.append((connExtractFeat.getfeatures(
                                parsetree, tokenNo), label))
                    wordNum += skip

                elif ((docWord == docConnective) and
                      (cOBConnective <= cOBWord and cOEWord <= cOEConnective)):
                    #Important match the potential connectives to connective head and not connectives' raw text
                    l = connective.split()
                    l = [string.lower() for string in l]
                    if (word in l):
                        #better thing would have been to just match the character offset beginning and end of connective
                        result, skip = matchConnectiveList(wordList, wordNum)

                        if result == 'if then':
                            print 1, word, l, result, connective
                        if result != 'False':
                            label = 'Y'
                            tokenNumberLists = relation['Connective'][
                                'TokenList']
                            tokenNo = range(wordNum, wordNum + skip + 1)
                            #tokenNo=[words[4] for words in tokenNumberLists]
                            ##tokens=[token[0] for token in wordList]
                            #print tokens,tokenNo,word
                            #print relation
                            parsetree = nltk.ParentedTree.fromstring(
                                sentence['parsetree'])
                            if parsetree.leaves() != []:
                                #                                print sentence['parsetree']
                                #                                print doc,word,sentenceOffset
                                #                                print tokens
                                #                                print relation
                                featureSets.append(
                                    (connExtractFeat.getfeatures(
                                        parsetree, tokenNo), label))

                            if (explicit - oldexplicit > 1):
                                #                                print explicit
                                #                                print doc,word,connective
                                #
                                #                                print discourseBank[oldExplicitIterator]
                                #                                print relation

                                k += 1
                            oldexplicit = explicit
                            oldExplicitIterator = dBIterator

                            #if relation['Type']=='Explicit' and explicit==40:
                            #    print 2,explicit,word,connective
                            #print explicit,word,connective
                            i += 1
                        wordNum += skip

                    else:
                        #these lines are not required. they are the cases in which a word appears before the connective head
                        result, skip = matchConnectiveList(wordList, wordNum)
                        #print 2,word,l,result,connective
                        wordNum += skip
                        if result != 'False':
                            label = 'N'

                    j += 1

                    #getFeatureVector()
                elif ((docConnective < docWord) or
                      (docConnective == docWord and cOEConnective < cOBWord)):
                    #if relation['Type']=='Explicit' and explicit==40:
                    #    print 3,explicit,word,connective,sentenceOffset
                    if (dBIterator > totalDiscourses):
                        print 'kuch galat hai'
                    if dBIterator != lastDiscourse:
                        dBIterator += 1
                        relation = discourseBank[dBIterator]
                        if (relation['Type'] == 'Explicit'):
                            #print 2,explicit,connective
                            explicit += 1
                        wordNum -= 1
                #print doc,sentenceOffset,wordNum,cOBWord,word
                wordNum += 1
                #print i,dBIterator

    print i, j, k
    return featureSets

Example #4

Show file

File: ConnectiveClassifier.py Project: opensourceware/Shallow-Discourse-Parser

def dataProcess(discourseBank,treeBank,connectiveList):
    featureSets=[]
    docList=treeBank.keys()
    docList.sort()
    totalDiscourses=len(discourseBank)
    lastDiscourse=totalDiscourses-1
    #print totalDiscourses
    dBIterator=0
    oldExplicitIterator=dBIterator
    explicit=0
    oldexplicit=explicit
    i=0;j=0;k=0
    for doc in docList:
        
        sentenceList=treeBank[doc]['sentences']
        for sentenceOffset,sentence in enumerate(sentenceList):
            
            wordList=sentence['words']
            #print wordList
            lengthWordList=len(wordList)
            wordNum=0
            while(wordNum<lengthWordList):
                wordStructure=wordList[wordNum]
                word=wordStructure[0]
                word=word.lower()
                #print word
                wordDictionary=wordStructure[1]
                #print wordDictionary     
                #if not matchConnectiveList(connectiveList,word):
                #    continue
                relation=discourseBank[dBIterator]
                #print relation['Type']
                
                while(1): 
                    #print 'Consecutive:%d'%dBIterator 
                    if(relation['Type']=='Explicit' or dBIterator==lastDiscourse):
                        break
                    
                    dBIterator+=1
                    relation=discourseBank[dBIterator]
                    if (relation['Type']=='Explicit'):
                        connective=relation['ConnectiveHead']
                        #print 1,explicit,connective
                        explicit+=1
                    #print relation['Type']
                    
                
                docWord=int(doc[4:]);docConnective=int(relation['DocID'][4:])
                cOBWord=wordDictionary['CharacterOffsetBegin']
                cOEWord=wordDictionary['CharacterOffsetEnd']
                
                if relation['Type']=='Explicit':
                    connective=relation['ConnectiveHead']
                    connectiveLength=len(relation['Connective']['CharacterSpanList'])
                    cOBConnective=relation['Connective']['CharacterSpanList'][0][0]
                    cOEConnective=relation['Connective']['CharacterSpanList'][connectiveLength-1][1]
                
                if ((docConnective > docWord) or (docWord==docConnective and cOEWord<cOBConnective)):
                    
#                    if word=='in':
#                        print doc,sentenceOffset
#                        print sentence
#                        print relation
                    result,skip=matchConnectiveList(wordList,wordNum)
                    
                    #if relation['Type']=='Explicit' and explicit==40:
                    #    print 1,explicit,word,connective
                    if result!='False':
                        #print 3,word,result,connective
                        label='N'
                        tokenNo=range(wordNum,wordNum+skip+1)
                        #tokenNo=[words[4] for words in tokenNumberLists]
                        tokens=[token[0] for token in wordList]
                        #if word=='either':
                            #print sentenceList[sentenceOffset-1]
                            #print word,wordNum
                            #print tokens
                        #print relation
                        parsetree = nltk.ParentedTree.fromstring(sentence['parsetree'])
                        if parsetree.leaves()!=[]:                        
                            featureSets.append((connExtractFeat.getfeatures(parsetree,tokenNo),label))
                    wordNum+=skip
                    
                elif((docWord==docConnective) and ( cOBConnective <= cOBWord and cOEWord <= cOEConnective)):
                    #Important match the potential connectives to connective head and not connectives' raw text                    
                    l=connective.split()
                    l=[string.lower() for string in l]
                    if(word in l):
                        #better thing would have been to just match the character offset beginning and end of connective
                        result,skip=matchConnectiveList(wordList,wordNum)
                        
                        
                        if result=='if then':
                            print 1,word,l,result,connective
                        if result!='False':
                            label='Y'
                            tokenNumberLists=relation['Connective']['TokenList']
                            tokenNo = range(wordNum, wordNum+skip+1)
                            #tokenNo=[words[4] for words in tokenNumberLists]
                            ##tokens=[token[0] for token in wordList]
                            #print tokens,tokenNo,word
                            #print relation
                            parsetree = nltk.ParentedTree.fromstring(sentence['parsetree'])
                            if parsetree.leaves()!=[]:
#                                print sentence['parsetree']                                
#                                print doc,word,sentenceOffset
#                                print tokens
#                                print relation
                                featureSets.append((connExtractFeat.getfeatures(parsetree,tokenNo),label))
                            
                            
                            if (explicit-oldexplicit>1):
#                                print explicit
#                                print doc,word,connective
#                                
#                                print discourseBank[oldExplicitIterator]
#                                print relation
                                
                                k+=1
                            oldexplicit=explicit
                            oldExplicitIterator=dBIterator
                        
                            
                            #if relation['Type']=='Explicit' and explicit==40:
                            #    print 2,explicit,word,connective
                            #print explicit,word,connective
                            i+=1
                        wordNum+=skip
                            
                    else:
                        #these lines are not required. they are the cases in which a word appears before the connective head
                        result,skip=matchConnectiveList(wordList,wordNum)
                        #print 2,word,l,result,connective
                        wordNum+=skip
                        if result!='False':
                            label='N'
                        
                    j+=1
                    
                    
                            #getFeatureVector()
                elif((docConnective < docWord) or (docConnective==docWord and cOEConnective<cOBWord)):
                    #if relation['Type']=='Explicit' and explicit==40:                    
                    #    print 3,explicit,word,connective,sentenceOffset                 
                    if (dBIterator > totalDiscourses):
                        print 'kuch galat hai'  
                    if dBIterator!=lastDiscourse:
                        dBIterator+=1
                        relation=discourseBank[dBIterator]
                        if (relation['Type']=='Explicit'):
                            #print 2,explicit,connective
                            explicit+=1
                        wordNum-=1
                #print doc,sentenceOffset,wordNum,cOBWord,word
                wordNum+=1         
                    #print i,dBIterator
                  
    print i,j,k
    return featureSets