Python PorterStemmer.encode Examples

Programming Language: Python

Namespace/Package Name: nltk

Class/Type: PorterStemmer

Method/Function: encode

Examples at hotexamples.com: 3

Python PorterStemmer.encode - 3 examples found. These are the top rated real world Python examples of nltk.PorterStemmer.encode extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

stem(30)

PorterStemmer(30)

lower(9)

strip(8)

stem_word(4)

encode(3)

isdigit(3)

decode(2)

isalpha(1)

replace(1)

split(1)

Example #1

Show file

def filter(text):
    #get rid stop word, puctuation, number, turn to lower case and check spelling, also stemming
    return_list = []
    for i in re.split("[,. \-!?:_'%$/#@&;\n\d]+", text):
        j = i.lower()
        if not is_ascii(j):
            j = j.encode('ascii','ignore')
            #print j
        if len(j) > 1 and (j not in stop) and (len(j) > 3):
            k = PorterStemmer().stem_word(j)
            if isinstance(k, unicode):
                k = k.encode('ascii','ignore')
            if (not k.isdigit()):
                return_list.append(k)
    return return_list

Example #2

Show file

def filter(text):
    #get rid stop word, puctuation, number, turn to lower case and check length, also stemming
    return_list = []
    for i in re.split("[,. ()\- \\\\s =\n-\!?#:_'%$/@\"]+", text):
        j = i.lower()
        if 'votetrump' in j or 'votehillary' in j:
            #print "j is: " + j
            j = j.replace('votetrump', '')
            j = j.replace('votehillary', '')
            #print "after is: " + j

        if len(j) > 1 and is_ascii(j) and (j not in stop):
            k = PorterStemmer().stem_word(j)
            if isinstance(k, unicode):
                k = k.encode('ascii', 'ignore')
            if (not k.isdigit()):
                return_list.append(k)
    return return_list

Example #3

Show file

def filter(text, removeWords):
    #get rid stop word, puctuation, number, turn to lower case and check length, also stemming
    return_list = []
    for i in re.split("[,. ()\- \\\\s =\n-\!?#:_'%$/@\"]+", text):
        j = i.lower()

        # remove words like 'votetrump', 'votehillary'
        removeList = removeWords.lower().replace(" ", "").split(",")
        for remove_element in removeList:
            j = j.replace(remove_element, '')

        if len(j) > 1 and is_ascii(j) and (j not in stop):
            k = PorterStemmer().stem_word(j)
            #k = PorterStemmer().stem(j) nltk3.2.2
            if isinstance(k, unicode):
                k = k.encode('ascii', 'ignore')
            if (not k.isdigit()):
                return_list.append(k)
    return return_list