Python WordMatrix Examples

Programming Language: Python

Namespace/Package Name: matrix_management

Class/Type: WordMatrix

Examples at hotexamples.com: 5

Python WordMatrix - 5 examples found. These are the top rated real world Python examples of matrix_management.WordMatrix extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get_tokens(2)

WordMatrix(1)

add(1)

get(1)

kn_columns(1)

kn_cooccurences(1)

Example #1

Show file

File: converter.py Project: VOVAN1993/python2012

tokens_filtered = []
for token in tokens:
    if token in stopwords.words('english'):
        tokens_filtered += ["*"]
    else:
        tokens_filtered += [ token ]

# stemming
#normalized_tokens = [stemmer.stem(token) for token in tokens if token not in stopwords.words('english')]
normalized_tokens = [stemmer.stem(token) for token in tokens_filtered]

print "Tokens set filtered and stemmed :", normalized_tokens

window_size = 10
matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second], window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

print "Co-occurence counted"

Example #2

Show file

tokens_filtered = []
for token in tokens:
    if token in stopwords.words('english'):
        tokens_filtered += ["*"]
    else:
        tokens_filtered += [token]

# stemming
#normalized_tokens = [stemmer.stem(token) for token in tokens if token not in stopwords.words('english')]
normalized_tokens = [stemmer.stem(token) for token in tokens_filtered]

print "Tokens set filtered and stemmed :", normalized_tokens

window_size = 10
matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second],
                       window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

Example #3

Show file

File: converter.py Project: A-Kulikov/python2012

tokens_filtered = []
for token in tokens:
    if token in stopwords.words('english'):
        tokens_filtered += ["*"]
    else:
        tokens_filtered += [ token ]

# stemming
#normalized_tokens = [stemmer.stem(token) for token in tokens if token not in stopwords.words('english')]
normalized_tokens = [stemmer.stem(token) for token in tokens_filtered]

print "Tokens set filtered and stemmed :", normalized_tokens

window_size = 10
matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second], window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

print "Co-occurence counted"

Example #4

Show file

File: converter.py Project: kimank/python2012

print

# maybe should first do sent_tokenize, then word_tokenize
tokens = word_tokenize(text)
normalized_tokens = []

# i chose the one everybody knows
stemmer = PorterStemmer()

# tokenization and stemming
for token in tokens:
    normalized_tokens += [stemmer.stem(token)]

window_size = 5

matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start : win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second], window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

# todo: tabs stuff, cool printing

Example #5

Show file

File: converter.py Project: viosng/python2012

print

# maybe should first do sent_tokenize, then word_tokenize
tokens = word_tokenize(text)
normalized_tokens = []

# i chose the one everybody knows
stemmer = PorterStemmer()

# tokenization and stemming
for token in tokens:
    normalized_tokens += [stemmer.stem(token)]

window_size = 5

matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second],
                       window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1