Beispiel #1
0
# exercise 3.1.4
import numpy as np
from tmgsimple import TmgSimple

# Generate text matrix with help of simple class TmgSimple
tm = TmgSimple(filename='../Data/textDocs.txt',
               stopwords_filename='../Data/stopWords.txt',
               stem=True)

# Extract variables representing data
X = tm.get_matrix(sort=True)
attributeNames = tm.get_words(sort=True)

# Display the result
print(attributeNames)
print(X)

print('Ran Exercise 3.1.4')
Beispiel #2
0
# exercise 3.1.2
from tmgsimple import TmgSimple

# Generate text matrix with help of simple class TmgSimple
tm = TmgSimple(filename='../Data/textDocs.txt', )

# Extract variables representing data
X = tm.get_matrix(sort=True)
attributeNames = tm.get_words(sort=True)

# Display the result
print(attributeNames)
print(X)
Beispiel #3
0
document at least contains 2 of your key words, i.e. the document-term matrix 
should have approximately 10 columns and each row of the matrix must at least 
contain 2 non-zero entries.
"""

bagOfWords = ['matrix', 'Google', 'ranking', 'web', 'webpage', 'rank']

"""
3.1.2
"""
import numpy as np
from tmgsimple import TmgSimple
from similarity import similarity

# Generate text matrix with help of simple class TmgSimple
tm = TmgSimple(filename='../02450Toolbox_Python/Data/textDocs.txt', )

# Extract variables representing data
X = tm.get_matrix(sort=True)
attributeNamesWithOutStop = tm.get_words(sort=True)

# Display the result
print attributeNamesWithOutStop
print X

"""
3.1.3
With stopwords
"""
print('Now with stopwords !!!')
tm = TmgSimple(filename='../02450Toolbox_Python/Data/textDocs.txt', stopwords_filename='../02450Toolbox_Python/Data/stopWords.txt')
Beispiel #4
0
# exercise 2.1.2
from tmgsimple import TmgSimple
import tmgsimple
#help(tmgsimple)
fn='C:\\Users\\Bahram\\PycharmProjects\\Machine-Learning-and-Data-Mining\\02450Toolbox_Python\\Data\\textDocs.txt'
stopwords='C:\\Users\\Bahram\\PycharmProjects\\Machine-Learning-and-Data-Mining\\02450Toolbox_Python\\Data\\stopWords.txt'
tm = TmgSimple(filename=fn,stopwords_filename=stopwords,stem=True,min_term_length=5)
attributeNames = tm.get_words(sort=True)
x=tm.get_matrix(sort=True)

print attributeNames
print x


"""

# Generate text matrix with help of simple class TmgSimple


# Extract variables representing data
X = tm.get_matrix(sort=True)


# Display the result
print attributeNames
print X
"""