Example #1
0
win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second], window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

print "Co-occurence counted"
print "Keys quantity:", len(matrix.get_tokens())

for key in matrix.get_tokens():
    if key <> "*":
        pass#print key, matrix.kn_cooccurences(key, 6)

print "Done"

def get_token_by_word(word):
    word = re.findall(r"[A-Za-z]+", word)[0]
    return stemmer.stem(word.lower())

def get_euclidean_vector_by_token(n, token):
    print "Incoming token:", token
    if token in matrix.token_set:
        return matrix.kn_columns(token, n, matrix.dist_cols_euclidean)
Example #2
0
    normalized_tokens += [stemmer.stem(token)]

window_size = 5

matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start : win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second], window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

# todo: tabs stuff, cool printing
s = "    "

s += " " + " ".join(matrix.get_tokens())

for token0 in matrix.get_tokens():
    s += "\n" + token0
    for token1 in matrix.get_tokens():
        s += " " + str(matrix.get(token0, token1))

print s
Example #3
0
win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second],
                       window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

print "Co-occurence counted"
print "Keys quantity:", len(matrix.get_tokens())

for key in matrix.get_tokens():
    if key <> "*":
        print key, matrix.kn_cooccurences(key, 6)

print "Now to more sophisticated analysis"

for key in matrix.get_tokens():
    if key <> "*":
        print key, matrix.kn_columns(key, 6, matrix.dist_cols_euclidean)

print "Done"
"""
for token0 in matrix.get_tokens():
    s += "\n" + token0
Example #4
0
    normalized_tokens += [stemmer.stem(token)]

window_size = 5

matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second],
                       window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

# todo: tabs stuff, cool printing
s = "    "

s += " " + " ".join(matrix.get_tokens())

for token0 in matrix.get_tokens():
    s += "\n" + token0
    for token1 in matrix.get_tokens():
        s += " " + str(matrix.get(token0, token1))

print s