win_start = 0 while win_start + window_size <= len(normalized_tokens): window = normalized_tokens[win_start:win_start + window_size] first = 0 second = 1 while first < len(window): second = first + 1 while second < len(window): matrix.add(window[first], window[second], window_size - second + first + 1) second += 1 first += 1 win_start += 1 print "Co-occurence counted" print "Keys quantity:", len(matrix.get_tokens()) for key in matrix.get_tokens(): if key <> "*": pass#print key, matrix.kn_cooccurences(key, 6) print "Done" def get_token_by_word(word): word = re.findall(r"[A-Za-z]+", word)[0] return stemmer.stem(word.lower()) def get_euclidean_vector_by_token(n, token): print "Incoming token:", token if token in matrix.token_set: return matrix.kn_columns(token, n, matrix.dist_cols_euclidean)
normalized_tokens += [stemmer.stem(token)] window_size = 5 matrix = WordMatrix() win_start = 0 while win_start + window_size <= len(normalized_tokens): window = normalized_tokens[win_start : win_start + window_size] first = 0 second = 1 while first < len(window): second = first + 1 while second < len(window): matrix.add(window[first], window[second], window_size - second + first + 1) second += 1 first += 1 win_start += 1 # todo: tabs stuff, cool printing s = " " s += " " + " ".join(matrix.get_tokens()) for token0 in matrix.get_tokens(): s += "\n" + token0 for token1 in matrix.get_tokens(): s += " " + str(matrix.get(token0, token1)) print s
win_start = 0 while win_start + window_size <= len(normalized_tokens): window = normalized_tokens[win_start:win_start + window_size] first = 0 second = 1 while first < len(window): second = first + 1 while second < len(window): matrix.add(window[first], window[second], window_size - second + first + 1) second += 1 first += 1 win_start += 1 print "Co-occurence counted" print "Keys quantity:", len(matrix.get_tokens()) for key in matrix.get_tokens(): if key <> "*": print key, matrix.kn_cooccurences(key, 6) print "Now to more sophisticated analysis" for key in matrix.get_tokens(): if key <> "*": print key, matrix.kn_columns(key, 6, matrix.dist_cols_euclidean) print "Done" """ for token0 in matrix.get_tokens(): s += "\n" + token0
normalized_tokens += [stemmer.stem(token)] window_size = 5 matrix = WordMatrix() win_start = 0 while win_start + window_size <= len(normalized_tokens): window = normalized_tokens[win_start:win_start + window_size] first = 0 second = 1 while first < len(window): second = first + 1 while second < len(window): matrix.add(window[first], window[second], window_size - second + first + 1) second += 1 first += 1 win_start += 1 # todo: tabs stuff, cool printing s = " " s += " " + " ".join(matrix.get_tokens()) for token0 in matrix.get_tokens(): s += "\n" + token0 for token1 in matrix.get_tokens(): s += " " + str(matrix.get(token0, token1)) print s