import dataprocessing
import numpy as np
import HMM
import nltk
import poetrytools

X, conv_words = dataprocessing.parse_words_lines()

n_states = 5
n_observations = len(conv_words)

hmm = HMM.unsupervised_HMM(X, n_states, n_iters=300)
emission = hmm.generate_emission(M=10)

# i-1 because the observations are 1-indexed
translated_emission = [conv_words[i] for i in emission]
line = ''
for word in translated_emission:
    line += word + ' '
print(line)
print(emission)

# Print transition matrix.
A_vis = np.around(np.array(hmm.A), 4)
print(A_vis)

# Obtain the top 10 words for each state.
O = np.array(hmm.O)
for i in range(len(O)):
    state_emissions = O[i, :]
    top_emissions = state_emissions.argsort()[-10:][::-1]
import dataprocessing
import numpy as np
import HMM
import pickle
import copy

X_raw, conversion_list = dataprocessing.parse_words_lines()

print(X_raw)
# REVERSE X
X = copy.deepcopy(X_raw)
for i in range(len(X_raw)):
    X[i].reverse()
print(X)

n_states = 50
lines = []
emissions = []
n_observations = len(conversion_list)

hmm = HMM.unsupervised_HMM(X, n_states, n_iters=10)

for _ in range(10):
    emission = hmm.generate_emission(M=10)

    # i-1 because the observations are 1-indexed
    translated_emission = [conversion_list[i] for i in emission]

    line = ''
    for word in translated_emission:
        line += word + ' '
import dataprocessing
import numpy as np
import pickle
import copy
import HMM

# Label the state with the part of speech.

X, word_conv = dataprocessing.parse_words_lines()
word_lists = dataprocessing.get_word_lists()
print(X)
for i in range(len(X)):
    X[i].reverse()
print(X)
print(word_lists)
for i in range(len(word_lists)):
    word_lists[i].reverse()
print(word_lists)

# Load up saved pos
with open('pos.txt', 'rb') as file:
    pos = pickle.load(file)

flattened_pos = []
for i in range(len(pos)):
    for j in range(len(pos[i])):
        flattened_pos.append(pos[i][j])

unique_pos, counts = np.unique(flattened_pos, return_counts=True)
pos_dict = {}
for i in range(len(unique_pos)):