import dataprocessing import numpy as np import HMM import nltk import poetrytools X, conv_words = dataprocessing.parse_words_lines() n_states = 5 n_observations = len(conv_words) hmm = HMM.unsupervised_HMM(X, n_states, n_iters=300) emission = hmm.generate_emission(M=10) # i-1 because the observations are 1-indexed translated_emission = [conv_words[i] for i in emission] line = '' for word in translated_emission: line += word + ' ' print(line) print(emission) # Print transition matrix. A_vis = np.around(np.array(hmm.A), 4) print(A_vis) # Obtain the top 10 words for each state. O = np.array(hmm.O) for i in range(len(O)): state_emissions = O[i, :] top_emissions = state_emissions.argsort()[-10:][::-1]
import dataprocessing import numpy as np import HMM import pickle import copy X_raw, conversion_list = dataprocessing.parse_words_lines() print(X_raw) # REVERSE X X = copy.deepcopy(X_raw) for i in range(len(X_raw)): X[i].reverse() print(X) n_states = 50 lines = [] emissions = [] n_observations = len(conversion_list) hmm = HMM.unsupervised_HMM(X, n_states, n_iters=10) for _ in range(10): emission = hmm.generate_emission(M=10) # i-1 because the observations are 1-indexed translated_emission = [conversion_list[i] for i in emission] line = '' for word in translated_emission: line += word + ' '
import dataprocessing import numpy as np import pickle import copy import HMM # Label the state with the part of speech. X, word_conv = dataprocessing.parse_words_lines() word_lists = dataprocessing.get_word_lists() print(X) for i in range(len(X)): X[i].reverse() print(X) print(word_lists) for i in range(len(word_lists)): word_lists[i].reverse() print(word_lists) # Load up saved pos with open('pos.txt', 'rb') as file: pos = pickle.load(file) flattened_pos = [] for i in range(len(pos)): for j in range(len(pos[i])): flattened_pos.append(pos[i][j]) unique_pos, counts = np.unique(flattened_pos, return_counts=True) pos_dict = {} for i in range(len(unique_pos)):