def sequence_generator(n, k, M): ''' Generates k emissions of length M using the HMM stored in the file 'sequence_data<n>.txt' for a given n and prints the results. Arguments: N: File index. K: Number of sequences to generate. M: Length of emission to generate. ''' A, O, seqs = Utility.load_sequence(n) # Print file information. print("File #{}:".format(n)) print("{:30}".format('Generated Emission')) print('#' * 70) # Generate k input sequences. for i in range(k): # Initialize an HMM. HMM = HiddenMarkovModel(A, O) # Generate a single input sequence of length m. emission, states = HMM.generate_emission(M) x = ''.join([str(i) for i in emission]) # Print the results. print("{:30}".format(x)) print('') print('')
def sequence_prediction(n): ''' Runs sequence prediction on the five sequences at the end of the file 'sequence_data<n>.txt' for a given n and prints the results. Arguments: n: Sequence index. ''' A, O, seqs = Utility.load_sequence(n) # Print file information. print("File #{}:".format(n)) print("{:30}{:30}".format('Emission Sequence', 'Max Probability State Sequence')) print('#' * 70) # For each input sequence: for seq in seqs: # Initialize an HMM. HMM = HiddenMarkovModel(A, O) # Make predictions. x = ''.join([str(xi) for xi in seq]) y = HMM.viterbi(seq) # Print the results. print("{:30}{:30}".format(x, y)) print('') print('')
def sequence_probability(n): ''' Determines the probability of emitting the five sequences at the end of the file 'sequence_data<n>.txt' for a given n and prints the results. Arguments: n: File index. ''' A, O, seqs = Utility.load_sequence(n) # Print file information. print("File #{}:".format(n)) print("{:30}{:10}".format('Emission Sequence', 'Probability of Emitting Sequence')) print('#' * 70) # For each input sequence: for seq in seqs: # Initialize an HMM. HMM = HiddenMarkovModel(A, O) # Compute the probability of the input sequence. x = ''.join([str(xi) for xi in seq]) p = HMM.probability_betas(seq) # Print the results. print("{:30}{:<10.3e}".format(x, p)) print('') print('')
def supervised_learning(tokenized_lines): ''' Generate a sonnet by training a HMM using supervised learning and then using the HMM to generate a line with 10 words. The dataset is labeled with part of speech tags as states for supervised learning. Arguments: tokenized_lines: a list of lines tokenized as words ''' # Come up with all of the maps needed (states to part-of-speech and vice versa) # These maps will be used for the training portion of the supervised model and for # generating the poem. states, state_POS_map, POS_state_map = convert_POS_to_states( tokenized_lines) observations, observation_word_map, word_observation_map = convert_lines_observations( tokenized_lines) # Initialize transition and observation matrices. A = [[0. for j in range(len(state_POS_map))] for i in range(len(state_POS_map))] O = [[0. for j in range(len(observation_word_map))] for i in range(len(state_POS_map))] # Create HMM that will be trained. X is a list of lines tokenized as words, and # Y is the corresponding part of speech tag labels for every word. hmm = HiddenMarkovModel(A, O) X = [[]] Y = [[]] # For each tokenized line that we are using for the training data, find the part of # speech of each word and add the corresponding states to Y. Also, fill in # the X with words from the lines. for line in tokenized_lines: words_and_tags = tag_POS(line) x = [] y = [] for word, POS in words_and_tags: x.append(word_observation_map[word]) y.append(POS_state_map[POS]) X.append(x) Y.append(y) # Train HMM using supervised learning with X and Y, where Y contains the part of speech # labels. hmm.supervised_learning(X, Y) # Generate 14 lines with 10 words each and print them out. for i in range(14): obs = hmm.preethi_generate_emission(10) line = '' for j in obs: line += observation_word_map[j] line += " " print(line)
def unsupervised_generation(D, n_states, N_iters, k, sylls, endsylls, rhymedict): ''' Trains an HMM using unsupervised learning on the poems and then calls generations() to generate k emissions for each HMM, processing the emissions and printing them as strings. Arguments: ps: the list of poems, where each poem is a list of integers representing the tokens (words) of the poem. D: the number of "words" contained in ps. n_states: number of hidden states that the HMM should have. N_iters: the number of iterations the HMM should train for. k: the number of generations for each HMM. ''' # simply tells us that we are running unsupervised learning. this isn't # necessary, but is nice for now. print('') print('') print('#' * 70) print("{:^70}".format("Generating Emissions From HMM with %d States") % n_states) print('#' * 70) print('') print('') rhymefile = str(n_states) + '_' + str(N_iters) + '_1.txt' regularfile = str(n_states) + '_' + str(N_iters) + '_2.txt' A, O = processing.read_saved_HMM(rhymefile) HMMrhyme = HiddenMarkovModel(A, O) A, O = processing.read_saved_HMM(regularfile) HMMreg = HiddenMarkovModel(A, O) # generates and prints "poems" print("RHYMING!!") generations(HMMrhyme, k, sylls, endsylls, True, rhymedict, True) print("10 SYLLABLES!!") generations(HMMrhyme, k, sylls, endsylls, True, rhymedict, False) print("REGULAR") generations(HMMrhyme, k, sylls, endsylls, False, rhymedict, False)
@ Description: Implement HMM_TEST """ from HMM import HiddenMarkovModel import numpy as np import time Q = np.array([0, 1]) # hot 0, cold 1 V = np.array([0, 1, 2]) O = np.array([[2, 2, 1], [0, 0, 1], [0, 1, 2]]) I = np.array([[0, 0, 1], [1, 1, 1], [1, 0, 0]]) test = np.array([0, 1, 2]) # # supervised learning algorithm time_start1 = time.time() clf1 = HiddenMarkovModel(Q, V) clf1.train(O, I) time_end1 = time.time() print("Supervised learning parameters:") print("Transfer probability matrix\n", clf1.A) print("Observation probability matirx\n", clf1.B) print("Initial state probability \n", clf1.Pi) print("Prediction of Supervised learning", clf1.predict(test)) print("Runtime of Supervised learning:", time_end1-time_start1) print("________________BOUNDARY_______________________________________") # unsupervised learning algorithm time_start2 = time.time() clf2 = HiddenMarkovModel(Q, V) clf2.train(O) time_end2 = time.time() print("Unsupervised learning parameters:")
lower=True, split=' ') Tokenizer = keras.preprocessing.text.Tokenizer(num_words=None, filters=filters, lower=True, split=' ', char_level=False, oov_token=None, document_count=0) # fit Tokenizer Tokenizer.fit_on_texts(word_sequence) # initalize the HMM = HiddenMarkovModel(hmm_param['A'], hmm_param['O']) poem_list = "" for i in range(5): poem, syll_list = poem_that_rhymes(HMM, Tokenizer, r2w_dict, w2s_dict) poem_list += poem poem_list += "\n\n" for syll in syll_list: poem_list += str(syll) + ', ' poem_list += "\n\n" # save poems as text fname_write = 'hmm_poems_k' + str(k) + '.txt' with open(dataPath + fname_write, 'w') as f:
import numpy as np TopElem = 100 Folds = 10 Tags =10 Obs = list(set(brown.words())) Sentences = brown.sents() alphabetReverseMap = {Obs[t]:t for t in xrange(len(Obs))} number_obs = len(Obs) number_sents = len(Sentences) foldsize = number_sents/Folds for fold in xrange(Folds): train = Sentences[0:foldsize*fold] + Sentences[foldsize*(fold+1):-1] train = [alphabetReverseMap[word] for sentence in train for word in sentence] #print train[:20] hmm = HiddenMarkovModel(Tags,len(train),number_obs) I = np.random.rand(Tags) A = np.random.rand(Tags,Tags) B = np.random.rand(Tags,number_obs) I/=I.sum() for i in xrange(Tags): A[i][:] /= A[i][:].sum() B[i][:] /= B[i][:].sum() #print A[1:3] #print B[1:3] #print I I,A,B = hmm.forward_backward(I,A,B,train) print "#Fold: ", fold for i in xrange(Tags): top = B[i].argsort()[-TopElem:][::-1]
from Viterbi import viterbi if __name__ == "__main__": # Handle command line arguments parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbosity", help="Increase output verbosity", action="store_true") parser.add_argument("string_to_decode", help="The string to be decoded") args = parser.parse_args() input_string = args.string_to_decode # Define the HMM h = HiddenMarkovModel(['HOT', 'COLD'], ['1', '2', '3']) # Initial probabilities initial_probabilities = [0.8, 0.2] # Transition probabilities transition_probabilities = [[0.7, 0.3], [0.6, 0.4]] # Emission probabilities emission_probabilities = [[0.2, 0.4, 0.4], [0.5, 0.4, 0.1]] # Set up probabilites h.set_probabilities(initial_probabilities, transition_probabilities, emission_probabilities) # Run Viterbi algorithm