Esempio n. 1
0
def sequence_generator(n, k, M):
    '''
    Generates k emissions of length M using the HMM stored in the file
    'sequence_data<n>.txt' for a given n and prints the results.

    Arguments:
        N:          File index.
        K:          Number of sequences to generate.
        M:          Length of emission to generate.
    '''
    A, O, seqs = Utility.load_sequence(n)

    # Print file information.
    print("File #{}:".format(n))
    print("{:30}".format('Generated Emission'))
    print('#' * 70)

    # Generate k input sequences.
    for i in range(k):
        # Initialize an HMM.
        HMM = HiddenMarkovModel(A, O)

        # Generate a single input sequence of length m.
        emission, states = HMM.generate_emission(M)
        x = ''.join([str(i) for i in emission])

        # Print the results.
        print("{:30}".format(x))

    print('')
    print('')
Esempio n. 2
0
def sequence_prediction(n):
    '''
    Runs sequence prediction on the five sequences at the end of the file
    'sequence_data<n>.txt' for a given n and prints the results.

    Arguments:
        n:          Sequence index.
    '''
    A, O, seqs = Utility.load_sequence(n)

    # Print file information.
    print("File #{}:".format(n))
    print("{:30}{:30}".format('Emission Sequence',
                              'Max Probability State Sequence'))
    print('#' * 70)

    # For each input sequence:
    for seq in seqs:
        # Initialize an HMM.
        HMM = HiddenMarkovModel(A, O)

        # Make predictions.
        x = ''.join([str(xi) for xi in seq])
        y = HMM.viterbi(seq)

        # Print the results.
        print("{:30}{:30}".format(x, y))

    print('')
    print('')
Esempio n. 3
0
def sequence_probability(n):
    '''
    Determines the probability of emitting the five sequences at the end of
    the file 'sequence_data<n>.txt' for a given n and prints the results.

    Arguments:
        n:          File index.
    '''
    A, O, seqs = Utility.load_sequence(n)

    # Print file information.
    print("File #{}:".format(n))
    print("{:30}{:10}".format('Emission Sequence',
                              'Probability of Emitting Sequence'))
    print('#' * 70)

    # For each input sequence:
    for seq in seqs:
        # Initialize an HMM.
        HMM = HiddenMarkovModel(A, O)

        # Compute the probability of the input sequence.
        x = ''.join([str(xi) for xi in seq])
        p = HMM.probability_betas(seq)

        # Print the results.
        print("{:30}{:<10.3e}".format(x, p))

    print('')
    print('')
Esempio n. 4
0
def supervised_learning(tokenized_lines):
    '''
    Generate a sonnet by training a HMM using supervised learning and then using 
    the HMM to generate a line with 10 words. The dataset is labeled with part of 
    speech tags as states for supervised learning.

    Arguments:
        tokenized_lines: a list of lines tokenized as words
    '''

    # Come up with all of the maps needed (states to part-of-speech and vice versa)
    # These maps will be used for the training portion of the supervised model and for
    # generating the poem.
    states, state_POS_map, POS_state_map = convert_POS_to_states(
        tokenized_lines)
    observations, observation_word_map, word_observation_map = convert_lines_observations(
        tokenized_lines)

    # Initialize transition and observation matrices.
    A = [[0. for j in range(len(state_POS_map))]
         for i in range(len(state_POS_map))]
    O = [[0. for j in range(len(observation_word_map))]
         for i in range(len(state_POS_map))]

    # Create HMM that will be trained. X is a list of lines tokenized as words, and
    # Y is the corresponding part of speech tag labels for every word.
    hmm = HiddenMarkovModel(A, O)
    X = [[]]
    Y = [[]]

    # For each tokenized line that we are using for the training data, find the part of
    # speech of each word and add the corresponding states to Y. Also, fill in
    # the X with words from the lines.
    for line in tokenized_lines:
        words_and_tags = tag_POS(line)
        x = []
        y = []
        for word, POS in words_and_tags:
            x.append(word_observation_map[word])
            y.append(POS_state_map[POS])
        X.append(x)
        Y.append(y)

    # Train HMM using supervised learning with X and Y, where Y contains the part of speech
    # labels.
    hmm.supervised_learning(X, Y)

    # Generate 14 lines with 10 words each and print them out.
    for i in range(14):
        obs = hmm.preethi_generate_emission(10)
        line = ''
        for j in obs:
            line += observation_word_map[j]
            line += " "
        print(line)
Esempio n. 5
0
def unsupervised_generation(D, n_states, N_iters, k, sylls, endsylls,
                            rhymedict):
    '''
    Trains an HMM using unsupervised learning on the poems and then calls
    generations() to generate k emissions for each HMM, processing the emissions
    and printing them as strings.

    Arguments:
        ps: the list of poems, where each poem is a list of integers
            representing the tokens (words) of the poem.
        D: the number of "words" contained in ps.
        n_states: number of hidden states that the HMM should have.
        N_iters: the number of iterations the HMM should train for.
        k: the number of generations for each HMM.
    '''
    # simply tells us that we are running unsupervised learning. this isn't
    # necessary, but is nice for now.
    print('')
    print('')
    print('#' * 70)
    print("{:^70}".format("Generating Emissions From HMM with %d States") %
          n_states)
    print('#' * 70)
    print('')
    print('')
    rhymefile = str(n_states) + '_' + str(N_iters) + '_1.txt'
    regularfile = str(n_states) + '_' + str(N_iters) + '_2.txt'

    A, O = processing.read_saved_HMM(rhymefile)
    HMMrhyme = HiddenMarkovModel(A, O)
    A, O = processing.read_saved_HMM(regularfile)
    HMMreg = HiddenMarkovModel(A, O)
    # generates and prints "poems"
    print("RHYMING!!")
    generations(HMMrhyme, k, sylls, endsylls, True, rhymedict, True)
    print("10 SYLLABLES!!")
    generations(HMMrhyme, k, sylls, endsylls, True, rhymedict, False)
    print("REGULAR")
    generations(HMMrhyme, k, sylls, endsylls, False, rhymedict, False)
Esempio n. 6
0
@ Description:    Implement HMM_TEST
"""

from HMM import HiddenMarkovModel
import numpy as np
import time

Q = np.array([0, 1]) # hot 0, cold 1
V = np.array([0, 1, 2])
O = np.array([[2, 2, 1], [0, 0, 1], [0, 1, 2]])
I = np.array([[0, 0, 1], [1, 1, 1], [1, 0, 0]])
test = np.array([0, 1, 2])

# # supervised learning algorithm
time_start1 = time.time()
clf1 = HiddenMarkovModel(Q, V)
clf1.train(O, I)
time_end1 = time.time()
print("Supervised learning parameters:")
print("Transfer probability  matrix\n", clf1.A)
print("Observation probability  matirx\n", clf1.B)
print("Initial state probability \n", clf1.Pi)
print("Prediction of Supervised learning", clf1.predict(test))
print("Runtime of Supervised learning:", time_end1-time_start1)
print("________________BOUNDARY_______________________________________")
# unsupervised learning algorithm
time_start2 = time.time()
clf2 = HiddenMarkovModel(Q, V)
clf2.train(O)
time_end2 = time.time()
print("Unsupervised learning  parameters:")
Esempio n. 7
0
                                                               lower=True,
                                                               split=' ')

Tokenizer = keras.preprocessing.text.Tokenizer(num_words=None,
                                               filters=filters,
                                               lower=True,
                                               split=' ',
                                               char_level=False,
                                               oov_token=None,
                                               document_count=0)

# fit Tokenizer
Tokenizer.fit_on_texts(word_sequence)

# initalize the
HMM = HiddenMarkovModel(hmm_param['A'], hmm_param['O'])

poem_list = ""
for i in range(5):
    poem, syll_list = poem_that_rhymes(HMM, Tokenizer, r2w_dict, w2s_dict)
    poem_list += poem
    poem_list += "\n\n"
    for syll in syll_list:
        poem_list += str(syll) + ', '

    poem_list += "\n\n"

# save poems as text
fname_write = 'hmm_poems_k' + str(k) + '.txt'

with open(dataPath + fname_write, 'w') as f:
Esempio n. 8
0
import numpy as np

TopElem = 100
Folds = 10
Tags =10
Obs = list(set(brown.words()))
Sentences = brown.sents()
alphabetReverseMap = {Obs[t]:t for t in xrange(len(Obs))}
number_obs = len(Obs)
number_sents = len(Sentences)
foldsize = number_sents/Folds
for fold in xrange(Folds):
    train = Sentences[0:foldsize*fold] + Sentences[foldsize*(fold+1):-1]
    train = [alphabetReverseMap[word] for sentence in train for word in sentence]
    #print train[:20]
    hmm = HiddenMarkovModel(Tags,len(train),number_obs)
    I = np.random.rand(Tags)
    A = np.random.rand(Tags,Tags)
    B = np.random.rand(Tags,number_obs)
    I/=I.sum()
    for i in xrange(Tags):
        A[i][:] /= A[i][:].sum()
        B[i][:] /= B[i][:].sum()
    #print A[1:3]
    #print B[1:3]
    #print I
    I,A,B = hmm.forward_backward(I,A,B,train)

    print "#Fold: ", fold
    for i in xrange(Tags):
        top = B[i].argsort()[-TopElem:][::-1]
Esempio n. 9
0
from Viterbi import viterbi

if __name__ == "__main__":

    # Handle command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--verbosity",
                        help="Increase output verbosity",
                        action="store_true")
    parser.add_argument("string_to_decode", help="The string to be decoded")
    args = parser.parse_args()
    input_string = args.string_to_decode

    # Define the HMM
    h = HiddenMarkovModel(['HOT', 'COLD'], ['1', '2', '3'])

    # Initial probabilities
    initial_probabilities = [0.8, 0.2]

    # Transition probabilities
    transition_probabilities = [[0.7, 0.3], [0.6, 0.4]]

    # Emission probabilities
    emission_probabilities = [[0.2, 0.4, 0.4], [0.5, 0.4, 0.1]]

    # Set up probabilites
    h.set_probabilities(initial_probabilities, transition_probabilities,
                        emission_probabilities)

    # Run Viterbi algorithm