def __init__(self, k, txt, state): ''' Constructor of Markov instance. Uses hash table or dictionary as its internal state to represent Markov model. Params: k: value of succeeding chars to use txt: string of text to create the model with state: 0 - uses hash table class to represent model 1 - uses built-in dictionary to represent model ''' self._k = k self._txt = txt self._state = state if state == 0: #Create 2 models, 1 for k succeeding chars and another for k+1 self._model_k = Hashtable(HASH_CELLS, 0, TOO_FULL, GROWTH_RATIO) self._model_k_1 = Hashtable(HASH_CELLS, 0, TOO_FULL, GROWTH_RATIO) else: self._model_k = {} self._model_k_1 = {} for i in range(len(txt)): #Temp vars to hold succeeding chars k_succeeding = '' k_1_succeeding = '' #Iterate k+1 times to get k+1 succeeding chars for j in range(i, i + k + 1): #Create copy of index j curr_idx = j #If j is greater than or equal to length of txt, wrap-around if curr_idx >= len(txt): curr_idx = curr_idx - len(txt) #Append succeeding characters to temp variables k_1_succeeding += txt[curr_idx] #If at k+1, don't append for k succeeding chars if j < i + k: k_succeeding += txt[curr_idx] #Insert or update table with k and k+1 succeeding chars as key if k_succeeding in self._model_k: self._model_k[k_succeeding] += 1 else: self._model_k[k_succeeding] = 1 if k_1_succeeding in self._model_k_1: self._model_k_1[k_1_succeeding] += 1 else: self._model_k_1[k_1_succeeding] = 1
def __init__(self, k, learning_text, state_variable, speaker_text): """ This method takes in a value of "k" and a string of text to create the model and a state variable. The markov class will use a hash table as its internal state to represent the markov model. If the variable state is • state == 0: A hashtable data structure, as defined in hash_table.py will be used to create the Markov Model. • state == 1: A Python dict data structure will be used to create the Markov Model. """ self._size = k self._learning_text = learning_text self._state_variable = state_variable self._speaker_text = speaker_text k_strings_lst = [] k_plus_1_strings_lst = [] k_strings_lst_speaker = [] k_plus_1_strings_lst_speaker = [] k_strings_count_lst = [] k_plus_1_strings_count_lst = [] for i in range(len(self._learning_text)): # Learning Text k-strings k_string, k_plus_1_string = self.k_strings(i, k, self._learning_text) k_strings_lst.append(k_string) k_plus_1_strings_lst.append(k_plus_1_string) for i in range(len(self._speaker_text)): # Speaker Text k-strings k_string_speaker, k_plus_1_string_speaker = self.k_strings( i, k, self._speaker_text) k_strings_lst_speaker.append(k_string_speaker) k_plus_1_strings_lst_speaker.append(k_plus_1_string_speaker) if state_variable == 0: # (Hashtable) self._markov_model = Hashtable(self.HASH_CELLS, 0, self.TOO_FULL, self.GROWTH_RATIO) for i in range(len(k_strings_lst)): k_strings_count_lst.append( k_strings_lst_speaker.count(k_strings_lst[i])) k_plus_1_strings_count_lst.append( k_plus_1_strings_lst_speaker.count( k_plus_1_strings_lst[i])) self._markov_model[k_strings_lst[i]] = k_strings_count_lst[i] self._markov_model[ k_plus_1_strings_lst[i]] = k_plus_1_strings_count_lst[i] else: # state_variable == 1 (Dictionary) self._markov_model = {} for i in range(len(k_strings_lst)): k_strings_count_lst.append( k_strings_lst_speaker.count(k_strings_lst[i])) k_plus_1_strings_count_lst.append( k_plus_1_strings_lst_speaker.count( k_plus_1_strings_lst[i])) self._markov_model[k_strings_lst[i]] = k_strings_count_lst[i] self._markov_model[ k_plus_1_strings_lst[i]] = k_plus_1_strings_count_lst[i]
def test_rehash_2(): ''' Purpose: Testing rehashing on a larger set of items ''' table = Hashtable(1, None, 0.5, 2) keys = [''.join(tup) for tup in list(permutations("abcefgh"))] test_dict1 = dict(zip(keys, range(len(keys)))) helper_update(table, test_dict1) helper_lookup(table, test_dict1)
def test_rehash_3(): ''' Purpose: Test rehashing on a small set that wraps around ''' keys = [chr(i) for i in range(ord('f'), ord('z'))] test_dict1 = dict(zip(keys, range(len(keys)))) table = Hashtable(15, None, 0.5, 2) helper_update(table, test_dict1) helper_lookup(table, test_dict1)
def populate_model(self): """Method which populates the hashtable (or dictionary, depending on state) for the model. No inputs. Outputs: Hashtable or dictionary """ if self._state == 1: model = Hashtable(HASH_CELLS, 0, 0.5, 2) else: model = dict() for i in range(len(self._text)): k_string = '' k_plus_one_string = '' # Collect unique characters in text if self._text[i] not in self._unique_chars: self._unique_chars.append(self._text[i]) # Loop through and build k and k + 1 strings for j in range(self._k + 1): if i + j >= len(self._text): index = i + j - len(self._text) else: index = i + j if j < self._k: k_string += self._text[index] k_plus_one_string += self._text[index] if k_string in model: model[k_string] += 1 else: model[k_string] = 1 if k_plus_one_string in model: model[k_plus_one_string] += 1 else: model[k_plus_one_string] = 1 return model
def __init__(self, k, speech, state): """ __init__(self, k, speech, state) The initialization method for the Markov class. Takes in k, the length of the substrings, speech, a text sample, and a state to communicate whether to use a Hashtable or a dict. Creates the table using the _gen_table function. Gets: self, a Markov instance k, an int speech, a string state, an int, 0 or 1 Returns: nothing explicitly, but implicitly self """ if state == 0: self.table = Hashtable(HASH_CELLS, 0, 0.5, 2) elif state == 1: self.table = {} else: raise Exception self.k = k self.alphabet_len = len(set(speech)) self._gen_table(speech)
def test_bool_21(): table = Hashtable(12000, None, 0.5, 2) simple_21(table)
def test_del_22(): table = Hashtable(1, None, 0.5, 2) simple_22(table)
def test_len_19(): table = Hashtable(12000, None, 0.5, 2) simple_19(table)
def test_bool_20(): table = Hashtable(1, None, 0.5, 2) simple_20(table)
def test_getset_7(): table = Hashtable(50, None, 0.5, 2) simple_7(table)
def test_len_18(): table = Hashtable(1, None, 0.5, 2) simple_18(table)
def test_values_15(): table = Hashtable(12000, None, 0.5, 2) simple_15(table)
class Markov: """ This class represents a Markov Model, which can be represented by either a Hashtable or a Dictionary. Text is tabulated, and a log_probability method is included to calculate the probability that a given unidentified text was spoken by the same person as the tabulated text in the Markov Model. """ TOO_FULL = 0.5 # Recommended load factor for the assignment GROWTH_RATIO = 2 # Recommended growth factor for the assignment HASH_CELLS = 57 # Assigned initial Hashtable size def __init__(self, k, learning_text, state_variable, speaker_text): """ This method takes in a value of "k" and a string of text to create the model and a state variable. The markov class will use a hash table as its internal state to represent the markov model. If the variable state is • state == 0: A hashtable data structure, as defined in hash_table.py will be used to create the Markov Model. • state == 1: A Python dict data structure will be used to create the Markov Model. """ self._size = k self._learning_text = learning_text self._state_variable = state_variable self._speaker_text = speaker_text k_strings_lst = [] k_plus_1_strings_lst = [] k_strings_lst_speaker = [] k_plus_1_strings_lst_speaker = [] k_strings_count_lst = [] k_plus_1_strings_count_lst = [] for i in range(len(self._learning_text)): # Learning Text k-strings k_string, k_plus_1_string = self.k_strings(i, k, self._learning_text) k_strings_lst.append(k_string) k_plus_1_strings_lst.append(k_plus_1_string) for i in range(len(self._speaker_text)): # Speaker Text k-strings k_string_speaker, k_plus_1_string_speaker = self.k_strings( i, k, self._speaker_text) k_strings_lst_speaker.append(k_string_speaker) k_plus_1_strings_lst_speaker.append(k_plus_1_string_speaker) if state_variable == 0: # (Hashtable) self._markov_model = Hashtable(self.HASH_CELLS, 0, self.TOO_FULL, self.GROWTH_RATIO) for i in range(len(k_strings_lst)): k_strings_count_lst.append( k_strings_lst_speaker.count(k_strings_lst[i])) k_plus_1_strings_count_lst.append( k_plus_1_strings_lst_speaker.count( k_plus_1_strings_lst[i])) self._markov_model[k_strings_lst[i]] = k_strings_count_lst[i] self._markov_model[ k_plus_1_strings_lst[i]] = k_plus_1_strings_count_lst[i] else: # state_variable == 1 (Dictionary) self._markov_model = {} for i in range(len(k_strings_lst)): k_strings_count_lst.append( k_strings_lst_speaker.count(k_strings_lst[i])) k_plus_1_strings_count_lst.append( k_plus_1_strings_lst_speaker.count( k_plus_1_strings_lst[i])) self._markov_model[k_strings_lst[i]] = k_strings_count_lst[i] self._markov_model[ k_plus_1_strings_lst[i]] = k_plus_1_strings_count_lst[i] def k_strings(self, i, k, string): """ This method creates strings of length k and k + 1, starting from an index i, from an input string. Inputs: integer index i, integer k, and a string Outputs: a string of length k and a string of length k + 1. """ k_string = '' k_plus_1_string = '' for j in range(k): k_string += string[(i + j) % len(string)] for m in range(k + 1): k_plus_1_string += string[(i + m) % len(string)] return k_string, k_plus_1_string def log_probability(self, string): """ This method accepts a string from an unidentified speaker and returns a a log probability for the likelihood that the speaker stored in the current Markov Model is the unidentified speaker of the input string. The following variables are used to calculate the log probability: N is the number of times we have observed the k succeeding letters. M is the number of times we have observed those letters followed by the present letter. S is the size of the "alphabet" of possible characters. Input: a string of text from an unidentified speaker Output: the log probability """ S = len( set(list(string)) ) # Currently counting space as a character. Implement - {' '} if this is wrong. N_lst = [] M_lst = [] for i in range(len(self._learning_text)): k_string, k_plus_1_string = self.k_strings(i, self._size, self._learning_text) if self._state_variable == 0: # (Hashtable) if self._markov_model[k_string] is None: # k string N_lst.append(0) else: N_lst.append(self._markov_model[k_string]) if self._markov_model[k_plus_1_string] is None: # k + 1 string M_lst.append(0) else: M_lst.append(self._markov_model[k_plus_1_string]) else: # self._state_variable == 1 (Dictionary) if k_string not in list(self._markov_model.keys()): # k string N_lst.append(0) else: N_lst.append(self._markov_model[k_string]) if k_plus_1_string not in list( self._markov_model.keys()): # k + 1 string M_lst.append(0) else: M_lst.append(self._markov_model[k_plus_1_string]) sum_logs = 0 for i in range(len(N_lst)): sum_logs += log( (M_lst[i] + 1) / (N_lst[i] + S)) # Laplace Smoothing return sum_logs def __repr__(self): """ This method returns a string that represents the keys and values from an instance of a Markov Model in the form of a Hashtable or Dictionary, depending on the instance's state variable. """ if self._state_variable == 0: return f'Keys: {self._markov_model.keys()}, Values: {self._markov_model.values()}' else: return f'{self._markov_model.keys()}, {self._markov_model.values()}'
def test_keys_13(): table = Hashtable(12000, None, 0.5, 2) simple_13(table)
def test_init_2(): table = Hashtable(1, DEF_VAL1, 0.5, 2) simple_2(table)
def test_keys_12(): table = Hashtable(1, None, 0.5, 2) simple_12(table)
def test_contains_11(): table = Hashtable(50, None, 0.5, 2) simple_10(table)
def test_getset_9(): table = Hashtable(12000, None, 0.5, 2) simple_9(table)
def test_del_23(): table = Hashtable(12000, None, 0.5, 2) simple_23(table)
def test_iter_16(): table = Hashtable(1, None, 0.5, 2) simple_16(table)
def test_init_1(): table = Hashtable(1, None, 0.5, 2) simple_1(table)
def test_iter_17(): table = Hashtable(12000, None, 0.5, 2) simple_17(table)
def test_values_14(): table = Hashtable(1, None, 0.5, 2) simple_14(table)
def test_getset_4(): table = Hashtable(1, None, 0.5, 2) simple_4(table)