Exemple #1
0
    def __init__(self, k, txt, state):
        '''
        Constructor of Markov instance. Uses hash table or dictionary as its
        internal state to represent Markov model.

        Params:
            k: value of succeeding chars to use
            txt: string of text to create the model with
            state: 0 - uses hash table class to represent model
                   1 - uses built-in dictionary to represent model
        '''

        self._k = k
        self._txt = txt
        self._state = state

        if state == 0:
            #Create 2 models, 1 for k succeeding chars and another for k+1
            self._model_k = Hashtable(HASH_CELLS, 0, TOO_FULL, GROWTH_RATIO)
            self._model_k_1 = Hashtable(HASH_CELLS, 0, TOO_FULL, GROWTH_RATIO)

        else:
            self._model_k = {}
            self._model_k_1 = {}

        for i in range(len(txt)):

            #Temp vars to hold succeeding chars
            k_succeeding = ''
            k_1_succeeding = ''

            #Iterate k+1 times to get k+1 succeeding chars
            for j in range(i, i + k + 1):

                #Create copy of index j
                curr_idx = j

                #If j is greater than or equal to length of txt, wrap-around
                if curr_idx >= len(txt):
                    curr_idx = curr_idx - len(txt)

                #Append succeeding characters to temp variables
                k_1_succeeding += txt[curr_idx]

                #If at k+1, don't append for k succeeding chars
                if j < i + k:
                    k_succeeding += txt[curr_idx]

            #Insert or update table with k and k+1 succeeding chars as key
            if k_succeeding in self._model_k:
                self._model_k[k_succeeding] += 1
            else:
                self._model_k[k_succeeding] = 1

            if k_1_succeeding in self._model_k_1:
                self._model_k_1[k_1_succeeding] += 1
            else:
                self._model_k_1[k_1_succeeding] = 1
    def __init__(self, k, learning_text, state_variable, speaker_text):
        """
        This method takes in a value of "k" and a string of text to create the model and a state variable. The markov
        class will use a hash table as its internal state to represent the markov model. If the variable state is

        • state == 0: A hashtable data structure, as defined in hash_table.py will be used to create the Markov Model.
        • state == 1: A Python dict data structure will be used to create the Markov Model.
        """
        self._size = k
        self._learning_text = learning_text
        self._state_variable = state_variable
        self._speaker_text = speaker_text
        k_strings_lst = []
        k_plus_1_strings_lst = []
        k_strings_lst_speaker = []
        k_plus_1_strings_lst_speaker = []
        k_strings_count_lst = []
        k_plus_1_strings_count_lst = []

        for i in range(len(self._learning_text)):  # Learning Text k-strings
            k_string, k_plus_1_string = self.k_strings(i, k,
                                                       self._learning_text)
            k_strings_lst.append(k_string)
            k_plus_1_strings_lst.append(k_plus_1_string)

        for i in range(len(self._speaker_text)):  # Speaker Text k-strings
            k_string_speaker, k_plus_1_string_speaker = self.k_strings(
                i, k, self._speaker_text)
            k_strings_lst_speaker.append(k_string_speaker)
            k_plus_1_strings_lst_speaker.append(k_plus_1_string_speaker)

        if state_variable == 0:  # (Hashtable)
            self._markov_model = Hashtable(self.HASH_CELLS, 0, self.TOO_FULL,
                                           self.GROWTH_RATIO)

            for i in range(len(k_strings_lst)):
                k_strings_count_lst.append(
                    k_strings_lst_speaker.count(k_strings_lst[i]))
                k_plus_1_strings_count_lst.append(
                    k_plus_1_strings_lst_speaker.count(
                        k_plus_1_strings_lst[i]))
                self._markov_model[k_strings_lst[i]] = k_strings_count_lst[i]
                self._markov_model[
                    k_plus_1_strings_lst[i]] = k_plus_1_strings_count_lst[i]

        else:  # state_variable == 1 (Dictionary)
            self._markov_model = {}

            for i in range(len(k_strings_lst)):
                k_strings_count_lst.append(
                    k_strings_lst_speaker.count(k_strings_lst[i]))
                k_plus_1_strings_count_lst.append(
                    k_plus_1_strings_lst_speaker.count(
                        k_plus_1_strings_lst[i]))
                self._markov_model[k_strings_lst[i]] = k_strings_count_lst[i]
                self._markov_model[
                    k_plus_1_strings_lst[i]] = k_plus_1_strings_count_lst[i]
def test_rehash_2():
    '''
    Purpose: Testing rehashing on a larger set of items
    '''
    table = Hashtable(1, None, 0.5, 2)
    keys = [''.join(tup) for tup in list(permutations("abcefgh"))]
    test_dict1 = dict(zip(keys, range(len(keys))))
    helper_update(table, test_dict1)
    helper_lookup(table, test_dict1)
def test_rehash_3():
    '''
    Purpose: Test rehashing on a small set that wraps around
    '''
    keys = [chr(i) for i in range(ord('f'), ord('z'))]
    test_dict1 = dict(zip(keys, range(len(keys))))
    table = Hashtable(15, None, 0.5, 2)
    helper_update(table, test_dict1)
    helper_lookup(table, test_dict1)
    def populate_model(self):
        """Method which populates the hashtable (or dictionary, depending on
        state) for the model.
        No inputs.
        Outputs: Hashtable or dictionary
        """
        if self._state == 1:
            model = Hashtable(HASH_CELLS, 0, 0.5, 2)
        else:
            model = dict()
        for i in range(len(self._text)):
            k_string = ''
            k_plus_one_string = ''
            # Collect unique characters in text
            if self._text[i] not in self._unique_chars:
                self._unique_chars.append(self._text[i])
            # Loop through and build k and k + 1 strings
            for j in range(self._k + 1):
                if i + j >= len(self._text):
                    index = i + j - len(self._text)
                else:
                    index = i + j

                if j < self._k:
                    k_string += self._text[index]
                k_plus_one_string += self._text[index]
            if k_string in model:
                model[k_string] += 1
            else:
                model[k_string] = 1

            if k_plus_one_string in model:
                model[k_plus_one_string] += 1
            else:
                model[k_plus_one_string] = 1
        return model
    def __init__(self, k, speech, state):
        """
		__init__(self, k, speech, state)
		The initialization method for the Markov class. Takes in k, 
		the length of the substrings, speech, a text sample, and 
		a state to communicate whether to use a Hashtable or a dict.
		Creates the table using the _gen_table function.

		Gets: 
			self, a Markov instance
			k, an int
			speech, a string
			state, an int, 0 or 1
		Returns: nothing explicitly, but implicitly self
		"""
        if state == 0:
            self.table = Hashtable(HASH_CELLS, 0, 0.5, 2)
        elif state == 1:
            self.table = {}
        else:
            raise Exception
        self.k = k
        self.alphabet_len = len(set(speech))
        self._gen_table(speech)
def test_bool_21():
    table = Hashtable(12000, None, 0.5, 2)
    simple_21(table)
def test_del_22():
    table = Hashtable(1, None, 0.5, 2)
    simple_22(table)
def test_len_19():
    table = Hashtable(12000, None, 0.5, 2)
    simple_19(table)
Exemple #10
0
def test_bool_20():
    table = Hashtable(1, None, 0.5, 2)
    simple_20(table)
Exemple #11
0
def test_getset_7():
    table = Hashtable(50, None, 0.5, 2)
    simple_7(table)
Exemple #12
0
def test_len_18():
    table = Hashtable(1, None, 0.5, 2)
    simple_18(table)
Exemple #13
0
def test_values_15():
    table = Hashtable(12000, None, 0.5, 2)
    simple_15(table)
class Markov:
    """
    This class represents a Markov Model, which can be represented by either a Hashtable or a Dictionary. Text is
    tabulated, and a log_probability method is included to calculate the probability that a given unidentified text was
    spoken by the same person as the tabulated text in the Markov Model.
    """
    TOO_FULL = 0.5  # Recommended load factor for the assignment
    GROWTH_RATIO = 2  # Recommended growth factor for the assignment
    HASH_CELLS = 57  # Assigned initial Hashtable size

    def __init__(self, k, learning_text, state_variable, speaker_text):
        """
        This method takes in a value of "k" and a string of text to create the model and a state variable. The markov
        class will use a hash table as its internal state to represent the markov model. If the variable state is

        • state == 0: A hashtable data structure, as defined in hash_table.py will be used to create the Markov Model.
        • state == 1: A Python dict data structure will be used to create the Markov Model.
        """
        self._size = k
        self._learning_text = learning_text
        self._state_variable = state_variable
        self._speaker_text = speaker_text
        k_strings_lst = []
        k_plus_1_strings_lst = []
        k_strings_lst_speaker = []
        k_plus_1_strings_lst_speaker = []
        k_strings_count_lst = []
        k_plus_1_strings_count_lst = []

        for i in range(len(self._learning_text)):  # Learning Text k-strings
            k_string, k_plus_1_string = self.k_strings(i, k,
                                                       self._learning_text)
            k_strings_lst.append(k_string)
            k_plus_1_strings_lst.append(k_plus_1_string)

        for i in range(len(self._speaker_text)):  # Speaker Text k-strings
            k_string_speaker, k_plus_1_string_speaker = self.k_strings(
                i, k, self._speaker_text)
            k_strings_lst_speaker.append(k_string_speaker)
            k_plus_1_strings_lst_speaker.append(k_plus_1_string_speaker)

        if state_variable == 0:  # (Hashtable)
            self._markov_model = Hashtable(self.HASH_CELLS, 0, self.TOO_FULL,
                                           self.GROWTH_RATIO)

            for i in range(len(k_strings_lst)):
                k_strings_count_lst.append(
                    k_strings_lst_speaker.count(k_strings_lst[i]))
                k_plus_1_strings_count_lst.append(
                    k_plus_1_strings_lst_speaker.count(
                        k_plus_1_strings_lst[i]))
                self._markov_model[k_strings_lst[i]] = k_strings_count_lst[i]
                self._markov_model[
                    k_plus_1_strings_lst[i]] = k_plus_1_strings_count_lst[i]

        else:  # state_variable == 1 (Dictionary)
            self._markov_model = {}

            for i in range(len(k_strings_lst)):
                k_strings_count_lst.append(
                    k_strings_lst_speaker.count(k_strings_lst[i]))
                k_plus_1_strings_count_lst.append(
                    k_plus_1_strings_lst_speaker.count(
                        k_plus_1_strings_lst[i]))
                self._markov_model[k_strings_lst[i]] = k_strings_count_lst[i]
                self._markov_model[
                    k_plus_1_strings_lst[i]] = k_plus_1_strings_count_lst[i]

    def k_strings(self, i, k, string):
        """
        This method creates strings of length k and k + 1, starting from an index i, from an input string.

        Inputs: integer index i, integer k, and a string
        Outputs: a string of length k and a string of length k + 1.
        """
        k_string = ''
        k_plus_1_string = ''
        for j in range(k):
            k_string += string[(i + j) % len(string)]
        for m in range(k + 1):
            k_plus_1_string += string[(i + m) % len(string)]
        return k_string, k_plus_1_string

    def log_probability(self, string):
        """
        This method accepts a string from an unidentified speaker and returns a a log probability for the likelihood
        that the speaker stored in the current Markov Model is the unidentified speaker of the input string.
        The following variables are used to calculate the log probability:
        N is the number of times we have observed the k succeeding letters.
        M is the number of times we have observed those letters followed by the present letter.
        S is the size of the "alphabet" of possible characters.

        Input: a string of text from an unidentified speaker
        Output: the log probability
        """
        S = len(
            set(list(string))
        )  # Currently counting space as a character. Implement - {' '} if this is wrong.
        N_lst = []
        M_lst = []

        for i in range(len(self._learning_text)):
            k_string, k_plus_1_string = self.k_strings(i, self._size,
                                                       self._learning_text)
            if self._state_variable == 0:  # (Hashtable)
                if self._markov_model[k_string] is None:  # k string
                    N_lst.append(0)
                else:
                    N_lst.append(self._markov_model[k_string])
                if self._markov_model[k_plus_1_string] is None:  # k + 1 string
                    M_lst.append(0)
                else:
                    M_lst.append(self._markov_model[k_plus_1_string])

            else:  # self._state_variable == 1 (Dictionary)
                if k_string not in list(self._markov_model.keys()):  # k string
                    N_lst.append(0)
                else:
                    N_lst.append(self._markov_model[k_string])
                if k_plus_1_string not in list(
                        self._markov_model.keys()):  # k + 1 string
                    M_lst.append(0)
                else:
                    M_lst.append(self._markov_model[k_plus_1_string])

        sum_logs = 0
        for i in range(len(N_lst)):
            sum_logs += log(
                (M_lst[i] + 1) / (N_lst[i] + S))  # Laplace Smoothing
        return sum_logs

    def __repr__(self):
        """
        This method returns a string that represents the keys and values from an instance of a Markov Model in the form
        of a Hashtable or Dictionary, depending on the instance's state variable.
        """
        if self._state_variable == 0:
            return f'Keys: {self._markov_model.keys()}, Values: {self._markov_model.values()}'
        else:
            return f'{self._markov_model.keys()}, {self._markov_model.values()}'
Exemple #15
0
def test_keys_13():
    table = Hashtable(12000, None, 0.5, 2)
    simple_13(table)
Exemple #16
0
def test_init_2():
    table = Hashtable(1, DEF_VAL1, 0.5, 2)
    simple_2(table)
Exemple #17
0
def test_keys_12():
    table = Hashtable(1, None, 0.5, 2)
    simple_12(table)
Exemple #18
0
def test_contains_11():
    table = Hashtable(50, None, 0.5, 2)
    simple_10(table)
Exemple #19
0
def test_getset_9():
    table = Hashtable(12000, None, 0.5, 2)
    simple_9(table)
Exemple #20
0
def test_del_23():
    table = Hashtable(12000, None, 0.5, 2)
    simple_23(table)
Exemple #21
0
def test_iter_16():
    table = Hashtable(1, None, 0.5, 2)
    simple_16(table)
Exemple #22
0
def test_init_1():
    table = Hashtable(1, None, 0.5, 2)
    simple_1(table)
Exemple #23
0
def test_iter_17():
    table = Hashtable(12000, None, 0.5, 2)
    simple_17(table)
Exemple #24
0
def test_values_14():
    table = Hashtable(1, None, 0.5, 2)
    simple_14(table)
Exemple #25
0
def test_getset_4():
    table = Hashtable(1, None, 0.5, 2)
    simple_4(table)