Exemple #1
0
    def test_items(self):
        hist_dict = Dictogram(self.text_list)
        assert len(hist_dict) == 5
        assert hist_dict == self.hist_dict
        self.assertItemsEqual(hist_dict, self.hist_dict)

        hist_list = hist_dict.items()
        assert len(hist_list) == 5
        self.assertItemsEqual(hist_list, self.hist_list)
Exemple #2
0
def make_markov_model(order, data):
    for i in range(0, len(data) - order):
        window = tuple(data[i:i + order])
        if window in models:
            models[window].update([data[i + order]])
        else:
            models[window] = Dictogram([data[i + order]])
    return models
Exemple #3
0
def main():
    txt = open(
        "/Users/ownernopassword/Desktop/Tweet-Generator-Course/harry.txt")
    cleanText = refinedCorpus.refineText(txt)
    histogram = Dictogram(cleanText)
    stochasticSampler = StochasticSample(histogram)

    return stochasticSampler.generateSentence()
Exemple #4
0
def make_markov_model(data):
    for i in range(0, len(data) - 1):
        if data[i] in models:
            # Просто присоединяем к уже существующему распределению
            models[data[i]].update([data[i + 1]])
        else:
            models[data[i]] = Dictogram([data[i + 1]])
    return models
Exemple #5
0
def make_markov_model(data):
    markov_model = dict()

    for i in range(0, len(data) - 1):
        if data[i] in markov_model:
            markov_model[data[i]].update([data[i + 1]])
        else:
            markov_model[data[i]] = Dictogram([data[i + 1]])
    return markov_model
Exemple #6
0
 def test_update(self):
     hist_dict = Dictogram(self.text_list)
     hist_dict.update(['two', 'blue', 'fish', 'food'])
     assert hist_dict.types == 6
     assert hist_dict.tokens == 12
     assert hist_dict.count('one') == 1
     assert hist_dict.count('two') == 2
     assert hist_dict.count('red') == 1
     assert hist_dict.count('blue') == 2
     assert hist_dict.count('fish') == 5
     assert hist_dict.count('food') == 1
def make_markov_model(data):
    markov_model = dict()
    for i in range(0, len(data) - 1):
        if data[i] in markov_model:
            # We have to just append to the existing histogram
            markov_model[data[i]].update([data[i + 1]])
        else:
            markov_model[data[i]] = Dictogram([data[i + 1]])
    #print(markov_model)
    return markov_model
Exemple #8
0
def make_higher_order_markov_model(order, data):
    markov_model = dict()

    for i in range(0, len(data) - order):
        window = tuple(data[i:i + order])
        if window in markov_model:
            markov_model[window].update(data[i + order])
        else:
            markov_model[window] = Dictogram(data[i + order])
    # print(markov_model)
    return markov_model
Exemple #9
0
def dict_o_listo(word_list):
    dictionary = {}
    for word in word_list:
        if word not in dictionary:
            dictionary[word] = Dictogram(iterable=None)
    for i in range(0, len(word_list) - 2):
        first = word_list[i]
        second = word_list[i + 1]
        dictionary[first].update([second])
    dictionary[word_list[-1]] = {word_list[0], 1}
    return dictionary
def make_higher_order_markov_model(order, data):
    markov_model = dict()
    for i in range(0, len(data) - order):
        # Create the window
        window = tuple(data[i:i + order])
        # Add to the dictionary
        if window in markov_model:
            # We have to just append to the existing Dictogram
            markov_model[window].update([data[i + order]])
        else:
            markov_model[window] = Dictogram([data[i + order]])
    return markov_model
Exemple #11
0
    def update(self, iterable):
        """Update the data within the model.

        Args:
            iterable: Any iterable type (preferably a word list)
        """
        for i in range(0, len(iterable) - 2):
            key = tuple(iterable[i:i + 2])

            if key not in self:
                self[key] = Dictogram()

            self[key].update([iterable[i + 2]])
Exemple #12
0
    def update(self, iterable):
        """Update the data within the model.

        Args:
            iterable: Any iterable type (preferably a word list)
        """
        len_of_iterable = len(iterable)  # O(1)

        for i in range(0, len_of_iterable - 1):  # O(n) n = length of iterable
            if iterable[i] not in self:
                self[iterable[i]] = Dictogram()

            self[iterable[i]].update([iterable[i + 1]])  # O(1) updating dict
Exemple #13
0
def nth_order_markov_model(order, data):
    markov_model = dict()

    for i in range(0, len(data) - order):
        # Creatjng the window
        window = tuple(data[i:i + order])
        # If windiw is already in the markov model
        if window in markov_model:
            # Update the value
            markov_model[window].update([data[i + order]])
        else:
            # Add the value
            markov_model[window] = Dictogram([data[i + order]])
    return markov_model
Exemple #14
0
def markov_chain(data):
    """markov model for 1st order"""
    #Dictionary that stores windows as the key in the key-value pair and then the value
    #for each key is a dictogram
    markov_chain = dict()
    # Looping through the ammount of indexs in the list
    for index in range(0, len(data) - 1):
        # If index word of list exists in dictionary then update the current index
        # Store a histogram of words for each window
        if data[index] in markov_chain:
            markov_chain[data[index]].update([data[index + 1]])
        else:
            markov_chain[data[index]] = Dictogram([data[index + 1]])
    return markov_chain
Exemple #15
0
def make_markov_model(data):
    markov_model = dict()
    '''
    Markov Model Structure
    A dictionary that stores windows as the key in the key-value pair and then the value for each key is a dictogram.
    A histogram of words for each window so I know what the next state can be based on a current state
    '''

    for i in range(0, len(data) - 1):
        if data[i] in markov_model:
            # We have to just append to the existing histogram
            markov_model[data[i]].update([data[i + 1]])
        else:
            markov_model[data[i]] = Dictogram([data[i + 1]])
    return markov_model
Exemple #16
0
 def test_count(self):
     hist_dict = Dictogram(self.text_list)
     assert hist_dict.types == 5
     assert hist_dict.tokens == 8
     assert hist_dict.count('one') == 1
     assert hist_dict.count('two') == 1
     assert hist_dict.count('red') == 1
     assert hist_dict.count('blue') == 1
     assert hist_dict.count('fish') == 4
     assert hist_dict.count('food') == 0
Exemple #17
0
def make_higher_order_markov_model(order, data):
    markov_model = dict()
    '''
	Nth Order Markov Model Structure
	Very similar to the first order Markov Model, but in this case we store a tuple as the key in the key-value pair in the dictionary. 
	We do this because a tuple is a great way to represent a single list. 
	And we use a tuple instead of a list because a key in a dictionary should not change and tuples are immutable. 
	'''
    for i in range(0, len(data) - order):
        # Create the window
        window = tuple(data[i:i + order])
        # Add to the dictionary
        if window in markov_model:
            # We have to just append to the existing Dictogram
            markov_model[window].update([data[i + order]])
        else:
            markov_model[window] = Dictogram([data[i + order]])
    return markov_model
    def make_markov_model(self, order, data):
        """
		Создание модели

		:param order:
                :type order: int
		:param data: Корпус
                :type data: list
		:return: Марковская модель
                :rtype: dict
		"""
        markov_model = dict()
        print(len(data))
        for i in range(0, len(data) - order):
            window = tuple(data[i:i + order])
            if i % 100000 == 0:
                print(i / 100000)
            if window in markov_model:
                markov_model[window].update([data[i + order]])
            else:
                markov_model[window] = Dictogram([data[i + order]])
        return markov_model
def get_random_sentence(word_list):

    histogram = Dictogram(word_list)

    return " ".join(
        [get_weighted_sample(histogram) for _ in range(random.randint(5, 15))])
Exemple #20
0
 def test_contains(self):
     hist_dict = Dictogram(self.text_list)
     for word in self.text_list:
         assert word in hist_dict
     for word in ['fishy', 'food']:
         assert word not in hist_dict