def make_markov_model(order, data): for i in range(0, len(data) - order): window = tuple(data[i:i + order]) if window in models: models[window].update([data[i + order]]) else: models[window] = Dictogram([data[i + order]]) return models
def main(): txt = open( "/Users/ownernopassword/Desktop/Tweet-Generator-Course/harry.txt") cleanText = refinedCorpus.refineText(txt) histogram = Dictogram(cleanText) stochasticSampler = StochasticSample(histogram) return stochasticSampler.generateSentence()
def make_markov_model(data): for i in range(0, len(data) - 1): if data[i] in models: # Просто присоединяем к уже существующему распределению models[data[i]].update([data[i + 1]]) else: models[data[i]] = Dictogram([data[i + 1]]) return models
def test_items(self): hist_dict = Dictogram(self.text_list) assert len(hist_dict) == 5 assert hist_dict == self.hist_dict self.assertItemsEqual(hist_dict, self.hist_dict) hist_list = hist_dict.items() assert len(hist_list) == 5 self.assertItemsEqual(hist_list, self.hist_list)
def make_markov_model(data): markov_model = dict() for i in range(0, len(data) - 1): if data[i] in markov_model: markov_model[data[i]].update([data[i + 1]]) else: markov_model[data[i]] = Dictogram([data[i + 1]]) return markov_model
def test_count(self): hist_dict = Dictogram(self.text_list) assert hist_dict.types == 5 assert hist_dict.tokens == 8 assert hist_dict.count('one') == 1 assert hist_dict.count('two') == 1 assert hist_dict.count('red') == 1 assert hist_dict.count('blue') == 1 assert hist_dict.count('fish') == 4 assert hist_dict.count('food') == 0
def make_markov_model(data): markov_model = dict() for i in range(0, len(data) - 1): if data[i] in markov_model: # We have to just append to the existing histogram markov_model[data[i]].update([data[i + 1]]) else: markov_model[data[i]] = Dictogram([data[i + 1]]) #print(markov_model) return markov_model
def make_higher_order_markov_model(order, data): markov_model = dict() for i in range(0, len(data) - order): window = tuple(data[i:i + order]) if window in markov_model: markov_model[window].update(data[i + order]) else: markov_model[window] = Dictogram(data[i + order]) # print(markov_model) return markov_model
def dict_o_listo(word_list): dictionary = {} for word in word_list: if word not in dictionary: dictionary[word] = Dictogram(iterable=None) for i in range(0, len(word_list) - 2): first = word_list[i] second = word_list[i + 1] dictionary[first].update([second]) dictionary[word_list[-1]] = {word_list[0], 1} return dictionary
def test_update(self): hist_dict = Dictogram(self.text_list) hist_dict.update(['two', 'blue', 'fish', 'food']) assert hist_dict.types == 6 assert hist_dict.tokens == 12 assert hist_dict.count('one') == 1 assert hist_dict.count('two') == 2 assert hist_dict.count('red') == 1 assert hist_dict.count('blue') == 2 assert hist_dict.count('fish') == 5 assert hist_dict.count('food') == 1
def make_higher_order_markov_model(order, data): markov_model = dict() for i in range(0, len(data) - order): # Create the window window = tuple(data[i:i + order]) # Add to the dictionary if window in markov_model: # We have to just append to the existing Dictogram markov_model[window].update([data[i + order]]) else: markov_model[window] = Dictogram([data[i + order]]) return markov_model
def update(self, iterable): """Update the data within the model. Args: iterable: Any iterable type (preferably a word list) """ len_of_iterable = len(iterable) # O(1) for i in range(0, len_of_iterable - 1): # O(n) n = length of iterable if iterable[i] not in self: self[iterable[i]] = Dictogram() self[iterable[i]].update([iterable[i + 1]]) # O(1) updating dict
def update(self, iterable): """Update the data within the model. Args: iterable: Any iterable type (preferably a word list) """ for i in range(0, len(iterable) - 2): key = tuple(iterable[i:i + 2]) if key not in self: self[key] = Dictogram() self[key].update([iterable[i + 2]])
def markov_chain(data): """markov model for 1st order""" #Dictionary that stores windows as the key in the key-value pair and then the value #for each key is a dictogram markov_chain = dict() # Looping through the ammount of indexs in the list for index in range(0, len(data) - 1): # If index word of list exists in dictionary then update the current index # Store a histogram of words for each window if data[index] in markov_chain: markov_chain[data[index]].update([data[index + 1]]) else: markov_chain[data[index]] = Dictogram([data[index + 1]]) return markov_chain
def nth_order_markov_model(order, data): markov_model = dict() for i in range(0, len(data) - order): # Creatjng the window window = tuple(data[i:i + order]) # If windiw is already in the markov model if window in markov_model: # Update the value markov_model[window].update([data[i + order]]) else: # Add the value markov_model[window] = Dictogram([data[i + order]]) return markov_model
def make_markov_model(data): markov_model = dict() ''' Markov Model Structure A dictionary that stores windows as the key in the key-value pair and then the value for each key is a dictogram. A histogram of words for each window so I know what the next state can be based on a current state ''' for i in range(0, len(data) - 1): if data[i] in markov_model: # We have to just append to the existing histogram markov_model[data[i]].update([data[i + 1]]) else: markov_model[data[i]] = Dictogram([data[i + 1]]) return markov_model
def make_higher_order_markov_model(order, data): markov_model = dict() ''' Nth Order Markov Model Structure Very similar to the first order Markov Model, but in this case we store a tuple as the key in the key-value pair in the dictionary. We do this because a tuple is a great way to represent a single list. And we use a tuple instead of a list because a key in a dictionary should not change and tuples are immutable. ''' for i in range(0, len(data) - order): # Create the window window = tuple(data[i:i + order]) # Add to the dictionary if window in markov_model: # We have to just append to the existing Dictogram markov_model[window].update([data[i + order]]) else: markov_model[window] = Dictogram([data[i + order]]) return markov_model
def make_markov_model(self, order, data): """ Создание модели :param order: :type order: int :param data: Корпус :type data: list :return: Марковская модель :rtype: dict """ markov_model = dict() print(len(data)) for i in range(0, len(data) - order): window = tuple(data[i:i + order]) if i % 100000 == 0: print(i / 100000) if window in markov_model: markov_model[window].update([data[i + order]]) else: markov_model[window] = Dictogram([data[i + order]]) return markov_model
def get_random_sentence(word_list): histogram = Dictogram(word_list) return " ".join( [get_weighted_sample(histogram) for _ in range(random.randint(5, 15))])
def test_contains(self): hist_dict = Dictogram(self.text_list) for word in self.text_list: assert word in hist_dict for word in ['fishy', 'food']: assert word not in hist_dict