def run(c, n, length): """Run a Markov model given a corpus version and a look-back Params: c - corpus version n - lookback length - number of words to generate int, int -> string """ # set to 100,000 to avoid max recursion depth exceeded error sys.setrecursionlimit(100000) words = tokenize("corpus/clean_corpus_{}.txt".format(c)) model = None # if a model already exists load it with open("models/trained_model{}_{}.p".format(c, n), "rb") as file: if len(list(file.peek())) > 0: model = pickle.load(file) # if not, train one and pickle it if not model: with open("models/trained_model{}_{}.p".format(c, n), "wb") as file: model = MarkovModel() model.train(words, n) pickle.dump(model, file) sentences = model.random_walk(length) paragraph = "\"" for sentence in sentences: paragraph += sentence.lower().capitalize() + " " return paragraph + "\""
# A Histogram structured as a Hash Table import os from flask import Flask, request from markovmodel import MarkovModel import cleanuptext # Open file file = open('text.txt') # Create models text = file.read() tokens = cleanuptext.tokenizetext(text) model = MarkovModel(tokens, 3) app = Flask(__name__) @app.route('/') def main(): words = model.walk() words_string = ' '.join(words) words_string = words_string[0].upper() + words_string[1:] + '.' return words_string if __name__ == '__main__': port = int(os.environ.get("PORT", 5000)) app.run(debug=True, host='0.0.0.0', port=port)
from tokenize import tokenize import pickle import sys if __name__ == "__main__": # set to 100,000 to avoid max recursion depth exceeded error sys.setrecursionlimit(100000) # c = Corpus version number c = 5 words = tokenize("corpus/clean_corpus_{}.txt".format(c)) model = None # n = lookback n = 3 # if a model already exists load it with open("models/trained_model{}_{}.p".format(c, n), "rb") as file: if len(list(file.peek())) > 0: model = pickle.load(file) # if not, train one and pickle it if not model: with open("models/trained_model{}_{}.p".format(c, n), "wb") as file: model = MarkovModel() model.train(words, n) pickle.dump(model, file) inp = input("Len: ") # keep asking for length while input != 0: sentences = model.random_walk(int(inp)) print(sentences) inp = input("Len: ")
from markovmodel import MarkovModel from tokenize import tokenize import pickle import sys if __name__ == "__main__": # set to 100,000 to avoid max recursion depth exceeded error sys.setrecursionlimit(100000) mapping = { 1: [1, 2, 3, 4, 5, 6, 7], 2: [2, 3, 4, 5], 3: [2, 3], 4: [1, 2, 3, 4, 5, 6], 5: [1, 2, 3, 4] } # c = Corpus version number # n = lookback for c in list(mapping.keys()): for n in mapping[c]: words = tokenize("corpus/clean_corpus_{}.txt".format(c)) # clear file, train model and pickle it open("models/trained_model{}_{}.p".format(c, n), 'wb').close() with open("models/trained_model{}_{}.p".format(c, n), "wb") \ as file: model = MarkovModel() model.train(words, n) pickle.dump(model, file)