def test_train(self): model = lm.LanguageModel(3) model.train([['This', 'is', 'an', 'apple', '.']]) dic = { (None, None): { 'This': 1 }, (None, 'This'): { 'is': 1 }, ('This', 'is'): { 'an': 1 }, ('is', 'an'): { 'apple': 1 }, ('an', 'apple'): { '.': 1 }, ('apple', '.'): { None: 1 }, ('.', None): { None: 1 } } self.assertTrue(model.counts == dic)
def start(): n = None print( '''Welcome. Let\'s create a language model together.\nWhat size n-grams do you desire?''' ) while n == None: try: n = int(input()) if n < 1 or n > 6: print('Please enter an integer between 1 and 6.') n = None except: print('Please enter an integer between 1 and 6.') mdl = lm.LanguageModel(n) print( '''\nGreat choice!! You must train the model.\nPlease tell us the path to a text that you\'d like to use for training.''' ) filename = input() tokens = cp.open_file(filename) while tokens == None: print('Try again.') filename = input() tokens = cp.open_file(filename) mdl.train(tokens) print( '\nYour model has been created. Here\'s a list of commands you can use to explore further.\n' ) help() return mdl
def test_check(self): self.langM = lm.LanguageModel(2) self.assertIsInstance(self.langM, lm.LanguageModel, "Belongs to same class") self.langM.train(['the', 'dog', 'runs']) print(self.langM.counts) print(self.langM.pdf) print(self.langM.perplexity()) print(self.langM.generate()) self.langM.train([ 'the', 'cat', 'runs', 'the', 'cat', 'the', 'cat', 'thea', 'cat', 'cat', 'the', 'cats' ]) print(self.langM.counts) print(self.langM.pdf) print(self.langM.perplexity()) print(self.langM.generate())
def init(n): return lm.LanguageModel(n)
def test_math_funcs(self): self.assertEqual(lm.LanguageModel(2).nthroot(4, 2), 2, "Should be 2") self.assertEqual(lm.LanguageModel(2).nthroot(8, 3), 2, "Should be 2") self.assertEqual(lm.LanguageModel(2).nthroot(32, 5), 2, "Should be 2")
def test_generate(self): model = lm.LanguageModel(3) model.train([['This', 'is', 'an', 'apple', '.']]) self.assertTrue(type(corpus.detokenize(model.generate())) == str)
def main(): while True: print("Press 1 : Create a new language model with a user-specified n") print( "Press 2 : Load texts from a file, and train the language model on those texts" ) print( "Press 3 : Generate a text from the language model, and print it to the screen" ) print( "Press 4 : Generate a user-specified number of texts from the language model, and write them to a file" ) print( "Press 5 : Print the predicted next word's probability distribution" ) print("Press 6 : Perplexity of language model") print("Press 7 : Exit") print("Enter your choice (integer) ") text = input() if text == "1": print() print("Enter the value of n(integer value)") n = int(input()) c = lm.LanguageModel(n) print("The value for ngram language model is ", n, "gram model") elif text == "2": print() print("You have pressed 2") print("Enter the filename") filename = input() # filename = "dev_shakespeare.txt" # lst = c.load(filename) c.load(filename) # print(lst) # c.train(lst) # print((c.counts)) elif text == "3": print() print("You have pressed 3 ") print("Generate a random text") print(corpus.detokenize(c.generate())) elif text == "4": print() print("You have pressed 4 ") print("Enter the number for how many random texts you want") number_random = int(input()) print("Enter the filename you want to save for random text") filename = input() file = open(filename, "w") while True: if number_random == 0: break file.write(corpus.detokenize(c.generate()) + "\n") number_random -= 1 file.close() # print(c.generate()) elif text == "5": print() print("You have pressed 5 ") print( "Enter the text and predict the next word's probability distribution" ) # s = "venture forth, The better part of my affections" s = input().lower() print(c.p_next(corpus.tokenize(s))) elif text == "6": print() print("You have pressed 6 ") print("Perplexity of the current language model is ", round(c.perplexity())) elif text == "7": print() print("You have pressed 7 for exit") # for x in c.pdf: # print(x, c.pdf[x]) # # print(len(c.pdf)) print("Exiting the main program") sys.exit(0) else: print( "Incorrect input. Please enter correct input for selecting option" )