Esempio n. 1
0
 def test_train(self):
     model = lm.LanguageModel(3)
     model.train([['This', 'is', 'an', 'apple', '.']])
     dic = {
         (None, None): {
             'This': 1
         },
         (None, 'This'): {
             'is': 1
         },
         ('This', 'is'): {
             'an': 1
         },
         ('is', 'an'): {
             'apple': 1
         },
         ('an', 'apple'): {
             '.': 1
         },
         ('apple', '.'): {
             None: 1
         },
         ('.', None): {
             None: 1
         }
     }
     self.assertTrue(model.counts == dic)
Esempio n. 2
0
def start():
    n = None
    print(
        '''Welcome. Let\'s create a language model together.\nWhat size n-grams do you desire?'''
    )
    while n == None:
        try:
            n = int(input())
            if n < 1 or n > 6:
                print('Please enter an integer between 1 and 6.')
                n = None
        except:
            print('Please enter an integer between 1 and 6.')
    mdl = lm.LanguageModel(n)
    print(
        '''\nGreat choice!! You must train the model.\nPlease tell us the path to a text that you\'d like to use for training.'''
    )
    filename = input()
    tokens = cp.open_file(filename)
    while tokens == None:
        print('Try again.')
        filename = input()
        tokens = cp.open_file(filename)
    mdl.train(tokens)
    print(
        '\nYour model has been created. Here\'s a list of commands you can use to explore further.\n'
    )
    help()
    return mdl
Esempio n. 3
0
 def test_check(self):
     self.langM = lm.LanguageModel(2)
     self.assertIsInstance(self.langM, lm.LanguageModel,
                           "Belongs to same class")
     self.langM.train(['the', 'dog', 'runs'])
     print(self.langM.counts)
     print(self.langM.pdf)
     print(self.langM.perplexity())
     print(self.langM.generate())
     self.langM.train([
         'the', 'cat', 'runs', 'the', 'cat', 'the', 'cat', 'thea', 'cat',
         'cat', 'the', 'cats'
     ])
     print(self.langM.counts)
     print(self.langM.pdf)
     print(self.langM.perplexity())
     print(self.langM.generate())
Esempio n. 4
0
def init(n):
    return lm.LanguageModel(n)
Esempio n. 5
0
 def test_math_funcs(self):
     self.assertEqual(lm.LanguageModel(2).nthroot(4, 2), 2, "Should be 2")
     self.assertEqual(lm.LanguageModel(2).nthroot(8, 3), 2, "Should be 2")
     self.assertEqual(lm.LanguageModel(2).nthroot(32, 5), 2, "Should be 2")
Esempio n. 6
0
 def test_generate(self):
     model = lm.LanguageModel(3)
     model.train([['This', 'is', 'an', 'apple', '.']])
     self.assertTrue(type(corpus.detokenize(model.generate())) == str)
Esempio n. 7
0
def main():
    while True:

        print("Press 1 : Create a new language model with a user-specified n")
        print(
            "Press 2 : Load texts from a file, and train the language model on those texts"
        )
        print(
            "Press 3 : Generate a text from the language model, and print it to the screen"
        )
        print(
            "Press 4 : Generate a user-specified number of texts from the language model, and write them to a file"
        )
        print(
            "Press 5 : Print the predicted  next word's probability distribution"
        )
        print("Press 6 : Perplexity of language model")
        print("Press 7 : Exit")
        print("Enter your choice (integer) ")
        text = input()
        if text == "1":
            print()
            print("Enter the value of n(integer value)")
            n = int(input())
            c = lm.LanguageModel(n)
            print("The value for ngram language model is ", n, "gram model")

        elif text == "2":
            print()
            print("You have pressed 2")
            print("Enter the filename")
            filename = input()
            # filename = "dev_shakespeare.txt"
            # lst = c.load(filename)
            c.load(filename)
            # print(lst)
            # c.train(lst)
            # print((c.counts))

        elif text == "3":
            print()
            print("You have pressed 3 ")
            print("Generate a random text")
            print(corpus.detokenize(c.generate()))

        elif text == "4":
            print()
            print("You have pressed 4 ")
            print("Enter the number for how many random texts you want")
            number_random = int(input())
            print("Enter the filename you want to save for random text")
            filename = input()
            file = open(filename, "w")
            while True:
                if number_random == 0:
                    break
                file.write(corpus.detokenize(c.generate()) + "\n")
                number_random -= 1
            file.close()
            # print(c.generate())

        elif text == "5":
            print()
            print("You have pressed 5 ")
            print(
                "Enter the text and predict the next word's probability distribution"
            )
            # s = "venture forth, The better part of my affections"
            s = input().lower()
            print(c.p_next(corpus.tokenize(s)))

        elif text == "6":
            print()
            print("You have pressed 6 ")
            print("Perplexity of the current language model is ",
                  round(c.perplexity()))

        elif text == "7":
            print()
            print("You have pressed 7 for exit")
            # for x in c.pdf:
            #     print(x, c.pdf[x])
            #
            # print(len(c.pdf))
            print("Exiting the main program")
            sys.exit(0)

        else:
            print(
                "Incorrect input. Please enter correct input for selecting option"
            )