def test_perplexity_produces_expected_values(): lm = LanguageModel(3) data = open_file('kn_test.txt') lm.train(data) perp = round(lm.perplexity(2, math.log(0.5)), 5) correct = round(math.sqrt(2), 5) assert perp == correct
def main(args): """ Main function of the program operates based on the argument provided. Train - Ask for ngram - Ask for training file path - Train language model - Save the trained model Generate - Load the saved model from pickle file - Ask for a beam search (y/n) - Ask Beam length - Print one generated sentence in terminal - Ask for number of sentences to be generated on file - Save the input number of sentences in a file (Default: new_shakespeare.txt) Perplexity - Load Pickle file - Ask the test set file path - Print perplexity value Common - Load pickle - Ask number of most common ngram - Print the most common ngram with their occurence number. """ if args['train']: if not args['--n']: ngram = input("Please enter n for n-gram (Default: 3)-\n") if not ngram: ngram=3 else: ngram=args['--n'] lm = LanguageModel(int(ngram)) if not args['--path']: path = input("Please enter path of the file-\n") else: path = args['--path'] lm.train(readFile(path)) print("N-gram training completed") print("Saving the model") f = open('trained_model_ngram.pkl','wb') pickle.dump(lm, f) f.close() print("Model saved") if args['generate']: lm = loadPickle() if click.confirm('Do you want to generate with Beam search?', default=True): lm.beam_flag = True beam_size =input("Enter beam size (Default: 20)-\n") if not beam_size: lm.beam_width = beam_size else: lm.beam_flag = False print("Generating one sentence in terminal...") print(detokenize(lm.generate())) if not args['--lines']: noOfText =input("Enter number of generated text you want to save (Default: 10)-\n") if not noOfText: noOfText=10 else: noOfText = args['--lines'] generated = [] for g in range(0, int(noOfText)): generated.append(detokenize(lm.generate())) with open('new_shakespeare.txt', 'w') as f: for g in generated: f.write("%s\n" % g) print("Sentence file generated in current folder") if args['perplexity']: lm = loadPickle() if not args['--path']: path = input("Please enter path of the test file-\n") else: path = args['--path'] print("Perplexity for {}-gram is {}".format(lm.ngram,lm.perplexity(readFile(path)))) if args['common']: lm = loadPickle() if args['--number']: number = args['--number'] else: number = 5 lm.count_common_ngram(int(number))