def q4_calculate_oov_rate_for_reuters_test_with_respect_to_training(): training = ReutersTrainingCorpus() test = ReutersTestCorpus() oov_calculator = OutOfVocabularyRateCalculator() oov_rate = oov_calculator.calculate_out_of_vocabulary_rate(training, test) print("Out of vocabulary rate: ", oov_rate)
def q4_calculate_oov_rate_for_reuters_test_with_respect_to_training(): training = ReutersTrainingCorpus() test = ReutersTestCorpus() oov_calculator = OutOfVocabularyRateCalculator() oov_rate = oov_calculator.calculate_out_of_vocabulary_rate(training, test) print("Out of vocabulary rate: ", oov_rate)
def q7_perplexity_oov_of_brown_corpus(): training = ReutersTrainingCorpus() brown = BrownCorpus() lambdas = [0.2518, 0.4691, 0.2791] calculator = InterpolatingNgramProbabilityCalculator(NgramCalculatorContainer(training, 3), lambdas) language_model = NgramLanguageModel(training, 3, ngram_probability_calculator=calculator) perplexity = PerplexityCalculator().calculate_corpus_perplexity(language_model, brown) oov_calculator = OutOfVocabularyRateCalculator() oov_rate = oov_calculator.calculate_out_of_vocabulary_rate(training, brown) print("Brown perplexity best model: ", perplexity) print("Brown out of vocabulary rate: ", oov_rate)
def q7_perplexity_oov_of_brown_corpus(): training = ReutersTrainingCorpus() brown = BrownCorpus() lambdas = [0.2518, 0.4691, 0.2791] calculator = InterpolatingNgramProbabilityCalculator( NgramCalculatorContainer(training, 3), lambdas) language_model = NgramLanguageModel( training, 3, ngram_probability_calculator=calculator) perplexity = PerplexityCalculator().calculate_corpus_perplexity( language_model, brown) oov_calculator = OutOfVocabularyRateCalculator() oov_rate = oov_calculator.calculate_out_of_vocabulary_rate(training, brown) print("Brown perplexity best model: ", perplexity) print("Brown out of vocabulary rate: ", oov_rate)