def q4_calculate_oov_rate_for_reuters_test_with_respect_to_training():
    training = ReutersTrainingCorpus()
    test = ReutersTestCorpus()
    oov_calculator = OutOfVocabularyRateCalculator()

    oov_rate = oov_calculator.calculate_out_of_vocabulary_rate(training, test)
    print("Out of vocabulary rate: ", oov_rate)
Exemple #2
0
def q4_calculate_oov_rate_for_reuters_test_with_respect_to_training():
    training = ReutersTrainingCorpus()
    test = ReutersTestCorpus()
    oov_calculator = OutOfVocabularyRateCalculator()

    oov_rate = oov_calculator.calculate_out_of_vocabulary_rate(training, test)
    print("Out of vocabulary rate: ", oov_rate)
def q7_perplexity_oov_of_brown_corpus():
    training = ReutersTrainingCorpus()
    brown = BrownCorpus()
    lambdas = [0.2518, 0.4691, 0.2791]
    calculator = InterpolatingNgramProbabilityCalculator(NgramCalculatorContainer(training, 3), lambdas)
    language_model = NgramLanguageModel(training, 3, ngram_probability_calculator=calculator)
    perplexity = PerplexityCalculator().calculate_corpus_perplexity(language_model, brown)

    oov_calculator = OutOfVocabularyRateCalculator()
    oov_rate = oov_calculator.calculate_out_of_vocabulary_rate(training, brown)

    print("Brown perplexity best model: ", perplexity)
    print("Brown out of vocabulary rate: ", oov_rate)
Exemple #4
0
def q7_perplexity_oov_of_brown_corpus():
    training = ReutersTrainingCorpus()
    brown = BrownCorpus()
    lambdas = [0.2518, 0.4691, 0.2791]
    calculator = InterpolatingNgramProbabilityCalculator(
        NgramCalculatorContainer(training, 3), lambdas)
    language_model = NgramLanguageModel(
        training, 3, ngram_probability_calculator=calculator)
    perplexity = PerplexityCalculator().calculate_corpus_perplexity(
        language_model, brown)

    oov_calculator = OutOfVocabularyRateCalculator()
    oov_rate = oov_calculator.calculate_out_of_vocabulary_rate(training, brown)

    print("Brown perplexity best model: ", perplexity)
    print("Brown out of vocabulary rate: ", oov_rate)