Example #1
0
def main():
    """Trains all of the language models and tests them on the dev data. Change devPath if you
     wish to do things like test on the training data."""

    # load training data
    trainPath = 'data/tagged-train.dat'
    trainingCorpus = Corpus(trainPath)

    # load dev data
    devPath = 'data/tagged-dev.dat'
    devCorpus = Corpus(devPath)

    print 'Unigram Language Model: '
    unigramLM = UnigramModel(trainingCorpus)
    unigramSpell = SpellCorrect(unigramLM, trainingCorpus)
    unigramOutcome = unigramSpell.evaluate(devCorpus)
    print str(unigramOutcome)

    print 'Uniform Language Model: '
    uniformLM = UniformModel(trainingCorpus)
    uniformSpell = SpellCorrect(uniformLM, trainingCorpus)
    uniformOutcome = uniformSpell.evaluate(devCorpus)
    print str(uniformOutcome)

    print 'Smooth Unigram Language Model: '
    smoothUnigramLM = SmoothUnigramModel(trainingCorpus)
    smoothUnigramSpell = SpellCorrect(smoothUnigramLM, trainingCorpus)
    smoothUnigramOutcome = smoothUnigramSpell.evaluate(devCorpus)
    print str(smoothUnigramOutcome)

    print 'Smooth Bigram Language Model: '
    smoothBigramLM = SmoothBigramModel(trainingCorpus)
    smoothBigramSpell = SpellCorrect(smoothBigramLM, trainingCorpus)
    smoothBigramOutcome = smoothBigramSpell.evaluate(devCorpus)
    print str(smoothBigramOutcome)

    print 'Backoff Language Model: '
    backoffLM = BackoffModel(trainingCorpus)
    backoffSpell = SpellCorrect(backoffLM, trainingCorpus)
    backoffOutcome = backoffSpell.evaluate(devCorpus)
    print str(backoffOutcome)

    print 'Custom Language Model: '
    customLM = CustomModel(trainingCorpus)
    customSpell = SpellCorrect(customLM, trainingCorpus)
    customOutcome = customSpell.evaluate(devCorpus)
    print str(customOutcome)
Example #2
0
def main():
    """Trains all of the language models and tests them on the dev data. Change devPath if you
     wish to do things like test on the training data."""

    trainPath = 'data/tagged-train.dat'
    trainingCorpus = Corpus(trainPath)

    devPath = 'data/tagged-dev.dat'
    devCorpus = Corpus(devPath)

    print 'Unigram Language Model: '
    unigramLM = UnigramModel(trainingCorpus)
    unigramSpell = SpellCorrect(unigramLM, trainingCorpus)
    unigramOutcome = unigramSpell.evaluate(devCorpus)
    print str(unigramOutcome)

    print 'Uniform Language Model: '
    uniformLM = UniformModel(trainingCorpus)
    uniformSpell = SpellCorrect(uniformLM, trainingCorpus)
    uniformOutcome = uniformSpell.evaluate(devCorpus)
    print str(uniformOutcome)
    '''