def TrainSenseTagger(Pcfg,CFDist):
    logger.info("Training unigram tagger:")
    SenseUnigramTagger = UnigramTagger(TAG='SENSE',TEXT='STEM')
    #SenseUnigramTagger.train(taggedData)
    SenseUnigramTagger._freqdist = invertConditionalFreqDist(CFDist)
    SenseDefaultTagger = DefaultTagger('APPEAR', TAG='SENSE',TEXT='STEM')
    backoff = BackoffTagger([SenseUnigramTagger,SenseDefaultTagger], TAG='SENSE',TEXT='STEM')
    return backoff
def test(numFiles=100,
         max_rules=200,
         min_score=2,
         ruleFile="dump.rules",
         errorOutput="errors.out",
         ruleOutput="rules.out",
         randomize=False,
         train=.8,
         trace=3):

    NN_CD_tagger = RegexpTagger([(r'^[0-9]+(.[0-9]+)?$', 'CD'), (r'.*', 'NN')],
                                TAG='POS')

    # train is the proportion of data used in training; the rest is reserved
    # for testing.

    print "Loading tagged data..."
    taggedData = getWSJTokens(numFiles, randomize)

    trainCutoff = int(len(taggedData) * train)
    trainingData = Token(SUBTOKENS=taggedData[0:trainCutoff])
    goldData = Token(SUBTOKENS=taggedData[trainCutoff:])
    testingData = goldData.exclude('POS')

    # Unigram tagger

    print "Training unigram tagger:",
    u = UnigramTagger(TAG='POS')
    u.train(trainingData)
    backoff = BackoffTagger([u, NN_CD_tagger], TAG='POS')
    print("[accuracy: %f]" % tagger_accuracy(backoff, [goldData]))

    # Brill tagger

    templates = [
        SymmetricProximateTokensTemplate(ProximateTagsRule, (1, 1)),
        SymmetricProximateTokensTemplate(ProximateTagsRule, (2, 2)),
        SymmetricProximateTokensTemplate(ProximateTagsRule, (1, 2)),
        SymmetricProximateTokensTemplate(ProximateTagsRule, (1, 3)),
        #        SymmetricProximateTokensTemplate(ProximateWordsRule, (1,1)),
        #        SymmetricProximateTokensTemplate(ProximateWordsRule, (2,2)),
        #        SymmetricProximateTokensTemplate(ProximateWordsRule, (1,2)),
        #        SymmetricProximateTokensTemplate(ProximateWordsRule, (1,3)),
        ProximateTokensTemplate(ProximateTagsRule, (-1, -1), (1, 1)),
        #        ProximateTokensTemplate(ProximateWordsRule, (-1, -1), (1,1)),
    ]

    #trainer = FastBrillTaggerTrainer(backoff, templates, trace, TAG='POS')
    trainer = BrillTaggerTrainer(backoff, templates, trace, TAG='POS')
    b = trainer.train(trainingData, max_rules, min_score)

    print
    print("Brill accuracy: %f" % tagger_accuracy(b, [goldData]))

    print("\nRules: ")
    printRules = file(ruleOutput, 'w')
    for rule in b.rules():
        print(str(rule))
        printRules.write(str(rule) + "\n\n")
    #b.saveRules(ruleFile)

    b.tag(testingData)
    el = errorList(goldData, testingData)
    errorFile = file(errorOutput, 'w')

    for e in el:
        errorFile.write(e + "\n\n")
    errorFile.close()
    print("Done.")
    return b