コード例 #1
0
#/////////////////////// TEXT IMPROVEMENT  /////////////////////////////

if args['lm'] == True:
    # get additional text
    # Text number 2554 English translation of Crime and Punishment
    bigrams = get_bigram(train_plain,
                         url='http://www.gutenberg.org/files/2554/2554-0.txt')
    # conditional freq dist
    cfd = ConditionalFreqDist(bigrams)
    # Conditional probability distribution
    cpd = nltk.ConditionalProbDist(cfd, Estimator)

    Trainer = nltk.tag.hmm.HiddenMarkovModelTagger(states=States,
                                                   symbols=Symbols,
                                                   priors=None,
                                                   transitions=cpd,
                                                   outputs=None)
    HMM_tagger = Trainer.train(
        labeled_sequence=train_tagged_corpus,
        estimator=Estimator,
    )

    print('HMM trained on {} with text improvement and {} estimator \n'.format(
        cipher_folder, print_estimator))

# testing
HMM_tagger.test(test_tagged_corpus, verbose=False)
# Accuracy using best path simple
accuracy = tagger_best_path(HMM_tagger, test_cipher, test_plain)
print('Accuracy with best paths: %.2f' % accuracy)