#/////////////////////// TEXT IMPROVEMENT ///////////////////////////// if args['lm'] == True: # get additional text # Text number 2554 English translation of Crime and Punishment bigrams = get_bigram(train_plain, url='http://www.gutenberg.org/files/2554/2554-0.txt') # conditional freq dist cfd = ConditionalFreqDist(bigrams) # Conditional probability distribution cpd = nltk.ConditionalProbDist(cfd, Estimator) Trainer = nltk.tag.hmm.HiddenMarkovModelTagger(states=States, symbols=Symbols, priors=None, transitions=cpd, outputs=None) HMM_tagger = Trainer.train( labeled_sequence=train_tagged_corpus, estimator=Estimator, ) print('HMM trained on {} with text improvement and {} estimator \n'.format( cipher_folder, print_estimator)) # testing HMM_tagger.test(test_tagged_corpus, verbose=False) # Accuracy using best path simple accuracy = tagger_best_path(HMM_tagger, test_cipher, test_plain) print('Accuracy with best paths: %.2f' % accuracy)