def tag(test_data_filename, result_filename, hmm_model_filename): print '1. load Hmm model' tagger = ViterbiTagger(3) tagger.read_counts(file(hmm_model_filename)) print '2. tag test file' tagger.tag(file(test_data_filename), file(result_filename, 'w'))
def train(train_data_filename, rare_train_data_filename, hmm_model_filename, rare_words_rule): print '1. train hmm model' hmm_model = ViterbiTagger(3) hmm_model.rare_words_rule = rare_words_rule hmm_model.train(file(train_data_filename)) print '2. process rare words' util.process_rare_words( file(train_data_filename), file(rare_train_data_filename, 'w'), hmm_model.rare_words, hmm_model.rare_words_rule) print '3. train hmm model again using the new train data' hmm_model_rare = ViterbiTagger(3) hmm_model_rare.train(file(rare_train_data_filename)) hmm_model_rare.write_counts(file(hmm_model_filename, 'w'))
def tag(test_data_filename, result_filename, hmm_model_filename): print '1. Cargando el modelo Hmm' tagger = ViterbiTagger(3) tagger.read_counts(file(hmm_model_filename)) print '2. Archivo de prueba' tagger.tag(file(test_data_filename), file(result_filename, 'w'))
def train(train_data_filename, rare_train_data_filename, hmm_model_filename, rare_words_rule): print '1. Modo entrenamiento hmm' hmm_model = ViterbiTagger(3) hmm_model.rare_words_rule = rare_words_rule hmm_model.train(file(train_data_filename)) print '2. Procesando palabras raras' util.process_rare_words(file(train_data_filename), file(rare_train_data_filename, 'w'), hmm_model.rare_words, hmm_model.rare_words_rule) print '3. Entrenando hmm usando la nueva data de entrenamiento' hmm_model_rare = ViterbiTagger(3) hmm_model_rare.train(file(rare_train_data_filename)) hmm_model_rare.write_counts(file(hmm_model_filename, 'w'))