コード例 #1
0
ファイル: p2.py プロジェクト: M4573R/Coursera_NLP_MC
def tag(test_data_filename, result_filename, hmm_model_filename):
    print '1. load Hmm model'
    tagger = ViterbiTagger(3)
    tagger.read_counts(file(hmm_model_filename))

    print '2. tag test file'
    tagger.tag(file(test_data_filename), file(result_filename, 'w'))
コード例 #2
0
ファイル: p2.py プロジェクト: M4573R/Coursera_NLP_MC
def train(train_data_filename, rare_train_data_filename, hmm_model_filename, rare_words_rule):
    print '1. train hmm model'
    hmm_model = ViterbiTagger(3)
    hmm_model.rare_words_rule = rare_words_rule
    hmm_model.train(file(train_data_filename))

    print '2. process rare words'
    util.process_rare_words(
        file(train_data_filename),
        file(rare_train_data_filename, 'w'),
        hmm_model.rare_words,
        hmm_model.rare_words_rule)

    print '3. train hmm model again using the new train data'
    hmm_model_rare = ViterbiTagger(3)
    hmm_model_rare.train(file(rare_train_data_filename))
    hmm_model_rare.write_counts(file(hmm_model_filename, 'w'))
コード例 #3
0
def tag(test_data_filename, result_filename, hmm_model_filename):
    print '1. Cargando el modelo Hmm'
    tagger = ViterbiTagger(3)
    tagger.read_counts(file(hmm_model_filename))

    print '2. Archivo de prueba'
    tagger.tag(file(test_data_filename), file(result_filename, 'w'))
コード例 #4
0
def train(train_data_filename, rare_train_data_filename, hmm_model_filename,
          rare_words_rule):
    print '1. Modo entrenamiento hmm'
    hmm_model = ViterbiTagger(3)
    hmm_model.rare_words_rule = rare_words_rule
    hmm_model.train(file(train_data_filename))

    print '2. Procesando palabras raras'
    util.process_rare_words(file(train_data_filename),
                            file(rare_train_data_filename, 'w'),
                            hmm_model.rare_words, hmm_model.rare_words_rule)

    print '3. Entrenando hmm usando la nueva data de entrenamiento'
    hmm_model_rare = ViterbiTagger(3)
    hmm_model_rare.train(file(rare_train_data_filename))
    hmm_model_rare.write_counts(file(hmm_model_filename, 'w'))