Exemple #1
0
def HMM_test(if_train=True):
    model_is_existed = os.path.exists(ModelPathConfig.hmm)

    print("upload data!")
    word_lists, tag_lists, word2id, tag2id = build_corpus("train")
    test_word_lists, test_tag_lists, _, _ = build_corpus("test")
    # word_lists,tag_lists,word2id,tag2id=build_corpus("train",data_dir=os.path.join(os.getcwd(),"data",'ResumeNER'))
    # test_word_lists,test_tag_lists,_,_=build_corpus("test",data_dir=os.path.join(os.getcwd(),"data",'ResumeNER'))

    hmm_model = HMM(len(tag2id), len(word2id))

    if if_train or not model_is_existed:
        print("start to training")
        hmm_model.train(word_lists, tag_lists, word2id, tag2id)
        print("save the model")
        save_model(hmm_model, ModelPathConfig.hmm)
    else:
        print("load model")
        hmm_model = load_model(ModelPathConfig.hmm)
    pred_tag_lists = hmm_model.test(test_word_lists, _, word2id, tag2id)
    label_tag_lists = test_tag_lists

    units = evaluate_entity_label(pred_tag_lists, label_tag_lists,
                                  list(tag2id.keys()))
    df = unitstopd(units)
    df.to_csv(ResultPathConfig.hmm_entity)
    print(tabulate(df, headers='keys', tablefmt='psql'))

    units = evaluate_single_label(pred_tag_lists, label_tag_lists,
                                  list(tag2id.keys()))
    df = unitstopd(units)
    df.to_csv(ResultPathConfig.hmm_model)
    print(tabulate(df, headers='keys', tablefmt='psql'))
def hmm_pred(train_word_lists, train_tag_lists, test_word_lists,
             test_tag_lists, word2id, tag2id):
    model = HMM(len(tag2id), len(word2id))
    model.train(train_word_lists, train_tag_lists, word2id, tag2id)
    save_model(model, "./ckpts/hmm.pkl")
    pred = model.test(test_word_lists, word2id, tag2id)
    return pred
def hmm_train_eval(train_data, test_data, word2id, tag2id, remove_O=False):
    # data
    train_word_lists, train_tag_lists = train_data
    test_word_lists, test_tag_lists = test_data

    # training
    hmm_model = HMM(len(tag2id), len(word2id))
    hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id)
    save_model(hmm_model, "./ckpts/hmm.pkl")

    # evaluating
    pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id)
    results_print(test_tag_lists, pred_tag_lists, remove_O=remove_O)
    return pred_tag_lists
Exemple #4
0
def hmm_train_eval(train_data, test_data, word2id, tag2id):
    """训练并评估hmm模型"""
    # 训练HMM模型
    train_word_lists, train_tag_lists = train_data
    test_word_lists, test_tag_lists = test_data

    hmm_model = HMM(len(tag2id), len(word2id))
    hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id)
    save_model(hmm_model, "./ckpts/hmm.pkl")

    # 评估hmm模型
    pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id)

    metrics = Metrics(test_tag_lists, pred_tag_lists)

    return pred_tag_lists
def hmm_train_eval(train_data, test_data, word2id, tag2id):
    """训练并评估hmm模型"""
    # 训练HMM模型
    train_word_lists, train_tag_lists = train_data
    test_word_lists, test_tag_lists = test_data

    hmm_model = HMM(len(tag2id), len(word2id))
    hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id)
    save_model(hmm_model, "./ckpts/hmm.pkl")

    # 评估hmm模型
    pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id)
    accuracy = evaluate(pred_tag_lists, test_tag_lists)
    print("HMM 模型的准确率为:{:.2f}%".format(accuracy * 100))

    return pred_tag_lists
def hmm_train_eval(train_data, test_data, word2id, tag2id, remove_O=False):
    """训练并评估hmm模型"""
    # 训练HMM模型
    train_word_lists, train_tag_lists = train_data
    test_word_lists, test_tag_lists = test_data

    hmm_model = HMM(len(tag2id), len(word2id))
    hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id)
    save_model(hmm_model, "./ckpts/hmm.pkl")

    # 评估hmm模型
    pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id)

    metrics = Metrics(test_tag_lists, pred_tag_lists, remove_O=remove_O)
    metrics.report_scores()
    metrics.report_confusion_matrix()

    return pred_tag_lists
def hmm_train_eval(train_data, test_data, word2id, tag2id, remove_O=False):
    """ Train and evaluate the hmm model """
    # Training HMM model
    train_word_lists, train_tag_lists = train_data
    test_word_lists, test_tag_lists = test_data

    hmm_model = HMM(len(tag2id), len(word2id))
    hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id)
    save_model(hmm_model, "./ckpts/hmm.pkl")

    #Evaluation of the hmm model
    pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id)

    metrics = Metrics(test_tag_lists, pred_tag_lists, remove_O=remove_O)
    metrics.report_scores()
    metrics.report_confusion_matrix()

    return pred_tag_lists
Exemple #8
0
def train(train_data, val_data, fold_idx=None):
    train_dataset = MyDataset(train_data)
    val_dataset = MyDataset(val_data)

    train_loader = DataLoader(train_dataset, batch_size=config.batch_size)
    val_loader = DataLoader(val_dataset, batch_size=config.batch_size)
    from models.hmm import HMM
    word2id, id2word = load_vocab()
    model = HMM(len(config.label2id), len(word2id))

    if fold_idx is None:
        print('start')
        model_save_path = os.path.join(config.model_path,
                                       '{}.bin'.format(model_name))
    else:
        print('start fold: {}'.format(fold_idx + 1))
        model_save_path = os.path.join(
            config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx))

    word_id_list = train_dataset.x_data
    label_id_list = train_dataset.y_data
    model.train(word_id_list, label_id_list)

    y_pred_list = model.predict(train_dataset.x_data)
    train_score = get_score(train_dataset.y_data, y_pred_list)
    y_pred_list = model.predict(val_dataset.x_data)
    val_score = get_score(val_dataset.y_data, y_pred_list)
    msg = 'train score: {0:>6.2%}, val score: {1:>6.2%}'
    print(msg.format(train_score, val_score))
#!/usr/bin/env python3

from pos_data import read
from models.simple import Simple
from models.hmm import HMM
from models.complex import Complex
from metrics import print_report


X_train, y_train = read('data/bc.train', 'train')
X_val, y_val = read('data/bc.val', 'train')

print('Simple model')
sm = Simple()
sm.fit(X_train, y_train)
y_pred = sm.predict(X_val)
print_report(y_val, y_pred)

print('HMM')
hm = HMM()
hm.fit(X_train, y_train)
y_pred = hm.predict(X_val)
print_report(y_val, y_pred)

print('Complex model')
cp = Complex()
cp.fit(X_train, y_train)
y_pred = cp.predict(X_val)
print_report(y_val, y_pred)
Exemple #10
0
def hmm_train(train_data, word2id, tag2id):
    train_word_lists, train_tag_lists = train_data
    hmm_model = HMM(len(tag2id), len(word2id))
    hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id)
    save_model(hmm_model, HMM_MODEL_PATH)