예제 #1
0
def train(train_data, val_data, fold_idx=None):
    train_dataset = MyDataset(train_data)
    val_dataset = MyDataset(val_data)

    train_loader = DataLoader(train_dataset, batch_size=config.batch_size)
    val_loader = DataLoader(val_dataset, batch_size=config.batch_size)
    from models.hmm import HMM
    word2id, id2word = load_vocab()
    model = HMM(len(config.label2id), len(word2id))

    if fold_idx is None:
        print('start')
        model_save_path = os.path.join(config.model_path,
                                       '{}.bin'.format(model_name))
    else:
        print('start fold: {}'.format(fold_idx + 1))
        model_save_path = os.path.join(
            config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx))

    word_id_list = train_dataset.x_data
    label_id_list = train_dataset.y_data
    model.train(word_id_list, label_id_list)

    y_pred_list = model.predict(train_dataset.x_data)
    train_score = get_score(train_dataset.y_data, y_pred_list)
    y_pred_list = model.predict(val_dataset.x_data)
    val_score = get_score(val_dataset.y_data, y_pred_list)
    msg = 'train score: {0:>6.2%}, val score: {1:>6.2%}'
    print(msg.format(train_score, val_score))
#!/usr/bin/env python3

from pos_data import read
from models.simple import Simple
from models.hmm import HMM
from models.complex import Complex
from metrics import print_report


X_train, y_train = read('data/bc.train', 'train')
X_val, y_val = read('data/bc.val', 'train')

print('Simple model')
sm = Simple()
sm.fit(X_train, y_train)
y_pred = sm.predict(X_val)
print_report(y_val, y_pred)

print('HMM')
hm = HMM()
hm.fit(X_train, y_train)
y_pred = hm.predict(X_val)
print_report(y_val, y_pred)

print('Complex model')
cp = Complex()
cp.fit(X_train, y_train)
y_pred = cp.predict(X_val)
print_report(y_val, y_pred)