def train(train_data, val_data, fold_idx=None): train_dataset = MyDataset(train_data) val_dataset = MyDataset(val_data) train_loader = DataLoader(train_dataset, batch_size=config.batch_size) val_loader = DataLoader(val_dataset, batch_size=config.batch_size) from models.hmm import HMM word2id, id2word = load_vocab() model = HMM(len(config.label2id), len(word2id)) if fold_idx is None: print('start') model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name)) else: print('start fold: {}'.format(fold_idx + 1)) model_save_path = os.path.join( config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx)) word_id_list = train_dataset.x_data label_id_list = train_dataset.y_data model.train(word_id_list, label_id_list) y_pred_list = model.predict(train_dataset.x_data) train_score = get_score(train_dataset.y_data, y_pred_list) y_pred_list = model.predict(val_dataset.x_data) val_score = get_score(val_dataset.y_data, y_pred_list) msg = 'train score: {0:>6.2%}, val score: {1:>6.2%}' print(msg.format(train_score, val_score))
#!/usr/bin/env python3 from pos_data import read from models.simple import Simple from models.hmm import HMM from models.complex import Complex from metrics import print_report X_train, y_train = read('data/bc.train', 'train') X_val, y_val = read('data/bc.val', 'train') print('Simple model') sm = Simple() sm.fit(X_train, y_train) y_pred = sm.predict(X_val) print_report(y_val, y_pred) print('HMM') hm = HMM() hm.fit(X_train, y_train) y_pred = hm.predict(X_val) print_report(y_val, y_pred) print('Complex model') cp = Complex() cp.fit(X_train, y_train) y_pred = cp.predict(X_val) print_report(y_val, y_pred)