def test_eval(self): test_path = os.path.join(DATA_ROOT, 'test.txt') x_test, y_test = load_data_and_labels(test_path) p = WordPreprocessor.load(os.path.join(SAVE_ROOT, 'preprocessor.pkl')) config = ModelConfig() config.vocab_size = len(p.vocab_word) config.char_vocab_size = len(p.vocab_char) model = SeqLabeling(config, ntags=len(p.vocab_tag)) model.load(filepath=os.path.join(SAVE_ROOT, 'model_weights.h5')) evaluator = anago.Evaluator(model, preprocessor=p) evaluator.eval(x_test, y_test)
def test_eval(self): DATA_ROOT = os.path.join(os.path.dirname(__file__), '../data/conll2003/en/tagging') SAVE_ROOT = os.path.join(os.path.dirname(__file__), '../models') model_config = ModelConfig() test_path = os.path.join(DATA_ROOT, 'test.txt') x_test, y_test = load_data_and_labels(test_path) p = WordPreprocessor.load(os.path.join(SAVE_ROOT, 'preprocessor.pkl')) model_config.vocab_size = len(p.vocab_word) model_config.char_vocab_size = len(p.vocab_char) weights = 'model_weights.h5' evaluator = anago.Evaluator(model_config, weights, save_path=SAVE_ROOT, preprocessor=p) evaluator.eval(x_test, y_test)
def test_anago(keras_model_name="WCP", hand_features=None, task_name="ATEPC2", data_name="laptops"): DATA_ROOT = 'data' SAVE_ROOT = './models' # trained models LOG_ROOT = './logs' # checkpoint, tensorboard w_embedding_path = '/home/s1610434/Documents/Data/Vector/glove.twitter.27B.100d.txt' c_embedding_path = '/home/s1610434/Documents/Data/Vector/AmaYelp/GloVe/glove.char.100.txt' pos_embedding_path = '/home/s1610434/Documents/Data/Vector/AmaYelp/GloVe/glove.pos.100.txt' unipos_embedding_path = '/home/s1610434/Documents/Data/Vector/AmaYelp/GloVe/glove.unipos.100.txt' model_config = prepare_modelconfig(keras_model_name) training_config = TrainingConfig() training_config.max_epoch = 100 training_config.early_stopping = 20 print("-----{0}-----{1}-----{2}-----{3}-----".format( task_name, data_name, keras_model_name, hand_features)) save_path = SAVE_ROOT + "/{0}/{1}".format(data_name, task_name) train_path = os.path.join(DATA_ROOT, '{0}.{1}.train.tsv'.format(data_name, task_name)) test_path = os.path.join(DATA_ROOT, '{0}.{1}.test.tsv'.format(data_name, task_name)) train_dep_path = os.path.join( DATA_ROOT, '{0}.{1}.train.dep.tsv'.format(data_name, task_name)) test_dep_path = os.path.join( DATA_ROOT, '{0}.{1}.test.dep.tsv'.format(data_name, task_name)) # train set x_train_valid, y_train_valid, _ = collect_data_from_tsv(train_path) x_train_valid_dep = collect_dept_data_from_tsv(train_dep_path) # test set X_test, Y_test, _ = collect_data_from_tsv(test_path) X_test_dep = collect_dept_data_from_tsv(test_dep_path) # TODO Kfold split kf = KFold(n_splits=10) i_fold = 0 results = [] atepc_evaluator = ATEPCEvaluator() for train_index, valid_index in kf.split(x_train_valid): model_name = "{0}.{1}.{2}".format(keras_model_name, "{0}".format(hand_features), i_fold) X_train, X_valid = x_train_valid[train_index], x_train_valid[ valid_index] X_train_dep, X_valid_dep = x_train_valid_dep[ train_index], x_train_valid_dep[valid_index] Y_train, Y_valid = y_train_valid[train_index], y_train_valid[ valid_index] print("Data train: ", X_train.shape, Y_train.shape) print("Data valid: ", X_valid.shape, Y_valid.shape) print("Data test: ", X_test.shape, Y_test.shape) p = prepare_preprocessor(list(zip(X_train, X_train_dep)), Y_train, keras_model_name=keras_model_name, hand_features=hand_features) model_config.vocab_size = len(p.vocab_word) model_config.char_vocab_size = len(p.vocab_char) if keras_model_name.find("P") != -1: if hand_features is not None: if "UNIPOS" in hand_features: pos_embedding_path = unipos_embedding_path model_config.pos_vocab_size = len(p.pos_extractor.features_dict) if keras_model_name.find("H") != -1: # model_config.hand_feature_size = gen_no_hand_dimension(data_name, hand_features, keras_model_name) model_config.hand_feature_size = 53 print("model_config.hand_feature_size: ", str(model_config.hand_feature_size)) filepath = os.path.join(save_path, model_name) if os.path.isfile(filepath) is False: continue evaluator = anago.Evaluator(model_config, weights=model_name, save_path=save_path, preprocessor=p, keras_model_name=keras_model_name) print("--- Test phrase --- " + model_name) # print("Train ") # f1_score_train = evaluator.eval(list(zip(X_train, X_train_dep)), Y_train) # print("Validation ") # f1_score_valid = evaluator.eval(list(zip(X_valid, X_valid_dep)), Y_valid) # print("Test ") f1_score_test = evaluator.eval(list(zip(X_test, X_test_dep)), Y_test) print("---") i_fold += 1 # Kfold cross validation f_out_name = "data/{0}.{1}.test.pred.tsv".format(data_name, task_name) f_out = open(f_out_name, "w") ## Tagging tagger = anago.Tagger(model_config, model_name, save_path=save_path, preprocessor=p, keras_model_name=keras_model_name) for x, y in zip(list(zip(X_test, X_test_dep)), Y_test): result = tagger.predict(x) for word, label, pred in zip(x[0], y, result): f_out.write("{0}\t{1}\t{2}\n".format(word, label, pred)) f_out.write("\n") f_out.close() ate_f1, apc_acc, c_apc_acc = atepc_evaluator.evaluate(f_out_name) results.append([ate_f1, apc_acc, c_apc_acc]) print(results[-1]) print("-----All-----{0}--{1}".format(keras_model_name, data_name)) for result in results: print(result) print("-----AVG-----") results_np = np.array(results, dtype=np.float32) print(results_np.mean(axis=0)) print("-------------")
DATA_ROOT = 'data/conll2003/en/ner' SAVE_ROOT = './models' # trained model LOG_ROOT = './logs' # checkpoint, tensorboard embedding_path = '/media/jan/OS/Dataset/WordEmbeddings/wiki.en.vec' model_config = ModelConfig() training_config = TrainingConfig() model_path = os.path.join(SAVE_ROOT, 'mymodel.h5') train_path = os.path.join(DATA_ROOT, 'train.small.txt') valid_path = os.path.join(DATA_ROOT, 'valid.small.txt') x_train, y_train = load_data_and_labels(train_path) x_valid, y_valid = load_data_and_labels(valid_path) p = prepare_preprocessor(x_train, y_train) embeddings = load_word_embeddings(p.vocab_word, embedding_path, model_config.word_embedding_size) model_config.vocab_size = len(p.vocab_word) model_config.char_vocab_size = len(p.vocab_char) model = SeqLabeling(model_config, embeddings, len(p.vocab_tag)) trainer = anago.Trainer(model, training_config, checkpoint_path=LOG_ROOT, save_path=SAVE_ROOT, preprocessor=p) trainer.train(x_train, y_train, x_valid, y_valid) evaluator = anago.Evaluator(model, preprocessor=p) model.save(model_path)