pos_model_history = pos_model.fit(train_X, train_y) deeplearn_eval = dict() deeplearn_eval['train_time'] = toc() print(deeplearn_eval['train_time']) # review training results plot_model_performance(pos_model_history) plot_model(pos_model.model, to_file=RESULTS_DIR+'Greek_POS_deep_model.png', show_shapes=True) """ 4. save and test the model """ # temporarily save the trained model, history and details pos_model.model.save(RESOURCES_DIR+'Greek_POS_DL.h5') save_tagger(RESULTS_DIR+'Greek_POS_DL_History.pkl', pos_model_history.history) save_tagger(RESOURCES_DIR+'Greek_POS_DL_DictVectorizer.pkl', dict_vectorizer) save_tagger(RESOURCES_DIR+'Greek_POS_DL_LabelEncoder.pkl', label_encoder) # test the results tic() val_tagged = deep_learning_tag(untag(val_sents), 'Greek_POS_DL', RESOURCES_DIR) deeplearn_eval['test_time'] = toc() _, deeplearn_eval['test_accuracy'] = compute_metrics(val_sents, val_tagged) display_training_metrics(deeplearn_eval) """ 5. look at errors and metrics """ metrics, _ = compute_metrics(val_sents, val_tagged) output, comparison = compare_results(val_sents, val_tagged, tags=['NO', 'AJ', 'PN'])
'Accuracy (%)', [d[1] for d in tag3_eval_opt], 'Scoring Time (s)', [d[2] for d in tag3_eval_opt]) """ 1. re-run tagger with changed corpus size """ # load the corpus as tagged sentences train_sents, val_sents, test_sents = read_corpus('INTERA', role='train', proportion=70, tag_length=TAG_LENGTH) # Naive Bayes tagger tag3_eval = dict() # train tic() tag3_tagger = ClassifierBasedTagger(train=train_sents, feature_detector=add_features) tag3_eval['train_time'] = toc() # test tic() tag3_eval['test_accuracy'] = tag3_tagger.evaluate(val_sents) tag3_eval['test_time'] = toc() # display results display_training_metrics(tag3_eval) """ 2. look at errors and metrics """ tag3_res = tag3_tagger.tag_sents(untag(val_sents)) metrics, _ = compute_metrics(val_sents, tag3_res) output, comparison = compare_results(val_sents, tag3_res, tags=['NO', 'AJ', 'AD', 'PN']) """ 3. save the tagger """ save_tagger(RESOURCES_DIR + 'Greek_POS_class.pkl', tag3_tagger)
# sentence level pred_int = [class_tag.tag(s) for s in untag(test_int)] class_eval['sent_evaluate'] = compute_sent_acc(test_int, pred_int) pred_ud = [class_tag.tag(s) for s in untag(test_ud)] class_eval['sent_ud_greek'] = compute_sent_acc(test_ud, pred_ud) pred_tt = [class_tag.tag(s) for s in untag(test_tt)] class_eval['sent_tagged_text'] = compute_sent_acc(test_tt, pred_tt) print('\n') print(class_eval) """ 3. deep learning """ deep_eval = dict() # word level tag_val = deep_learning_tag(untag(val_100), 'Greek_POS_DL', RESOURCES_DIR) _, deep_eval['verification'] = compute_metrics(val_100, tag_val) tic() tag_int = deep_learning_tag(untag(test_int), 'Greek_POS_DL', RESOURCES_DIR) _, deep_eval['evaluate'] = compute_metrics(test_int, tag_int) deep_eval['evaluate_time'] = toc() tag_ud = deep_learning_tag(untag(test_ud), 'Greek_POS_DL', RESOURCES_DIR) _, deep_eval['ud_greek'] = compute_metrics(test_ud, tag_ud) tag_tt = deep_learning_tag(untag(test_tt), 'Greek_POS_DL', RESOURCES_DIR) _, deep_eval['tagged_text'] = compute_metrics(test_tt, tag_tt) # sentence level deep_eval['sent_evaluate'] = compute_sent_acc(test_int, tag_int) deep_eval['sent_ud_greek'] = compute_sent_acc(test_ud, tag_ud) deep_eval['sent_tagged_text'] = compute_sent_acc(test_tt, tag_tt) print('\n') print(deep_eval)