pos_model_history = pos_model.fit(train_X, train_y)
deeplearn_eval = dict()
deeplearn_eval['train_time'] = toc()
print(deeplearn_eval['train_time'])

# review training results
plot_model_performance(pos_model_history)
plot_model(pos_model.model, to_file=RESULTS_DIR+'Greek_POS_deep_model.png', 
           show_shapes=True)


""" 4. save and test the model """

# temporarily save the trained model, history and details
pos_model.model.save(RESOURCES_DIR+'Greek_POS_DL.h5')
save_tagger(RESULTS_DIR+'Greek_POS_DL_History.pkl', pos_model_history.history)   
save_tagger(RESOURCES_DIR+'Greek_POS_DL_DictVectorizer.pkl', dict_vectorizer)   
save_tagger(RESOURCES_DIR+'Greek_POS_DL_LabelEncoder.pkl', label_encoder)   

# test the results
tic()
val_tagged = deep_learning_tag(untag(val_sents), 'Greek_POS_DL', RESOURCES_DIR)
deeplearn_eval['test_time'] = toc()
_, deeplearn_eval['test_accuracy'] = compute_metrics(val_sents, val_tagged)
display_training_metrics(deeplearn_eval)


""" 5. look at errors and metrics """
metrics, _ = compute_metrics(val_sents, val_tagged)
output, comparison = compare_results(val_sents, val_tagged, tags=['NO', 'AJ', 'PN'])
Example #2
0
              'Accuracy (%)', [d[1] for d in tag3_eval_opt],
              'Scoring Time (s)', [d[2] for d in tag3_eval_opt])
""" 1. re-run tagger with changed corpus size """

# load the corpus as tagged sentences
train_sents, val_sents, test_sents = read_corpus('INTERA',
                                                 role='train',
                                                 proportion=70,
                                                 tag_length=TAG_LENGTH)

# Naive Bayes tagger
tag3_eval = dict()
# train
tic()
tag3_tagger = ClassifierBasedTagger(train=train_sents,
                                    feature_detector=add_features)
tag3_eval['train_time'] = toc()
# test
tic()
tag3_eval['test_accuracy'] = tag3_tagger.evaluate(val_sents)
tag3_eval['test_time'] = toc()
# display results
display_training_metrics(tag3_eval)
""" 2. look at errors and metrics """
tag3_res = tag3_tagger.tag_sents(untag(val_sents))
metrics, _ = compute_metrics(val_sents, tag3_res)
output, comparison = compare_results(val_sents,
                                     tag3_res,
                                     tags=['NO', 'AJ', 'AD', 'PN'])
""" 3. save the tagger """
save_tagger(RESOURCES_DIR + 'Greek_POS_class.pkl', tag3_tagger)
# sentence level
pred_int = [class_tag.tag(s) for s in untag(test_int)]
class_eval['sent_evaluate'] = compute_sent_acc(test_int, pred_int)
pred_ud = [class_tag.tag(s) for s in untag(test_ud)]
class_eval['sent_ud_greek'] = compute_sent_acc(test_ud, pred_ud)
pred_tt = [class_tag.tag(s) for s in untag(test_tt)]
class_eval['sent_tagged_text'] = compute_sent_acc(test_tt, pred_tt)
print('\n')
print(class_eval)


""" 3. deep learning """
deep_eval = dict()
# word level
tag_val = deep_learning_tag(untag(val_100), 'Greek_POS_DL', RESOURCES_DIR)
_, deep_eval['verification'] = compute_metrics(val_100, tag_val)
tic()
tag_int = deep_learning_tag(untag(test_int), 'Greek_POS_DL', RESOURCES_DIR)
_, deep_eval['evaluate'] = compute_metrics(test_int, tag_int)
deep_eval['evaluate_time'] = toc()
tag_ud = deep_learning_tag(untag(test_ud), 'Greek_POS_DL', RESOURCES_DIR)
_, deep_eval['ud_greek'] = compute_metrics(test_ud, tag_ud)
tag_tt = deep_learning_tag(untag(test_tt), 'Greek_POS_DL', RESOURCES_DIR)
_, deep_eval['tagged_text'] = compute_metrics(test_tt, tag_tt)
# sentence level
deep_eval['sent_evaluate'] = compute_sent_acc(test_int, tag_int)
deep_eval['sent_ud_greek'] = compute_sent_acc(test_ud, tag_ud)
deep_eval['sent_tagged_text'] = compute_sent_acc(test_tt, tag_tt)
print('\n')
print(deep_eval)