def evaluate(doc_lstm_model, attn_model, scoring_model, words_set, markable_set, feats): doc_lstm_model.eval() attn_model.eval() scoring_model.eval() emb_dict = {} # for getting around matcher's signature for words, marks in zip(words_set, markable_set): doc_lstm_model.clear_hidden_state() base_embs = doc_lstm_model(words) att_embs = [attn_model(base_embs, m) for m in marks] emb_dict[marks[0].entity] = att_embs # don't try this at home. resolver = make_resolver(feats, emb_dict, scoring_model) coref.eval_on_dataset(resolver, markable_set) return resolver
def test_match_content_f1_d2_5(): global all_markables f, r, p = coref.eval_on_dataset( coref_rules.make_resolver(coref_rules.match_on_content), all_markables) assert_greater(f, .68) assert_greater(r, .57) assert_greater(p, .85)
def test_match_last_tok_f1_d2_3(): global all_markables f, r, p = coref.eval_on_dataset( coref_rules.make_resolver(coref_rules.match_last_token), all_markables) assert_greater(f, .64) assert_greater(r, .59) assert_greater(p, .71)
def test_match_nopro_f1_d2_3(): global all_markables f, r, p = coref.eval_on_dataset( coref_rules.make_resolver(coref_rules.exact_match_no_pronouns), all_markables) assert_almost_equals(r, 0.3028, places=4) assert_almost_equals(p, 0.9158, places=4)
def test_match_no_overlap_f1_d2_4(): global all_markables f,r,p = coref.eval_on_dataset( coref_rules.make_resolver(coref_rules.match_last_token_no_overlap), all_markables) assert_greater(f,.67) assert_greater(r,.61) assert_greater(p,.74)
def test_match_nopro_f1_d2_2(): global all_markables f, r, p = coref.eval_on_dataset( coref_rules.make_resolver(coref_rules.exact_match_no_pronouns), all_markables) assert_greater(f, .64) assert_greater(r, .48) assert_greater(p, .94)
def test_match_content_f1_d2_5(): global all_markables f,r,p = coref.eval_on_dataset( coref_rules.make_resolver(coref_rules.match_on_content), all_markables) assert_greater(f,.68) assert_greater(r,.57) assert_greater(p,.85)
def test_match_last_tok_f1_d2_3(): global all_markables f,r,p = coref.eval_on_dataset( coref_rules.make_resolver(coref_rules.match_last_token), all_markables) assert_greater(f,.64) assert_greater(r,.59) assert_greater(p,.71)
def test_match_nopro_f1_d2_2(): global all_markables f,r,p = coref.eval_on_dataset( coref_rules.make_resolver(coref_rules.exact_match_no_pronouns), all_markables) print f,r,p case_1 = [.48,.94] case_2 = [.405,.97] assert any(r > case[0] and p > case[1] for case in [case_1,case_2])