def eval_proj(xc): prj_sents = [] sup_sents = [] for inst in xc: fix_ctn_gloss_line(inst, tag_method=INTENT_POS_PROJ) # Do the projection comparison sup = inst.get_pos_tags(GLOSS_WORD_ID, tag_method=INTENT_POS_MANUAL) prj = inst.get_pos_tags(GLOSS_WORD_ID, tag_method=INTENT_POS_PROJ) sup_tags = [] prj_tags = [] for s in sup: sup_tags.append(POSToken(s.value(), label=s.value())) # If the same tag occurs in the projections... if not prj: prj_tags.append(POSToken('UNALIGNED', label='UNALIGNED')) continue proj_tag = prj.find(alignment=s.attributes[ALIGNMENT]) if proj_tag: prj_tags.append(POSToken(proj_tag.value(), label=proj_tag.value())) else: prj_tags.append(POSToken('UNALIGNED', label='UNALIGNED')) sup_sents.append(sup_tags) prj_sents.append(prj_tags) poseval(prj_sents, sup_sents, details=True)
def eval_classifier(c, inst_list, context_feats=False, posdict=None): """ :param c: The classifier :param inst_list: A list of Igt instances to test against. Must already have POS tags. """ gold_sents = [] eval_sents = [] to_dump = XigtCorpus() for inst in inst_list: to_tag = inst.copy() strip_pos(to_tag) # Do the classification. to_tag.classify_gloss_pos(c, lowercase=True, feat_next_gram=context_feats, feat_prev_gram=context_feats, posdict=posdict) to_dump.append(to_tag) # Fix the tags... # fix_ctn_gloss_line(to_tag, tag_method=INTENT_POS_CLASS) # Now, retrieve eval/gold. eval_tags = [v.value() for v in to_tag.get_pos_tags(GLOSS_WORD_ID, tag_method=INTENT_POS_CLASS)] gold_tags = [v.value() for v in inst.get_pos_tags(GLOSS_WORD_ID, tag_method=INTENT_POS_MANUAL)] tag_tokens = [POSToken('a', label=l) for l in eval_tags] gold_tokens= [POSToken('a', label=l) for l in gold_tags] if not len(tag_tokens) == len(gold_tokens): print("LENGTH OF SEQUENCE IS MISMATCHED") continue gold_sents.append(gold_tokens) eval_sents.append(tag_tokens) xigtxml.dump(open('./enriched_ctn_dev.xml', 'w'), to_dump) return poseval(eval_sents, gold_sents, details=True,csv=True, matrix=True)