def evaluate_classifier_on_instances(inst_list, classifier, feat_list, pos_class_matrix, gold_tagmap=None): """ Given a list of instances, do the evaluation on them. :param inst_list: :param classifier: :param tagger: :return: """ pd = load_posdict() if (CLASS_FEATS_DICT in feat_list) or (CLASS_FEATS_PDICT in feat_list) or (CLASS_FEATS_NDICT in feat_list) else False matches = 0 compares = 0 for inst in inst_list: sup_postier = gloss_tag_tier(inst, tag_method=INTENT_POS_MANUAL) if sup_postier is None: continue gw_tier = gloss(inst) classify_gloss_pos(inst, classifier, posdict=pd, feat_prev_gram=CLASS_FEATS_PRESW in feat_list, feat_next_gram=CLASS_FEATS_NEXSW in feat_list, feat_dict=CLASS_FEATS_DICT in feat_list, feat_prev_gram_dict=CLASS_FEATS_PDICT in feat_list, feat_next_gram_dict=CLASS_FEATS_NDICT in feat_list, feat_suffix=CLASS_FEATS_SUF in feat_list, feat_prefix=CLASS_FEATS_PRE in feat_list, feat_morph_num=CLASS_FEATS_NUMSW in feat_list, feat_has_number=CLASS_FEATS_NUM in feat_list, feat_basic=CLASS_FEATS_SW in feat_list) cls_postier = gloss_tag_tier(inst, tag_method=INTENT_POS_CLASS) for cls_tag in cls_postier: word = xigt_find(gw_tier, id=cls_tag.alignment) sup_tag = xigt_find(sup_postier, alignment=cls_tag.alignment) if sup_tag is None: continue else: sup_tag_v = sup_tag.value() if gold_tagmap is not None: sup_tag_v = gold_tagmap.get(sup_tag_v) pos_class_matrix.add(sup_tag_v, cls_tag.value()) if cls_tag.value() == sup_tag_v: matches += 1 compares += 1 return matches, compares, matches/compares*100
def gather_gloss_pos_stats(inst, subword_dict, feat_list): """ Given an instance, look for the gloss pos tags, and save the statistics about them, so that we can filter by the number of times each kind was seen later. :param inst: Instance to process. :type inst: RGIgt :param subword_dict: This dictionary will record the number of times each (word, TAG) pair has been seen. :type subword_dict: SubwordDict :param gram_tag_dict: This dictionary will record the number of times individual grams are seen. :type gram_tag_dict: TwoLevelCountDict """ # Grab the gloss POS tier... gpos_tier = gloss_tag_tier(inst) lpos_tier = lang_tag_tier(inst) gw_tier = gloss(inst) if CLASS_FEATS_ALN in feat_list: heur_align_inst(inst) get_trans_glosses_alignment(inst, aln_method=INTENT_ALN_HEUR) # If there are POS tags on the language line but not the gloss line... if gpos_tier is None and lpos_tier is not None: add_gloss_lang_alignments(inst) project_lang_to_gloss(inst) gpos_tier = gloss_tag_tier(inst) # If this tier exists, then let's process it. if gpos_tier is not None: # Iterate over each gloss POS tag... for i, gw in enumerate(gw_tier): tag = xigt_find(inst, alignment=gw.id) if tag is None: continue prev_word = gw_tier[i-1].value().lower() if i > 0 else None next_word = gw_tier[i+1].value().lower() if i < len(gw_tier)-1 else None if CLASS_FEATS_ALN in feat_list: subword_dict.add_word_tag(gw.value().lower(), tag.value(), prev_word, next_word)
def test_proj_method(method): project_trans_pos_to_gloss(inst, aln_method=method) gtt = gloss_tag_tier(inst, tag_method=INTENT_POS_PROJ) self.assertIsNotNone(inst, gtt) self.assertEqual(get_intent_proj_aln_method(gtt), method)
def evaluate_pos_projections_on_file(lang, xc, plma, pos_proj_matrix, tagger, gold_tagmap=None, trans_tagmap=None, outstream=sys.stdout): """ :type plma: PerLangMethodAccuracies :type pos_proj_matrix: POSMatrix """ new_xc = XigtCorpus(xc.id) for inst in xc: gtt = gloss_tag_tier(inst, INTENT_POS_MANUAL) ttt = trans_tag_tier(inst, INTENT_POS_MANUAL) m_aln = get_trans_gloss_alignment(inst, INTENT_ALN_MANUAL) # Only continue if we have manual gloss tags, trans tags, and manual alignment. if gtt is None or m_aln is None or ttt is None: continue # Get the heuristic alignment... h_aln = heur_align_inst(inst) # And tag the translation line. tag_trans_pos(inst, tagger=tagger) # Now, iterate through each alignment method and set of tags. for aln_method in [INTENT_ALN_MANUAL, INTENT_ALN_HEUR]: for trans_tag_method in [INTENT_POS_MANUAL, INTENT_POS_TAGGER]: project_trans_pos_to_gloss(inst, aln_method=aln_method, trans_tag_method=trans_tag_method) proj_gtt = gloss_tag_tier(inst, tag_method=INTENT_POS_PROJ) # Go through each word in the gloss line and, if it has a gold # tag, was it correct? matches = 0 compares = 0 for gw in gloss(inst): gold_tag = xigt_find(gtt, alignment=gw.id) proj_tag = xigt_find(proj_gtt, alignment=gw.id) if gold_tag is not None: gold_tag_v = gold_tag.value() # Remap the tags if asked... if gold_tagmap is not None: try: gold_tag_v = gold_tagmap.get(gold_tag_v) except TagMapException: pass if proj_tag is None: proj_str = '**UNK' else: proj_str = proj_tag.value() if trans_tagmap is not None: # Try to remap the tag, but keep it if it can't be remapped. try: proj_str = trans_tagmap.get(proj_str) except TagMapException: pass pos_proj_matrix.add(gold_tag_v, proj_str) if proj_tag is not None and proj_str == gold_tag_v: matches += 1 compares += 1 plma.add(lang, '{}:{}'.format(aln_method, trans_tag_method), matches, compares) outstream.write('{}\n'.format(plma)) return new_xc