def evaluate_instance(inst, classifier, tagger): # Get the supervised POS tags... """ :param inst: :type inst: RGIgt :param classifier: MalletMaxent :param tagger: StanfordPOSTagger """ sup_gloss_tier = pos_tag_tier(inst, GLOSS_WORD_ID) # We will incrementally build up the tag sequences... sup_lang_tier = pos_tag_tier(inst, LANG_WORD_ID) sup_tags = [] prj_tags = [] cls_tags = [] # If there are no supervised tags on the gloss line, but there are on the language line... if sup_gloss_tier is None and sup_lang_tier is not None: try: add_gloss_lang_alignments(inst) project_lang_to_gloss(inst) sup_gloss_tier = pos_tag_tier(inst, GLOSS_WORD_ID) except RGXigtException: pass if sup_gloss_tier: # Do the classification classify_gloss_pos(inst, classifier) cls_tier = pos_tag_tier(inst, GLOSS_WORD_ID, tag_method=INTENT_POS_CLASS) for sup_item in sup_gloss_tier: word = xigt_find(inst, id=sup_item.alignment) if not word: continue else: word = word.value() # prj_item = xigt_find(prj_tier, alignment=sup_item.alignment) # if prj_item is None: # prj_tag = 'UNK' # else: # prj_tag = prj_item.value() cls_item = xigt_find(cls_tier, alignment=sup_item.alignment) if cls_item is None: cls_tag = 'UNK' else: cls_tag = cls_item.value() sup_tags.append(POSToken(word, label=sup_item.value())) # prj_tags.append(POSToken(word, label=prj_tag)) cls_tags.append(POSToken(word, label=cls_tag)) return sup_tags, cls_tags
def gather_gloss_pos_stats(inst, subword_dict, feat_list): """ Given an instance, look for the gloss pos tags, and save the statistics about them, so that we can filter by the number of times each kind was seen later. :param inst: Instance to process. :type inst: RGIgt :param subword_dict: This dictionary will record the number of times each (word, TAG) pair has been seen. :type subword_dict: SubwordDict :param gram_tag_dict: This dictionary will record the number of times individual grams are seen. :type gram_tag_dict: TwoLevelCountDict """ # Grab the gloss POS tier... gpos_tier = gloss_tag_tier(inst) lpos_tier = lang_tag_tier(inst) gw_tier = gloss(inst) if CLASS_FEATS_ALN in feat_list: heur_align_inst(inst) get_trans_glosses_alignment(inst, aln_method=INTENT_ALN_HEUR) # If there are POS tags on the language line but not the gloss line... if gpos_tier is None and lpos_tier is not None: add_gloss_lang_alignments(inst) project_lang_to_gloss(inst) gpos_tier = gloss_tag_tier(inst) # If this tier exists, then let's process it. if gpos_tier is not None: # Iterate over each gloss POS tag... for i, gw in enumerate(gw_tier): tag = xigt_find(inst, alignment=gw.id) if tag is None: continue prev_word = gw_tier[i-1].value().lower() if i > 0 else None next_word = gw_tier[i+1].value().lower() if i < len(gw_tier)-1 else None if CLASS_FEATS_ALN in feat_list: subword_dict.add_word_tag(gw.value().lower(), tag.value(), prev_word, next_word)