def test_gloss_projection_unaligned(self): xc = xc_load(os.path.join(testfile_dir, "xigt/project_gloss_lang_tests.xml")) igt = xc[0] project_gloss_pos_to_lang(igt, tag_method=INTENT_POS_PROJ, unk_handling='keep') self.assertEqual('UNK', pos_tag_tier(igt, lang(igt).id, INTENT_POS_PROJ)[-1].value())
def filter_xc(xc, require_lang=False, require_gloss=False, require_trans=False, require_aln=False, require_gloss_pos=False, require_grammatical=False, max_instances=0, prev_good_instances=0): new_corp = XigtCorpus() examined = 0 failures = 0 successes= 0 my_filter = '' for inst in xc: examined += 1 assert isinstance(inst, Igt) def fail(reason): nonlocal failures, my_filter my_filter = filter_string(inst).format("FAIL", '['+reason+']') failures += 1 FILTER_LOG.info(my_filter) def success(): nonlocal successes, my_filter my_filter = filter_string(inst).format("SUCCESS", "") successes += 1 def trytier(f): try: result = f(inst) except (NoNormLineException) as nnle: return None fail("Bad Lines") else: return result lt = trytier(lang) gt = trytier(gloss) tt = trytier(trans) if require_lang and lt is None: fail("LANG") continue if require_gloss and gt is None: fail("GLOSS") continue if require_trans and tt is None: fail("TRANS") continue if require_aln: if gt is None: fail("ALIGN-GLOSS") continue if lt is None: fail("ALIGN-LANG") continue try: word_align(gt, lt) except GlossLangAlignException: fail("ALIGN") continue if require_grammatical: if lt: grammatical_ll = [l for l in lang_lines(inst) if l.get_attribute(ODIN_JUDGMENT_ATTRIBUTE)] if gt: grammatical_gl = gloss_line(inst).get_attribute(ODIN_JUDGMENT_ATTRIBUTE) if tt: grammatical_tl = [l for l in trans_lines(inst) if l.get_attribute(ODIN_JUDGMENT_ATTRIBUTE)] if grammatical_ll or grammatical_gl or grammatical_tl: fail("UNGRAMMATICAL") continue if require_gloss_pos: if pos_tag_tier(inst, gt.id) is None: fail("GLOSS_POS") continue # Otherwise, attach to the new corpus. new_corp.append(inst) success() FILTER_LOG.info(my_filter) inst.sort_tiers() # ------------------------------------------- # Break out of the loop if we've hit the maximum # number of good instances. # ------------------------------------------- if max_instances and prev_good_instances+successes >= max_instances: break return new_corp, examined, successes, failures
def test_add_pos_tags(self): add_pos_tags(self.igt, 'gw', self.tags) self.assertEquals(tier_tokens(pos_tag_tier(self.igt, 'gw')), self.tags)