def non_deriv_dom(inf, outf, proc): """ Dominance filter: Remove annotations which are based on derived transfers when there is an annotation based on a non-derived transfer of the same token. """ return NonDerivTournament(*decode_dom_arg(proc)).proc_stream(inf, outf)
def filter_align_dom(inf, outf, proc): """ Dominance filter: Remove annotations which are based on unaligned transfers when there is an annotation based on aligned transfers of the same token. """ return AlignTournament(*decode_dom_arg(proc)).proc_stream(inf, outf)
def finnpos_naive_lemma_dom(inf, outf, proc): """ FinnPOS Dominance filter: Use FinnPOS annotations to support certain annotations over others, in terms of POS or lemma. Naive lemma filter: Based on matching exactly the lemma. Either as requirement or dominance filter. """ return NaiveLemmaTournament(*decode_dom_arg(proc)).proc_stream(inf, outf)
def test_filter_deriv_dom(): tournament = NonDerivTournament(*decode_dom_arg("dom")) # If we have two annotations, one with deriv/non-deriv and one with # only non-deriv, neither should dominate sent1 = etree.fromstring(FILTER_ALIGN_DOM_TEST_CORPUS_BOTH_ALIGNED) tournament.proc_sent(sent1) assert len(sent1.xpath("//annotation")) == 2 # If we have two annotations, one with deriv/non-deriv and one with # only deriv, the first should dominate sent2 = etree.fromstring(FILTER_ALIGN_DOM_TEST_CORPUS_BOTH_UNALIGNED) tournament.proc_sent(sent2) assert len(sent2.xpath("//annotation")) == 1 assert len(sent2.xpath("//annotation[@id='0']")) == 1
def test_filter_align_dom(): tournament = AlignTournament(*decode_dom_arg("dom")) # If we have two annotations, one with aligned/unaligned and one with # aligned, neither should dominate sent1 = etree.fromstring(FILTER_ALIGN_DOM_TEST_CORPUS_BOTH_ALIGNED) tournament.proc_sent(sent1) assert len(sent1.xpath("//annotation")) == 2 # If we have two annotations, one with aligned/unaligned and one with # unaligned, the first should dominate sent2 = etree.fromstring(FILTER_ALIGN_DOM_TEST_CORPUS_BOTH_UNALIGNED) tournament.proc_sent(sent2) assert len(sent2.xpath("//annotation")) == 1 assert len(sent2.xpath("//annotation[@id='0']")) == 1
def test_filter_has_support(): tournament = HasSupportTournament(*decode_dom_arg("dom")) # If we have two annotations and one has no support, the one with support # dominates sent1 = etree.fromstring( FILTER_ALIGN_DOM_TEST_CORPUS.substitute({ "annotations": "".join([ANNOTATION_MURHA_0_BOTH, ANNOTATION_MURHA_1_NO_SUPPORT]) })) tournament.proc_sent(sent1) assert len(sent1.xpath("//annotation")) == 1 assert len(sent1.xpath("//annotation[@id='0']")) == 1 # If we have two annotations and both have support, neither dominates sent2 = etree.fromstring( FILTER_ALIGN_DOM_TEST_CORPUS.substitute({ "annotations": "".join([ANNOTATION_MURHA_0_BOTH, ANNOTATION_MURHA_1_ALIGNED]) })) tournament.proc_sent(sent2) assert len(sent2.xpath("//annotation")) == 2
def non_wiki_trg(inf, outf, proc): return PreferNonWikiTargetDom(*decode_dom_arg(proc)).proc_stream(inf, outf)
def non_wiki_src(inf, outf, proc): return PreferNonWikiSourceDom(*decode_dom_arg(proc)).proc_stream(inf, outf)
def filter_support(inf, outf, proc): """ Remove annotations without any support at all. """ return HasSupportTournament(*decode_dom_arg(proc)).proc_stream(inf, outf)
def finnpos_non_recurs_dom(inf, outf, proc): """ Remove annotations with one part of their lemma supported by only a recurs. """ return LemmaPathTournament(*decode_dom_arg(proc)).proc_stream(inf, outf)