Beispiel #1
0
def non_deriv_dom(inf, outf, proc):
    """
    Dominance filter:

    Remove annotations which are based on derived transfers when there is an
    annotation based on a non-derived transfer of the same token.
    """

    return NonDerivTournament(*decode_dom_arg(proc)).proc_stream(inf, outf)
Beispiel #2
0
def filter_align_dom(inf, outf, proc):
    """
    Dominance filter:

    Remove annotations which are based on unaligned transfers when there is an
    annotation based on aligned transfers of the same token.
    """

    return AlignTournament(*decode_dom_arg(proc)).proc_stream(inf, outf)
Beispiel #3
0
def finnpos_naive_lemma_dom(inf, outf, proc):
    """
    FinnPOS Dominance filter: Use FinnPOS annotations to support certain
    annotations over others, in terms of POS or lemma.

    Naive lemma filter: Based on matching exactly the lemma. Either as
    requirement or dominance filter.
    """

    return NaiveLemmaTournament(*decode_dom_arg(proc)).proc_stream(inf, outf)
Beispiel #4
0
def test_filter_deriv_dom():
    tournament = NonDerivTournament(*decode_dom_arg("dom"))
    # If we have two annotations, one with deriv/non-deriv and one with
    # only non-deriv, neither should dominate
    sent1 = etree.fromstring(FILTER_ALIGN_DOM_TEST_CORPUS_BOTH_ALIGNED)
    tournament.proc_sent(sent1)
    assert len(sent1.xpath("//annotation")) == 2
    # If we have two annotations, one with deriv/non-deriv and one with
    # only deriv, the first should dominate
    sent2 = etree.fromstring(FILTER_ALIGN_DOM_TEST_CORPUS_BOTH_UNALIGNED)
    tournament.proc_sent(sent2)
    assert len(sent2.xpath("//annotation")) == 1
    assert len(sent2.xpath("//annotation[@id='0']")) == 1
Beispiel #5
0
def test_filter_align_dom():
    tournament = AlignTournament(*decode_dom_arg("dom"))
    # If we have two annotations, one with aligned/unaligned and one with
    # aligned, neither should dominate
    sent1 = etree.fromstring(FILTER_ALIGN_DOM_TEST_CORPUS_BOTH_ALIGNED)
    tournament.proc_sent(sent1)
    assert len(sent1.xpath("//annotation")) == 2

    # If we have two annotations, one with aligned/unaligned and one with
    # unaligned, the first should dominate
    sent2 = etree.fromstring(FILTER_ALIGN_DOM_TEST_CORPUS_BOTH_UNALIGNED)
    tournament.proc_sent(sent2)
    assert len(sent2.xpath("//annotation")) == 1
    assert len(sent2.xpath("//annotation[@id='0']")) == 1
Beispiel #6
0
def test_filter_has_support():
    tournament = HasSupportTournament(*decode_dom_arg("dom"))
    # If we have two annotations and one has no support, the one with support
    # dominates
    sent1 = etree.fromstring(
        FILTER_ALIGN_DOM_TEST_CORPUS.substitute({
            "annotations":
            "".join([ANNOTATION_MURHA_0_BOTH, ANNOTATION_MURHA_1_NO_SUPPORT])
        }))
    tournament.proc_sent(sent1)
    assert len(sent1.xpath("//annotation")) == 1
    assert len(sent1.xpath("//annotation[@id='0']")) == 1

    # If we have two annotations and both have support, neither dominates
    sent2 = etree.fromstring(
        FILTER_ALIGN_DOM_TEST_CORPUS.substitute({
            "annotations":
            "".join([ANNOTATION_MURHA_0_BOTH, ANNOTATION_MURHA_1_ALIGNED])
        }))
    tournament.proc_sent(sent2)
    assert len(sent2.xpath("//annotation")) == 2
Beispiel #7
0
def non_wiki_trg(inf, outf, proc):
    return PreferNonWikiTargetDom(*decode_dom_arg(proc)).proc_stream(inf, outf)
Beispiel #8
0
def non_wiki_src(inf, outf, proc):
    return PreferNonWikiSourceDom(*decode_dom_arg(proc)).proc_stream(inf, outf)
Beispiel #9
0
def filter_support(inf, outf, proc):
    """
    Remove annotations without any support at all.
    """

    return HasSupportTournament(*decode_dom_arg(proc)).proc_stream(inf, outf)
Beispiel #10
0
def finnpos_non_recurs_dom(inf, outf, proc):
    """
    Remove annotations with one part of their lemma supported by only a recurs.
    """

    return LemmaPathTournament(*decode_dom_arg(proc)).proc_stream(inf, outf)