예제 #1
0
def test_concordance_slots_regions(germaparl):
    corpus = get_corpus(germaparl)
    query = (
        r'[pos="NE"]? @1[pos="NE"] @2"\[" ([word="[A-Z]+"]+ "/"?)+ @3"\]"')
    df_dump = corpus.query(query,
                           context=10,
                           context_break='s',
                           match_strategy='longest',
                           corrections={
                               2: +1,
                               3: -1
                           }).df
    concordance = Concordance(corpus, df_dump)
    lines = concordance.slots(df_dump, ['word'], slots=[['match', 1], [2, 3]])
    assert (set(lines.columns) == {"word", "match..1_word", "2..3_word"})
예제 #2
0
def test_concordance_slots_singletons(germaparl):
    corpus = get_corpus(germaparl)
    query = (
        r'[pos="NE"]? @1[pos="NE"] @2"\[" ([word="[A-Z]+"]+ "/"?)+ @3"\]"')
    df_dump = corpus.query(query,
                           context=2,
                           context_break='s',
                           match_strategy='longest',
                           corrections={
                               2: +1,
                               3: -1
                           }).df
    concordance = Concordance(corpus, df_dump)
    lines = concordance.slots(df_dump, ['word', 'lemma'])
    assert (set(lines.columns) == {
        "word", "lemma", "1_word", "1_lemma", "2_word", "2_lemma", "3_word",
        "3_lemma", "match..matchend_word", "match..matchend_lemma"
    })