def test_concordance_slots_regions(germaparl): corpus = get_corpus(germaparl) query = ( r'[pos="NE"]? @1[pos="NE"] @2"\[" ([word="[A-Z]+"]+ "/"?)+ @3"\]"') df_dump = corpus.query(query, context=10, context_break='s', match_strategy='longest', corrections={ 2: +1, 3: -1 }).df concordance = Concordance(corpus, df_dump) lines = concordance.slots(df_dump, ['word'], slots=[['match', 1], [2, 3]]) assert (set(lines.columns) == {"word", "match..1_word", "2..3_word"})
def test_concordance_slots_singletons(germaparl): corpus = get_corpus(germaparl) query = ( r'[pos="NE"]? @1[pos="NE"] @2"\[" ([word="[A-Z]+"]+ "/"?)+ @3"\]"') df_dump = corpus.query(query, context=2, context_break='s', match_strategy='longest', corrections={ 2: +1, 3: -1 }).df concordance = Concordance(corpus, df_dump) lines = concordance.slots(df_dump, ['word', 'lemma']) assert (set(lines.columns) == { "word", "lemma", "1_word", "1_lemma", "2_word", "2_lemma", "3_word", "3_lemma", "match..matchend_word", "match..matchend_lemma" })