corpus = Corpus(corpus_name="SZ_2009_14") query = r'@0[lemma="Angela"]? @1[lemma="Merkel"] [word="\("] @2[lemma="CDU"] [word="\)"]' result = corpus.query(query) concordance = corpus.concordance(result) print(concordance.breakdown) print(concordance.size) print(concordance.lines([567792])) corpus = Corpus(corpus_name="SZ_2009_14") query = '[lemma="Angela"]? [lemma="Merkel"] [word="\\("] [lemma="CDU"] [word="\\)"]' result = corpus.query(query, s_meta=['text_id']) collocates = corpus.collocates(result) print(collocates.show(window=5, order="log_likelihood").head()) @pytest.mark.readme_keywords def test_keywords_sz(): meta = read_csv( "/home/ausgerechnet/corpora/cwb/upload/efe/sz-2009-14.tsv.gz", sep="\t", index_col=0, dtype=str) ids = set(meta.loc[(meta['ressort'] == "Panorama") & (meta['month'] == '201103')].index.values) meta['s_id'] = meta.index
from ccc import Corpus corpus = Corpus("BREXIT_V20190522_DEDUP") matches = corpus.query('[lemma="Merkel"%cd]', context=20, s_context='tweet') collocates = corpus.collocates(matches, p_query="lemma") df = collocates.show(order='log_likelihood') df.to_csv("BREXIT_merkel-ll.tsv", sep="\t")