예제 #1
0
파일: demo-ccc.py 프로젝트: dokempf/cwb-ccc
corpus = Corpus(corpus_name="SZ_2009_14")

query = r'@0[lemma="Angela"]? @1[lemma="Merkel"] [word="\("] @2[lemma="CDU"] [word="\)"]'
result = corpus.query(query)
concordance = corpus.concordance(result)

print(concordance.breakdown)
print(concordance.size)
print(concordance.lines([567792]))

corpus = Corpus(corpus_name="SZ_2009_14")

query = '[lemma="Angela"]? [lemma="Merkel"] [word="\\("] [lemma="CDU"] [word="\\)"]'
result = corpus.query(query, s_meta=['text_id'])
collocates = corpus.collocates(result)

print(collocates.show(window=5, order="log_likelihood").head())


@pytest.mark.readme_keywords
def test_keywords_sz():
    meta = read_csv(
        "/home/ausgerechnet/corpora/cwb/upload/efe/sz-2009-14.tsv.gz",
        sep="\t",
        index_col=0,
        dtype=str)
    ids = set(meta.loc[(meta['ressort'] == "Panorama")
                       & (meta['month'] == '201103')].index.values)
    meta['s_id'] = meta.index
예제 #2
0
from ccc import Corpus

corpus = Corpus("BREXIT_V20190522_DEDUP")
matches = corpus.query('[lemma="Merkel"%cd]', context=20, s_context='tweet')
collocates = corpus.collocates(matches, p_query="lemma")
df = collocates.show(order='log_likelihood')
df.to_csv("BREXIT_merkel-ll.tsv", sep="\t")