예제 #1
0
파일: demo-ccc.py 프로젝트: dokempf/cwb-ccc
def test_argmin():
    corpus = Corpus(
        corpus_name="BREXIT_V20190522",
        registry_path="/home/ausgerechnet/corpora/cwb/registry/",
        lib_path=
        "/home/ausgerechnet/projects/spheroscope/app/instance-stable/lib/",
        s_meta="tweet_id")

    query_path = "/home/ausgerechnet/projects/cwb-ccc/tests/gold/query-example.json"
    with open(query_path, "rt") as f:
        query = json.loads(f.read())
    query_result = corpus.query(query['query'],
                                context=None,
                                s_break='tweet',
                                match_strategy='longest')
    concordance = corpus.concordance(query_result)

    result = concordance.show_argmin(query['anchors'], query['regions'])
    print(result.keys())
    print(result['nr_matches'])
    from pandas import DataFrame
    print(DataFrame(result['matches'][0]['df']))
예제 #2
0
파일: demo-ccc.py 프로젝트: dokempf/cwb-ccc
from ccc import Corpus, Corpora
from pandas import read_csv
import json

from .conftest import LOCAL
import pytest

corpus = Corpus(corpus_name="SZ_2009_14")

query = r'[lemma="Angela"]? [lemma="Merkel"] [word="\("] [lemma="CDU"] [word="\)"]'
result = corpus.query(query)
concordance = corpus.concordance(result)

print(concordance.breakdown)
print(concordance.size)
print(concordance.meta.head())
print(concordance.lines([567792]))

corpus = Corpus(corpus_name="SZ_2009_14")

query = r'@0[lemma="Angela"]? @1[lemma="Merkel"] [word="\("] @2[lemma="CDU"] [word="\)"]'
result = corpus.query(query)
concordance = corpus.concordance(result)

print(concordance.breakdown)
print(concordance.size)
print(concordance.lines([567792]))

corpus = Corpus(corpus_name="SZ_2009_14")

query = '[lemma="Angela"]? [lemma="Merkel"] [word="\\("] [lemma="CDU"] [word="\\)"]'