Exemple #1
0
def test_dumps(brexit):

    corpus = Corpus(brexit['corpus_name'])
    ids = {
        't740982320711249920', 't731037753241112576', 't729363812802039814',
        't733648546881277953', 't741216447595220992', 't705780723018539012',
        't745930343627243520', 't730870826178904065', 't745691821477605377',
        't730419966818783232', 't746069538693750784'
    }
    dump = corpus.dump_from_s_att('tweet_id', ids)
    print(dump.concordance())
def test_count_items_subcorpora(germaparl):

    # subcorpus
    corpus = Corpus(germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    cqp = corpus.start_cqp()
    dump = corpus.dump_from_s_att("text_role", ["presidency"])
    cqp.nqr_from_dump(dump.df, 'presidency')
    cqp.nqr_activate(corpus.corpus_name, 'presidency')
    items = ["Horst Seehofer", r"( CSU )", "CSU", "WES324", "CSU"]
    queries = [formulate_cqp_query([item]) for item in items]

    counts1 = corpus.counts.mwus(cqp, queries, strategy=1, fill_missing=False)
    assert (sum(counts1['freq']) > 0)

    counts2 = corpus.counts.mwus(cqp, queries, strategy=2, fill_missing=False)

    counts3 = corpus.counts.mwus(cqp, queries, strategy=3, fill_missing=False)
    assert (counts2.equals(counts3))
    cqp.__kill__()