예제 #1
0
파일: demo-ccc.py 프로젝트: dokempf/cwb-ccc
def test_keywords(germaparl):

    party = {"CDU", "CSU"}
    corpus = Corpus(germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    dump = corpus.dump_from_s_att('text_party', party)
    keywords = dump.keywords()
    print(keywords.head(50))
예제 #2
0
def test_keywords_from_meta(brexit):

    # get relevant ids
    meta = read_csv(brexit['meta_path'], dtype=str, sep="\t")
    ids_replies = set(meta.loc[meta['in_reply_status'] == "1"]['id'])

    # create subcorpus
    corpus = Corpus(corpus_name=brexit['corpus_name'])
    dump = corpus.dump_from_s_att('tweet_id', ids_replies)

    # keywords
    keywords = Keywords(corpus, dump.df, p_query='lemma')
    lines = keywords.show(order='log_ratio')
    assert ('@pama1969' in lines.index)