def test_keywords(germaparl): party = {"CDU", "CSU"} corpus = Corpus(germaparl['corpus_name'], registry_path=germaparl['registry_path']) dump = corpus.dump_from_s_att('text_party', party) keywords = dump.keywords() print(keywords.head(50))
def test_keywords_from_meta(brexit): # get relevant ids meta = read_csv(brexit['meta_path'], dtype=str, sep="\t") ids_replies = set(meta.loc[meta['in_reply_status'] == "1"]['id']) # create subcorpus corpus = Corpus(corpus_name=brexit['corpus_name']) dump = corpus.dump_from_s_att('tweet_id', ids_replies) # keywords keywords = Keywords(corpus, dump.df, p_query='lemma') lines = keywords.show(order='log_ratio') assert ('@pama1969' in lines.index)