def test_count_matches(brexit): corpus = Corpus(brexit['corpus_name']) corpus.query(cqp_query='[lemma="nigel"]', context=10, context_break='tweet', name='Test', save=True) cqp = corpus.start_cqp() counts = corpus.counts.matches(cqp, 'Test') assert ("Nigel" in counts.index)
def test_concordance_options(germaparl): corpus = Corpus(germaparl['corpus_name'], registry_path=germaparl['registry_path']) dump = corpus.query('"SPD"') print(dump.concordance(form='raw')) print(dump.concordance(form='simple')) print(dump.concordance(form='kwic')) print(dump.concordance(form='dataframes')) print(dump.concordance(form='extended'))
def test_argmin_query(brexit): corpus = Corpus(brexit['corpus_name'], lib_path=brexit['lib_path']) query = brexit['query_argmin'] dump = corpus.query(cqp_query=query['cqp'], context=query.get('context', None), context_break=query.get('s_context', None), corrections=query['corrections'], match_strategy=query['match_strategy']) conc = dump.concordance(p_show=query['p_show'], s_show=query['s_show'], p_text=query['p_text'], p_slots=query['p_slots'], slots=query['slots'], order='first', cut_off=None, form='extended') print(conc) print(conc['df'].iloc[0])
def test_context_matches(germaparl): corpus = Corpus(germaparl['corpus_name'], registry_path=germaparl['registry_path']) dump = corpus.query('"SPD"') print(dump.matches()) print(dump.context())
def test_keywords_options(germaparl): corpus = Corpus(germaparl['corpus_name'], registry_path=germaparl['registry_path']) dump = corpus.query('"SPD" expand to s') print(dump.keywords(order='log_ratio', cut_off=200))
def test_query2dump(germaparl): corpus = Corpus(germaparl['corpus_name'], registry_path=germaparl['registry_path']) dump = corpus.query('"SPD"') print(dump)
def test_keywords(germaparl): corpus = Corpus(germaparl['corpus_name'], registry_path=germaparl['registry_path']) dump = corpus.query('"SPD" expand to s') print(dump.keywords())
def test_collocates_options(germaparl): corpus = Corpus(germaparl['corpus_name'], registry_path=germaparl['registry_path']) dump = corpus.query('"SPD"') print(dump.collocates(order='log_likelihood', cut_off=200))
def test_collocates(germaparl): corpus = Corpus(germaparl['corpus_name'], registry_path=germaparl['registry_path']) dump = corpus.query('"SPD"') print(dump.collocates())
def test_breakdown(germaparl): corpus = Corpus(germaparl['corpus_name'], registry_path=germaparl['registry_path']) dump = corpus.query('"SPD"') print(dump.breakdown())