예제 #1
0
def test_dump_from_query_lib(brexit):
    corpus = Corpus(brexit['corpus_name'], lib_path=brexit['lib_path'])
    df_dump = corpus.dump_from_query(query=brexit['query_lib'],
                                     s_query=brexit['s_query'],
                                     match_strategy='longest')
    assert (type(df_dump) == pd.DataFrame)
    assert (df_dump.shape[0] > 99)
예제 #2
0
def test_dump_from_query_1(brexit):
    corpus = Corpus(brexit['corpus_name'])
    df_dump = corpus.dump_from_query(
        query='[lemma="angela"] @1[lemma="merkel"]',
        anchors=[1],
        match_strategy='longest')
    assert (type(df_dump) == pd.DataFrame)
    assert (df_dump.shape[0] > 99)
예제 #3
0
def test_keywords_from_dump(germaparl):

    name = 'test_keywords'

    # get some regions
    corpus = Corpus(corpus_name=germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    df_1 = corpus.dump_from_query('"und" expand to s', name=name)

    # will show keywords for df_1
    keywords = Keywords(corpus, df_dump=df_1, p_query="lemma")
    line_1 = keywords.show(order='log_likelihood', min_freq=10)
    assert ('CDU' in line_1.index)