Ejemplo n.º 1
0
def test_vector(text_corpora: TextCorpora,
                corpus_collection: Dict[str, Corpus]) -> None:
    vector_cat1 = corpus_collection['cat1'].vectors
    vector_cat2 = corpus_collection['cat2'].vectors
    vector = np.vstack((vector_cat1, vector_cat2))

    assert np.all(np.isclose(text_corpora.vectors('cat1'), vector_cat1))
    assert np.all(np.isclose(text_corpora.vectors('cat2'), vector_cat2))
    assert np.all(np.isclose(text_corpora.vectors(), vector))
    assert np.all(np.isclose(text_corpora.vectors(['cat1', 'cat2']), vector))
Ejemplo n.º 2
0
def test_add_with_records(
    spacy_lang: Language, record_collection: Dict[str, Sequence[str]]
) -> None:  # pylint: disable=missing-docstring
    corpora = TextCorpora(spacy_lang)

    corpora.add('cat1', record_collection['cat1'])
    corpora.add('cat2', record_collection['cat2'])

    assert len(corpora) == 4
    assert corpora.labels == ['cat1', 'cat2']
Ejemplo n.º 3
0
def test_can_add_docs(
    spacy_lang: Language, doc_collection: Dict[str, Sequence[Doc]]
) -> None:  # pylint: disable=missing-docstring
    corpora = TextCorpora(spacy_lang)

    corpora.add_docs('cat1', doc_collection['cat1'])
    corpora.add_docs('cat2', doc_collection['cat2'])

    assert len(corpora) == 4
    assert corpora.labels == ['cat1', 'cat2']
Ejemplo n.º 4
0
def test_add_labelled_texts(
    spacy_lang: Language, text_collection: Dict[str, Sequence[str]]
) -> None:  # pylint: disable=missing-docstring
    corpora = TextCorpora(spacy_lang)

    labelled_collection = to_labelled(text_collection)

    corpora.add_labelled(labelled_collection)

    assert len(corpora) == 4
    assert corpora.labels == ['cat1', 'cat2']
Ejemplo n.º 5
0
def test_word_counts_attributes(text_corpora: TextCorpora,
                                corpus_collection: Dict[str, Corpus]) -> None:
    wc_cat1 = corpus_collection['cat1']\
                             .word_counts(as_strings=True)
    wc_cat2 = corpus_collection['cat2']\
                             .word_counts(as_strings=True)
    wc = tlz.merge_with(sum, wc_cat1, wc_cat2)
    assert text_corpora.word_counts(corpora='cat1', as_strings=True) == wc_cat1
    assert text_corpora.word_counts(corpora='cat2', as_strings=True) == wc_cat2
    assert text_corpora.word_counts(as_strings=True) == wc
    assert text_corpora.word_counts(corpora=['cat2', 'cat1'],
                                    as_strings=True) == wc
Ejemplo n.º 6
0
def test_can_add_text(
    spacy_lang: Language, text_collection: Dict[str, Sequence[str]]
) -> None:  # pylint: disable=missing-docstring
    corpora = TextCorpora(spacy_lang)

    for text in text_collection['cat1']:
        corpora.add_text('cat1', text)

    for text in text_collection['cat2']:
        corpora.add_text('cat2', text)

    assert len(corpora) == 4
    assert corpora.labels == ['cat1', 'cat2']
Ejemplo n.º 7
0
def test_can_add_corpus(spacy_lang: Language,
                        corpus_collection: Dict[str, Corpus]) -> None:
    corpora = TextCorpora(spacy_lang)
    corpora.add_corpus('cat1', corpus_collection['cat1'])
    assert corpora['cat1'] == corpus_collection['cat1']
Ejemplo n.º 8
0
def test_n_tokens_attributes(text_corpora: TextCorpora) -> None:
    assert text_corpora.n_tokens() == 51
    assert text_corpora.n_tokens('cat1') == 28
    assert text_corpora.n_tokens('cat2') == 23
    assert text_corpora.n_tokens(['cat2', 'cat1']) == 51
Ejemplo n.º 9
0
def test_n_sents_attributes(text_corpora: TextCorpora) -> None:
    assert text_corpora.n_sents() == 6
    assert text_corpora.n_sents('cat1') == 2
    assert text_corpora.n_sents('cat2') == 4
    assert text_corpora.n_sents(['cat2', 'cat1']) == 6
Ejemplo n.º 10
0
def test_can_create_corpora(spacy_lang: Language,
                            corpus_collection: Dict[str, Corpus]) -> None:
    corpora = TextCorpora(spacy_lang, corpus_collection)
    assert isinstance(corpora, TextCorpora)
    assert corpora.labels == ['cat1', 'cat2']
    assert corpora.n_corpora == 2
Ejemplo n.º 11
0
def test_can_create_empty_corpora(spacy_lang: Language) -> None:
    corpora = TextCorpora(spacy_lang)
    assert isinstance(corpora, TextCorpora)
Ejemplo n.º 12
0
def text_corpora(spacy_lang: Language,
                 corpus_collection: Dict[str, Corpus]) -> TextCorpora:
    return TextCorpora(spacy_lang, corpus_collection)