Esempi in Python per GroupVectorizer

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: textacy.vsm

Metodo/funzione: GroupVectorizer

Esempi su hotexamples.com: 6

GroupVectorizer in Python: 6 esempi trovati. Questi sono i migliori esempi reali in Python per textacy.vsm.GroupVectorizer, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: test_vsm.py Progetto: tkhan3/textacy

def grp_vectorizer_and_gtm(tokenized_docs, groups):
    grp_vectorizer = vsm.GroupVectorizer(tf_type='linear',
                                         idf_type='smooth',
                                         norm=None,
                                         min_df=1,
                                         max_df=1.0,
                                         max_n_terms=None)
    grp_term_matrix = grp_vectorizer.fit_transform(tokenized_docs, groups)
    return grp_vectorizer, grp_term_matrix

Esempio n. 2

Mostra file

File: test_vsm.py Progetto: tkhan3/textacy

def test_grp_vectorizer_fixed_vocab(tokenized_docs, groups):
    vocabulary_terms = ['lamb', 'snow', 'school', 'rule', 'teacher']
    vocabulary_grps = ['a', 'b']
    grp_vectorizer = vsm.GroupVectorizer(vocabulary_terms=vocabulary_terms,
                                         vocabulary_grps=vocabulary_grps)
    grp_term_matrix = grp_vectorizer.fit_transform(tokenized_docs, groups)
    assert len(grp_vectorizer.vocabulary_terms) == len(vocabulary_terms)
    assert grp_term_matrix.shape[1] == len(vocabulary_terms)
    assert sorted(grp_vectorizer.terms_list) == sorted(vocabulary_terms)
    assert len(grp_vectorizer.vocabulary_grps) == len(vocabulary_grps)
    assert grp_term_matrix.shape[0] == len(vocabulary_grps)
    assert sorted(grp_vectorizer.grps_list) == sorted(vocabulary_grps)

Esempio n. 3

Mostra file

def grp_vectorizer_and_gtm_2(tokenized_docs, groups):
    grp_vectorizer = vsm.GroupVectorizer(
        tf_type="bm25",
        idf_type="smooth",
        norm=None,
        apply_dl=True,
        min_df=1,
        max_df=1.0,
        max_n_terms=None,
    )
    grp_term_matrix = grp_vectorizer.fit_transform(tokenized_docs, groups)
    return grp_vectorizer, grp_term_matrix

Esempio n. 4

Mostra file

def test_grp_vectorizer_fixed_vocab(tokenized_docs, groups):
    vocabulary_terms = ["lamb", "snow", "school", "rule", "teacher"]
    vocabulary_grps = ["a", "b"]
    grp_vectorizer = vsm.GroupVectorizer(vocabulary_terms=vocabulary_terms,
                                         vocabulary_grps=vocabulary_grps)
    grp_term_matrix = grp_vectorizer.fit_transform(tokenized_docs, groups)
    assert len(grp_vectorizer.vocabulary_terms) == len(vocabulary_terms)
    assert grp_term_matrix.shape[1] == len(vocabulary_terms)
    assert sorted(grp_vectorizer.terms_list) == sorted(vocabulary_terms)
    assert len(grp_vectorizer.vocabulary_grps) == len(vocabulary_grps)
    assert grp_term_matrix.shape[0] == len(vocabulary_grps)
    assert sorted(grp_vectorizer.grps_list) == sorted(vocabulary_grps)

Esempio n. 5

Mostra file

File: test_vsm.py Progetto: tkhan3/textacy

def test_grp_vectorizer_bad_transform(tokenized_docs, groups):
    grp_vectorizer = vsm.GroupVectorizer()
    with pytest.raises(ValueError):
        _ = grp_vectorizer.transform(tokenized_docs, groups)

Esempio n. 6

Mostra file

def grp_vectorizer_and_gtm(tokenized_docs, groups):
    grp_vectorizer = vsm.GroupVectorizer(
        weighting='tf', normalize=False, sublinear_tf=False, smooth_idf=True,
        min_df=1, max_df=1.0, min_ic=0.0, max_n_terms=None)
    grp_term_matrix = grp_vectorizer.fit_transform(tokenized_docs, groups)
    return grp_vectorizer, grp_term_matrix