Python GroupVectorizer Examples

Programming Language: Python

Namespace/Package Name: textacy.vsm

Method/Function: GroupVectorizer

Examples at hotexamples.com: 6

Python GroupVectorizer - 6 examples found. These are the top rated real world Python examples of textacy.vsm.GroupVectorizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_vsm.py Project: tkhan3/textacy

def grp_vectorizer_and_gtm(tokenized_docs, groups):
    grp_vectorizer = vsm.GroupVectorizer(tf_type='linear',
                                         idf_type='smooth',
                                         norm=None,
                                         min_df=1,
                                         max_df=1.0,
                                         max_n_terms=None)
    grp_term_matrix = grp_vectorizer.fit_transform(tokenized_docs, groups)
    return grp_vectorizer, grp_term_matrix

Example #2

Show file

File: test_vsm.py Project: tkhan3/textacy

def test_grp_vectorizer_fixed_vocab(tokenized_docs, groups):
    vocabulary_terms = ['lamb', 'snow', 'school', 'rule', 'teacher']
    vocabulary_grps = ['a', 'b']
    grp_vectorizer = vsm.GroupVectorizer(vocabulary_terms=vocabulary_terms,
                                         vocabulary_grps=vocabulary_grps)
    grp_term_matrix = grp_vectorizer.fit_transform(tokenized_docs, groups)
    assert len(grp_vectorizer.vocabulary_terms) == len(vocabulary_terms)
    assert grp_term_matrix.shape[1] == len(vocabulary_terms)
    assert sorted(grp_vectorizer.terms_list) == sorted(vocabulary_terms)
    assert len(grp_vectorizer.vocabulary_grps) == len(vocabulary_grps)
    assert grp_term_matrix.shape[0] == len(vocabulary_grps)
    assert sorted(grp_vectorizer.grps_list) == sorted(vocabulary_grps)

Example #3

Show file

def grp_vectorizer_and_gtm_2(tokenized_docs, groups):
    grp_vectorizer = vsm.GroupVectorizer(
        tf_type="bm25",
        idf_type="smooth",
        norm=None,
        apply_dl=True,
        min_df=1,
        max_df=1.0,
        max_n_terms=None,
    )
    grp_term_matrix = grp_vectorizer.fit_transform(tokenized_docs, groups)
    return grp_vectorizer, grp_term_matrix

Example #4

Show file

def test_grp_vectorizer_fixed_vocab(tokenized_docs, groups):
    vocabulary_terms = ["lamb", "snow", "school", "rule", "teacher"]
    vocabulary_grps = ["a", "b"]
    grp_vectorizer = vsm.GroupVectorizer(vocabulary_terms=vocabulary_terms,
                                         vocabulary_grps=vocabulary_grps)
    grp_term_matrix = grp_vectorizer.fit_transform(tokenized_docs, groups)
    assert len(grp_vectorizer.vocabulary_terms) == len(vocabulary_terms)
    assert grp_term_matrix.shape[1] == len(vocabulary_terms)
    assert sorted(grp_vectorizer.terms_list) == sorted(vocabulary_terms)
    assert len(grp_vectorizer.vocabulary_grps) == len(vocabulary_grps)
    assert grp_term_matrix.shape[0] == len(vocabulary_grps)
    assert sorted(grp_vectorizer.grps_list) == sorted(vocabulary_grps)

Example #5

Show file

File: test_vsm.py Project: tkhan3/textacy

def test_grp_vectorizer_bad_transform(tokenized_docs, groups):
    grp_vectorizer = vsm.GroupVectorizer()
    with pytest.raises(ValueError):
        _ = grp_vectorizer.transform(tokenized_docs, groups)

Example #6

Show file

def grp_vectorizer_and_gtm(tokenized_docs, groups):
    grp_vectorizer = vsm.GroupVectorizer(
        weighting='tf', normalize=False, sublinear_tf=False, smooth_idf=True,
        min_df=1, max_df=1.0, min_ic=0.0, max_n_terms=None)
    grp_term_matrix = grp_vectorizer.fit_transform(tokenized_docs, groups)
    return grp_vectorizer, grp_term_matrix