def dispersion(): corpus = load_hobbies() target_words = ["Game", "player", "score", "oil", "Man"] oz = DispersionPlot(target_words, ax=newfig()) oz.fit([doc.split() for doc in corpus.data]) savefig(oz, "dispersion")
def tsne(): corpus = load_hobbies() docs = TfidfVectorizer().fit_transform(corpus.data) oz = TSNEVisualizer(ax=newfig()) oz.fit(docs, corpus.target) savefig(oz, "corpus_tsne")
def freqdist(): corpus = load_hobbies() vecs = CountVectorizer() docs = vecs.fit_transform(corpus.data) oz = FreqDistVisualizer(features=vecs.get_feature_names(), ax=newfig()) oz.fit(docs) savefig(oz, "freqdist")
def test_quick_method(self): """ Test for tsne quick method with hobbies dataset """ corpus = load_hobbies() tfidf = TfidfVectorizer() X = tfidf.fit_transform(corpus.data) y = corpus.target viz = tsne(X, y, show=False) self.assert_images_similar(viz, tol=50)
def test_topic_modeling_k_means(self): """ Test topic modeling k-means on the hobbies corpus """ corpus = load_hobbies() tfidf = TfidfVectorizer() docs = tfidf.fit_transform(corpus.data) visualizer = KElbowVisualizer(KMeans(), k=(4, 8)) visualizer.fit(docs) visualizer.finalize() self.assert_images_similar(visualizer)
def test_quick_method(self): """ Test for umap quick method with hobbies dataset """ corpus = load_hobbies() tfidf = TfidfVectorizer() X = tfidf.fit_transform(corpus.data) y = corpus.target viz = umap(X, y, show=False) assert isinstance(viz, UMAPVisualizer) self.assert_images_similar(viz, tol=50)
## Imports ########################################################################## import pytest import matplotlib.pyplot as plt from yellowbrick.exceptions import YellowbrickValueError from yellowbrick.datasets import load_hobbies from yellowbrick.text.dispersion import DispersionPlot, dispersion from tests.base import VisualTestCase ########################################################################## ## Data ########################################################################## corpus = load_hobbies() ########################################################################## ## DispersionPlot Tests ########################################################################## class TestDispersionPlot(VisualTestCase): def test_quick_method(self): """ Assert no errors occur when using the quick method """ _, ax = plt.subplots() text = [doc.split() for doc in corpus.data] search_terms = ["Game", "player", "score", "oil", "Man"]
def load_document_representation(): corpus = load_hobbies() tfidf = TfidfVectorizer() docs = tfidf.fit_transform(corpus.data) return docs, corpus.target