def test_exceptions(self): good1 = turicreate.SArray([{'a': 5, 'b': 7}]) good2 = turicreate.SFrame({'bow': good1}) good3 = turicreate.SArray([{}]) bad1 = turicreate.SFrame({'x': [0, 1, 2, 3]}) bad2 = turicreate.SFrame({'x': [{'0': 3}], 'y': [{'3': 5}]}) bad3 = turicreate.SArray([{'a': 5, 'b': 3}, None, {'a': 10}]) bad4 = turicreate.SArray([{'a': 5, 'b': None}, {'a': 3}]) for d in [good1, good2, good3]: m = topic_model.create(d) self.assertTrue(m is not None) # Test that create() throws on bad input with self.assertRaises(Exception): m = topic_model.create(bad1) with self.assertRaises(Exception): m = topic_model.create(bad2) with self.assertRaises(ToolkitError): m = topic_model.create(bad3) with self.assertRaises(ToolkitError): m = topic_model.create(bad4) m = self.models[0] with self.assertRaises(Exception): pr = m.predict(bad1) with self.assertRaises(Exception): pr = m.predict(bad2) with self.assertRaises(Exception): pr = m.predict(bad3)
def setUpClass(self): # Create an example containing synthetic data along # with fitted models (with default parameters). docs = generate_bar_example(num_documents=1000, seed=12345) models = [] # Test a model that used CGS m = topic_model.create(docs, num_topics=10) models.append(m) # Test a model with many topics m = topic_model.create(docs, method='cgs', num_topics=100, num_iterations=2) models.append(m) m = topic_model.create(docs, method='alias', num_topics=100, num_iterations=2) models.append(m) # Test a model serialized after using CGS with test_util.TempDirectory() as f: m.save(f) m2 = turicreate.load_model(f) models.append(m2) # Save examples['synthetic'] = {'docs': docs, 'models': models} self.docs = examples['synthetic']['docs'] self.models = examples['synthetic']['models']
def test_exceptions(self): good1 = turicreate.SArray([{"a": 5, "b": 7}]) good2 = turicreate.SFrame({"bow": good1}) good3 = turicreate.SArray([{}]) bad1 = turicreate.SFrame({"x": [0, 1, 2, 3]}) bad2 = turicreate.SFrame({"x": [{"0": 3}], "y": [{"3": 5}]}) bad3 = turicreate.SArray([{"a": 5, "b": 3}, None, {"a": 10}]) bad4 = turicreate.SArray([{"a": 5, "b": None}, {"a": 3}]) for d in [good1, good2, good3]: m = topic_model.create(d) self.assertTrue(m is not None) # Test that create() throws on bad input with self.assertRaises(Exception): m = topic_model.create(bad1) with self.assertRaises(Exception): m = topic_model.create(bad2) with self.assertRaises(ToolkitError): m = topic_model.create(bad3) with self.assertRaises(ToolkitError): m = topic_model.create(bad4) m = self.models[0] with self.assertRaises(Exception): pr = m.predict(bad1) with self.assertRaises(Exception): pr = m.predict(bad2) with self.assertRaises(Exception): pr = m.predict(bad3)
def test_validation_set(self): m = topic_model.create(self.docs, validation_set=self.docs) self.assertTrue('validation_perplexity' in m._list_fields()) # Test that an SFrame can be used sf = turicreate.SFrame({'text': self.docs}) m = topic_model.create(self.docs, validation_set=sf) self.assertTrue('validation_perplexity' in m._list_fields())
def test_no_validation_print(self): m = topic_model.create(self.docs, num_burnin=25, num_iterations=2, print_interval=0) self.assertTrue(m is not None) self.assertEqual(m.num_burnin, 25)
def test_initialize(self): """ The initial_topics argument allows one to fit a model from a particular set of parameters. """ for m in self.models: start_docs = turicreate.SArray(self.docs.tail(3)) m = topic_model.create( start_docs, num_topics=20, method="cgs", alpha=0.1, beta=0.01, num_iterations=1, print_interval=1, ) start_topics = turicreate.SFrame(m.topics.head(100)) m2 = topic_model.create( self.docs, num_topics=20, initial_topics=start_topics, method="cgs", alpha=0.1, beta=0.01, num_iterations=0, print_interval=1, ) # Check that the vocabulary of the new model is the same as # the one we used to initialize the model. self.assertTrue( (start_topics["vocabulary"] == m2.topics["vocabulary"]).all()) # Check that the previously most probable word is still the most # probable after 0 iterations, i.e. just initialization. old_prob = start_topics["topic_probabilities"].vector_slice(0) new_prob = m2.topics["topic_probabilities"].vector_slice(0) self.assertTrue( np.argmax(list(old_prob)) == np.argmax(list(new_prob)))
def test_set_associations(self): associations = turicreate.SFrame() associations['word'] = ['1,1', '1,2', '1,3'] associations['topic'] = [0, 0, 0] m = topic_model.create(self.docs, associations=associations) # In this context, the "words" '1,1', '1,2', '1,3' should be # the first three words in the vocabulary. self.assertEqual(list(m.topics['vocabulary'].head(3)), ['1,1', '1,2', '1,3']) # For each of these words, the probability of topic 0 should # be largest. probs = m.topics['topic_probabilities'] largest = probs.apply(lambda x: np.argmax(x)) self.assertEqual(list(largest.head(3)), [0, 0, 0])
def test_set_burnin(self): m = topic_model.create(self.docs, num_burnin=25, num_iterations=1) self.assertTrue(m.num_burnin == 25)