Esempio n. 1
0
    def test_exceptions(self):

        good1 = turicreate.SArray([{'a': 5, 'b': 7}])
        good2 = turicreate.SFrame({'bow': good1})
        good3 = turicreate.SArray([{}])
        bad1 = turicreate.SFrame({'x': [0, 1, 2, 3]})
        bad2 = turicreate.SFrame({'x': [{'0': 3}], 'y': [{'3': 5}]})
        bad3 = turicreate.SArray([{'a': 5, 'b': 3}, None, {'a': 10}])
        bad4 = turicreate.SArray([{'a': 5, 'b': None}, {'a': 3}])

        for d in [good1, good2, good3]:
            m = topic_model.create(d)
            self.assertTrue(m is not None)

        # Test that create() throws on bad input
        with self.assertRaises(Exception):
            m = topic_model.create(bad1)
        with self.assertRaises(Exception):
            m = topic_model.create(bad2)
        with self.assertRaises(ToolkitError):
            m = topic_model.create(bad3)
        with self.assertRaises(ToolkitError):
            m = topic_model.create(bad4)

        m = self.models[0]
        with self.assertRaises(Exception):
            pr = m.predict(bad1)
        with self.assertRaises(Exception):
            pr = m.predict(bad2)
        with self.assertRaises(Exception):
            pr = m.predict(bad3)
Esempio n. 2
0
    def setUpClass(self):

        # Create an example containing synthetic data along
        # with fitted models (with default parameters).
        docs = generate_bar_example(num_documents=1000, seed=12345)
        models = []

        # Test a model that used CGS
        m = topic_model.create(docs, num_topics=10)
        models.append(m)

        # Test a model with many topics
        m = topic_model.create(docs,
                               method='cgs',
                               num_topics=100,
                               num_iterations=2)
        models.append(m)
        m = topic_model.create(docs,
                               method='alias',
                               num_topics=100,
                               num_iterations=2)
        models.append(m)

        # Test a model serialized after using CGS
        with test_util.TempDirectory() as f:
            m.save(f)
            m2 = turicreate.load_model(f)

        models.append(m2)

        # Save
        examples['synthetic'] = {'docs': docs, 'models': models}
        self.docs = examples['synthetic']['docs']
        self.models = examples['synthetic']['models']
Esempio n. 3
0
    def test_exceptions(self):

        good1 = turicreate.SArray([{"a": 5, "b": 7}])
        good2 = turicreate.SFrame({"bow": good1})
        good3 = turicreate.SArray([{}])
        bad1 = turicreate.SFrame({"x": [0, 1, 2, 3]})
        bad2 = turicreate.SFrame({"x": [{"0": 3}], "y": [{"3": 5}]})
        bad3 = turicreate.SArray([{"a": 5, "b": 3}, None, {"a": 10}])
        bad4 = turicreate.SArray([{"a": 5, "b": None}, {"a": 3}])

        for d in [good1, good2, good3]:
            m = topic_model.create(d)
            self.assertTrue(m is not None)

        # Test that create() throws on bad input
        with self.assertRaises(Exception):
            m = topic_model.create(bad1)
        with self.assertRaises(Exception):
            m = topic_model.create(bad2)
        with self.assertRaises(ToolkitError):
            m = topic_model.create(bad3)
        with self.assertRaises(ToolkitError):
            m = topic_model.create(bad4)

        m = self.models[0]
        with self.assertRaises(Exception):
            pr = m.predict(bad1)
        with self.assertRaises(Exception):
            pr = m.predict(bad2)
        with self.assertRaises(Exception):
            pr = m.predict(bad3)
Esempio n. 4
0
    def test_validation_set(self):
        m = topic_model.create(self.docs, validation_set=self.docs)
        self.assertTrue('validation_perplexity' in m._list_fields())

        # Test that an SFrame can be used
        sf = turicreate.SFrame({'text': self.docs})
        m = topic_model.create(self.docs, validation_set=sf)
        self.assertTrue('validation_perplexity' in m._list_fields())
Esempio n. 5
0
 def test_no_validation_print(self):
     m = topic_model.create(self.docs,
                            num_burnin=25,
                            num_iterations=2,
                            print_interval=0)
     self.assertTrue(m is not None)
     self.assertEqual(m.num_burnin, 25)
Esempio n. 6
0
    def test_initialize(self):
        """
        The initial_topics argument allows one to fit a model from a
        particular set of parameters.
        """

        for m in self.models:
            start_docs = turicreate.SArray(self.docs.tail(3))
            m = topic_model.create(
                start_docs,
                num_topics=20,
                method="cgs",
                alpha=0.1,
                beta=0.01,
                num_iterations=1,
                print_interval=1,
            )
            start_topics = turicreate.SFrame(m.topics.head(100))
            m2 = topic_model.create(
                self.docs,
                num_topics=20,
                initial_topics=start_topics,
                method="cgs",
                alpha=0.1,
                beta=0.01,
                num_iterations=0,
                print_interval=1,
            )

            # Check that the vocabulary of the new model is the same as
            # the one we used to initialize the model.
            self.assertTrue(
                (start_topics["vocabulary"] == m2.topics["vocabulary"]).all())

            # Check that the previously most probable word is still the most
            # probable after 0 iterations, i.e. just initialization.
            old_prob = start_topics["topic_probabilities"].vector_slice(0)
            new_prob = m2.topics["topic_probabilities"].vector_slice(0)
            self.assertTrue(
                np.argmax(list(old_prob)) == np.argmax(list(new_prob)))
Esempio n. 7
0
    def test_set_associations(self):
        associations = turicreate.SFrame()
        associations['word'] = ['1,1', '1,2', '1,3']
        associations['topic'] = [0, 0, 0]
        m = topic_model.create(self.docs, associations=associations)

        # In this context, the "words" '1,1', '1,2', '1,3' should be
        # the first three words in the vocabulary.
        self.assertEqual(list(m.topics['vocabulary'].head(3)),
                         ['1,1', '1,2', '1,3'])

        # For each of these words, the probability of topic 0 should
        # be largest.
        probs = m.topics['topic_probabilities']
        largest = probs.apply(lambda x: np.argmax(x))
        self.assertEqual(list(largest.head(3)), [0, 0, 0])
Esempio n. 8
0
 def test_set_burnin(self):
     m = topic_model.create(self.docs, num_burnin=25, num_iterations=1)
     self.assertTrue(m.num_burnin == 25)