Exemple #1
0
    def test_correct_cached_predict(self):
        model = Classifier(**self.default_config())
        train_sample = self.dataset.sample(n=self.n_sample)
        valid_sample = self.dataset.sample(n=self.n_sample)

        # Test with different sizes to make sure we handle cases where
        # the data doesn't divide evenly into batches
        half_sample = int(self.n_sample / 2)
        quarter_sample = int(half_sample / 2)

        model.fit(train_sample.Text.values, train_sample.Target.values)

        # Predictions w/o cached predict
        preds = [
            model.predict_proba(valid_sample.Text.values[:half_sample]),
            model.predict_proba(valid_sample.Text.values[half_sample:]),
            model.predict_proba(valid_sample.Text.values[:quarter_sample]),
            model.predict_proba(valid_sample.Text.values[quarter_sample:])
        ]

        # Predictions w/ cached predict
        with model.cached_predict():
            cached_preds = [
                model.predict_proba(valid_sample.Text.values[:half_sample]),
                model.predict_proba(valid_sample.Text.values[half_sample:]),
                model.predict_proba(valid_sample.Text.values[:quarter_sample]),
                model.predict_proba(valid_sample.Text.values[quarter_sample:])
            ]

        for batch_preds, batch_cached_preds in zip(preds, cached_preds):
            for pred, cached_pred in zip(batch_preds, batch_cached_preds):
                assert list(pred.keys()) == list(cached_pred.keys())
                for pred_val, cached_pred_val in zip(pred.values(), cached_pred.values()):
                    np.testing.assert_almost_equal(pred_val, cached_pred_val, decimal=4)
Exemple #2
0
 def test_correct_cached_predict(self):
     model = Classifier(**self.default_config())
     train_sample = self.dataset.sample(n=self.n_sample)
     valid_sample = self.dataset.sample(n=self.n_sample)
     model.fit(train_sample.Text.values, train_sample.Target.values)
     predictions = model.predict_proba(valid_sample.Text[:1].values)
     predictions2 = model.predict_proba(valid_sample.Text[1:2].values)
     with model.cached_predict():
         np.testing.assert_allclose(list(
             model.predict_proba(valid_sample.Text[:1].values)[0].values()),
                                    list(predictions[0].values()),
                                    rtol=1e-4)
         np.testing.assert_allclose(list(
             model.predict_proba(
                 valid_sample.Text[1:2].values)[0].values()),
                                    list(predictions2[0].values()),
                                    rtol=1e-4)
Exemple #3
0
    def test_cached_predict(self):
        """
        Ensure second call to predict is faster than first
        """

        model = Classifier(**self.default_config())
        train_sample = self.dataset.sample(n=self.n_sample)
        valid_sample = self.dataset.sample(n=self.n_sample)
        model.fit(train_sample.Text.values, train_sample.Target.values)

        with model.cached_predict():
            start = time.time()
            model.predict(valid_sample.Text[:1].values)
            first = time.time()
            model.predict(valid_sample.Text[:1].values)
            second = time.time()

        first_prediction_time = first - start
        second_prediction_time = second - first
        self.assertLess(second_prediction_time, first_prediction_time / 2.0)