예제 #1
0
 def test_comparison_regressor_auxiliary(self):
     """
     Ensure model training does not error out
     Ensure model returns reasonable predictions
     """
     model = ComparisonRegressor(**self.default_config(
         chunk_long_sequences=False, max_length=50, batch_size=4))
     trainX = [['i like apples', 'i like apples']] * 4
     trainY = [0, .5, .5, 1]
     train_context = [[self.train_context[i], self.train_context[j]]
                      for i in [0, 1] for j in [0, 1]]
     model.fit(trainX, trainY, context=train_context)
     preds = model.predict(trainX, context=train_context)
    def test_reasonable_predictions(self):
        """
        Ensure model training does not error out
        Ensure model returns predictions of the right type
        Test model loss at least outperforms naive baseline
        """
        model = ComparisonRegressor(**self.default_config())

        # fake dataset generation
        animals = [
            "dog", "cat", "horse", "cow", "pig", "sheep", "goat", "chicken",
            "guinea pig", "donkey", "turkey", "duck", "camel", "goose",
            "llama", "rabbit", "fox"
        ]
        numbers = [
            "one", "two", "three", "four", "five", "six", "seven", "eight",
            "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",
            "fifteen", "sixteen"
        ]

        n_per = 150
        similar = []
        different = []
        for dataset in [animals, numbers]:
            for i in range(n_per // 2):
                similar.append(
                    [random.choice(dataset),
                     random.choice(dataset)])
        for i in range(n_per):
            different.append([random.choice(animals), random.choice(numbers)])

        targets = np.asarray([1] * len(similar) + [0] * len(different))
        data = similar + different

        x_tr, x_te, t_tr, t_te = train_test_split(data,
                                                  targets,
                                                  test_size=0.3,
                                                  random_state=42)
        model.finetune(x_tr, t_tr)

        predictions = model.predict(x_te)
        mse = np.mean([(pred - true)**2
                       for pred, true in zip(predictions, t_te)])
        naive_baseline = max(np.mean(targets == 1), np.mean(targets == 0))
        naive_baseline_mse = np.mean([(naive_baseline - true)**2
                                      for true in t_te])
        self.assertIsInstance(predictions, np.ndarray)
        self.assertIsInstance(predictions[0], np.float32)
        self.assertGreater(naive_baseline_mse, mse)