def test_preprocessor_normalization(self):
        dataset = new_line(random_seed=0)
        target_preprocessor = StandardPreprocessor(continuous=[0],
                                                   threshold=-1)
        feature_preprocessor = StandardPreprocessor(continuous=[0],
                                                    threshold=-1)

        num_batches = 8
        for _ in range(num_batches):
            X, y = dataset.input_fn()

            # Inputs are all 1D
            self.assertEqual(1, len(shape_of_array(X)))
            self.assertEqual(1, len(shape_of_array(y)))

            # Scale up to be able to put pressure on normalization
            X = (X - 0.5) * 100
            y = (y - 0.5) * 100
            self.assertGreaterEqual(X.max() - X.min(), 1)
            self.assertGreaterEqual(y.max() - y.min(), 1)

            X_ = feature_preprocessor.fit(X).transform(X)
            self.assertEqual(1, X_.ndim)
            self.assertEqual(round(X_.mean()), 0, X_.mean())
            self.assertNotAlmostEqual(X_.min(), 0)
            self.assertNotAlmostEqual(X_.max(), 0)

            y_ = target_preprocessor.fit(y).transform(y)
            self.assertEqual(round(y_.mean()), 0, y_.mean())
            self.assertNotAlmostEqual(y_.min(), 0)
            self.assertNotAlmostEqual(y_.max(), 0)
 def test_learner_synthetic(self):
     opts = dict(random_seed=0)
     test_data = [
         (LinearRegressor, new_line(**opts), 0.95),  # Approximate a line
         (LogisticRegression, new_labels(**opts),
          0.55),  # Correctly guess labels
         (LinearRegressor, new_poly(**opts),
          0.85),  # Approximate a 4th deg. poly
         (LinearRegressor, new_3x3(**opts), 0.95),  # 3x3 fuzzy matrix
     ]
     for learner, dataset, target_score in test_data:
         pipeline = learner(verbose=False, **opts)
         history = pipeline.train(dataset.input_fn,
                                  max_score=target_score,
                                  progress=True)
         self.assertGreaterEqual(max(history.scores), target_score,
                                 dataset.name)
 def test_learner_synthetic(self):
     opts = dict(random_seed=0)
     learner_kwargs = dict(kernel_size=1, padding=0, maxpool_size=1, **opts)
     test_data = [
         (CNNRegressor, new_line(**opts), 0.95),  # Approximate a line
         (CNNRegressor, new_trig(**opts), 0.60),  # Approximate a sine curve
         (CNNRegressor, new_poly(**opts),
          0.85),  # Approximate a 4th deg. poly
         (CNNClassifier, new_labels(**opts),
          0.80),  # Correctly guess labels
         (CNNRegressor, new_3x3(**opts), 0.90),  # 3x3 fuzzy matrix
     ]
     for learner, dataset, target_score in test_data:
         pipeline = learner(verbose=False, **learner_kwargs)
         history = pipeline.train(dataset.input_fn,
                                  max_score=target_score,
                                  progress=True)
         self.assertGreaterEqual(max(history.scores), target_score,
                                 dataset.name)
 def test_learner_synthetic(self):
     opts = dict(random_seed=0)
     learners_classifiers = [
         LogisticRegression, MLPClassifier, CNNClassifier
     ]
     learners_regressors = [LinearRegressor, MLPRegressor, CNNRegressor]
     test_data = [
         (learners_regressors, new_line(**opts),
          0.95),  # Approximate a line
         (learners_regressors, new_trig(**opts),
          .75),  # Approximate a sine curve
         (learners_regressors, new_poly(**opts),
          0.85),  # Approximate a 4th deg. poly
         (learners_classifiers, new_labels(**opts),
          0.80),  # Correctly guess labels
         (learners_regressors, new_3x3(**opts), 0.90),  # 3x3 fuzzy matrix
     ]
     for learners, dataset, target_score in test_data:
         pipeline = BruteForce(dataset, learners, n_jobs=4)
         history = pipeline.train(dataset.input_fn,
                                  max_score=target_score,
                                  progress=True)
         self.assertGreaterEqual(max(history.scores), target_score,
                                 dataset.name)