def sklearn_comp(array):

            self.logger.info("Original array is:\n{:}".format(array))

            # Sklearn normalizer (requires float dtype input)
            array_sk = array.astype(float).tondarray()
            sk_norm = MinMaxScaler().fit(array_sk)

            target = CArray(sk_norm.transform(array_sk))

            # Our normalizer
            our_norm = CNormalizerMinMax().fit(array)
            result = our_norm.transform(array)

            self.logger.info("Correct result is:\n{:}".format(target))
            self.logger.info("Our result is:\n{:}".format(result))

            self.assert_array_almost_equal(target, result)

            # Testing out of range normalization

            self.logger.info("Testing out of range normalization")

            # Sklearn normalizer (requires float dtype input)
            target = CArray(sk_norm.transform(array_sk * 2))

            # Our normalizer
            result = our_norm.transform(array * 2)

            self.logger.info("Correct result is:\n{:}".format(target))
            self.logger.info("Our result is:\n{:}".format(result))

            self.assert_array_almost_equal(target, result)
コード例 #2
0
    def _dataset_creation(self):
        """Creates a blob dataset. """
        self.n_features = 2  # Number of dataset features

        self.seed = 42

        self.n_tr = 50
        self.n_ts = 100
        self.n_classes = 2

        loader = CDLRandomBlobs(n_samples=self.n_tr + self.n_ts,
                                n_features=self.n_features,
                                centers=[(-1, -1), (+1, +1)],
                                center_box=(-2, 2),
                                cluster_std=0.8,
                                random_state=self.seed)

        self.logger.info("Loading `random_blobs` with seed: {:}".format(
            self.seed))

        dataset = loader.load()
        splitter = CDataSplitterShuffle(num_folds=1,
                                        train_size=self.n_tr,
                                        random_state=3)
        splitter.compute_indices(dataset)
        self.tr = dataset[splitter.tr_idx[0], :]
        self.ts = dataset[splitter.ts_idx[0], :]

        normalizer = CNormalizerMinMax(feature_range=(-1, 1))
        self.tr.X = normalizer.fit_transform(self.tr.X)
        self.ts.X = normalizer.transform(self.ts.X)

        self.lb = -1
        self.ub = 1

        self.grid_limits = [(self.lb - 0.1, self.ub + 0.1),
                            (self.lb - 0.1, self.ub + 0.1)]