def test_transform(self):
     columns = ["Product number", "Quantity", "Retailer code"]
     data = self.go_sales[0][columns]
     sk_scaler = SkMinMaxScaler()
     rasl_scaler = RaslMinMaxScaler()
     sk_trained = sk_scaler.fit(data)
     rasl_trained = rasl_scaler.fit(data)
     sk_transformed = sk_trained.transform(data)
     rasl_transformed = rasl_trained.transform(data)
     self.assertAlmostEqual(sk_transformed[0, 0], rasl_transformed.iloc[0,
                                                                        0])
     self.assertAlmostEqual(sk_transformed[0, 1], rasl_transformed.iloc[0,
                                                                        1])
     self.assertAlmostEqual(sk_transformed[0, 2], rasl_transformed.iloc[0,
                                                                        2])
     self.assertAlmostEqual(sk_transformed[10, 0], rasl_transformed.iloc[10,
                                                                         0])
     self.assertAlmostEqual(sk_transformed[10, 1], rasl_transformed.iloc[10,
                                                                         1])
     self.assertAlmostEqual(sk_transformed[10, 2], rasl_transformed.iloc[10,
                                                                         2])
     self.assertAlmostEqual(sk_transformed[20, 0], rasl_transformed.iloc[20,
                                                                         0])
     self.assertAlmostEqual(sk_transformed[20, 1], rasl_transformed.iloc[20,
                                                                         1])
     self.assertAlmostEqual(sk_transformed[20, 2], rasl_transformed.iloc[20,
                                                                         2])
 def test_transform_range(self):
     columns = ["Product number", "Quantity", "Retailer code"]
     data = self.go_sales[0][columns]
     data_spark = lale.datasets.pandas2spark(data)
     sk_scaler = SkMinMaxScaler(feature_range=(-5, 5))
     rasl_scaler = RaslMinMaxScaler(feature_range=(-5, 5))
     sk_trained = sk_scaler.fit(data)
     rasl_trained = rasl_scaler.fit(data_spark)
     sk_transformed = sk_trained.transform(data)
     rasl_transformed = rasl_trained.transform(data_spark)
     rasl_transformed = rasl_transformed.toPandas()
     self.assertAlmostEqual(sk_transformed[0, 0], rasl_transformed.iloc[0,
                                                                        0])
     self.assertAlmostEqual(sk_transformed[0, 1], rasl_transformed.iloc[0,
                                                                        1])
     self.assertAlmostEqual(sk_transformed[0, 2], rasl_transformed.iloc[0,
                                                                        2])
     self.assertAlmostEqual(sk_transformed[10, 0], rasl_transformed.iloc[10,
                                                                         0])
     self.assertAlmostEqual(sk_transformed[10, 1], rasl_transformed.iloc[10,
                                                                         1])
     self.assertAlmostEqual(sk_transformed[10, 2], rasl_transformed.iloc[10,
                                                                         2])
     self.assertAlmostEqual(sk_transformed[20, 0], rasl_transformed.iloc[20,
                                                                         0])
     self.assertAlmostEqual(sk_transformed[20, 1], rasl_transformed.iloc[20,
                                                                         1])
     self.assertAlmostEqual(sk_transformed[20, 2], rasl_transformed.iloc[20,
                                                                         2])
 def test_fit(self):
     columns = ["Product number", "Quantity", "Retailer code"]
     data = self.go_sales[0][columns]
     sk_scaler = SkMinMaxScaler()
     rasl_scaler = RaslMinMaxScaler()
     sk_trained = sk_scaler.fit(data)
     rasl_trained = rasl_scaler.fit(data)
     self._check_trained(sk_trained, rasl_trained)
 def test_fit_range(self):
     columns = ["Product number", "Quantity", "Retailer code"]
     data = self.go_sales[0][columns]
     data_spark = lale.datasets.pandas2spark(data)
     sk_scaler = SkMinMaxScaler(feature_range=(-5, 5))
     rasl_scaler = RaslMinMaxScaler(feature_range=(-5, 5))
     sk_trained = sk_scaler.fit(data)
     rasl_trained = rasl_scaler.fit(data_spark)
     self._check_trained(sk_trained, rasl_trained)
 def test_partial_fit(self):
     columns = ["Product number", "Quantity", "Retailer code"]
     data = self.go_sales[0][columns]
     data1 = data[:10]
     data2 = data[10:100]
     data3 = data[100:]
     sk_scaler = SkMinMaxScaler()
     rasl_scaler = RaslMinMaxScaler()
     sk_trained = sk_scaler.partial_fit(data1)
     rasl_trained = rasl_scaler.partial_fit(data1)
     self._check_trained(sk_trained, rasl_trained)
     sk_trained = sk_scaler.partial_fit(data2)
     rasl_trained = rasl_scaler.partial_fit(data2)
     self._check_trained(sk_trained, rasl_trained)
     sk_trained = sk_scaler.partial_fit(data3)
     rasl_trained = rasl_scaler.partial_fit(data3)
     self._check_trained(sk_trained, rasl_trained)
Exemple #6
0
    def test_fit_transform(self, feature_range):
        """ Tests fit_transform against scikit-learn.
        """
        n_samples = 1500
        x, y = make_blobs(n_samples=n_samples, random_state=170)
        transformation = [[0.6, -0.6], [-0.4, 0.8]]
        x = np.dot(x, transformation)
        ds_arr = ds.array(x, block_size=(300, 2))

        sc1 = SkMinMaxScaler(feature_range=feature_range)
        scaled_x = sc1.fit_transform(x)
        sc2 = MinMaxScaler(feature_range=feature_range)
        ds_scaled = sc2.fit_transform(ds_arr)

        self.assertTrue(np.allclose(scaled_x, ds_scaled.collect()))
        self.assertTrue(np.allclose(sc1.data_min_, sc2.data_min_.collect()))
        self.assertTrue(np.allclose(sc1.data_max_, sc2.data_max_.collect()))
        self.assertEqual(ds_scaled._top_left_shape,
                         ds_scaled._blocks[0][0].shape)
        self.assertEqual(ds_arr._reg_shape, ds_scaled._reg_shape)
        self.assertEqual(ds_arr._top_left_shape, ds_scaled._top_left_shape)
        self.assertEqual(ds_arr.shape, ds_scaled.shape)
        self.assertEqual(ds_arr._n_blocks, ds_scaled._n_blocks)
Exemple #7
0
    def test_irregular(self, feature_range):
        """ Test with an irregular array """
        n_samples = 1500
        x, y = make_blobs(n_samples=n_samples, random_state=170)
        transformation = [[0.6, -0.6], [-0.4, 0.8]]
        x = np.dot(x, transformation)
        ds_arr = ds.array(x, block_size=(300, 2))
        ds_arr = ds_arr[297:602]
        x = x[297:602]

        sc1 = SkMinMaxScaler(feature_range=feature_range)
        scaled_x = sc1.fit_transform(x)
        sc2 = MinMaxScaler(feature_range=feature_range)
        ds_scaled = sc2.fit_transform(ds_arr)

        self.assertTrue(np.allclose(scaled_x, ds_scaled.collect()))
        self.assertTrue(np.allclose(sc1.data_min_, sc2.data_min_.collect()))
        self.assertTrue(np.allclose(sc1.data_max_, sc2.data_max_.collect()))
        self.assertEqual(ds_scaled._top_left_shape,
                         compss_wait_on(ds_scaled._blocks[0][0]).shape)
        self.assertEqual(ds_arr._reg_shape, ds_scaled._reg_shape)
        self.assertEqual(ds_arr._top_left_shape, ds_scaled._top_left_shape)
        self.assertEqual(ds_arr.shape, ds_scaled.shape)
        self.assertEqual(ds_arr._n_blocks, ds_scaled._n_blocks)
 def test_get_params(self):
     sk_scaler = SkMinMaxScaler()
     rasl_scaler = RaslMinMaxScaler()
     sk_params = sk_scaler.get_params()
     rasl_params = rasl_scaler.get_params()
     self.assertDictContainsSubset(sk_params, rasl_params)