Esempio n. 1
0
 def test_transform_range(self):
     columns = ["Product number", "Quantity", "Retailer code"]
     data = self.go_sales[0][columns]
     data_spark = lale.datasets.pandas2spark(data)
     sk_scaler = SkMinMaxScaler(feature_range=(-5, 5))
     rasl_scaler = RaslMinMaxScaler(feature_range=(-5, 5))
     sk_trained = sk_scaler.fit(data)
     rasl_trained = rasl_scaler.fit(data_spark)
     sk_transformed = sk_trained.transform(data)
     rasl_transformed = rasl_trained.transform(data_spark)
     rasl_transformed = rasl_transformed.toPandas()
     self.assertAlmostEqual(sk_transformed[0, 0], rasl_transformed.iloc[0,
                                                                        0])
     self.assertAlmostEqual(sk_transformed[0, 1], rasl_transformed.iloc[0,
                                                                        1])
     self.assertAlmostEqual(sk_transformed[0, 2], rasl_transformed.iloc[0,
                                                                        2])
     self.assertAlmostEqual(sk_transformed[10, 0], rasl_transformed.iloc[10,
                                                                         0])
     self.assertAlmostEqual(sk_transformed[10, 1], rasl_transformed.iloc[10,
                                                                         1])
     self.assertAlmostEqual(sk_transformed[10, 2], rasl_transformed.iloc[10,
                                                                         2])
     self.assertAlmostEqual(sk_transformed[20, 0], rasl_transformed.iloc[20,
                                                                         0])
     self.assertAlmostEqual(sk_transformed[20, 1], rasl_transformed.iloc[20,
                                                                         1])
     self.assertAlmostEqual(sk_transformed[20, 2], rasl_transformed.iloc[20,
                                                                         2])
Esempio n. 2
0
 def test_transform(self):
     columns = ["Product number", "Quantity", "Retailer code"]
     data = self.go_sales[0][columns]
     sk_scaler = SkMinMaxScaler()
     rasl_scaler = RaslMinMaxScaler()
     sk_trained = sk_scaler.fit(data)
     rasl_trained = rasl_scaler.fit(data)
     sk_transformed = sk_trained.transform(data)
     rasl_transformed = rasl_trained.transform(data)
     self.assertAlmostEqual(sk_transformed[0, 0], rasl_transformed.iloc[0,
                                                                        0])
     self.assertAlmostEqual(sk_transformed[0, 1], rasl_transformed.iloc[0,
                                                                        1])
     self.assertAlmostEqual(sk_transformed[0, 2], rasl_transformed.iloc[0,
                                                                        2])
     self.assertAlmostEqual(sk_transformed[10, 0], rasl_transformed.iloc[10,
                                                                         0])
     self.assertAlmostEqual(sk_transformed[10, 1], rasl_transformed.iloc[10,
                                                                         1])
     self.assertAlmostEqual(sk_transformed[10, 2], rasl_transformed.iloc[10,
                                                                         2])
     self.assertAlmostEqual(sk_transformed[20, 0], rasl_transformed.iloc[20,
                                                                         0])
     self.assertAlmostEqual(sk_transformed[20, 1], rasl_transformed.iloc[20,
                                                                         1])
     self.assertAlmostEqual(sk_transformed[20, 2], rasl_transformed.iloc[20,
                                                                         2])
Esempio n. 3
0
 def test_fit(self):
     columns = ["Product number", "Quantity", "Retailer code"]
     data = self.go_sales[0][columns]
     sk_scaler = SkMinMaxScaler()
     rasl_scaler = RaslMinMaxScaler()
     sk_trained = sk_scaler.fit(data)
     rasl_trained = rasl_scaler.fit(data)
     self._check_trained(sk_trained, rasl_trained)
Esempio n. 4
0
 def test_fit_range(self):
     columns = ["Product number", "Quantity", "Retailer code"]
     data = self.go_sales[0][columns]
     data_spark = lale.datasets.pandas2spark(data)
     sk_scaler = SkMinMaxScaler(feature_range=(-5, 5))
     rasl_scaler = RaslMinMaxScaler(feature_range=(-5, 5))
     sk_trained = sk_scaler.fit(data)
     rasl_trained = rasl_scaler.fit(data_spark)
     self._check_trained(sk_trained, rasl_trained)
Esempio n. 5
0
 def test_partial_fit(self):
     columns = ["Product number", "Quantity", "Retailer code"]
     data = self.go_sales[0][columns]
     data1 = data[:10]
     data2 = data[10:100]
     data3 = data[100:]
     sk_scaler = SkMinMaxScaler()
     rasl_scaler = RaslMinMaxScaler()
     sk_trained = sk_scaler.partial_fit(data1)
     rasl_trained = rasl_scaler.partial_fit(data1)
     self._check_trained(sk_trained, rasl_trained)
     sk_trained = sk_scaler.partial_fit(data2)
     rasl_trained = rasl_scaler.partial_fit(data2)
     self._check_trained(sk_trained, rasl_trained)
     sk_trained = sk_scaler.partial_fit(data3)
     rasl_trained = rasl_scaler.partial_fit(data3)
     self._check_trained(sk_trained, rasl_trained)
Esempio n. 6
0
 def test_get_params(self):
     sk_scaler = SkMinMaxScaler()
     rasl_scaler = RaslMinMaxScaler()
     sk_params = sk_scaler.get_params()
     rasl_params = rasl_scaler.get_params()
     self.assertDictContainsSubset(sk_params, rasl_params)
Esempio n. 7
0
 def test_pipeline_pandas(self):
     pipeline = RaslMinMaxScaler() >> LogisticRegression()
     trained = pipeline.fit(self.X_train, self.y_train)
     _ = trained.predict(self.X_test)