def test_transform_range(self): columns = ["Product number", "Quantity", "Retailer code"] data = self.go_sales[0][columns] data_spark = lale.datasets.pandas2spark(data) sk_scaler = SkMinMaxScaler(feature_range=(-5, 5)) rasl_scaler = RaslMinMaxScaler(feature_range=(-5, 5)) sk_trained = sk_scaler.fit(data) rasl_trained = rasl_scaler.fit(data_spark) sk_transformed = sk_trained.transform(data) rasl_transformed = rasl_trained.transform(data_spark) rasl_transformed = rasl_transformed.toPandas() self.assertAlmostEqual(sk_transformed[0, 0], rasl_transformed.iloc[0, 0]) self.assertAlmostEqual(sk_transformed[0, 1], rasl_transformed.iloc[0, 1]) self.assertAlmostEqual(sk_transformed[0, 2], rasl_transformed.iloc[0, 2]) self.assertAlmostEqual(sk_transformed[10, 0], rasl_transformed.iloc[10, 0]) self.assertAlmostEqual(sk_transformed[10, 1], rasl_transformed.iloc[10, 1]) self.assertAlmostEqual(sk_transformed[10, 2], rasl_transformed.iloc[10, 2]) self.assertAlmostEqual(sk_transformed[20, 0], rasl_transformed.iloc[20, 0]) self.assertAlmostEqual(sk_transformed[20, 1], rasl_transformed.iloc[20, 1]) self.assertAlmostEqual(sk_transformed[20, 2], rasl_transformed.iloc[20, 2])
def test_transform(self): columns = ["Product number", "Quantity", "Retailer code"] data = self.go_sales[0][columns] sk_scaler = SkMinMaxScaler() rasl_scaler = RaslMinMaxScaler() sk_trained = sk_scaler.fit(data) rasl_trained = rasl_scaler.fit(data) sk_transformed = sk_trained.transform(data) rasl_transformed = rasl_trained.transform(data) self.assertAlmostEqual(sk_transformed[0, 0], rasl_transformed.iloc[0, 0]) self.assertAlmostEqual(sk_transformed[0, 1], rasl_transformed.iloc[0, 1]) self.assertAlmostEqual(sk_transformed[0, 2], rasl_transformed.iloc[0, 2]) self.assertAlmostEqual(sk_transformed[10, 0], rasl_transformed.iloc[10, 0]) self.assertAlmostEqual(sk_transformed[10, 1], rasl_transformed.iloc[10, 1]) self.assertAlmostEqual(sk_transformed[10, 2], rasl_transformed.iloc[10, 2]) self.assertAlmostEqual(sk_transformed[20, 0], rasl_transformed.iloc[20, 0]) self.assertAlmostEqual(sk_transformed[20, 1], rasl_transformed.iloc[20, 1]) self.assertAlmostEqual(sk_transformed[20, 2], rasl_transformed.iloc[20, 2])
def test_fit(self): columns = ["Product number", "Quantity", "Retailer code"] data = self.go_sales[0][columns] sk_scaler = SkMinMaxScaler() rasl_scaler = RaslMinMaxScaler() sk_trained = sk_scaler.fit(data) rasl_trained = rasl_scaler.fit(data) self._check_trained(sk_trained, rasl_trained)
def test_fit_range(self): columns = ["Product number", "Quantity", "Retailer code"] data = self.go_sales[0][columns] data_spark = lale.datasets.pandas2spark(data) sk_scaler = SkMinMaxScaler(feature_range=(-5, 5)) rasl_scaler = RaslMinMaxScaler(feature_range=(-5, 5)) sk_trained = sk_scaler.fit(data) rasl_trained = rasl_scaler.fit(data_spark) self._check_trained(sk_trained, rasl_trained)
def test_partial_fit(self): columns = ["Product number", "Quantity", "Retailer code"] data = self.go_sales[0][columns] data1 = data[:10] data2 = data[10:100] data3 = data[100:] sk_scaler = SkMinMaxScaler() rasl_scaler = RaslMinMaxScaler() sk_trained = sk_scaler.partial_fit(data1) rasl_trained = rasl_scaler.partial_fit(data1) self._check_trained(sk_trained, rasl_trained) sk_trained = sk_scaler.partial_fit(data2) rasl_trained = rasl_scaler.partial_fit(data2) self._check_trained(sk_trained, rasl_trained) sk_trained = sk_scaler.partial_fit(data3) rasl_trained = rasl_scaler.partial_fit(data3) self._check_trained(sk_trained, rasl_trained)
def test_get_params(self): sk_scaler = SkMinMaxScaler() rasl_scaler = RaslMinMaxScaler() sk_params = sk_scaler.get_params() rasl_params = rasl_scaler.get_params() self.assertDictContainsSubset(sk_params, rasl_params)
def test_pipeline_pandas(self): pipeline = RaslMinMaxScaler() >> LogisticRegression() trained = pipeline.fit(self.X_train, self.y_train) _ = trained.predict(self.X_test)