def test_init_fit_predict_spark_pandas(self): from lale.datasets import pandas2spark from lale.datasets.util import spark_installed if spark_installed: trainable_cf = ConcatFeatures() A = [[11, 12, 13], [21, 22, 23], [31, 32, 33]] B = [[14, 15], [24, 25], [34, 35]] A = pd.DataFrame(A, columns=["a", "b", "c"]) B = pd.DataFrame(B, columns=["d", "e"]) A = pandas2spark(A, add_index=True) A = add_table_name(A, "A") B = add_table_name(B, "B") trained_cf = trainable_cf.fit(X=[A, B]) transformed = trained_cf.transform([A, B]) expected = [ [11, 12, 13, 14, 15], [21, 22, 23, 24, 25], [31, 32, 33, 34, 35], ] expected = pd.DataFrame(expected, columns=["a", "b", "c", "d", "e"]) for c in expected.columns: self.assertEqual(list(transformed[c]), list(expected[c]))
def test_init_fit_predict(self): trainable_cf = ConcatFeatures() A = [[11, 12, 13], [21, 22, 23], [31, 32, 33]] B = [[14, 15], [24, 25], [34, 35]] trained_cf = trainable_cf.fit(X=[A, B]) transformed = trained_cf.transform([A, B]) expected = [[11, 12, 13, 14, 15], [21, 22, 23, 24, 25], [31, 32, 33, 34, 35]] for i_sample in range(len(transformed)): for i_feature in range(len(transformed[i_sample])): self.assertEqual(transformed[i_sample][i_feature], expected[i_sample][i_feature])
def test_init_fit_predict_pandas_series(self): trainable_cf = ConcatFeatures() A = [[11, 12, 13], [21, 22, 23], [31, 32, 33]] B = [14, 24, 34] A = pd.DataFrame(A, columns=["a", "b", "c"]) B = pd.Series(B, name="d") A = add_table_name(A, "A") B = add_table_name(B, "B") trained_cf = trainable_cf.fit(X=[A, B]) transformed = trained_cf.transform([A, B]) expected = [ [11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34], ] expected = pd.DataFrame(expected, columns=["a", "b", "c", "d"]) for c in expected.columns: self.assertEqual(list(transformed[c]), list(expected[c]))