def test_init_fit_predict_spark_pandas(self):

        from lale.datasets import pandas2spark
        from lale.datasets.util import spark_installed

        if spark_installed:
            trainable_cf = ConcatFeatures()
            A = [[11, 12, 13], [21, 22, 23], [31, 32, 33]]
            B = [[14, 15], [24, 25], [34, 35]]
            A = pd.DataFrame(A, columns=["a", "b", "c"])
            B = pd.DataFrame(B, columns=["d", "e"])
            A = pandas2spark(A, add_index=True)
            A = add_table_name(A, "A")
            B = add_table_name(B, "B")

            trained_cf = trainable_cf.fit(X=[A, B])
            transformed = trained_cf.transform([A, B])
            expected = [
                [11, 12, 13, 14, 15],
                [21, 22, 23, 24, 25],
                [31, 32, 33, 34, 35],
            ]
            expected = pd.DataFrame(expected,
                                    columns=["a", "b", "c", "d", "e"])
            for c in expected.columns:
                self.assertEqual(list(transformed[c]), list(expected[c]))
Exemple #2
0
    def test_init_fit_predict(self):
        trainable_cf = ConcatFeatures()
        A = [[11, 12, 13], [21, 22, 23], [31, 32, 33]]
        B = [[14, 15], [24, 25], [34, 35]]

        trained_cf = trainable_cf.fit(X=[A, B])
        transformed = trained_cf.transform([A, B])
        expected = [[11, 12, 13, 14, 15], [21, 22, 23, 24, 25],
                    [31, 32, 33, 34, 35]]
        for i_sample in range(len(transformed)):
            for i_feature in range(len(transformed[i_sample])):
                self.assertEqual(transformed[i_sample][i_feature],
                                 expected[i_sample][i_feature])
 def test_init_fit_predict_pandas_series(self):
     trainable_cf = ConcatFeatures()
     A = [[11, 12, 13], [21, 22, 23], [31, 32, 33]]
     B = [14, 24, 34]
     A = pd.DataFrame(A, columns=["a", "b", "c"])
     B = pd.Series(B, name="d")
     A = add_table_name(A, "A")
     B = add_table_name(B, "B")
     trained_cf = trainable_cf.fit(X=[A, B])
     transformed = trained_cf.transform([A, B])
     expected = [
         [11, 12, 13, 14],
         [21, 22, 23, 24],
         [31, 32, 33, 34],
     ]
     expected = pd.DataFrame(expected, columns=["a", "b", "c", "d"])
     for c in expected.columns:
         self.assertEqual(list(transformed[c]), list(expected[c]))