Exemple #1
0
    def test_overlay(self):
        from pyspark.sql.functions import col, lit, overlay
        from itertools import chain
        import re

        actual = list(
            chain.from_iterable([
                re.findall("(overlay\\(.*\\))", str(x)) for x in [
                    overlay(col("foo"), col("bar"), 1),
                    overlay("x", "y", 3),
                    overlay(col("x"), col("y"), 1, 3),
                    overlay("x", "y", 2, 5),
                    overlay("x", "y", lit(11)),
                    overlay("x", "y", lit(2), lit(5)),
                ]
            ]))

        expected = [
            "overlay(foo, bar, 1, -1)",
            "overlay(x, y, 3, -1)",
            "overlay(x, y, 1, 3)",
            "overlay(x, y, 2, 5)",
            "overlay(x, y, 11, -1)",
            "overlay(x, y, 2, 5)",
        ]

        self.assertListEqual(actual, expected)
Exemple #2
0
    def test_overlay(self):
        from pyspark.sql.functions import col, lit, overlay
        from itertools import chain
        import re

        actual = list(
            chain.from_iterable(
                [
                    re.findall("(overlay\\(.*\\))", str(x))
                    for x in [
                        overlay(col("foo"), col("bar"), 1),
                        overlay("x", "y", 3),
                        overlay(col("x"), col("y"), 1, 3),
                        overlay("x", "y", 2, 5),
                        overlay("x", "y", lit(11)),
                        overlay("x", "y", lit(2), lit(5)),
                    ]
                ]
            )
        )

        expected = [
            "overlay(foo, bar, 1, -1)",
            "overlay(x, y, 3, -1)",
            "overlay(x, y, 1, 3)",
            "overlay(x, y, 2, 5)",
            "overlay(x, y, 11, -1)",
            "overlay(x, y, 2, 5)",
        ]

        self.assertListEqual(actual, expected)

        df = self.spark.createDataFrame([("SPARK_SQL", "CORE", 7, 0)], ("x", "y", "pos", "len"))

        exp = [Row(ol="SPARK_CORESQL")]
        self.assertTrue(
            all(
                [
                    df.select(overlay(df.x, df.y, 7, 0).alias("ol")).collect() == exp,
                    df.select(overlay(df.x, df.y, lit(7), lit(0)).alias("ol")).collect() == exp,
                    df.select(overlay("x", "y", "pos", "len").alias("ol")).collect() == exp,
                ]
            )
        )