Esempio n. 1
0
    def it_renames_columns_based_on_a_map(spark):
        mapping = {"chips": "french_fries", "petrol": "gas"}

        def british_to_american(s):
            return mapping[s]

        def change_col_name(s):
            return s in mapping

        schema = StructType([
            StructField("chips", StringType(), True),
            StructField("hi", StringType(), True),
            StructField("petrol", StringType(), True),
        ])
        data = [("potato", "hola!", "disel")]
        source_df = spark.createDataFrame(data, schema)
        actual_df = quinn.with_some_columns_renamed(british_to_american,
                                                    change_col_name)(source_df)
        expected_df = spark.create_df(
            [("potato", "hola!", "disel")],
            [
                ("french_fries", StringType(), True),
                ("hi", StringType(), True),
                ("gas", StringType(), True),
            ],
        )
        chispa.assert_df_equality(actual_df, expected_df)
Esempio n. 2
0
    def it_renames_some_columns_with_dots(spark):
        def dots_to_underscores(s):
            return s.replace(".", "_")

        def change_col_name(s):
            return s.startswith("a")

        schema = StructType([
            StructField("a.person", StringType(), True),
            StructField("a.thing", StringType(), True),
            StructField("b.person", StringType(), True),
        ])
        data = [("frank", "hot dog", "mia")]
        source_df = spark.createDataFrame(data, schema)
        actual_df = quinn.with_some_columns_renamed(dots_to_underscores,
                                                    change_col_name)(source_df)
        expected_df = spark.create_df(
            [("frank", "hot dog", "mia")],
            [
                ("a_person", StringType(), True),
                ("a_thing", StringType(), True),
                ("b.person", StringType(), True),
            ],
        )
        chispa.assert_df_equality(actual_df, expected_df)
Esempio n. 3
0
import quinn
def spaces_to_underscores(s):
    return s.replace("_", "--")
actual_df = quinn.with_columns_renamed(spaces_to_underscores)(df3)
actual_df.show()

#####################################################################
toDF for renaming columns
######################################################################

df3.toDF(*(c.replace(' ', '_') for c in df3.columns))
#####################################################################
Renaming some columns from a map

The with_some_columns_renamed function takes two arguments:

The first argument is a function specifies how the strings should be modified
The second argument is a function that returns True if the string should be modified and False otherwise
######################################################################

import quinn
mapping = {"id": "new_id", "name": "new_name","salaryww": "new_salary"}
def british_to_american(s):
    return mapping[s]
def change_col_name(s):
    return s in mapping
actual_df = quinn.with_some_columns_renamed(british_to_american, change_col_name)(df2)
actual_df.show()

################################