def it_renames_columns_based_on_a_map(spark): mapping = {"chips": "french_fries", "petrol": "gas"} def british_to_american(s): return mapping[s] def change_col_name(s): return s in mapping schema = StructType([ StructField("chips", StringType(), True), StructField("hi", StringType(), True), StructField("petrol", StringType(), True), ]) data = [("potato", "hola!", "disel")] source_df = spark.createDataFrame(data, schema) actual_df = quinn.with_some_columns_renamed(british_to_american, change_col_name)(source_df) expected_df = spark.create_df( [("potato", "hola!", "disel")], [ ("french_fries", StringType(), True), ("hi", StringType(), True), ("gas", StringType(), True), ], ) chispa.assert_df_equality(actual_df, expected_df)
def it_renames_some_columns_with_dots(spark): def dots_to_underscores(s): return s.replace(".", "_") def change_col_name(s): return s.startswith("a") schema = StructType([ StructField("a.person", StringType(), True), StructField("a.thing", StringType(), True), StructField("b.person", StringType(), True), ]) data = [("frank", "hot dog", "mia")] source_df = spark.createDataFrame(data, schema) actual_df = quinn.with_some_columns_renamed(dots_to_underscores, change_col_name)(source_df) expected_df = spark.create_df( [("frank", "hot dog", "mia")], [ ("a_person", StringType(), True), ("a_thing", StringType(), True), ("b.person", StringType(), True), ], ) chispa.assert_df_equality(actual_df, expected_df)
import quinn def spaces_to_underscores(s): return s.replace("_", "--") actual_df = quinn.with_columns_renamed(spaces_to_underscores)(df3) actual_df.show() ##################################################################### toDF for renaming columns ###################################################################### df3.toDF(*(c.replace(' ', '_') for c in df3.columns)) ##################################################################### Renaming some columns from a map The with_some_columns_renamed function takes two arguments: The first argument is a function specifies how the strings should be modified The second argument is a function that returns True if the string should be modified and False otherwise ###################################################################### import quinn mapping = {"id": "new_id", "name": "new_name","salaryww": "new_salary"} def british_to_american(s): return mapping[s] def change_col_name(s): return s in mapping actual_df = quinn.with_some_columns_renamed(british_to_american, change_col_name)(df2) actual_df.show() ################################