Esempio n. 1
0
def change_df_schema(spark: SparkSession, schema: Unischema, df: pyspark.sql.DataFrame) -> pyspark.sql.DataFrame:
    rows_rdd = (
        df
            .rdd
            .map(row_generator)
            .map(lambda x: dict_to_spark_row(schema, x))
    )

    df = spark.createDataFrame(
        rows_rdd,
        schema.as_spark_schema()
    )

    return df