def process_rdd(spark_session, rdd): gs_df = SparkSession(rdd.context).createDataFrame(rdd.filter(is_not_empty).map(process_group_series), group_series_schema) print(gs_df.head()) se_df = series_entities_df(spark_session) gu_df = group_user_df(spark_session) df = gs_df.join(gu_df, ["group_id"]).join(se_df, ["series_id"]).select("user_id", "series_id", "module_id") write_to_mysql(df, "acl", "append")