Example #1
0
def process_rdd(spark_session, rdd):
	gs_df = SparkSession(rdd.context).createDataFrame(rdd.filter(is_not_empty).map(process_group_series), group_series_schema)

	print(gs_df.head())
	se_df = series_entities_df(spark_session)
	gu_df = group_user_df(spark_session)

	df = gs_df.join(gu_df, ["group_id"]).join(se_df, ["series_id"]).select("user_id", "series_id", "module_id")
	write_to_mysql(df, "acl", "append")