def create_keywords_showlog_table(spark, table_name): df = create_keywords_raw_log(spark) df = df.withColumnRenamed('media', 'adv_type') df = df.withColumnRenamed('price_model', 'adv_bill_mode_cd') df = df.withColumnRenamed('action_time', 'show_time') df.printSchema() write_to_table(df, table_name)
def create_matrix_table(self, data, table_name): schema = StructType([ StructField("did_list", ArrayType(StringType(), False)), StructField("score_matrix", ArrayType(ArrayType(FloatType(), False)), False), StructField("c1_list", ArrayType(FloatType(), False)), StructField("did_bucket", IntegerType(), False) ]) df = self.spark.createDataFrame( self.spark.sparkContext.parallelize(data), schema) util.write_to_table(df, table_name)
def create_effective_keywords_table(spark, table_name): df = create_effective_keywords(spark) write_to_table(df, table_name)
def create_unified_log_table(spark, table_name): df = create_unified_log(spark) write_to_table(df, table_name)
def create_log_table(spark, table_name): df = create_cleaned_log(spark) write_to_table(df, table_name)
def create_persona_table(spark, table_name): df = create_raw_persona(spark) df = df.withColumnRenamed('gender', 'gender_new_dev') df = df.withColumnRenamed('age', 'forecast_age_dev') df.printSchema() write_to_table(df, table_name)