def valid_redemption(self, grouping_set, column_set, measure): df_correct = self.base_redemption_df.filter( col('contact_stage_code') == 'EXP' ).select( self.config_dict['identity_type_code'], 'offer_code' ).dropDuplicates() df = self.base_redemption_df.filter( col('contact_stage_code') == 'RDM' ).join( df_correct, [self.config_dict['identity_type_code'], 'offer_code'], 'inner' ) df_final = utils.count( self.sqlContext, df, grouping_set, column_set, measure, self.config_dict['identity_type_code'] ) return df_final
def digital_trigered(self, grouping_set, column_set, measure): df = self.base_redemption_df.filter( (lower(col('channel_code')).like('%digital%')) & (col('contact_stage_code') == 'ACT') ) df_final = utils.count( self.sqlContext, df, grouping_set, column_set, measure, 'offer_code' ) return df_final
def number_of_redemptions(self, grouping_set, column_set, measure): if not self.df_redemptions.head(1): df_offer = utils.count( self.sqlContext, self.df_redeem, grouping_set, column_set, measure, 'offer_code' ) self.df_redemptions = df_offer else: df_offer = self.df_redemptions return df_offer
def mis_redemptions(self, grouping_set, column_set, measure): df_subtract = self.df_redeem.select( 'prod_code', 'offer_code').dropDuplicates().subtract( self.df_dict('table3').select('prod_code', 'offer_code').dropDuplicates()) df_subtract_prsn = df_subtract.join(self.df_redeem, ['prod_code', 'offer_code']) # since f.count() don't consider null customers df_subtract_prsn = df_subtract_prsn.fillna('null', ['prsn_code']) df_final = utils.count(self.sqlContext, df_subtract_prsn, grouping_set, column_set, measure, self.config_dict['identity_type_code']) return df_final
def redemption_rate(self, grouping_set, column_set, measure): redemeed = self.base_redemption_df.filter( col('contact_stage_code') == 'RDM' ) # print 'redemeed' # redemeed.cache() # print redemeed.count() mailed = self.base_redemption_df.filter( col('contact_stage_code') == 'EXP' ) # print 'mailed' # mailed.cache() # print mailed.count() redem_coupon = utils.count( self.sqlContext, redemeed, grouping_set, column_set, 'redem_coupon', 'offer_code' ) # print 'redem_coupon' # redem_coupon.cache() # print redem_coupon.count() mailed_coupon = utils.count( self.sqlContext, mailed, grouping_set, column_set, 'mailed_coupon', 'offer_code' ) # print 'mailed_coupon' # mailed_coupon.cache() # print mailed_coupon.count() group_set = column_set + ['grouping_level'] df_redem_mailed = redem_coupon.join(mailed_coupon, group_set) # print 'df_redem_mailed' # df_redem_mailed.cache() # print df_redem_mailed.count() df_final = df_redem_mailed.withColumn( measure, df_redem_mailed.redem_coupon/df_redem_mailed.mailed_coupon ) # print 'df_final' # df_final.cache() # print df_final.count() df_redemption_rate = df_final.drop('redem_coupon') df_redemption_rate = df_redemption_rate.drop('mailed_coupon') df_redemption_rate = df_redemption_rate.withColumn( measure, df_redemption_rate[measure].cast(StringType()) ) return df_redemption_rate
def coupons_allocated(self, grouping_set, column_set, measure): df = self.base_allocation_df.filter(col('contact_stage_code') == 'DLV') coupon_allocated_df = utils.count(self.sqlContext, df, grouping_set, column_set, measure, 'offer_code') return coupon_allocated_df
def buy_in_category_but_not_product(self, grouping_set, column_set, measure): exp_df = self.detail_offer_prod # .withColumnRenamed('prod_code', 'featured_prod_code') # print 'exp_df' # exp_df.cache() # print exp_df.count() # exp_df.show() prod_dim_df = self.df_dict('prod_dim') # prod_dim_df.cache() # prod_dim_df.show() exp_cat_df = exp_df.join( prod_dim_df, 'prod_code' 'left_outer' ) exp_cat_df = exp_cat_df.withColumnRenamed('prod_code', 'featured_prod_code') # print 'exp_cat_df' # exp_cat_df.cache() # print exp_cat_df.count() # exp_cat_df.show() pre_prod_df = self.df_dict('pre_period').join( self.df_dict('prod_dim'), 'prod_code', 'left_outer' ) # print 'pre_prod_df' # pre_prod_df.cache() # print pre_prod_df.count() # pre_prod_df.show() post_prod_df = self.df_dict('post_period').join( self.df_dict('prod_dim'), 'prod_code', 'left_outer' ) # print 'post_prod_df' # post_prod_df.cache() # print post_prod_df.count() # post_prod_df.show() pre_post_df = pre_prod_df.select( self.config_dict['identity_type_code'], 'prod_hier_l20_code' ).intersect( post_prod_df.select( self.config_dict['identity_type_code'], 'prod_hier_l20_code' ) ) # print 'pre_post_df' # pre_post_df.cache() # print pre_post_df.count() # pre_post_df.show() pre_post_exp_cat_df = exp_cat_df.join( pre_post_df, [self.config_dict['identity_type_code'], 'prod_hier_l20_code'] ) # print 'pre_post_exp_cat_df' # pre_post_exp_cat_df.cache() # print pre_post_exp_cat_df.count() # pre_post_exp_cat_df.show() df = pre_post_exp_cat_df.filter( pre_post_exp_cat_df.featured_prod_code != pre_post_exp_cat_df.prod_code ) # print 'df' # df.cache() # print df.count() # df.show() # .drop('channel_code') buy_in_category_but_not_product_df = utils.count( self.sqlContext, df, grouping_set, column_set, measure, self.config_dict['identity_type_code'] ) return buy_in_category_but_not_product_df