def response_rate(self,grouping_set,column_set,measure): redemeed = self.base_redemption_df.filter(col('contact_stage_code') == 'RDM') mailed = self.base_redemption_df.filter(col('contact_stage_code') == 'EXP') redem_coupon = utils.distinct_count( self.sqlContext, redemeed, grouping_set, column_set, 'redem_coupon', self.config_dict['identity_type_code'] ) # print 'redem_coupon' # redem_coupon.cache() # print redem_coupon.count() mailed_coupon = utils.distinct_count( self.sqlContext, mailed, grouping_set, column_set, 'mailed_coupon', self.config_dict['identity_type_code'] ) # print 'mailed_coupon' # mailed_coupon.cache() # print mailed_coupon.count() group_set = column_set + ['grouping_level'] df_redem_mailed = redem_coupon.join(mailed_coupon, group_set) # print 'df_redem_mailed' # df_redem_mailed.cache() # print df_redem_mailed.count() df_final = df_redem_mailed.withColumn( measure, df_redem_mailed.redem_coupon/df_redem_mailed.mailed_coupon ) # print 'df_final' # df_final.cache() # print df_final.count() df_response_rate = df_final.drop('redem_coupon') df_response_rate = df_response_rate.drop('mailed_coupon') df_response_rate = df_response_rate.withColumn( measure, df_response_rate[measure].cast(StringType()) ) return df_response_rate
def household_allocated(self, grouping_set, column_set, measure): df = self.base_allocation_df.filter(col('contact_stage_code') == 'DLV') household_allocated_df = utils.distinct_count( self.sqlContext, df, grouping_set, column_set, measure, self.config_dict['identity_type_code']) return household_allocated_df
def control_redeemers(self, grouping_set, column_set, measure): df_redem_control_details = self.df_redeem.filter( (self.df_redeem.event_control_flag == 'Y')) df_final = utils.distinct_count(self.sqlContext, df_redem_control_details, grouping_set, column_set, measure, self.config_dict['identity_type_code']) return df_final
def active_loyal_customer(self, grouping_set, column_set, measure): df = self.base_allocation_df.filter(col('contact_stage_code') == 'ALC') df = df.filter((trim(col('loyalty_level')) == 'PR') | (trim(col('loyalty_level')) == 'VL')) df = df.filter(col('loyalty_level').isNotNull()) active_loyal_customer_df = utils.distinct_count( self.sqlContext, df, grouping_set, column_set, measure, self.config_dict['identity_type_code']) return active_loyal_customer_df
def digital_redemeers(self, grouping_set, column_set, measure): df = self.df_redeem.filter( lower(col('channel_code')).like("%digital%") ) df_final = utils.distinct_count( self.sqlContext, df, grouping_set, column_set, measure, self.config_dict['identity_type_code'] ) return df_final
def hh_redemeers(self, grouping_set, column_set, measure): if not self.df_prsn.head(1): df_final = utils.distinct_count( self.sqlContext, self.df_redeem, grouping_set, column_set, measure, self.config_dict['identity_type_code'] ) self.df_prsn = df_final else: df_final = self.df_prsn return df_final
def trialist(self, grouping_set, column_set, measure): df = self.detail_offer_prod_dur.join( self.post_count_df, [self.config_dict['identity_type_code'], 'prod_code'], 'left_outer' ).filter( col('count').isNull() ) trialist_df = utils.distinct_count( self.sqlContext, df, grouping_set, column_set, measure, self.config_dict['identity_type_code'] ) return trialist_df
def mailed_participation(self, grouping_set, column_set, measure): df_control = self.table3.select( 'prod_code' ).dropDuplicates() # print 'df_control' # df_control.cache() # print df_control.count() df_transaction = self.dur_period.select( self.config_dict['identity_type_code'], 'prod_code' ).dropDuplicates() # print 'df_transaction' # df_transaction.cache() # print df_transaction.count() df_trans = df_control.join(df_transaction,['prod_code']) # print 'df_trans' # df_trans.cache() # print df_trans.count() df_mail = self.base_redemption_df.drop('prod_code') df_mailed = df_mail.filter(col('contact_stage_code') == 'EXP') # print 'df_mailed' # df_mailed.cache() # print df_mailed.count() df_prod = df_trans.join(df_mailed, self.config_dict['identity_type_code']) # print 'df_prod' # df_prod.cache() # print df_prod.count() df_final=utils.distinct_count( self.sqlContext, df_prod, grouping_set, column_set, measure, self.config_dict['identity_type_code'] ) return df_final
def adopters(self, grouping_set, column_set, measure): post_mul_time_df = self.post_count_df.filter( col('count') > 1 ).drop('count') df = self.detail_offer_prod_dur.join( post_mul_time_df, [self.config_dict['identity_type_code'], 'prod_code'] ) adopters_df = utils.distinct_count( self.sqlContext, df, grouping_set, column_set, measure, self.config_dict['identity_type_code'] ) return adopters_df
def retained_hhs(self, grouping_set, column_set, measure): df = self.detail_offer_prod.join( self.df_dict('pre_period').select(self.config_dict['identity_type_code'], 'prod_code'), [self.config_dict['identity_type_code'], 'prod_code'] ) df = df.join( self.df_dict('post_period').select(self.config_dict['identity_type_code'], 'prod_code'), [self.config_dict['identity_type_code'], 'prod_code'] ) retained_hhs = utils.distinct_count( self.sqlContext, df, grouping_set, column_set, measure, self.config_dict['identity_type_code'] ) return retained_hhs
def app_dm_multi_redeemers(self, grouping_set, column_set, measure): if not self.df_multi_appdm.head(1): df_paper = self.df_redeem.filter( lower(col('channel_code')).like("%paper%")) df_digital = self.df_redeem.filter( lower(col('channel_code')).like("%digital%")) dup_df_paper = df_paper.select( self.config_dict['identity_type_code'], 'offer_code').dropDuplicates() dup_df_digital = df_digital.select( self.config_dict['identity_type_code'], 'offer_code').dropDuplicates() # df_union = functions.union_multi_df( # dup_df_paper, # dup_df_digital, # column_sequence_df = 1 # ) dup_df = dup_df_paper.intersect(dup_df_digital) # dup_df = df_union.groupBy( # [self.identity_type_code,'offer_code'] # ).count().filter('count > 1') df_inter = dup_df.join( self.df_redeem, [self.config_dict['identity_type_code'], 'offer_code'], 'left_outer') df_final = utils.distinct_count( self.sqlContext, df_inter, grouping_set, column_set, measure, self.config_dict['identity_type_code']) self.df_multi_appdm = df_final else: df_final = self.df_multi_appdm return df_final
def correct_redemeers(self, grouping_set, column_set, measure): df_correct = self.base_redemption_df.filter( col('contact_stage_code') == 'EXP' ).select( self.config_dict['identity_type_code'], 'offer_code' ).dropDuplicates() # print 'df_correct' # df_correct.cache() # print df_correct.count() # df_correct.show() # print 'self.df_redeem' # self.df_redeem.cache() # print self.df_redeem.count() # print self.df_redeem.show() df = self.df_redeem.join( df_correct, [self.config_dict['identity_type_code'], 'offer_code'], 'inner' ) # print 'df' # df.cache() # print df.count() # print df.select('prsn_code').count() # df.show() df_final = utils.distinct_count( self.sqlContext, df, grouping_set, column_set, measure, self.config_dict['identity_type_code'] ) return df_final
def dm_multi_redeemers(self, grouping_set, column_set, measure): if not self.df_multi_dm.head(1): df_paper = self.df_redeem.filter( lower(col('channel_code')).like("%paper%")) dup_df = df_paper.groupby( [self.config_dict['identity_type_code'], 'offer_code']).count().filter('count > 1') df_inter = dup_df.join( df_paper, [self.config_dict['identity_type_code'], 'offer_code'], 'left_outer') df_final = utils.distinct_count( self.sqlContext, df_inter, grouping_set, column_set, measure, self.config_dict['identity_type_code']) self.df_multi_dm = df_final else: df_final = self.df_multi_dm return df_final
def lapsed_buyers(self, grouping_set, column_set, measure): df = self.detail_offer_prod.join( self.df_dict('pre_period').select(self.config_dict['identity_type_code'], 'prod_code'), [self.config_dict['identity_type_code'], 'prod_code'] ) df = df.join( self.df_dict('post_period').select(self.config_dict['identity_type_code'], 'prod_code', 'transaction_fid'), [self.config_dict['identity_type_code'], 'prod_code'], 'left_outer' ).filter( col('transaction_fid').isNull() ) lapsed_buyers_df = utils.distinct_count( self.sqlContext, df, grouping_set, column_set, measure, self.config_dict['identity_type_code'] ) return lapsed_buyers_df
def control_participation(self, grouping_set, column_set, measure): df_control = self.df.where((col('contact_stage_code') == 'ALC') & ( col('event_control_flag') == 'Y')).drop('prod_code') # .select(self.config_dict['identity_type_code']) if not df_control.head(1): column_set.append(measure) group_set = column_set + ['grouping_level'] df_final = sqlContext.createDataFrame([[''] * len(group_set)], group_set) column_set.remove(measure) else: df_trans = df_control.join( self.df_dict('dur_period').select( self.config_dict['identity_type_code'], 'prod_code'), self.config_dict['identity_type_code']) df_prod = df_trans.join( self.df_dict('table3').select('prod_code'), ['prod_code']) df_final = utils.distinct_count( self.sqlContext, df_prod, grouping_set, column_set, measure, self.config_dict['identity_type_code']) return df_final
def index_vs_mailed(self,grouping_set,column_set,measure): redemeed = self.base_redemption_df.filter(col('contact_stage_code') == 'RDM') mailed = self.base_redemption_df.filter(col('contact_stage_code') == 'EXP') redem_coupon = utils.distinct_count( self.sqlContext, redemeed, grouping_set, column_set, 'redem_coupon', self.config_dict['identity_type_code'] ) mailed_coupon = utils.distinct_count( self.sqlContext, mailed, grouping_set, column_set, 'mailed_coupon', self.config_dict['identity_type_code'] ) mailed_1 = mailed.agg( func.countDistinct(self.config_dict['identity_type_code']).alias('total_mailed') ) mailed_2 = mailed_1.withColumn('flag', lit(1)) group_set = column_set + ['grouping_level'] df_redem_mailed = redem_coupon.join(mailed_coupon,group_set) df_redem_mailed_flag= df_redem_mailed.withColumn('flag', lit(1)) df_redem_mailed_flag.cache() df_redem_mailed_flag.show() redemeed_1 = redemeed.agg( func.countDistinct(self.config_dict['identity_type_code']).alias('total_redem') ) redemeed_2 = redemeed_1.withColumn('flag', lit(1)) df_mailed_flag = df_redem_mailed_flag.join(mailed_2,['flag']) df_redemeed_flag = df_mailed_flag.join(redemeed_2,['flag']) df_mailed_index= df_redemeed_flag.withColumn( 'mailed_index', df_redemeed_flag.mailed_coupon/df_redemeed_flag.total_mailed ) df_redemeed_index = df_mailed_index.withColumn( 'redemeed_index', df_mailed_index.redem_coupon/df_mailed_index.total_redem ) df_index_mailed = df_redemeed_index.withColumn( measure, df_redemeed_index.redemeed_index/df_redemeed_index.mailed_index ) df_index_mailed = df_index_mailed.drop('redem_coupon') df_index_mailed = df_index_mailed.drop('mailed_coupon') df_index_mailed = df_index_mailed.drop('flag') df_index_mailed = df_index_mailed.drop('total_mailed') df_index_mailed = df_index_mailed.drop('total_redem') df_index_mailed = df_index_mailed.drop('mailed_index') df_index_mailed = df_index_mailed.drop('redemeed_index') df_index_mailed = df_index_mailed.withColumn( measure, df_index_mailed[measure].cast(StringType()) ) return df_index_mailed
def mailed_penetration(self, grouping_set, column_set, measure): df_customer = self.table3.select( 'prod_code' ).dropDuplicates() # print 'df_customer' # df_customer.cache() # print df_customer.count() # join with exp stage df_transaction = self.dur_period.select( self.config_dict['identity_type_code'], 'prod_code' ).dropDuplicates() # print 'df_transaction' # df_transaction.cache() # print df_transaction.count() df_trans = df_customer.join(df_transaction, ['prod_code']).select(self.config_dict['identity_type_code']) # print 'df_trans' # df_trans.cache() # print df_trans.count() # df_trans1 = df_trans.select( # self.identity_type_code # ).dropDuplicates() exp_df = self.base_redemption_df.filter(col('contact_stage_code') == 'EXP') # print 'exp_df' # exp_df.cache() # print exp_df.count() df_prod = df_trans.join(exp_df, self.config_dict['identity_type_code']) # print 'df_prod' # df_prod.cache() # print df_prod.count() purchase_df = utils.distinct_count( self.sqlContext, df_prod, grouping_set, column_set, 'purchase', self.config_dict['identity_type_code'] ) # print 'purchase_df' # purchase_df.cache() # print purchase_df.count() exp_df = utils.distinct_count( self.sqlContext, exp_df, grouping_set, column_set, 'exposed', self.config_dict['identity_type_code'] ) # print 'exp_df' # exp_df.cache() # print exp_df.count() group_set = column_set + ['grouping_level'] df_final = exp_df.join(purchase_df, group_set) df_final = df_final.withColumn( measure, df_final.purchase/df_final.exposed ) df_final = df_final.drop('purchase') df_final = df_final.drop('exposed') df_final = df_final.withColumn( measure, df_final[measure].cast(StringType()) ) return df_final