def response_rate(self,grouping_set,column_set,measure):
        redemeed = self.base_redemption_df.filter(col('contact_stage_code') == 'RDM')
        mailed = self.base_redemption_df.filter(col('contact_stage_code') == 'EXP')
        
        redem_coupon = utils.distinct_count(
            self.sqlContext,
            redemeed,
            grouping_set,
            column_set,
            'redem_coupon',
            self.config_dict['identity_type_code']
        )
#         print 'redem_coupon'
#         redem_coupon.cache()
#         print redem_coupon.count()
        
        mailed_coupon = utils.distinct_count(
            self.sqlContext,
            mailed,
            grouping_set,
            column_set,
            'mailed_coupon',
            self.config_dict['identity_type_code']
        )
#         print 'mailed_coupon'
#         mailed_coupon.cache()
#         print mailed_coupon.count()
        
        group_set = column_set + ['grouping_level']
        
        df_redem_mailed = redem_coupon.join(mailed_coupon, group_set)
#         print 'df_redem_mailed'
#         df_redem_mailed.cache()
#         print df_redem_mailed.count()
        
        df_final = df_redem_mailed.withColumn(
            measure,
            df_redem_mailed.redem_coupon/df_redem_mailed.mailed_coupon
        )
#         print 'df_final'
#         df_final.cache()
#         print df_final.count()
        
        df_response_rate = df_final.drop('redem_coupon')
        df_response_rate = df_response_rate.drop('mailed_coupon')
        
        df_response_rate = df_response_rate.withColumn(
            measure,
            df_response_rate[measure].cast(StringType())
        )
        return df_response_rate
Beispiel #2
0
    def household_allocated(self, grouping_set, column_set, measure):
        df = self.base_allocation_df.filter(col('contact_stage_code') == 'DLV')

        household_allocated_df = utils.distinct_count(
            self.sqlContext, df, grouping_set, column_set, measure,
            self.config_dict['identity_type_code'])
        return household_allocated_df
Beispiel #3
0
    def control_redeemers(self, grouping_set, column_set, measure):
        df_redem_control_details = self.df_redeem.filter(
            (self.df_redeem.event_control_flag == 'Y'))

        df_final = utils.distinct_count(self.sqlContext,
                                        df_redem_control_details, grouping_set,
                                        column_set, measure,
                                        self.config_dict['identity_type_code'])
        return df_final
Beispiel #4
0
    def active_loyal_customer(self, grouping_set, column_set, measure):
        df = self.base_allocation_df.filter(col('contact_stage_code') == 'ALC')

        df = df.filter((trim(col('loyalty_level')) == 'PR')
                       | (trim(col('loyalty_level')) == 'VL'))

        df = df.filter(col('loyalty_level').isNotNull())

        active_loyal_customer_df = utils.distinct_count(
            self.sqlContext, df, grouping_set, column_set, measure,
            self.config_dict['identity_type_code'])
        return active_loyal_customer_df
 def digital_redemeers(self, grouping_set, column_set, measure):
     df = self.df_redeem.filter(
         lower(col('channel_code')).like("%digital%")
     )
     
     df_final = utils.distinct_count(
         self.sqlContext,
         df,
         grouping_set,
         column_set,
         measure,
         self.config_dict['identity_type_code']
     )
     return df_final
 def hh_redemeers(self, grouping_set, column_set, measure):
     if not self.df_prsn.head(1):
         df_final = utils.distinct_count(
             self.sqlContext,
             self.df_redeem,
             grouping_set,
             column_set,
             measure,
             self.config_dict['identity_type_code']
         )
         
         self.df_prsn = df_final
     else:
         df_final = self.df_prsn
     return df_final
 def trialist(self, grouping_set, column_set, measure):
     df = self.detail_offer_prod_dur.join(
         self.post_count_df,
         [self.config_dict['identity_type_code'],
          'prod_code'], 'left_outer'
     ).filter(
         col('count').isNull()
     )
     
     trialist_df = utils.distinct_count(
         self.sqlContext,
         df,
         grouping_set,
         column_set,
         measure,
         self.config_dict['identity_type_code']
     )
     return trialist_df
    def mailed_participation(self, grouping_set, column_set, measure):
        df_control = self.table3.select(
            'prod_code'
        ).dropDuplicates()
        
#         print 'df_control'
#         df_control.cache()
#         print df_control.count()
        
        df_transaction = self.dur_period.select(
            self.config_dict['identity_type_code'],
            'prod_code'
        ).dropDuplicates()
        
#         print 'df_transaction'
#         df_transaction.cache()
#         print df_transaction.count()
        
        df_trans = df_control.join(df_transaction,['prod_code'])
#         print 'df_trans'
#         df_trans.cache()
#         print df_trans.count()
        
        df_mail = self.base_redemption_df.drop('prod_code')
        
        df_mailed = df_mail.filter(col('contact_stage_code') == 'EXP')
#         print 'df_mailed'
#         df_mailed.cache()
#         print df_mailed.count()
        
        df_prod = df_trans.join(df_mailed, self.config_dict['identity_type_code'])
#         print 'df_prod'
#         df_prod.cache()
#         print df_prod.count()
        
        df_final=utils.distinct_count(
            self.sqlContext,
            df_prod,
            grouping_set,
            column_set,
            measure,
            self.config_dict['identity_type_code']
        )
        return df_final
 def adopters(self, grouping_set, column_set, measure):
     post_mul_time_df = self.post_count_df.filter(
         col('count') > 1
     ).drop('count')
     
     df = self.detail_offer_prod_dur.join(
         post_mul_time_df,
         [self.config_dict['identity_type_code'], 'prod_code']
     )
     
     adopters_df = utils.distinct_count(
         self.sqlContext,
         df,
         grouping_set,
         column_set,
         measure,
         self.config_dict['identity_type_code']
     )
     return adopters_df
 def retained_hhs(self, grouping_set, column_set, measure):
     df = self.detail_offer_prod.join(
         self.df_dict('pre_period').select(self.config_dict['identity_type_code'], 'prod_code'),
         [self.config_dict['identity_type_code'], 'prod_code']
     )
     
     df = df.join(
         self.df_dict('post_period').select(self.config_dict['identity_type_code'], 'prod_code'),
         [self.config_dict['identity_type_code'], 'prod_code']
     )
     
     retained_hhs = utils.distinct_count(
         self.sqlContext,
         df,
         grouping_set,
         column_set,
         measure,
         self.config_dict['identity_type_code']
     )
     return retained_hhs
Beispiel #11
0
    def app_dm_multi_redeemers(self, grouping_set, column_set, measure):
        if not self.df_multi_appdm.head(1):
            df_paper = self.df_redeem.filter(
                lower(col('channel_code')).like("%paper%"))

            df_digital = self.df_redeem.filter(
                lower(col('channel_code')).like("%digital%"))

            dup_df_paper = df_paper.select(
                self.config_dict['identity_type_code'],
                'offer_code').dropDuplicates()

            dup_df_digital = df_digital.select(
                self.config_dict['identity_type_code'],
                'offer_code').dropDuplicates()

            #             df_union = functions.union_multi_df(
            #                 dup_df_paper,
            #                 dup_df_digital,
            #                 column_sequence_df = 1
            #             )

            dup_df = dup_df_paper.intersect(dup_df_digital)

            #             dup_df = df_union.groupBy(
            #                 [self.identity_type_code,'offer_code']
            #             ).count().filter('count > 1')

            df_inter = dup_df.join(
                self.df_redeem,
                [self.config_dict['identity_type_code'], 'offer_code'],
                'left_outer')

            df_final = utils.distinct_count(
                self.sqlContext, df_inter, grouping_set, column_set, measure,
                self.config_dict['identity_type_code'])

            self.df_multi_appdm = df_final
        else:
            df_final = self.df_multi_appdm
        return df_final
    def correct_redemeers(self, grouping_set, column_set, measure):
        df_correct = self.base_redemption_df.filter(
            col('contact_stage_code') == 'EXP'
        ).select(
            self.config_dict['identity_type_code'],
            'offer_code'
        ).dropDuplicates()
        
#         print 'df_correct'
#         df_correct.cache()
#         print df_correct.count()
#         df_correct.show()
        
        
#         print 'self.df_redeem'
#         self.df_redeem.cache()
#         print self.df_redeem.count()
#         print self.df_redeem.show()
        
        df = self.df_redeem.join(
            df_correct,
            [self.config_dict['identity_type_code'], 'offer_code'],
            'inner'
        )
        
#         print 'df'
#         df.cache()
#         print df.count()
#         print df.select('prsn_code').count()
#         df.show()
        
        df_final = utils.distinct_count(
            self.sqlContext,
            df,
            grouping_set,
            column_set,
            measure,
            self.config_dict['identity_type_code']
        )
        return df_final
Beispiel #13
0
    def dm_multi_redeemers(self, grouping_set, column_set, measure):
        if not self.df_multi_dm.head(1):
            df_paper = self.df_redeem.filter(
                lower(col('channel_code')).like("%paper%"))

            dup_df = df_paper.groupby(
                [self.config_dict['identity_type_code'],
                 'offer_code']).count().filter('count > 1')

            df_inter = dup_df.join(
                df_paper,
                [self.config_dict['identity_type_code'], 'offer_code'],
                'left_outer')

            df_final = utils.distinct_count(
                self.sqlContext, df_inter, grouping_set, column_set, measure,
                self.config_dict['identity_type_code'])

            self.df_multi_dm = df_final
        else:
            df_final = self.df_multi_dm
        return df_final
 def lapsed_buyers(self, grouping_set, column_set, measure):
     df = self.detail_offer_prod.join(
         self.df_dict('pre_period').select(self.config_dict['identity_type_code'], 'prod_code'),
         [self.config_dict['identity_type_code'], 'prod_code']
     )
     
     df = df.join(
         self.df_dict('post_period').select(self.config_dict['identity_type_code'], 'prod_code', 'transaction_fid'),
         [self.config_dict['identity_type_code'], 'prod_code'],
         'left_outer'
     ).filter(
         col('transaction_fid').isNull()
     )
     
     lapsed_buyers_df = utils.distinct_count(
         self.sqlContext,
         df,
         grouping_set,
         column_set,
         measure,
         self.config_dict['identity_type_code']
     )
     return lapsed_buyers_df
Beispiel #15
0
    def control_participation(self, grouping_set, column_set, measure):
        df_control = self.df.where((col('contact_stage_code') == 'ALC') & (
            col('event_control_flag') == 'Y')).drop('prod_code')
        #         .select(self.config_dict['identity_type_code'])

        if not df_control.head(1):
            column_set.append(measure)
            group_set = column_set + ['grouping_level']
            df_final = sqlContext.createDataFrame([[''] * len(group_set)],
                                                  group_set)
            column_set.remove(measure)
        else:
            df_trans = df_control.join(
                self.df_dict('dur_period').select(
                    self.config_dict['identity_type_code'], 'prod_code'),
                self.config_dict['identity_type_code'])

            df_prod = df_trans.join(
                self.df_dict('table3').select('prod_code'), ['prod_code'])

            df_final = utils.distinct_count(
                self.sqlContext, df_prod, grouping_set, column_set, measure,
                self.config_dict['identity_type_code'])
        return df_final
    def index_vs_mailed(self,grouping_set,column_set,measure):
        redemeed = self.base_redemption_df.filter(col('contact_stage_code') == 'RDM')
        mailed = self.base_redemption_df.filter(col('contact_stage_code') == 'EXP')
        
        redem_coupon = utils.distinct_count(
            self.sqlContext,
            redemeed,
            grouping_set,
            column_set,
            'redem_coupon',
            self.config_dict['identity_type_code']
        )
        
        mailed_coupon = utils.distinct_count(
            self.sqlContext,
            mailed,
            grouping_set,
            column_set,
            'mailed_coupon',
            self.config_dict['identity_type_code']
        )
        
        mailed_1 = mailed.agg(
            func.countDistinct(self.config_dict['identity_type_code']).alias('total_mailed')
        )
        
        mailed_2 = mailed_1.withColumn('flag', lit(1))
        group_set = column_set + ['grouping_level']
        df_redem_mailed = redem_coupon.join(mailed_coupon,group_set)
        df_redem_mailed_flag= df_redem_mailed.withColumn('flag', lit(1))
        
        df_redem_mailed_flag.cache()
        df_redem_mailed_flag.show()
        
        redemeed_1 = redemeed.agg(
            func.countDistinct(self.config_dict['identity_type_code']).alias('total_redem')
        )
        
        redemeed_2 = redemeed_1.withColumn('flag', lit(1))
        df_mailed_flag = df_redem_mailed_flag.join(mailed_2,['flag'])
        df_redemeed_flag = df_mailed_flag.join(redemeed_2,['flag'])
        df_mailed_index= df_redemeed_flag.withColumn(
            'mailed_index',
            df_redemeed_flag.mailed_coupon/df_redemeed_flag.total_mailed
        )
        df_redemeed_index = df_mailed_index.withColumn(
            'redemeed_index',
            df_mailed_index.redem_coupon/df_mailed_index.total_redem
        )
    
        df_index_mailed = df_redemeed_index.withColumn(
            measure,
            df_redemeed_index.redemeed_index/df_redemeed_index.mailed_index
        )
        
        df_index_mailed = df_index_mailed.drop('redem_coupon')
        df_index_mailed = df_index_mailed.drop('mailed_coupon')
        df_index_mailed = df_index_mailed.drop('flag')
        df_index_mailed = df_index_mailed.drop('total_mailed')
        df_index_mailed = df_index_mailed.drop('total_redem')
        df_index_mailed = df_index_mailed.drop('mailed_index')
        df_index_mailed = df_index_mailed.drop('redemeed_index')

        df_index_mailed = df_index_mailed.withColumn(
            measure,
            df_index_mailed[measure].cast(StringType())
        )
    
        return df_index_mailed
    def mailed_penetration(self, grouping_set, column_set, measure):
        df_customer = self.table3.select(
            'prod_code'
        ).dropDuplicates()
#         print 'df_customer'
#         df_customer.cache()
#         print df_customer.count()
        
        
        # join with exp stage
        df_transaction = self.dur_period.select(
            self.config_dict['identity_type_code'],
            'prod_code'
        ).dropDuplicates()
        
#         print 'df_transaction'
#         df_transaction.cache()
#         print df_transaction.count()
        
        df_trans = df_customer.join(df_transaction, ['prod_code']).select(self.config_dict['identity_type_code'])
        
#         print 'df_trans'
#         df_trans.cache()
#         print df_trans.count()
        
#         df_trans1 = df_trans.select(
#             self.identity_type_code
#         ).dropDuplicates()
        
        exp_df = self.base_redemption_df.filter(col('contact_stage_code') == 'EXP')
#         print 'exp_df'
#         exp_df.cache()
#         print exp_df.count()
        
        
        df_prod = df_trans.join(exp_df, self.config_dict['identity_type_code'])
#         print 'df_prod'
#         df_prod.cache()
#         print df_prod.count()
        
        
        purchase_df = utils.distinct_count(
            self.sqlContext,
            df_prod,
            grouping_set,
            column_set,
            'purchase',
            self.config_dict['identity_type_code']
        )
#         print 'purchase_df'
#         purchase_df.cache()
#         print purchase_df.count()
        
        exp_df = utils.distinct_count(
            self.sqlContext,
            exp_df,
            grouping_set,
            column_set,
            'exposed',
            self.config_dict['identity_type_code']
        )
#         print 'exp_df'
#         exp_df.cache()
#         print exp_df.count()
        
        group_set = column_set + ['grouping_level']
        
        df_final = exp_df.join(purchase_df, group_set)
        
        df_final = df_final.withColumn(
            measure,
            df_final.purchase/df_final.exposed
        )
        
        df_final = df_final.drop('purchase')
        df_final = df_final.drop('exposed')
        
        df_final = df_final.withColumn(
            measure,
            df_final[measure].cast(StringType())
        )
        return df_final