예제 #1
0
    def get_booker_cc(self):
        """function to get the booker country for an account

        Returns:
            a panda dataframe with booker country
        """
        # get the campaign id, campaign name correspondence
        campaign_query = """
        SELECT id AS campaign_id,
               name AS campaign_name
          FROM mrktctl.account_1Bid_Campaign
        """
        msql = mu.getConnSql()
        msql.execute("USE mrktctl")
        campaigns = pd.read_sql_query(campaign_query, con=msql)
        msql.close()

        # get the pos, campaign id correspondence because in account_1, campaign could be
        # one pos or multiple pos
        cid = spark.table("spmeta.account_1_cc_campaign").withColumnRenamed(
            "cc", "pos")
        pd_cid = cid.toPandas()
        pd_cid.loc[pd_cid.pos == "TR", "campaign_id"] = 105
        booker_cc_df = campaigns.merge(pd_cid)

        return booker_cc_df
예제 #2
0
    def get_id_pos(self):
        """get campaign id, campaign correspondence from mysql database and join with the filtered campaign lists

        returns: spark dataframe
        """
        # get the campaign id and campaign name correspondence from mysql
        msql = mu.getConnSql()
        msql.execute('USE mrktctl')
        qs = """
        SELECT id campaign_id,
               name pos
          FROM account_1Bid_Campaign"""
        account_1_campaign = pd.read_sql_query(qs, con=msql)
        msql.close()
        account_1_campaign = spark.createDataFrame(account_1_campaign)

        # filter for the campaign list
        if self.pos == ['All']:
            return account_1_campaign
        else:
            filtered_pos = spark.createDataFrame(
                pd.DataFrame(data=self.pos, columns=["pos"]))
            account_1_campaign = account_1_campaign.join(filtered_pos,
                                                         on="pos",
                                                         how="inner")
            return account_1_campaign
예제 #3
0
    def get_cancellations(self):
        """get cancellation data from mysql at desired aggregated dimension and filter for the selected point of
        sales.

        Returns: spark dataframe with cancelled commission, cancelled bookings and cancelled roomnights at desired
                 aggregated dimension
        """
        msql = mu.getConnSql()
        msql.execute('USE ppc_sp')
        cancellation_query = """
            SELECT r.date_cancelled yyyy_mm_dd
                 , a.distribution pos
                 , CAST(coalesce(r.dest_id, r.hotel_id) AS INT) hotel_id
                 , CAST(sum(1) AS INT) cancellations
                 , sum(commission_amount_euro) cancelled_commission
                 , CAST(sum(roomnights) AS INT) cancelled_roomnights
             FROM {reservation_table} r force index (cancel)
             JOIN {affiliate_table} a on (a.affiliate_id = r.affiliate_id)
            WHERE r.date_cancelled >= '{start_date}'
              AND r.date_cancelled < '{end_date}'
              AND r.status not in ('fraudulent','test','unknown')
              AND r.partner_id = {account_4_partner_id}
            GROUP BY yyyy_mm_dd, pos, coalesce(r.dest_id, r.hotel_id)
            """.format(reservation_table=self.reservation_table,
                       affiliate_table=self.affiliate_table,
                       start_date=self.start_date,
                       end_date=self.end_date,
                       account_4_partner_id=self.partner_id)

        cancellations = pd.read_sql_query(cancellation_query, con=msql)
        msql.close()
        cancellations = spark.createDataFrame(cancellations)

        cancellations_agg = cancellations.groupBy(*self.agg_on)\
                            .agg(f.sum("cancelled_commission").alias("cancelled_commission")
                                ,f.sum("cancelled_roomnights").alias("cancelled_roomnights")
                                ,f.sum("cancellations").alias("cancellations"))

        if self.pos == ['All']:
            return cancellations_agg
        else:
            filtered_pos = spark.createDataFrame(
                pd.DataFrame(data=self.pos, columns=["pos"]))
            cancellations_agg = cancellations_agg.join(filtered_pos,
                                                       on="pos",
                                                       how="inner")
            return cancellations_agg