Python MetaUtils.getConnSql Examples

Programming Language: Python

Class/Type: MetaUtils

Method/Function: getConnSql

Examples at hotexamples.com: 3

Python MetaUtils.getConnSql - 3 examples found. These are the top rated real world Python examples of MetaUtils.getConnSql extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

localmax(5)

fitness(4)

move_by_step1(4)

runtool(4)

getConnSql(3)

report(2)

Example #1

Show file

    def get_booker_cc(self):
        """function to get the booker country for an account

        Returns:
            a panda dataframe with booker country
        """
        # get the campaign id, campaign name correspondence
        campaign_query = """
        SELECT id AS campaign_id,
               name AS campaign_name
          FROM mrktctl.account_1Bid_Campaign
        """
        msql = mu.getConnSql()
        msql.execute("USE mrktctl")
        campaigns = pd.read_sql_query(campaign_query, con=msql)
        msql.close()

        # get the pos, campaign id correspondence because in account_1, campaign could be
        # one pos or multiple pos
        cid = spark.table("spmeta.account_1_cc_campaign").withColumnRenamed(
            "cc", "pos")
        pd_cid = cid.toPandas()
        pd_cid.loc[pd_cid.pos == "TR", "campaign_id"] = 105
        booker_cc_df = campaigns.merge(pd_cid)

        return booker_cc_df

Example #2

Show file

    def get_id_pos(self):
        """get campaign id, campaign correspondence from mysql database and join with the filtered campaign lists

        returns: spark dataframe
        """
        # get the campaign id and campaign name correspondence from mysql
        msql = mu.getConnSql()
        msql.execute('USE mrktctl')
        qs = """
        SELECT id campaign_id,
               name pos
          FROM account_1Bid_Campaign"""
        account_1_campaign = pd.read_sql_query(qs, con=msql)
        msql.close()
        account_1_campaign = spark.createDataFrame(account_1_campaign)

        # filter for the campaign list
        if self.pos == ['All']:
            return account_1_campaign
        else:
            filtered_pos = spark.createDataFrame(
                pd.DataFrame(data=self.pos, columns=["pos"]))
            account_1_campaign = account_1_campaign.join(filtered_pos,
                                                         on="pos",
                                                         how="inner")
            return account_1_campaign

Example #3

Show file

File: account_4.py Project: linjianl/experimental_framework

    def get_cancellations(self):
        """get cancellation data from mysql at desired aggregated dimension and filter for the selected point of
        sales.

        Returns: spark dataframe with cancelled commission, cancelled bookings and cancelled roomnights at desired
                 aggregated dimension
        """
        msql = mu.getConnSql()
        msql.execute('USE ppc_sp')
        cancellation_query = """
            SELECT r.date_cancelled yyyy_mm_dd
                 , a.distribution pos
                 , CAST(coalesce(r.dest_id, r.hotel_id) AS INT) hotel_id
                 , CAST(sum(1) AS INT) cancellations
                 , sum(commission_amount_euro) cancelled_commission
                 , CAST(sum(roomnights) AS INT) cancelled_roomnights
             FROM {reservation_table} r force index (cancel)
             JOIN {affiliate_table} a on (a.affiliate_id = r.affiliate_id)
            WHERE r.date_cancelled >= '{start_date}'
              AND r.date_cancelled < '{end_date}'
              AND r.status not in ('fraudulent','test','unknown')
              AND r.partner_id = {account_4_partner_id}
            GROUP BY yyyy_mm_dd, pos, coalesce(r.dest_id, r.hotel_id)
            """.format(reservation_table=self.reservation_table,
                       affiliate_table=self.affiliate_table,
                       start_date=self.start_date,
                       end_date=self.end_date,
                       account_4_partner_id=self.partner_id)

        cancellations = pd.read_sql_query(cancellation_query, con=msql)
        msql.close()
        cancellations = spark.createDataFrame(cancellations)

        cancellations_agg = cancellations.groupBy(*self.agg_on)\
                            .agg(f.sum("cancelled_commission").alias("cancelled_commission")
                                ,f.sum("cancelled_roomnights").alias("cancelled_roomnights")
                                ,f.sum("cancellations").alias("cancellations"))

        if self.pos == ['All']:
            return cancellations_agg
        else:
            filtered_pos = spark.createDataFrame(
                pd.DataFrame(data=self.pos, columns=["pos"]))
            cancellations_agg = cancellations_agg.join(filtered_pos,
                                                       on="pos",
                                                       how="inner")
            return cancellations_agg