Example #1
0
    def response_handler(self, data_df):
        db_obj = PyHdbWrapper()
        cursor, connection = db_obj.connect_hana(
            Utils.get_file_path(self.da_path,
                                [SCRIPT_FOLDER_NAME, HANA_CONFIG_FILE]),
            'HANA_ENV')
        ''' Truncate staging table before inserting records'''
        delete_page_url = db_obj.get_delete_query(self.schema +
                                                  '.STG_PAGE_URL_METRICS')
        db_obj.execute_sql(cursor, connection, delete_page_url, '', 'DELETE')

        # Extract Date will be used as a bookmark for loading data into HANA
        extract_date = datetime.datetime.today()

        for index, row in data_df.iterrows(
        ):  # Outer Loop for Day Specific data
            table = row.iloc[0]  # Table is of type Dictionary
            source_date = str(
                datetime.date(table['year'], table['month'], table['day']))
            source_date = datetime.datetime.strptime(source_date, "%Y-%m-%d")
            breakdown = (table['breakdown'])  # Breakdown is type of list
            for i in breakdown:
                if 'breakdown' in i.keys():
                    country = i['name']
                    # print(i['breakdownTotal'])
                    temp = i['breakdown']
                    for i in temp:
                        counts = i['counts']
                        pageviews = counts[0]
                        visits = counts[1]
                        uniquevisitors = counts[2]
                        bouncerate = counts[3]
                        averageTimeSpentOnSite = counts[4]
                        url = i['name']
                        print(source_date, country, url + '\n' + pageviews,
                              visits, uniquevisitors, bouncerate)
                        column_name = [
                            "PERIOD_DATE", "GRANULARITY", "COUNTRY", "URL",
                            "PAGE_VIEWS_COUNT", "PAGE_VISITS_COUNT",
                            "UNIQUE_VISITOR_COUNT", "BOUNCE_RATE_%%",
                            "AVG_TIME_SPENT_ON_PAGE", "EXTRACT_DATE"
                        ]
                        insert_query = db_obj.get_insert_query(
                            self.schema + ".STG_PAGE_URL_METRICS", column_name)

                        values = [
                            source_date, self.date_granularity, country, url,
                            pageviews, visits, uniquevisitors, bouncerate,
                            averageTimeSpentOnSite, extract_date
                        ]
                        print(values)

                        #db_obj.execute_sql(cursor, connection, insert_query, values, 'INSERT')

        upsert_statement = "UPSERT \"" + self.schema + "\".\"PAGE_URL_METRICS\"  \
                                        SELECT * FROM \"" + self.schema + "\".\"STG_PAGE_URL_METRICS\""
Example #2
0
    def response_handler(self, data_df):
        """

       This is used for Parsing JSON data and saving it into HANA DB #########

        :param data_df: it include json data and loading json data in hana database
        """
        db_obj = PyHdbWrapper()
        cursor, connection = db_obj.connect_hana(
            Utils.get_file_path(self.da_path,
                                [SCRIPT_FOLDER_NAME, HANA_CONFIG_FILE]),
            'HANA_ENV')

        # Extract Date will be used as a bookmark for loading data into HANA
        extract_date = datetime.datetime.today()

        for index, row in data_df.iterrows(
        ):  # Outer Loop for Day Specific data
            table = row.iloc[0]  # Table is of type Dictionary
            source_date = str(
                datetime.date(table['year'], table['month'], table['day']))
            source_date = datetime.datetime.strptime(source_date, "%Y-%m-%d")
            breakdown = (table['breakdown'])  # Breakdown is type of list
            for i in breakdown:
                if 'breakdown' in i.keys():
                    country = i['name']
                    # print(i['breakdownTotal'])
                    temp = i['breakdown']
                    for i in temp:
                        counts = i['counts']
                        pageviews = counts[0]
                        visits = counts[1]
                        uniquevisitors = counts[2]
                        bouncerate = counts[3]
                        averageTimeSpentOnSite = counts[4]
                        url = i['name']
                        print(source_date, country, url + '\n' + pageviews,
                              visits, uniquevisitors, bouncerate)
                        column_name = [
                            "PERIOD_DATE", "GRANULARITY", "COUNTRY", "URL",
                            "PAGE_VIEWS_COUNT", "PAGE_VISITS_COUNT",
                            "UNIQUE_VISITOR_COUNT", "BOUNCE_RATE_%%",
                            "AVG_TIME_SPENT_ON_PAGE", "EXTRACT_DATE"
                        ]
                        insert_query = db_obj.get_insert_query(
                            "SAMEER_RATHOD.STG_PAGE_URL_METRICS", column_name)

                        values = [
                            source_date, self.date_granularity, country, url,
                            pageviews, visits, uniquevisitors, bouncerate,
                            averageTimeSpentOnSite, extract_date
                        ]

                        db_obj.execute_sql(cursor, connection, insert_query,
                                           values, 'INSERT')
Example #3
0
    def main(self):
        """
            calling utils() Method
               Make connection to database by reading connection parameters from an ini file.
               """
        utils_object = Utils()
        """
           from_ini= It will make the database connection.
           get_file_path= it will Read the file path of Adobe_Analytics and return us a "username " and " api_secret"
        """
        adobe_config = utils_object.from_ini(
            Utils.get_file_path(self.da_path,
                                [SCRIPT_FOLDER_NAME, CONFIG_FILE]),
            'Adobe_Analytics', ('username', 'api_secret'))
        """ 
        get_endpoint_url= calling get_endpoint_url to generate endpoint url
        """
        query_url = self.get_endpoint_url('method=Report.Queue')
        """ 
                get_payload= calling get_payload to get payload which we will use in json body 
        """
        payload = json.dumps(
            self.get_payload(self.date_from, self.date_to,
                             self.date_granularity))

        nonce_b, iso_time, digest = self.get_unique_connection_parameters(
            adobe_config['api_secret'])

        head = self.get_header(adobe_config['username'], digest, nonce_b,
                               iso_time)
        """
         Api Method 'POST' = It will send the request 
        """
        report_queue_api_response = utils_object.send_request(
            'POST', query_url, payload, head)

        report_queue_response_body = report_queue_api_response.text.encode(
            'ascii')
        """
        After API POST Request we are encoding the response in ascii format
        """
        temp_var = report_queue_response_body.split(b':')
        report_id = temp_var[1].replace(b'}', b'')
        print(report_id)

        ######## Section - 2: Get data based on report developed and save the JSON reply in shared folder ########
        # Develoiping API URL for retriving
        query_url = self.get_endpoint_url('method=Report.Get')

        # The body of the API url is enlcosed as post_params
        bodydata = {'reportID': '' + report_id.decode('ascii') + ''}
        payload = json.dumps(bodydata)

        counter_error = 0
        while (counter_error == 0):
            """"
             While Loop is used to check the api response if any error is there loop will pass else break.            
            """
            # Using sleep method to give enough time to get the report ready to pull the data else it will throw
            # "Report not ready"
            print("Start sleep time " + time.strftime("%X"))
            time.sleep(self.sleep_time)

            nonce_b, iso_time, digest = self.get_unique_connection_parameters(
                adobe_config['api_secret'])
            head = self.get_header(adobe_config['username'], digest, nonce_b,
                                   iso_time)
            # logger
            api_response = utils_object.send_request('POST', query_url,
                                                     payload, head)
            response_body = json.loads(api_response.text)
            if 'error' in response_body.keys():
                if 'report_not_ready' in response_body['error']:
                    pass
                else:
                    break
            elif 'report' in response_body.keys():
                counter_error = 1

        # Using Pandas library to load json data and transpose it for easy manuplation
        adobe_ana_pd = pd.DataFrame.from_dict(response_body)
        adobe_ana_pd = adobe_ana_pd.T

        # Removing unwanted index from the dataFrame
        adobe_ana_pd = adobe_ana_pd.drop(adobe_ana_pd.index[1:])

        # The metrics for Adobe Analytics is in 'data' column, so parsing it
        data_df = pd.read_json((adobe_ana_pd['data']).to_json())

        # datetime.datetime.strptime(str(datetime.date.today()),"%Y-%m-%d")

        # Iterating over the JSON file to extract metrics
        self.response_handler(data_df)
Example #4
0
    def main(self):
        '''
        This function will be called from the main.py file and contains the
        logic to fetch data from source and will save it to designation.
        :return:
        '''
        '''
        from_ini function will read the configuration file as per given section name and key name 
        and will provide dict of configuration parameters.
        '''
        #print ("Date granularity is: {}".format(self.date_granularity))
        adobe_config = Utils.from_ini(
            Utils.get_file_path(self.da_path,
                                [SCRIPT_FOLDER_NAME, CONFIG_FILE]),
            'Adobe_Analytics', ('username', 'api_secret'))
        '''
        Getting end point url 
        '''
        query_url = self.get_endpoint_url('method=Report.Queue')
        print("\n")
        print(query_url)
        print("\n")
        '''
        Getting payload to be passed with the api
        '''
        payload = json.dumps(
            self.get_payload(self.date_from, self.date_to,
                             self.date_granularity))
        print("------------------------------------")
        print("Payload is:")
        print(payload)
        print("------------------------------------")
        print("\n")
        '''
        Preparing parameters for passing in header with api for authentication
        '''
        nonce_b, iso_time, digest = self.get_unique_connection_parameters(
            adobe_config['api_secret'])
        '''
        Get header
        '''
        head = self.get_header(adobe_config['username'], digest, nonce_b,
                               iso_time)

        print("------------------------------------")
        print("Header is:")
        print(head)
        print("------------------------------------")
        print("\n")
        '''
        Calling api for preparing reports
        '''
        report_queue_api_response = Utils.send_request('POST', query_url,
                                                       payload, head)

        if report_queue_api_response.status_code != 200:
            logger.error(report_queue_api_response.text)
            raise Exception(report_queue_api_response.reason)

        report_queue_response_body = report_queue_api_response.text.encode(
            'ascii')
        temp_var = report_queue_response_body.split(b':')
        report_id = temp_var[1].replace(b'}', b'')
        # print(report_id)
        '''
        Section - 2: Get data based on report developed and save the JSON reply in shared folder 
        '''
        '''
        Developing API URL for retrieving
        '''
        query_url = self.get_endpoint_url(
            'method=Report.Get')  # 'method=Report.GetMetrics'

        # The body of the API url is enclosed as post_params
        bodydata = {'reportID': '' + report_id.decode('ascii') + ''}
        payload = json.dumps(bodydata)

        counter_error = 0
        while (counter_error == 0):
            # Using sleep method to give enough time to get the reort ready to pull the data else it will throw
            # "Report not ready"
            print("Start sleep time " + time.strftime("%X"))
            print("\n")
            time.sleep(self.sleep_time)
            '''
            Get connection parameter for getting reports data
            and get header
            '''
            nonce_b, iso_time, digest = self.get_unique_connection_parameters(
                adobe_config['api_secret'])
            head = self.get_header(adobe_config['username'], digest, nonce_b,
                                   iso_time)

            # logger
            '''
            Call api to get reports
            '''
            api_response = Utils.send_request('POST', query_url, payload, head)
            if api_response.status_code != 200:
                print(api_response)
                continue

            try:
                response_body = json.loads(api_response.text)
                if 'error' in response_body.keys():
                    if 'report_not_ready' in response_body['error']:
                        pass
                    else:
                        logger.error(api_response.text)
                        raise Exception(api_response.reason)
                elif 'report' in response_body.keys():
                    counter_error = 1
            except Exception as e:
                logger.error(e)
                raise

            try:
                # response_body = json.loads(api_response.text)
                print("------------------------------------")
                print("API Response is:")
                print("\n")
                print("Response is: {}".format(response_body))
                print("------------------------------------")
                print("\n")

                adobe_ana_pd = pd.DataFrame.from_dict(response_body)
                adobe_ana_pd = adobe_ana_pd.T

                # Removing unwanted index from the dataFrame
                adobe_ana_pd = adobe_ana_pd.drop(adobe_ana_pd.index[1:])

                # The metrics for Adobe Analytics is in 'data' column, so parsing it
                data_df = pd.read_json((adobe_ana_pd['data']).to_json())
                self.response_handler(data_df)

                # metricsdf = pd.DataFrame(response_body["report"]["metrics"])
                # datadf = pd.DataFrame(response_body["report"]["data"])
                # outerdf = pd.DataFrame()
                # #breakdowndf_2 = pd.DataFrame()
                #
                # # for breakdown_data in response_body["report"]["data"]:
                # #     for element in breakdown_data["breakdown"]:
                # #         innerdf = pd.DataFrame()
                # #         final_data = {}
                # #         final_data["ITEM"] = []
                # #         final_data["ACCOUNT_NUMBER"] = []
                # #         final_data["UNIQUE_VISITORS"] = []
                # #         final_data["VISITS"] = []
                # #         final_data["PAGE_VIEWS"] = []
                # #         final_data["BOUNCES"] = []
                # #         final_data["TIME_SPENT_ON_PAGE_(MIN)"] = []
                # #         final_data["E89_VIDEO_VIEWS"] = []
                # #         final_data["E17_FORM_SUCCESS"] = []
                # #         final_data["FORM_SUBMISSIONS"] = []
                # #         final_data["TOTAL_WEEKLY_UNIQUE_VISITORS"] = []
                # #         final_data["ENTRIES"] = []
                # #         final_data["TOTAL_TIME_SPENT"] = []
                # #         final_data["START_DATE_OF_WEEK"] = []
                # #         final_data["GRANULARITY"] = []
                # #         final_data["START_DATE_OF_WEEK"].append(breakdown_data["name"])
                # #         final_data["GRANULARITY"].append(self.date_granularity)
                # #         final_data["ACCOUNT_NUMBER"].append(element["name"])
                # #         if "breakdown" in element:
                # #             for pageurl in element["breakdown"]:
                # #                 final_data["ITEM"].append(pageurl["name"])
                # #                 final_data["UNIQUE_VISITORS"].append(pageurl["counts"][0])
                # #                 final_data["VISITS"].append(pageurl["counts"][1])
                # #                 final_data["PAGE_VIEWS"].append(pageurl["counts"][2])
                # #                 final_data["BOUNCES"].append(pageurl["counts"][3])
                # #                 final_data["TIME_SPENT_ON_PAGE_(MIN)"].append(pageurl["counts"][4])
                # #                 final_data["E89_VIDEO_VIEWS"].append(pageurl["counts"][5])
                # #                 final_data["E17_FORM_SUCCESS"].append(pageurl["counts"][6])
                # #                 final_data["FORM_SUBMISSIONS"].append(pageurl["counts"][7])
                # #                 final_data["TOTAL_WEEKLY_UNIQUE_VISITORS"].append(pageurl["counts"][8])
                # #                 final_data["ENTRIES"].append(pageurl["counts"][9])
                # #                 final_data["TOTAL_TIME_SPENT"].append(pageurl["counts"][10])
                # #
                # #         innerdf["ITEM"] = final_data["ITEM"]
                # #         innerdf["UNIQUE_VISITORS"] = final_data["UNIQUE_VISITORS"]
                # #         innerdf["VISITS"] = final_data["VISITS"]
                # #         innerdf["PAGE_VIEWS"] = final_data["PAGE_VIEWS"]
                # #         innerdf["BOUNCES"] = final_data["BOUNCES"]
                # #         innerdf["TIME_SPENT_ON_PAGE_(MIN)"] = final_data["TIME_SPENT_ON_PAGE_(MIN)"]
                # #         innerdf["E89_VIDEO_VIEWS"] = final_data["E89_VIDEO_VIEWS"]
                # #         innerdf["E17_FORM_SUCCESS"] = final_data["E17_FORM_SUCCESS"]
                # #         innerdf["FORM_SUBMISSIONS"] = final_data["FORM_SUBMISSIONS"]
                # #         innerdf["TOTAL_WEEKLY_UNIQUE_VISITORS"] = final_data["TOTAL_WEEKLY_UNIQUE_VISITORS"]
                # #         innerdf["ENTRIES"] = final_data["ENTRIES"]
                # #         innerdf["TOTAL_TIME_SPENT"] = final_data["TOTAL_TIME_SPENT"]
                # #         innerdf["ACCOUNT_NUMBER"] = pd.Series(final_data["ACCOUNT_NUMBER"])
                # #         #innerdf.fillna(method='ffill', inplace=True)
                # #         innerdf["ACCOUNT_NUMBER"]  = innerdf["ACCOUNT_NUMBER"].fillna(method='ffill')
                # #         innerdf["START_DATE_OF_WEEK"] = pd.Series(final_data["START_DATE_OF_WEEK"])
                # #         innerdf["START_DATE_OF_WEEK"] = innerdf["START_DATE_OF_WEEK"].fillna(method='ffill')
                # #         innerdf["GRANULARITY"] = pd.Series(final_data["GRANULARITY"])
                # #         innerdf["GRANULARITY"] = innerdf["GRANULARITY"].fillna(method='ffill')
                # #         # outerdf['ETL_EXTRACT_DATE'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                # #         #print (outerdf)
                # #         #outerdf = pd.concat([outerdf,innerdf], ignore_index=True)
                # #         outerdf = pd.concat([outerdf, innerdf], axis=0, ignore_index=True)
                # #         outerdf = outerdf.drop_duplicates()
                #
                # columns = ["ITEM", "UNIQUE_VISITORS", "VISITS", "PAGE_VIEWS", "BOUNCES", "TIME_SPENT_ON_PAGE_(MIN)",
                #            "E89_VIDEO_VIEWS", "E17_FORM_SUCCESS", "FORM_SUBMISSIONS", "TOTAL_WEEKLY_UNIQUE_VISITORS",
                #            "ENTRIES","TOTAL_TIME_SPENT"	, "ACCOUNT_NUMBER", "START_DATE_OF_WEEK", "GRANULARITY", "SEGMENT_ID",
                #            "GROUP", "ETL_EXTRACT_DATE"]
                #
                # # outerdf['SEGMENT_ID'] = "SUCCESS_PAGE"
                # outerdf['SEGMENT_ID'] = "VALUE_CALCULATOR"
                # outerdf['GROUP'] = "CUSTOMER_SUCCESS"
                # outerdf['ETL_EXTRACT_DATE'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                #
                # outerdf["START_DATE_OF_WEEK"] = outerdf["START_DATE_OF_WEEK"].map(self.date_conversion)
                # print (outerdf.columns)
                # outerdf = outerdf[columns]
                # outerdf = outerdf.replace([np.inf, -np.inf], np.nan)
                # outerdf = outerdf.replace('', np.NaN)
                # outerdf = outerdf.replace('None', np.NaN)
                # outerdf = outerdf.replace('nan', np.NaN)
                # outerdf = outerdf.where((pd.notnull(outerdf)), None)
                # db_obj = PyHdbWrapper()
                # cursor, connection = db_obj.connect_hana(
                #     Utils.get_file_path(self.da_path, [SCRIPT_FOLDER_NAME, HANA_CONFIG_FILE]), 'HANA_ENV')
                # # delete_page_url = db_obj.get_delete_query(self.schema + '.STG_ADOBE_CSC_DAILY')
                # # db_obj.execute_sql(cursor, connection, delete_page_url, '', 'DELETE')
                # for record in outerdf.to_dict("records"):
                #     insert_query = db_obj.get_insert_query(self.schema + ".STG_ADOBE_CSC_WEEKLY",
                #                                            record)
                #
                #     values = list(record.values())
                #     #print(values)
                #     print ("Inserting into Staging table")
                #     db_obj.execute_sql(cursor, connection, insert_query, tuple(values), 'INSERT')
                #     print ("Completed inserting into Staging table")
                #
                # print("All records are inserted into Staging table")
                # # print ("Upserting into Target table")
                # #
                # # upsert_statement = "UPSERT \"" + self.schema + "\".\"ADOBE_CSC_DAILY\"  \
                # #                                     SELECT * FROM \"" + self.schema + "\".\"STG_ADOBE_CSC_DAILY\""
                # # db_obj.execute_sql(cursor, connection, upsert_statement, '', 'UPSERT')
                # #
                # # print ("Completed upserting to target table")
                #
                #
                #
                #
                #
                # #print(outerdf)
                #
                # outerdf.to_csv(r'C:\Users\chanukya.konduru\Documents\testing.csv', index=False)

                #     breakdowndf = pd.concat([breakdowndf, pd.DataFrame(element["breakdown"])], ignore_index=True)
                #     # breakdowndf_2 = pd.concat([breakdowndf_2, pd.DataFrame(element["breakdown"][0]["breakdown"])], ignore_index=True)
                #
                #
                #     breakdowndf_2 = pd.DataFrame(element["breakdown"][0]["breakdown"])
                #
                # #print (breakdowndf_2)
                # names = metricsdf['name'].tolist()
                # for i,name in enumerate(names):
                #     breakdowndf[name] = [metricname[i] for metricname in list(breakdowndf['counts'].tolist())]
                #
                # #print (breakdowndf.head())
                # breakdowndf.drop(['counts', 'url'], axis=1, inplace=True)
                # #breakdowndf = breakdowndf[['name', 'e17 Form Success', 'e89 Video Views']]
                # breakdown_length = len(response_body["report"]["data"][0]["breakdown"])
                # breakdowndf["Granularity"] = self.date_granularity
                # #print (breakdowndf)
                # # breakdowndf.to_csv(r'C:\Users\rajkiran.reddy\Desktop\SNow-Projects\Framework\master_\Git_Repositories\adobe_analytics\testing.csv')

                counter_error = counter_error + 1

        #         # Using Pandas library to load json data and transpose it for easy manuplation
        #         adobe_ana_pd = pd.DataFrame.from_dict(response_body)
        #         adobe_ana_pd = adobe_ana_pd.T
        #         # Removing unwanted index from the dataFrame
        #         adobe_ana_pd = adobe_ana_pd.drop(adobe_ana_pd.index[1:])
        #
        #         # The metrics for Adobe Analytics is in 'data' column, so parsing it
        #         final_data_df = pd.DataFrame(adobe_ana_pd['data'][0][0]['breakdown'])
        #         #print (final_data_df.columns)
        #         final_data_df = final_data_df.rename(columns={"counts": "Counts",
        #                                         "name": "Surf ID",
        #                                         "url": "URL"})
        #
        #         final_data_df = final_data_df[["Counts", "Surf ID"]]
        #         final_data_df['Granularity'] = adobe_ana_pd['data'][0][0]['name']
        #
        #         if len(adobe_ana_pd['data'][0][0]['breakdown'][0]['counts']) > 1:
        #             final_data_df['Visits'] = final_data_df['Counts'].map(get_visits)
        #             final_data_df['e17 Form Success'] = final_data_df['Counts'].map(get_form_success)
        #             final_data_df['e89 Video Views'] = final_data_df['Counts'].map(get_video_views)
        #         else:
        #             final_data_df['Return Visits + Visits'] = final_data_df['Counts'].map(get_return_visits)
        #
        #         final_data_df_columns = list(final_data_df.columns)
        #         final_data_df_columns.remove('Counts')
        #         final_data_df = final_data_df[final_data_df_columns]
        #         print (final_data_df)
        #
        #         if 'error' in response_body.keys():
        #             if 'report_not_ready' in response_body['error']:
        #                 pass
        #             else:
        #                 logger.error(api_response.text)
        #                 raise Exception(api_response.reason)
        #         elif 'report' in response_body.keys():
        #             counter_error = 1
            except Exception as e:
                logger.error(e)
                raise
Example #5
0
    def response_handler(self, data_df):
        db_obj = PyHdbWrapper()
        cursor, connection = db_obj.connect_hana(
            Utils.get_file_path(self.da_path,
                                [SCRIPT_FOLDER_NAME, HANA_CONFIG_FILE]),
            'HANA_ENV')
        ''' Truncate staging table before inserting records'''
        # delete_page_url = db_obj.get_delete_query(self.schema + '.STG_ADOBE_CSC_DAILY')
        # db_obj.execute_sql(cursor, connection, delete_page_url, '', 'DELETE')

        # Extract Date will be used as a bookmark for loading data into HANA
        extract_date = datetime.datetime.today()

        for index, row in data_df.iterrows(
        ):  # Outer Loop for Day Specific data
            table = row.iloc[0]  # Table is of type Dictionary
            source_date = str(
                datetime.date(table['year'], table['month'], table['day']))
            source_date = datetime.datetime.strptime(source_date, "%Y-%m-%d")
            breakdown = (table['breakdown'])  # Breakdown is type of list
            for j in breakdown:
                if 'breakdown' in j.keys():
                    account_name = j['name']
                    # print(i['breakdownTotal'])
                    temp = j['breakdown']
                    # for i in breakdown:
                    #     if 'breakdown' in i.keys():
                    #         country = i['name']
                    #         # print(i['breakdownTotal'])
                    #         temp = i['breakdown']
                    for i in temp:
                        counts = i['counts']
                        pageviews = counts[0]
                        visits = counts[1]
                        uniquevisitors = counts[2]
                        bouncerate = counts[3]
                        averageTimeSpentOnSite = counts[4]
                        url = i['name']
                        # print(source_date, account_name, url + '\n' + pageviews, visits, uniquevisitors, bouncerate)
                        # column_name = ["PERIOD_DATE", "GRANULARITY", "COUNTRY", "URL"
                        #     , "PAGE_VIEWS_COUNT", "PAGE_VISITS_COUNT", "UNIQUE_VISITOR_COUNT"
                        #     , "BOUNCE_RATE_%%", "AVG_TIME_SPENT_ON_PAGE", "EXTRACT_DATE"]
                        column_name = [
                            "ITEM", "UNIQUE_VISITORS", "VISITS", "PAGE_VIEWS",
                            "BOUNCES", "TIME_SPENT_ON_PAGE_(MIN)",
                            "E89_VIDEO_VIEWS", "E17_FORM_SUCCESS",
                            "FORM_SUBMISSIONS", "TOTAL_WEEKLY_UNIQUE_VISITORS",
                            "ENTRIES", "TOTAL_TIME_SPENT", "ACCOUNT_NUMBER",
                            "DATE", "GRANULARITY", "SEGMENT_ID", "GROUP",
                            "ETL_EXTRACT_DATE"
                        ]

                        # insert_query = db_obj.get_insert_query(self.schema + ".STG_ADOBE_CSC_DAILY",
                        #                                        column_name)
                        insert_query = db_obj.get_insert_query(
                            self.schema + ".STG_ADOBE_SAMPLE", column_name)
                        values = []
                        # values = [source_date, self.date_granularity, country, url, pageviews, visits,
                        #           uniquevisitors, bouncerate, averageTimeSpentOnSite, extract_date]

                        values.append(url)
                        if counts[4] == 'INF':
                            counts[4] = 0
                        values.extend(counts)
                        values.append(account_name)
                        values.append(source_date)
                        values.append(self.date_granularity)
                        values.append(self.segment_name)
                        values.append(self.group)
                        values.append(extract_date)
                        # print(values)

                        print('insert')
                        db_obj.execute_sql(cursor, connection, insert_query,
                                           values, 'INSERT')
Example #6
0
    def main(self):
        '''
        This function will be called from the main.py file and contains the
        logic to fetch data from source and will save it to designation.
        :return:
        '''

        # metrics = self.metrics
        # elements = self.elements
        # element_names = self.element_names
        # segements = self.segments
        #
        # if metrics is not None:
        #     metrics = metrics.split(",")
        # else:
        #     metrics = []
        #
        # if elements is not None:
        #     elements = elements.split(",")
        # else:
        #     elements = ["eVar48"]
        #
        # if element_names is not None:
        #     element_names = element_names.split(",")
        # else:
        #     element_names = ["DemandBase Custom 2"]
        #
        # if segements is not None:
        #     segements = segements.split(",")
        # else:
        #     segements = "s300007365_5b1ee51fbef0d34e1bda4081"
        #
        # element_name_map_list = []
        # for i, ele in enumerate(elements):
        #     name = element_names[i]
        #     name = list(str(name))
        #     ele = str(ele)
        #     ele = list(ele)
        #     element_name_map_list.append(dict(zip(ele, name)))
        #
        # segment_element_map_list = []
        # for i, seg in enumerate(segments):
        #     element_name_map = element_name_map_list[i]
        #     element_name_map = list(str(name))
        #     ele = str(ele)
        #     ele = list(ele)
        #     element_name_map_list.append(dict(zip(ele, name)))
        '''
        from_ini function will read the configuration file as per given section name and key name 
        and will provide dict of configuration parameters.
        '''
        #print ("Date granularity is: {}".format(self.date_granularity))
        adobe_config = Utils.from_ini(
            Utils.get_file_path(self.da_path,
                                [SCRIPT_FOLDER_NAME, CONFIG_FILE]),
            'Adobe_Analytics', ('username', 'api_secret'))
        '''
        Getting end point url 
        '''
        query_url = self.get_endpoint_url('method=Report.Queue')
        print("\n")
        print(query_url)
        print("\n")
        '''
        Getting payload to be passed with the api
        '''
        payload = json.dumps(
            self.get_payload(self.date_from, self.date_to,
                             self.date_granularity))
        print("------------------------------------")
        print("Payload is:")
        print(payload)
        print("------------------------------------")
        print("\n")
        '''
        Preparing parameters for passing in header with api for authentication
        '''
        nonce_b, iso_time, digest = self.get_unique_connection_parameters(
            adobe_config['api_secret'])
        '''
        Get header
        '''
        head = self.get_header(adobe_config['username'], digest, nonce_b,
                               iso_time)

        print("------------------------------------")
        print("Header is:")
        print(head)
        print("------------------------------------")
        print("\n")
        '''
        Calling api for preparing reports
        '''
        report_queue_api_response = Utils.send_request('POST', query_url,
                                                       payload, head)

        if report_queue_api_response.status_code != 200:
            logger.error(report_queue_api_response.text)
            raise Exception(report_queue_api_response.reason)

        report_queue_response_body = report_queue_api_response.text.encode(
            'ascii')
        temp_var = report_queue_response_body.split(b':')
        report_id = temp_var[1].replace(b'}', b'')
        # print(report_id)
        '''
        Section - 2: Get data based on report developed and save the JSON reply in shared folder 
        '''
        '''
        Developing API URL for retrieving
        '''
        query_url = self.get_endpoint_url(
            'method=Report.Get')  # 'method=Report.GetMetrics'

        # The body of the API url is enclosed as post_params
        bodydata = {'reportID': '' + report_id.decode('ascii') + ''}
        payload = json.dumps(bodydata)

        counter_error = 0
        while (counter_error == 0):
            # Using sleep method to give enough time to get the reort ready to pull the data else it will throw
            # "Report not ready"
            print("Start sleep time " + time.strftime("%X"))
            print("\n")
            time.sleep(self.sleep_time)
            '''
            Get connection parameter for getting reports data
            and get header
            '''
            nonce_b, iso_time, digest = self.get_unique_connection_parameters(
                adobe_config['api_secret'])
            head = self.get_header(adobe_config['username'], digest, nonce_b,
                                   iso_time)

            # logger
            '''
            Call api to get reports
            '''
            api_response = Utils.send_request('POST', query_url, payload, head)

            try:
                response_body = json.loads(api_response.text)
                #print ("Response Body is: {}".format(response_body))
                print("------------------------------------")
                print("API Response is:")
                print("\n")
                print("Response is: {}".format(response_body))
                print("------------------------------------")
                print("\n")

                metricsdf = pd.DataFrame(response_body["report"]["metrics"])
                datadf = pd.DataFrame(response_body["report"]["data"])
                granularity_list = datadf['name'].tolist()
                breakdowndf = pd.DataFrame()
                for element in response_body["report"]["data"]:
                    breakdowndf = pd.concat(
                        [breakdowndf,
                         pd.DataFrame(element["breakdown"])])

                names = metricsdf['name'].tolist()
                for i, name in enumerate(names):
                    breakdowndf[name] = [
                        metricname[i]
                        for metricname in list(breakdowndf['counts'].tolist())
                    ]

                #print (breakdowndf.head())
                breakdowndf = breakdowndf.drop(['counts', 'url'], axis=1)
                #breakdowndf = breakdowndf[['name', 'e17 Form Success', 'e89 Video Views']]
                breakdown_length = len(
                    response_body["report"]["data"][0]["breakdown"])
                granularity = {
                    i: (i + ";") * breakdown_length
                    for i in granularity_list
                }
                final_list = []
                for key, value in granularity.items():
                    final_list.extend(value[:-1].split(";"))
                breakdowndf["Granularity"] = final_list
                breakdowndf = breakdowndf.reset_index()
                print(breakdowndf)
                breakdowndf.to_csv(
                    r'C:\Users\chanukya.konduru\Documents\testing.csv')

                counter_error = counter_error + 1

        #         # Using Pandas library to load json data and transpose it for easy manuplation
        #         adobe_ana_pd = pd.DataFrame.from_dict(response_body)
        #         adobe_ana_pd = adobe_ana_pd.T
        #         # Removing unwanted index from the dataFrame
        #         adobe_ana_pd = adobe_ana_pd.drop(adobe_ana_pd.index[1:])
        #
        #         # The metrics for Adobe Analytics is in 'data' column, so parsing it
        #         final_data_df = pd.DataFrame(adobe_ana_pd['data'][0][0]['breakdown'])
        #         #print (final_data_df.columns)
        #         final_data_df = final_data_df.rename(columns={"counts": "Counts",
        #                                         "name": "Surf ID",
        #                                         "url": "URL"})
        #
        #         final_data_df = final_data_df[["Counts", "Surf ID"]]
        #         final_data_df['Granularity'] = adobe_ana_pd['data'][0][0]['name']
        #
        #         if len(adobe_ana_pd['data'][0][0]['breakdown'][0]['counts']) > 1:
        #             final_data_df['Visits'] = final_data_df['Counts'].map(get_visits)
        #             final_data_df['e17 Form Success'] = final_data_df['Counts'].map(get_form_success)
        #             final_data_df['e89 Video Views'] = final_data_df['Counts'].map(get_video_views)
        #         else:
        #             final_data_df['Return Visits + Visits'] = final_data_df['Counts'].map(get_return_visits)
        #
        #         final_data_df_columns = list(final_data_df.columns)
        #         final_data_df_columns.remove('Counts')
        #         final_data_df = final_data_df[final_data_df_columns]
        #         print (final_data_df)
        #
        #         if 'error' in response_body.keys():
        #             if 'report_not_ready' in response_body['error']:
        #                 pass
        #             else:
        #                 logger.error(api_response.text)
        #                 raise Exception(api_response.reason)
        #         elif 'report' in response_body.keys():
        #             counter_error = 1
            except Exception as e:
                logger.error(e)
                raise
Example #7
0
    def main(self):
        '''
        This function will be called from the main.py file and contains the
        logic to fetch data from source and will save it to designation.
        :return:
        '''

        '''
        from_ini function will read the configuration file as per given section name and key name 
        and will provide dict of configuration parameters.
        '''
        #print ("Date granularity is: {}".format(self.date_granularity))
        adobe_config = Utils.from_ini(
            Utils.get_file_path(
                self.da_path,
                [SCRIPT_FOLDER_NAME, CONFIG_FILE]),
            'Adobe_Analytics',
            ('username', 'api_secret'))

        '''
        Getting end point url 
        '''
        query_url = self.get_endpoint_url('method=Report.Queue')
        print ("\n")
        print (query_url)
        print ("\n")

        def get_visits(x):
            y = x[0]
            return y

        def get_form_success(x):
            y = x[0]
            return y

        def get_video_views(x):
            y = x[1]
            return y

        def get_return_visits(x):
            y = x[0]
            return y

        '''
        Getting payload to be passed with the api
        '''
        payload = json.dumps(self.get_payload(self.date_from, self.date_to, self.date_granularity))
        print ("------------------------------------")
        print ("Payload is:")
        print (payload)
        print ("------------------------------------")
        print ("\n")
        '''
        Preparing parameters for passing in header with api for authentication
        '''
        nonce_b, iso_time, digest = self.get_unique_connection_parameters(adobe_config['api_secret'])

        '''
        Get header
        '''
        head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time)

        print ("------------------------------------")
        print ("Header is:")
        print (head)
        print ("------------------------------------")
        print ("\n")


        '''
        Calling api for preparing reports
        '''
        report_queue_api_response = Utils.send_request('POST', query_url, payload, head)

        if report_queue_api_response.status_code != 200:
            logger.error(report_queue_api_response.text)
            raise Exception(report_queue_api_response.reason)

        report_queue_response_body = report_queue_api_response.text.encode('ascii')
        temp_var = report_queue_response_body.split(b':')
        report_id = temp_var[1].replace(b'}', b'')
        # print(report_id)

        '''
        Section - 2: Get data based on report developed and save the JSON reply in shared folder 
        '''

        '''
        Developing API URL for retrieving
        '''
        query_url = self.get_endpoint_url('method=Report.Get')

        # The body of the API url is enclosed as post_params
        bodydata = {
            'reportID': '' + report_id.decode('ascii') + ''
        }
        payload = json.dumps(bodydata)

        counter_error = 0
        while (counter_error == 0):
            # Using sleep method to give enough time to get the reort ready to pull the data else it will throw
            # "Report not ready"
            print("Start sleep time " + time.strftime("%X"))
            print ("\n")
            time.sleep(self.sleep_time)

            '''
            Get connection parameter for getting reports data
            and get header
            '''
            nonce_b, iso_time, digest = self.get_unique_connection_parameters(adobe_config['api_secret'])
            head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time)

            # logger

            '''
            Call api to get reports
            '''
            api_response = Utils.send_request('POST', query_url, payload, head)

            try:
                response_body = json.loads(api_response.text)
                #print ("Response Body is: {}".format(response_body))
                print ("------------------------------------")
                print ("API Response is:")
                print ("\n")
                print ("Response is: {}".format(response_body))
                print ("------------------------------------")
                print ("\n")
                with open(r'C:\Users\rajkiran.reddy\Desktop\SNow-Projects\AdobeAnalytics\Adobe_data.json', 'w') as f:
                    json.dump(response_body, f)
                # Using Pandas library to load json data and transpose it for easy manuplation
                adobe_ana_pd = pd.DataFrame.from_dict(response_body)
                adobe_ana_pd = adobe_ana_pd.T
                # Removing unwanted index from the dataFrame
                adobe_ana_pd = adobe_ana_pd.drop(adobe_ana_pd.index[1:])

                # The metrics for Adobe Analytics is in 'data' column, so parsing it
                #print (adobe_ana_pd['data'][0][0]['name'])
                #print (len(adobe_ana_pd['data'][0][0]['breakdown'][0]['counts']))
                final_data_df = pd.DataFrame(adobe_ana_pd['data'][0][0]['breakdown'])
                #print (final_data_df.columns)
                final_data_df = final_data_df.rename(columns={"counts": "Counts",
                                                "name": "Surf ID",
                                                "url": "URL"})

                final_data_df = final_data_df[["Counts", "Surf ID"]]
                final_data_df['Granularity'] = adobe_ana_pd['data'][0][0]['name']

                if len(adobe_ana_pd['data'][0][0]['breakdown'][0]['counts']) > 1:
                    final_data_df['Visits'] = final_data_df['Counts'].map(get_visits)
                    final_data_df['e17 Form Success'] = final_data_df['Counts'].map(get_form_success)
                    final_data_df['e89 Video Views'] = final_data_df['Counts'].map(get_video_views)
                else:
                    final_data_df['Return Visits + Visits'] = final_data_df['Counts'].map(get_return_visits)

                final_data_df_columns = list(final_data_df.columns)
                final_data_df_columns.remove('Counts')
                final_data_df = final_data_df[final_data_df_columns]
                print (final_data_df)

                if 'error' in response_body.keys():
                    if 'report_not_ready' in response_body['error']:
                        pass
                    else:
                        logger.error(api_response.text)
                        raise Exception(api_response.reason)
                elif 'report' in response_body.keys():
                    counter_error = 1
            except Exception as e:
                logger.error(e)
                raise
Example #8
0
    def main(self):
        '''
        This function will be called from the main.py file and contains the
        logic to fetch data from source and will save it to designation.
        :return:
        '''
        '''
        from_ini function will read the configuration file as per given section name and key name 
        and will provide dict of configuration parameters.
        '''
        print("Date granularity is: {}".format(self.date_granularity))
        adobe_config = Utils.from_ini(
            Utils.get_file_path(self.da_path,
                                [SCRIPT_FOLDER_NAME, CONFIG_FILE]),
            'Adobe_Analytics', ('username', 'api_secret'))

        #Getting end point url
        query_url = self.get_endpoint_url('method=Report.Queue')
        print(query_url)

        #Getting payload to be passed with the api
        metric_payload = json.dumps(
            self.get_payload(self.date_from, self.date_to,
                             self.date_granularity))
        #print (metric_payload)

        segment_payload = json.dumps(
            self.get_segment_payload(self.date_from, self.date_to,
                                     self.date_granularity))
        #print (segment_payload)

        # #Preparing parameters for passing in header with api for authentication
        # nonce_b, iso_time, digest = self.get_unique_connection_parameters(adobe_config['api_secret'])
        #
        # #Get header
        # head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time)
        # print (head)

        payloads = [metric_payload, segment_payload]

        for payload in payloads:

            time.sleep(10)

            print(payload)

            # Preparing parameters for passing in header with api for authentication
            nonce_b, iso_time, digest = self.get_unique_connection_parameters(
                adobe_config['api_secret'])

            # Get header
            head = self.get_header(adobe_config['username'], digest, nonce_b,
                                   iso_time)
            print(head)

            #Calling api for preparing reports
            report_queue_api_response = Utils.send_request(
                'POST', query_url, payload, head)

            if report_queue_api_response.status_code != 200:
                logger.error(report_queue_api_response.text)
                raise Exception(report_queue_api_response.reason)

            report_queue_response_body = report_queue_api_response.text.encode(
                'ascii')
            temp_var = report_queue_response_body.split(b':')
            report_id = temp_var[1].replace(b'}', b'')
            # print(report_id)
            '''
            Section - 2: Get data based on report developed and save the JSON reply in shared folder 
            '''
            '''
            Developing API URL for retrieving
            '''
            query_url = self.get_endpoint_url('method=Report.Get')

            # The body of the API url is enclosed as post_params
            bodydata = {'reportID': '' + report_id.decode('ascii') + ''}
            payload = json.dumps(bodydata)

            counter_error = 0
            while (counter_error == 0):
                # Using sleep method to give enough time to get the reort ready to pull the data else it will throw
                # "Report not ready"
                print("Start sleep time " + time.strftime("%X"))
                time.sleep(self.sleep_time)
                '''
                Get connection parameter for getting reports data
                and get header
                '''
                nonce_b, iso_time, digest = self.get_unique_connection_parameters(
                    adobe_config['api_secret'])
                head = self.get_header(adobe_config['username'], digest,
                                       nonce_b, iso_time)

                # logger
                '''
                Call api to get reports
                '''
                api_response = Utils.send_request('POST', query_url, payload,
                                                  head)

                try:
                    response_body = json.loads(api_response.text)
                    print(response_body)
                    if 'error' in response_body.keys():
                        if 'report_not_ready' in response_body['error']:
                            pass
                        else:
                            logger.error(api_response.text)
                            raise Exception(api_response.reason)
                    elif 'report' in response_body.keys():
                        counter_error = 1
                except Exception as e:
                    logger.error(e)
                    raise
Example #9
0
    def response_handler(self, data_df):
        db_obj = PyHdbWrapper()
        cursor, connection = db_obj.connect_hana(
            Utils.get_file_path(self.da_path,
                                [SCRIPT_FOLDER_NAME, HANA_CONFIG_FILE]),
            'HANA_ENV')
        ''' Truncate staging table before inserting records'''
        delete_page_url = db_obj.get_delete_query(self.schema + '.STG_' +
                                                  str(self.table_name).upper())
        db_obj.execute_sql(cursor, connection, delete_page_url, '', 'DELETE')

        # Extract Date will be used as a bookmark for loading data into HANA
        extract_date = datetime.datetime.today()

        for index, row in data_df.iterrows(
        ):  # Outer Loop for Day Specific data
            table = row.iloc[0]  # Table is of type Dictionary
            source_date = str(
                datetime.date(table['year'], table['month'], table['day']))
            source_date = datetime.datetime.strptime(source_date, "%Y-%m-%d")
            breakdown = (table['breakdown'])  # Breakdown is type of list
            for i in breakdown:
                if 'breakdown' in i.keys():
                    country = i['name']
                    # print(i['breakdownTotal'])
                    temp = i['breakdown']
                    for i in temp:
                        counts = i['counts']
                        pageviews = counts[0]
                        visits = counts[1]
                        uniquevisitors = counts[2]
                        bouncerate = counts[3]
                        averageTimeSpentOnSite = counts[4]
                        url = i['name']
                        print(source_date, country, url + '\n' + pageviews,
                              visits, uniquevisitors, bouncerate)
                        # column_name = ["PERIOD_DATE", "GRANULARITY", "COUNTRY", "URL"
                        #     , "PAGE_VIEWS_COUNT", "PAGE_VISITS_COUNT", "UNIQUE_VISITOR_COUNT"
                        #     , "BOUNCE_RATE_%%", "AVG_TIME_SPENT_ON_PAGE", "EXTRACT_DATE"]

                        # Getting HANA Table definition
                        column_names, column_datatypes = get_hana_table_definition(
                            cursor,
                            str(self.schema).upper() + "." + "STG_" +
                            str(self.table_name).upper())

                        resp_dict = {key: None for key in column_names}

                        if len(sales_enablement_dataframe.to_dict()) > 0:

                            stg_table_name = "STG_" + str(
                                self.table_name).upper()

                            # Sync the source and target structure
                            target_query = "SCHEMA_NAME = '%s' AND TABLE_NAME = '%s'" % (
                                str(self.schema).upper(), stg_table_name)
                            structure_target_query = db_obj.get_select_query(
                                'COLUMN_NAME', 'SYS.M_CS_COLUMNS',
                                target_query)
                            structure_target = db_obj.execute_sql(
                                cursor, connection, structure_target_query, '',
                                'SELECT')

                            # converting a list of tuples to a simple list of elements
                            structure_target = [i[0] for i in structure_target]
                            sales_enablement_dataframe = self.source_target_structure_sync(
                                sales_enablement_dataframe, structure_target)

                            sales_enablement_columns = sales_enablement_dataframe.columns.tolist(
                            )
                            sales_enablement_columns = [
                                col.upper() for col in sales_enablement_columns
                            ]
                            sales_enablement_dataframe.columns = sales_enablement_columns
                            sales_enablement_dataframe = sales_enablement_dataframe.where(
                                (pd.notnull(sales_enablement_dataframe)), None)

                            print("Inserting the records into {}".format(
                                schema_name + '.' + stg_table_name))
                            for record in sales_enablement_dataframe.to_dict(
                                    'records'):
                                result_dict = generate_hana_store_dict(
                                    column_names, column_datatypes, record)
                                insert_table_name = schema_name + '.' + stg_table_name
                                table_values = list(result_dict.values())
                                insert_query = get_insert_query(
                                    insert_table_name, result_dict)

                                ## Inserting the records in staging table
                                db_obj.execute_sql(cursor, connection,
                                                   insert_query,
                                                   tuple(table_values),
                                                   'INSERT')
                            logger.info("Records have been Inserted")

                        insert_query = db_obj.get_insert_query(
                            self.schema + ".STG_PAGE_URL_METRICS", column_name)

                        values = [
                            source_date, self.date_granularity, country, url,
                            pageviews, visits, uniquevisitors, bouncerate,
                            averageTimeSpentOnSite, extract_date
                        ]
                        print(values)

                        #db_obj.execute_sql(cursor, connection, insert_query, values, 'INSERT')

        # upsert_statement = "UPSERT \"" + self.schema + "\".\"PAGE_URL_METRICS\"  \
        #                                 SELECT * FROM \"" + self.schema + "\".\"STG_PAGE_URL_METRICS\""
        stg_table_name = "STG_" + str(self.table_name).upper()
        upsert_query = ''' UPSERT %s.%s SELECT * FROM %s.%s ''' % (str(
            self.schema).upper(), str(self.table_name).upper(), str(
                self.schema).upper(), stg_table_name)