def response_handler(self, data_df): db_obj = PyHdbWrapper() cursor, connection = db_obj.connect_hana( Utils.get_file_path(self.da_path, [SCRIPT_FOLDER_NAME, HANA_CONFIG_FILE]), 'HANA_ENV') ''' Truncate staging table before inserting records''' delete_page_url = db_obj.get_delete_query(self.schema + '.STG_PAGE_URL_METRICS') db_obj.execute_sql(cursor, connection, delete_page_url, '', 'DELETE') # Extract Date will be used as a bookmark for loading data into HANA extract_date = datetime.datetime.today() for index, row in data_df.iterrows( ): # Outer Loop for Day Specific data table = row.iloc[0] # Table is of type Dictionary source_date = str( datetime.date(table['year'], table['month'], table['day'])) source_date = datetime.datetime.strptime(source_date, "%Y-%m-%d") breakdown = (table['breakdown']) # Breakdown is type of list for i in breakdown: if 'breakdown' in i.keys(): country = i['name'] # print(i['breakdownTotal']) temp = i['breakdown'] for i in temp: counts = i['counts'] pageviews = counts[0] visits = counts[1] uniquevisitors = counts[2] bouncerate = counts[3] averageTimeSpentOnSite = counts[4] url = i['name'] print(source_date, country, url + '\n' + pageviews, visits, uniquevisitors, bouncerate) column_name = [ "PERIOD_DATE", "GRANULARITY", "COUNTRY", "URL", "PAGE_VIEWS_COUNT", "PAGE_VISITS_COUNT", "UNIQUE_VISITOR_COUNT", "BOUNCE_RATE_%%", "AVG_TIME_SPENT_ON_PAGE", "EXTRACT_DATE" ] insert_query = db_obj.get_insert_query( self.schema + ".STG_PAGE_URL_METRICS", column_name) values = [ source_date, self.date_granularity, country, url, pageviews, visits, uniquevisitors, bouncerate, averageTimeSpentOnSite, extract_date ] print(values) #db_obj.execute_sql(cursor, connection, insert_query, values, 'INSERT') upsert_statement = "UPSERT \"" + self.schema + "\".\"PAGE_URL_METRICS\" \ SELECT * FROM \"" + self.schema + "\".\"STG_PAGE_URL_METRICS\""
def response_handler(self, data_df): """ This is used for Parsing JSON data and saving it into HANA DB ######### :param data_df: it include json data and loading json data in hana database """ db_obj = PyHdbWrapper() cursor, connection = db_obj.connect_hana( Utils.get_file_path(self.da_path, [SCRIPT_FOLDER_NAME, HANA_CONFIG_FILE]), 'HANA_ENV') # Extract Date will be used as a bookmark for loading data into HANA extract_date = datetime.datetime.today() for index, row in data_df.iterrows( ): # Outer Loop for Day Specific data table = row.iloc[0] # Table is of type Dictionary source_date = str( datetime.date(table['year'], table['month'], table['day'])) source_date = datetime.datetime.strptime(source_date, "%Y-%m-%d") breakdown = (table['breakdown']) # Breakdown is type of list for i in breakdown: if 'breakdown' in i.keys(): country = i['name'] # print(i['breakdownTotal']) temp = i['breakdown'] for i in temp: counts = i['counts'] pageviews = counts[0] visits = counts[1] uniquevisitors = counts[2] bouncerate = counts[3] averageTimeSpentOnSite = counts[4] url = i['name'] print(source_date, country, url + '\n' + pageviews, visits, uniquevisitors, bouncerate) column_name = [ "PERIOD_DATE", "GRANULARITY", "COUNTRY", "URL", "PAGE_VIEWS_COUNT", "PAGE_VISITS_COUNT", "UNIQUE_VISITOR_COUNT", "BOUNCE_RATE_%%", "AVG_TIME_SPENT_ON_PAGE", "EXTRACT_DATE" ] insert_query = db_obj.get_insert_query( "SAMEER_RATHOD.STG_PAGE_URL_METRICS", column_name) values = [ source_date, self.date_granularity, country, url, pageviews, visits, uniquevisitors, bouncerate, averageTimeSpentOnSite, extract_date ] db_obj.execute_sql(cursor, connection, insert_query, values, 'INSERT')
def main(self): """ calling utils() Method Make connection to database by reading connection parameters from an ini file. """ utils_object = Utils() """ from_ini= It will make the database connection. get_file_path= it will Read the file path of Adobe_Analytics and return us a "username " and " api_secret" """ adobe_config = utils_object.from_ini( Utils.get_file_path(self.da_path, [SCRIPT_FOLDER_NAME, CONFIG_FILE]), 'Adobe_Analytics', ('username', 'api_secret')) """ get_endpoint_url= calling get_endpoint_url to generate endpoint url """ query_url = self.get_endpoint_url('method=Report.Queue') """ get_payload= calling get_payload to get payload which we will use in json body """ payload = json.dumps( self.get_payload(self.date_from, self.date_to, self.date_granularity)) nonce_b, iso_time, digest = self.get_unique_connection_parameters( adobe_config['api_secret']) head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) """ Api Method 'POST' = It will send the request """ report_queue_api_response = utils_object.send_request( 'POST', query_url, payload, head) report_queue_response_body = report_queue_api_response.text.encode( 'ascii') """ After API POST Request we are encoding the response in ascii format """ temp_var = report_queue_response_body.split(b':') report_id = temp_var[1].replace(b'}', b'') print(report_id) ######## Section - 2: Get data based on report developed and save the JSON reply in shared folder ######## # Develoiping API URL for retriving query_url = self.get_endpoint_url('method=Report.Get') # The body of the API url is enlcosed as post_params bodydata = {'reportID': '' + report_id.decode('ascii') + ''} payload = json.dumps(bodydata) counter_error = 0 while (counter_error == 0): """" While Loop is used to check the api response if any error is there loop will pass else break. """ # Using sleep method to give enough time to get the report ready to pull the data else it will throw # "Report not ready" print("Start sleep time " + time.strftime("%X")) time.sleep(self.sleep_time) nonce_b, iso_time, digest = self.get_unique_connection_parameters( adobe_config['api_secret']) head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) # logger api_response = utils_object.send_request('POST', query_url, payload, head) response_body = json.loads(api_response.text) if 'error' in response_body.keys(): if 'report_not_ready' in response_body['error']: pass else: break elif 'report' in response_body.keys(): counter_error = 1 # Using Pandas library to load json data and transpose it for easy manuplation adobe_ana_pd = pd.DataFrame.from_dict(response_body) adobe_ana_pd = adobe_ana_pd.T # Removing unwanted index from the dataFrame adobe_ana_pd = adobe_ana_pd.drop(adobe_ana_pd.index[1:]) # The metrics for Adobe Analytics is in 'data' column, so parsing it data_df = pd.read_json((adobe_ana_pd['data']).to_json()) # datetime.datetime.strptime(str(datetime.date.today()),"%Y-%m-%d") # Iterating over the JSON file to extract metrics self.response_handler(data_df)
def main(self): ''' This function will be called from the main.py file and contains the logic to fetch data from source and will save it to designation. :return: ''' ''' from_ini function will read the configuration file as per given section name and key name and will provide dict of configuration parameters. ''' #print ("Date granularity is: {}".format(self.date_granularity)) adobe_config = Utils.from_ini( Utils.get_file_path(self.da_path, [SCRIPT_FOLDER_NAME, CONFIG_FILE]), 'Adobe_Analytics', ('username', 'api_secret')) ''' Getting end point url ''' query_url = self.get_endpoint_url('method=Report.Queue') print("\n") print(query_url) print("\n") ''' Getting payload to be passed with the api ''' payload = json.dumps( self.get_payload(self.date_from, self.date_to, self.date_granularity)) print("------------------------------------") print("Payload is:") print(payload) print("------------------------------------") print("\n") ''' Preparing parameters for passing in header with api for authentication ''' nonce_b, iso_time, digest = self.get_unique_connection_parameters( adobe_config['api_secret']) ''' Get header ''' head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) print("------------------------------------") print("Header is:") print(head) print("------------------------------------") print("\n") ''' Calling api for preparing reports ''' report_queue_api_response = Utils.send_request('POST', query_url, payload, head) if report_queue_api_response.status_code != 200: logger.error(report_queue_api_response.text) raise Exception(report_queue_api_response.reason) report_queue_response_body = report_queue_api_response.text.encode( 'ascii') temp_var = report_queue_response_body.split(b':') report_id = temp_var[1].replace(b'}', b'') # print(report_id) ''' Section - 2: Get data based on report developed and save the JSON reply in shared folder ''' ''' Developing API URL for retrieving ''' query_url = self.get_endpoint_url( 'method=Report.Get') # 'method=Report.GetMetrics' # The body of the API url is enclosed as post_params bodydata = {'reportID': '' + report_id.decode('ascii') + ''} payload = json.dumps(bodydata) counter_error = 0 while (counter_error == 0): # Using sleep method to give enough time to get the reort ready to pull the data else it will throw # "Report not ready" print("Start sleep time " + time.strftime("%X")) print("\n") time.sleep(self.sleep_time) ''' Get connection parameter for getting reports data and get header ''' nonce_b, iso_time, digest = self.get_unique_connection_parameters( adobe_config['api_secret']) head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) # logger ''' Call api to get reports ''' api_response = Utils.send_request('POST', query_url, payload, head) if api_response.status_code != 200: print(api_response) continue try: response_body = json.loads(api_response.text) if 'error' in response_body.keys(): if 'report_not_ready' in response_body['error']: pass else: logger.error(api_response.text) raise Exception(api_response.reason) elif 'report' in response_body.keys(): counter_error = 1 except Exception as e: logger.error(e) raise try: # response_body = json.loads(api_response.text) print("------------------------------------") print("API Response is:") print("\n") print("Response is: {}".format(response_body)) print("------------------------------------") print("\n") adobe_ana_pd = pd.DataFrame.from_dict(response_body) adobe_ana_pd = adobe_ana_pd.T # Removing unwanted index from the dataFrame adobe_ana_pd = adobe_ana_pd.drop(adobe_ana_pd.index[1:]) # The metrics for Adobe Analytics is in 'data' column, so parsing it data_df = pd.read_json((adobe_ana_pd['data']).to_json()) self.response_handler(data_df) # metricsdf = pd.DataFrame(response_body["report"]["metrics"]) # datadf = pd.DataFrame(response_body["report"]["data"]) # outerdf = pd.DataFrame() # #breakdowndf_2 = pd.DataFrame() # # # for breakdown_data in response_body["report"]["data"]: # # for element in breakdown_data["breakdown"]: # # innerdf = pd.DataFrame() # # final_data = {} # # final_data["ITEM"] = [] # # final_data["ACCOUNT_NUMBER"] = [] # # final_data["UNIQUE_VISITORS"] = [] # # final_data["VISITS"] = [] # # final_data["PAGE_VIEWS"] = [] # # final_data["BOUNCES"] = [] # # final_data["TIME_SPENT_ON_PAGE_(MIN)"] = [] # # final_data["E89_VIDEO_VIEWS"] = [] # # final_data["E17_FORM_SUCCESS"] = [] # # final_data["FORM_SUBMISSIONS"] = [] # # final_data["TOTAL_WEEKLY_UNIQUE_VISITORS"] = [] # # final_data["ENTRIES"] = [] # # final_data["TOTAL_TIME_SPENT"] = [] # # final_data["START_DATE_OF_WEEK"] = [] # # final_data["GRANULARITY"] = [] # # final_data["START_DATE_OF_WEEK"].append(breakdown_data["name"]) # # final_data["GRANULARITY"].append(self.date_granularity) # # final_data["ACCOUNT_NUMBER"].append(element["name"]) # # if "breakdown" in element: # # for pageurl in element["breakdown"]: # # final_data["ITEM"].append(pageurl["name"]) # # final_data["UNIQUE_VISITORS"].append(pageurl["counts"][0]) # # final_data["VISITS"].append(pageurl["counts"][1]) # # final_data["PAGE_VIEWS"].append(pageurl["counts"][2]) # # final_data["BOUNCES"].append(pageurl["counts"][3]) # # final_data["TIME_SPENT_ON_PAGE_(MIN)"].append(pageurl["counts"][4]) # # final_data["E89_VIDEO_VIEWS"].append(pageurl["counts"][5]) # # final_data["E17_FORM_SUCCESS"].append(pageurl["counts"][6]) # # final_data["FORM_SUBMISSIONS"].append(pageurl["counts"][7]) # # final_data["TOTAL_WEEKLY_UNIQUE_VISITORS"].append(pageurl["counts"][8]) # # final_data["ENTRIES"].append(pageurl["counts"][9]) # # final_data["TOTAL_TIME_SPENT"].append(pageurl["counts"][10]) # # # # innerdf["ITEM"] = final_data["ITEM"] # # innerdf["UNIQUE_VISITORS"] = final_data["UNIQUE_VISITORS"] # # innerdf["VISITS"] = final_data["VISITS"] # # innerdf["PAGE_VIEWS"] = final_data["PAGE_VIEWS"] # # innerdf["BOUNCES"] = final_data["BOUNCES"] # # innerdf["TIME_SPENT_ON_PAGE_(MIN)"] = final_data["TIME_SPENT_ON_PAGE_(MIN)"] # # innerdf["E89_VIDEO_VIEWS"] = final_data["E89_VIDEO_VIEWS"] # # innerdf["E17_FORM_SUCCESS"] = final_data["E17_FORM_SUCCESS"] # # innerdf["FORM_SUBMISSIONS"] = final_data["FORM_SUBMISSIONS"] # # innerdf["TOTAL_WEEKLY_UNIQUE_VISITORS"] = final_data["TOTAL_WEEKLY_UNIQUE_VISITORS"] # # innerdf["ENTRIES"] = final_data["ENTRIES"] # # innerdf["TOTAL_TIME_SPENT"] = final_data["TOTAL_TIME_SPENT"] # # innerdf["ACCOUNT_NUMBER"] = pd.Series(final_data["ACCOUNT_NUMBER"]) # # #innerdf.fillna(method='ffill', inplace=True) # # innerdf["ACCOUNT_NUMBER"] = innerdf["ACCOUNT_NUMBER"].fillna(method='ffill') # # innerdf["START_DATE_OF_WEEK"] = pd.Series(final_data["START_DATE_OF_WEEK"]) # # innerdf["START_DATE_OF_WEEK"] = innerdf["START_DATE_OF_WEEK"].fillna(method='ffill') # # innerdf["GRANULARITY"] = pd.Series(final_data["GRANULARITY"]) # # innerdf["GRANULARITY"] = innerdf["GRANULARITY"].fillna(method='ffill') # # # outerdf['ETL_EXTRACT_DATE'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # # #print (outerdf) # # #outerdf = pd.concat([outerdf,innerdf], ignore_index=True) # # outerdf = pd.concat([outerdf, innerdf], axis=0, ignore_index=True) # # outerdf = outerdf.drop_duplicates() # # columns = ["ITEM", "UNIQUE_VISITORS", "VISITS", "PAGE_VIEWS", "BOUNCES", "TIME_SPENT_ON_PAGE_(MIN)", # "E89_VIDEO_VIEWS", "E17_FORM_SUCCESS", "FORM_SUBMISSIONS", "TOTAL_WEEKLY_UNIQUE_VISITORS", # "ENTRIES","TOTAL_TIME_SPENT" , "ACCOUNT_NUMBER", "START_DATE_OF_WEEK", "GRANULARITY", "SEGMENT_ID", # "GROUP", "ETL_EXTRACT_DATE"] # # # outerdf['SEGMENT_ID'] = "SUCCESS_PAGE" # outerdf['SEGMENT_ID'] = "VALUE_CALCULATOR" # outerdf['GROUP'] = "CUSTOMER_SUCCESS" # outerdf['ETL_EXTRACT_DATE'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # # outerdf["START_DATE_OF_WEEK"] = outerdf["START_DATE_OF_WEEK"].map(self.date_conversion) # print (outerdf.columns) # outerdf = outerdf[columns] # outerdf = outerdf.replace([np.inf, -np.inf], np.nan) # outerdf = outerdf.replace('', np.NaN) # outerdf = outerdf.replace('None', np.NaN) # outerdf = outerdf.replace('nan', np.NaN) # outerdf = outerdf.where((pd.notnull(outerdf)), None) # db_obj = PyHdbWrapper() # cursor, connection = db_obj.connect_hana( # Utils.get_file_path(self.da_path, [SCRIPT_FOLDER_NAME, HANA_CONFIG_FILE]), 'HANA_ENV') # # delete_page_url = db_obj.get_delete_query(self.schema + '.STG_ADOBE_CSC_DAILY') # # db_obj.execute_sql(cursor, connection, delete_page_url, '', 'DELETE') # for record in outerdf.to_dict("records"): # insert_query = db_obj.get_insert_query(self.schema + ".STG_ADOBE_CSC_WEEKLY", # record) # # values = list(record.values()) # #print(values) # print ("Inserting into Staging table") # db_obj.execute_sql(cursor, connection, insert_query, tuple(values), 'INSERT') # print ("Completed inserting into Staging table") # # print("All records are inserted into Staging table") # # print ("Upserting into Target table") # # # # upsert_statement = "UPSERT \"" + self.schema + "\".\"ADOBE_CSC_DAILY\" \ # # SELECT * FROM \"" + self.schema + "\".\"STG_ADOBE_CSC_DAILY\"" # # db_obj.execute_sql(cursor, connection, upsert_statement, '', 'UPSERT') # # # # print ("Completed upserting to target table") # # # # # # #print(outerdf) # # outerdf.to_csv(r'C:\Users\chanukya.konduru\Documents\testing.csv', index=False) # breakdowndf = pd.concat([breakdowndf, pd.DataFrame(element["breakdown"])], ignore_index=True) # # breakdowndf_2 = pd.concat([breakdowndf_2, pd.DataFrame(element["breakdown"][0]["breakdown"])], ignore_index=True) # # # breakdowndf_2 = pd.DataFrame(element["breakdown"][0]["breakdown"]) # # #print (breakdowndf_2) # names = metricsdf['name'].tolist() # for i,name in enumerate(names): # breakdowndf[name] = [metricname[i] for metricname in list(breakdowndf['counts'].tolist())] # # #print (breakdowndf.head()) # breakdowndf.drop(['counts', 'url'], axis=1, inplace=True) # #breakdowndf = breakdowndf[['name', 'e17 Form Success', 'e89 Video Views']] # breakdown_length = len(response_body["report"]["data"][0]["breakdown"]) # breakdowndf["Granularity"] = self.date_granularity # #print (breakdowndf) # # breakdowndf.to_csv(r'C:\Users\rajkiran.reddy\Desktop\SNow-Projects\Framework\master_\Git_Repositories\adobe_analytics\testing.csv') counter_error = counter_error + 1 # # Using Pandas library to load json data and transpose it for easy manuplation # adobe_ana_pd = pd.DataFrame.from_dict(response_body) # adobe_ana_pd = adobe_ana_pd.T # # Removing unwanted index from the dataFrame # adobe_ana_pd = adobe_ana_pd.drop(adobe_ana_pd.index[1:]) # # # The metrics for Adobe Analytics is in 'data' column, so parsing it # final_data_df = pd.DataFrame(adobe_ana_pd['data'][0][0]['breakdown']) # #print (final_data_df.columns) # final_data_df = final_data_df.rename(columns={"counts": "Counts", # "name": "Surf ID", # "url": "URL"}) # # final_data_df = final_data_df[["Counts", "Surf ID"]] # final_data_df['Granularity'] = adobe_ana_pd['data'][0][0]['name'] # # if len(adobe_ana_pd['data'][0][0]['breakdown'][0]['counts']) > 1: # final_data_df['Visits'] = final_data_df['Counts'].map(get_visits) # final_data_df['e17 Form Success'] = final_data_df['Counts'].map(get_form_success) # final_data_df['e89 Video Views'] = final_data_df['Counts'].map(get_video_views) # else: # final_data_df['Return Visits + Visits'] = final_data_df['Counts'].map(get_return_visits) # # final_data_df_columns = list(final_data_df.columns) # final_data_df_columns.remove('Counts') # final_data_df = final_data_df[final_data_df_columns] # print (final_data_df) # # if 'error' in response_body.keys(): # if 'report_not_ready' in response_body['error']: # pass # else: # logger.error(api_response.text) # raise Exception(api_response.reason) # elif 'report' in response_body.keys(): # counter_error = 1 except Exception as e: logger.error(e) raise
def response_handler(self, data_df): db_obj = PyHdbWrapper() cursor, connection = db_obj.connect_hana( Utils.get_file_path(self.da_path, [SCRIPT_FOLDER_NAME, HANA_CONFIG_FILE]), 'HANA_ENV') ''' Truncate staging table before inserting records''' # delete_page_url = db_obj.get_delete_query(self.schema + '.STG_ADOBE_CSC_DAILY') # db_obj.execute_sql(cursor, connection, delete_page_url, '', 'DELETE') # Extract Date will be used as a bookmark for loading data into HANA extract_date = datetime.datetime.today() for index, row in data_df.iterrows( ): # Outer Loop for Day Specific data table = row.iloc[0] # Table is of type Dictionary source_date = str( datetime.date(table['year'], table['month'], table['day'])) source_date = datetime.datetime.strptime(source_date, "%Y-%m-%d") breakdown = (table['breakdown']) # Breakdown is type of list for j in breakdown: if 'breakdown' in j.keys(): account_name = j['name'] # print(i['breakdownTotal']) temp = j['breakdown'] # for i in breakdown: # if 'breakdown' in i.keys(): # country = i['name'] # # print(i['breakdownTotal']) # temp = i['breakdown'] for i in temp: counts = i['counts'] pageviews = counts[0] visits = counts[1] uniquevisitors = counts[2] bouncerate = counts[3] averageTimeSpentOnSite = counts[4] url = i['name'] # print(source_date, account_name, url + '\n' + pageviews, visits, uniquevisitors, bouncerate) # column_name = ["PERIOD_DATE", "GRANULARITY", "COUNTRY", "URL" # , "PAGE_VIEWS_COUNT", "PAGE_VISITS_COUNT", "UNIQUE_VISITOR_COUNT" # , "BOUNCE_RATE_%%", "AVG_TIME_SPENT_ON_PAGE", "EXTRACT_DATE"] column_name = [ "ITEM", "UNIQUE_VISITORS", "VISITS", "PAGE_VIEWS", "BOUNCES", "TIME_SPENT_ON_PAGE_(MIN)", "E89_VIDEO_VIEWS", "E17_FORM_SUCCESS", "FORM_SUBMISSIONS", "TOTAL_WEEKLY_UNIQUE_VISITORS", "ENTRIES", "TOTAL_TIME_SPENT", "ACCOUNT_NUMBER", "DATE", "GRANULARITY", "SEGMENT_ID", "GROUP", "ETL_EXTRACT_DATE" ] # insert_query = db_obj.get_insert_query(self.schema + ".STG_ADOBE_CSC_DAILY", # column_name) insert_query = db_obj.get_insert_query( self.schema + ".STG_ADOBE_SAMPLE", column_name) values = [] # values = [source_date, self.date_granularity, country, url, pageviews, visits, # uniquevisitors, bouncerate, averageTimeSpentOnSite, extract_date] values.append(url) if counts[4] == 'INF': counts[4] = 0 values.extend(counts) values.append(account_name) values.append(source_date) values.append(self.date_granularity) values.append(self.segment_name) values.append(self.group) values.append(extract_date) # print(values) print('insert') db_obj.execute_sql(cursor, connection, insert_query, values, 'INSERT')
def main(self): ''' This function will be called from the main.py file and contains the logic to fetch data from source and will save it to designation. :return: ''' # metrics = self.metrics # elements = self.elements # element_names = self.element_names # segements = self.segments # # if metrics is not None: # metrics = metrics.split(",") # else: # metrics = [] # # if elements is not None: # elements = elements.split(",") # else: # elements = ["eVar48"] # # if element_names is not None: # element_names = element_names.split(",") # else: # element_names = ["DemandBase Custom 2"] # # if segements is not None: # segements = segements.split(",") # else: # segements = "s300007365_5b1ee51fbef0d34e1bda4081" # # element_name_map_list = [] # for i, ele in enumerate(elements): # name = element_names[i] # name = list(str(name)) # ele = str(ele) # ele = list(ele) # element_name_map_list.append(dict(zip(ele, name))) # # segment_element_map_list = [] # for i, seg in enumerate(segments): # element_name_map = element_name_map_list[i] # element_name_map = list(str(name)) # ele = str(ele) # ele = list(ele) # element_name_map_list.append(dict(zip(ele, name))) ''' from_ini function will read the configuration file as per given section name and key name and will provide dict of configuration parameters. ''' #print ("Date granularity is: {}".format(self.date_granularity)) adobe_config = Utils.from_ini( Utils.get_file_path(self.da_path, [SCRIPT_FOLDER_NAME, CONFIG_FILE]), 'Adobe_Analytics', ('username', 'api_secret')) ''' Getting end point url ''' query_url = self.get_endpoint_url('method=Report.Queue') print("\n") print(query_url) print("\n") ''' Getting payload to be passed with the api ''' payload = json.dumps( self.get_payload(self.date_from, self.date_to, self.date_granularity)) print("------------------------------------") print("Payload is:") print(payload) print("------------------------------------") print("\n") ''' Preparing parameters for passing in header with api for authentication ''' nonce_b, iso_time, digest = self.get_unique_connection_parameters( adobe_config['api_secret']) ''' Get header ''' head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) print("------------------------------------") print("Header is:") print(head) print("------------------------------------") print("\n") ''' Calling api for preparing reports ''' report_queue_api_response = Utils.send_request('POST', query_url, payload, head) if report_queue_api_response.status_code != 200: logger.error(report_queue_api_response.text) raise Exception(report_queue_api_response.reason) report_queue_response_body = report_queue_api_response.text.encode( 'ascii') temp_var = report_queue_response_body.split(b':') report_id = temp_var[1].replace(b'}', b'') # print(report_id) ''' Section - 2: Get data based on report developed and save the JSON reply in shared folder ''' ''' Developing API URL for retrieving ''' query_url = self.get_endpoint_url( 'method=Report.Get') # 'method=Report.GetMetrics' # The body of the API url is enclosed as post_params bodydata = {'reportID': '' + report_id.decode('ascii') + ''} payload = json.dumps(bodydata) counter_error = 0 while (counter_error == 0): # Using sleep method to give enough time to get the reort ready to pull the data else it will throw # "Report not ready" print("Start sleep time " + time.strftime("%X")) print("\n") time.sleep(self.sleep_time) ''' Get connection parameter for getting reports data and get header ''' nonce_b, iso_time, digest = self.get_unique_connection_parameters( adobe_config['api_secret']) head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) # logger ''' Call api to get reports ''' api_response = Utils.send_request('POST', query_url, payload, head) try: response_body = json.loads(api_response.text) #print ("Response Body is: {}".format(response_body)) print("------------------------------------") print("API Response is:") print("\n") print("Response is: {}".format(response_body)) print("------------------------------------") print("\n") metricsdf = pd.DataFrame(response_body["report"]["metrics"]) datadf = pd.DataFrame(response_body["report"]["data"]) granularity_list = datadf['name'].tolist() breakdowndf = pd.DataFrame() for element in response_body["report"]["data"]: breakdowndf = pd.concat( [breakdowndf, pd.DataFrame(element["breakdown"])]) names = metricsdf['name'].tolist() for i, name in enumerate(names): breakdowndf[name] = [ metricname[i] for metricname in list(breakdowndf['counts'].tolist()) ] #print (breakdowndf.head()) breakdowndf = breakdowndf.drop(['counts', 'url'], axis=1) #breakdowndf = breakdowndf[['name', 'e17 Form Success', 'e89 Video Views']] breakdown_length = len( response_body["report"]["data"][0]["breakdown"]) granularity = { i: (i + ";") * breakdown_length for i in granularity_list } final_list = [] for key, value in granularity.items(): final_list.extend(value[:-1].split(";")) breakdowndf["Granularity"] = final_list breakdowndf = breakdowndf.reset_index() print(breakdowndf) breakdowndf.to_csv( r'C:\Users\chanukya.konduru\Documents\testing.csv') counter_error = counter_error + 1 # # Using Pandas library to load json data and transpose it for easy manuplation # adobe_ana_pd = pd.DataFrame.from_dict(response_body) # adobe_ana_pd = adobe_ana_pd.T # # Removing unwanted index from the dataFrame # adobe_ana_pd = adobe_ana_pd.drop(adobe_ana_pd.index[1:]) # # # The metrics for Adobe Analytics is in 'data' column, so parsing it # final_data_df = pd.DataFrame(adobe_ana_pd['data'][0][0]['breakdown']) # #print (final_data_df.columns) # final_data_df = final_data_df.rename(columns={"counts": "Counts", # "name": "Surf ID", # "url": "URL"}) # # final_data_df = final_data_df[["Counts", "Surf ID"]] # final_data_df['Granularity'] = adobe_ana_pd['data'][0][0]['name'] # # if len(adobe_ana_pd['data'][0][0]['breakdown'][0]['counts']) > 1: # final_data_df['Visits'] = final_data_df['Counts'].map(get_visits) # final_data_df['e17 Form Success'] = final_data_df['Counts'].map(get_form_success) # final_data_df['e89 Video Views'] = final_data_df['Counts'].map(get_video_views) # else: # final_data_df['Return Visits + Visits'] = final_data_df['Counts'].map(get_return_visits) # # final_data_df_columns = list(final_data_df.columns) # final_data_df_columns.remove('Counts') # final_data_df = final_data_df[final_data_df_columns] # print (final_data_df) # # if 'error' in response_body.keys(): # if 'report_not_ready' in response_body['error']: # pass # else: # logger.error(api_response.text) # raise Exception(api_response.reason) # elif 'report' in response_body.keys(): # counter_error = 1 except Exception as e: logger.error(e) raise
def main(self): ''' This function will be called from the main.py file and contains the logic to fetch data from source and will save it to designation. :return: ''' ''' from_ini function will read the configuration file as per given section name and key name and will provide dict of configuration parameters. ''' #print ("Date granularity is: {}".format(self.date_granularity)) adobe_config = Utils.from_ini( Utils.get_file_path( self.da_path, [SCRIPT_FOLDER_NAME, CONFIG_FILE]), 'Adobe_Analytics', ('username', 'api_secret')) ''' Getting end point url ''' query_url = self.get_endpoint_url('method=Report.Queue') print ("\n") print (query_url) print ("\n") def get_visits(x): y = x[0] return y def get_form_success(x): y = x[0] return y def get_video_views(x): y = x[1] return y def get_return_visits(x): y = x[0] return y ''' Getting payload to be passed with the api ''' payload = json.dumps(self.get_payload(self.date_from, self.date_to, self.date_granularity)) print ("------------------------------------") print ("Payload is:") print (payload) print ("------------------------------------") print ("\n") ''' Preparing parameters for passing in header with api for authentication ''' nonce_b, iso_time, digest = self.get_unique_connection_parameters(adobe_config['api_secret']) ''' Get header ''' head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) print ("------------------------------------") print ("Header is:") print (head) print ("------------------------------------") print ("\n") ''' Calling api for preparing reports ''' report_queue_api_response = Utils.send_request('POST', query_url, payload, head) if report_queue_api_response.status_code != 200: logger.error(report_queue_api_response.text) raise Exception(report_queue_api_response.reason) report_queue_response_body = report_queue_api_response.text.encode('ascii') temp_var = report_queue_response_body.split(b':') report_id = temp_var[1].replace(b'}', b'') # print(report_id) ''' Section - 2: Get data based on report developed and save the JSON reply in shared folder ''' ''' Developing API URL for retrieving ''' query_url = self.get_endpoint_url('method=Report.Get') # The body of the API url is enclosed as post_params bodydata = { 'reportID': '' + report_id.decode('ascii') + '' } payload = json.dumps(bodydata) counter_error = 0 while (counter_error == 0): # Using sleep method to give enough time to get the reort ready to pull the data else it will throw # "Report not ready" print("Start sleep time " + time.strftime("%X")) print ("\n") time.sleep(self.sleep_time) ''' Get connection parameter for getting reports data and get header ''' nonce_b, iso_time, digest = self.get_unique_connection_parameters(adobe_config['api_secret']) head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) # logger ''' Call api to get reports ''' api_response = Utils.send_request('POST', query_url, payload, head) try: response_body = json.loads(api_response.text) #print ("Response Body is: {}".format(response_body)) print ("------------------------------------") print ("API Response is:") print ("\n") print ("Response is: {}".format(response_body)) print ("------------------------------------") print ("\n") with open(r'C:\Users\rajkiran.reddy\Desktop\SNow-Projects\AdobeAnalytics\Adobe_data.json', 'w') as f: json.dump(response_body, f) # Using Pandas library to load json data and transpose it for easy manuplation adobe_ana_pd = pd.DataFrame.from_dict(response_body) adobe_ana_pd = adobe_ana_pd.T # Removing unwanted index from the dataFrame adobe_ana_pd = adobe_ana_pd.drop(adobe_ana_pd.index[1:]) # The metrics for Adobe Analytics is in 'data' column, so parsing it #print (adobe_ana_pd['data'][0][0]['name']) #print (len(adobe_ana_pd['data'][0][0]['breakdown'][0]['counts'])) final_data_df = pd.DataFrame(adobe_ana_pd['data'][0][0]['breakdown']) #print (final_data_df.columns) final_data_df = final_data_df.rename(columns={"counts": "Counts", "name": "Surf ID", "url": "URL"}) final_data_df = final_data_df[["Counts", "Surf ID"]] final_data_df['Granularity'] = adobe_ana_pd['data'][0][0]['name'] if len(adobe_ana_pd['data'][0][0]['breakdown'][0]['counts']) > 1: final_data_df['Visits'] = final_data_df['Counts'].map(get_visits) final_data_df['e17 Form Success'] = final_data_df['Counts'].map(get_form_success) final_data_df['e89 Video Views'] = final_data_df['Counts'].map(get_video_views) else: final_data_df['Return Visits + Visits'] = final_data_df['Counts'].map(get_return_visits) final_data_df_columns = list(final_data_df.columns) final_data_df_columns.remove('Counts') final_data_df = final_data_df[final_data_df_columns] print (final_data_df) if 'error' in response_body.keys(): if 'report_not_ready' in response_body['error']: pass else: logger.error(api_response.text) raise Exception(api_response.reason) elif 'report' in response_body.keys(): counter_error = 1 except Exception as e: logger.error(e) raise
def main(self): ''' This function will be called from the main.py file and contains the logic to fetch data from source and will save it to designation. :return: ''' ''' from_ini function will read the configuration file as per given section name and key name and will provide dict of configuration parameters. ''' print("Date granularity is: {}".format(self.date_granularity)) adobe_config = Utils.from_ini( Utils.get_file_path(self.da_path, [SCRIPT_FOLDER_NAME, CONFIG_FILE]), 'Adobe_Analytics', ('username', 'api_secret')) #Getting end point url query_url = self.get_endpoint_url('method=Report.Queue') print(query_url) #Getting payload to be passed with the api metric_payload = json.dumps( self.get_payload(self.date_from, self.date_to, self.date_granularity)) #print (metric_payload) segment_payload = json.dumps( self.get_segment_payload(self.date_from, self.date_to, self.date_granularity)) #print (segment_payload) # #Preparing parameters for passing in header with api for authentication # nonce_b, iso_time, digest = self.get_unique_connection_parameters(adobe_config['api_secret']) # # #Get header # head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) # print (head) payloads = [metric_payload, segment_payload] for payload in payloads: time.sleep(10) print(payload) # Preparing parameters for passing in header with api for authentication nonce_b, iso_time, digest = self.get_unique_connection_parameters( adobe_config['api_secret']) # Get header head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) print(head) #Calling api for preparing reports report_queue_api_response = Utils.send_request( 'POST', query_url, payload, head) if report_queue_api_response.status_code != 200: logger.error(report_queue_api_response.text) raise Exception(report_queue_api_response.reason) report_queue_response_body = report_queue_api_response.text.encode( 'ascii') temp_var = report_queue_response_body.split(b':') report_id = temp_var[1].replace(b'}', b'') # print(report_id) ''' Section - 2: Get data based on report developed and save the JSON reply in shared folder ''' ''' Developing API URL for retrieving ''' query_url = self.get_endpoint_url('method=Report.Get') # The body of the API url is enclosed as post_params bodydata = {'reportID': '' + report_id.decode('ascii') + ''} payload = json.dumps(bodydata) counter_error = 0 while (counter_error == 0): # Using sleep method to give enough time to get the reort ready to pull the data else it will throw # "Report not ready" print("Start sleep time " + time.strftime("%X")) time.sleep(self.sleep_time) ''' Get connection parameter for getting reports data and get header ''' nonce_b, iso_time, digest = self.get_unique_connection_parameters( adobe_config['api_secret']) head = self.get_header(adobe_config['username'], digest, nonce_b, iso_time) # logger ''' Call api to get reports ''' api_response = Utils.send_request('POST', query_url, payload, head) try: response_body = json.loads(api_response.text) print(response_body) if 'error' in response_body.keys(): if 'report_not_ready' in response_body['error']: pass else: logger.error(api_response.text) raise Exception(api_response.reason) elif 'report' in response_body.keys(): counter_error = 1 except Exception as e: logger.error(e) raise
def response_handler(self, data_df): db_obj = PyHdbWrapper() cursor, connection = db_obj.connect_hana( Utils.get_file_path(self.da_path, [SCRIPT_FOLDER_NAME, HANA_CONFIG_FILE]), 'HANA_ENV') ''' Truncate staging table before inserting records''' delete_page_url = db_obj.get_delete_query(self.schema + '.STG_' + str(self.table_name).upper()) db_obj.execute_sql(cursor, connection, delete_page_url, '', 'DELETE') # Extract Date will be used as a bookmark for loading data into HANA extract_date = datetime.datetime.today() for index, row in data_df.iterrows( ): # Outer Loop for Day Specific data table = row.iloc[0] # Table is of type Dictionary source_date = str( datetime.date(table['year'], table['month'], table['day'])) source_date = datetime.datetime.strptime(source_date, "%Y-%m-%d") breakdown = (table['breakdown']) # Breakdown is type of list for i in breakdown: if 'breakdown' in i.keys(): country = i['name'] # print(i['breakdownTotal']) temp = i['breakdown'] for i in temp: counts = i['counts'] pageviews = counts[0] visits = counts[1] uniquevisitors = counts[2] bouncerate = counts[3] averageTimeSpentOnSite = counts[4] url = i['name'] print(source_date, country, url + '\n' + pageviews, visits, uniquevisitors, bouncerate) # column_name = ["PERIOD_DATE", "GRANULARITY", "COUNTRY", "URL" # , "PAGE_VIEWS_COUNT", "PAGE_VISITS_COUNT", "UNIQUE_VISITOR_COUNT" # , "BOUNCE_RATE_%%", "AVG_TIME_SPENT_ON_PAGE", "EXTRACT_DATE"] # Getting HANA Table definition column_names, column_datatypes = get_hana_table_definition( cursor, str(self.schema).upper() + "." + "STG_" + str(self.table_name).upper()) resp_dict = {key: None for key in column_names} if len(sales_enablement_dataframe.to_dict()) > 0: stg_table_name = "STG_" + str( self.table_name).upper() # Sync the source and target structure target_query = "SCHEMA_NAME = '%s' AND TABLE_NAME = '%s'" % ( str(self.schema).upper(), stg_table_name) structure_target_query = db_obj.get_select_query( 'COLUMN_NAME', 'SYS.M_CS_COLUMNS', target_query) structure_target = db_obj.execute_sql( cursor, connection, structure_target_query, '', 'SELECT') # converting a list of tuples to a simple list of elements structure_target = [i[0] for i in structure_target] sales_enablement_dataframe = self.source_target_structure_sync( sales_enablement_dataframe, structure_target) sales_enablement_columns = sales_enablement_dataframe.columns.tolist( ) sales_enablement_columns = [ col.upper() for col in sales_enablement_columns ] sales_enablement_dataframe.columns = sales_enablement_columns sales_enablement_dataframe = sales_enablement_dataframe.where( (pd.notnull(sales_enablement_dataframe)), None) print("Inserting the records into {}".format( schema_name + '.' + stg_table_name)) for record in sales_enablement_dataframe.to_dict( 'records'): result_dict = generate_hana_store_dict( column_names, column_datatypes, record) insert_table_name = schema_name + '.' + stg_table_name table_values = list(result_dict.values()) insert_query = get_insert_query( insert_table_name, result_dict) ## Inserting the records in staging table db_obj.execute_sql(cursor, connection, insert_query, tuple(table_values), 'INSERT') logger.info("Records have been Inserted") insert_query = db_obj.get_insert_query( self.schema + ".STG_PAGE_URL_METRICS", column_name) values = [ source_date, self.date_granularity, country, url, pageviews, visits, uniquevisitors, bouncerate, averageTimeSpentOnSite, extract_date ] print(values) #db_obj.execute_sql(cursor, connection, insert_query, values, 'INSERT') # upsert_statement = "UPSERT \"" + self.schema + "\".\"PAGE_URL_METRICS\" \ # SELECT * FROM \"" + self.schema + "\".\"STG_PAGE_URL_METRICS\"" stg_table_name = "STG_" + str(self.table_name).upper() upsert_query = ''' UPSERT %s.%s SELECT * FROM %s.%s ''' % (str( self.schema).upper(), str(self.table_name).upper(), str( self.schema).upper(), stg_table_name)