def sanity_check_counts(): """ Execute the sanity check queries and report the failed cases Args: No Arguments Returns: No return Arguments """ subtaskid = createSubTask("sanity check queries", getMaintaskId()) try: conn = dbConnection() cur = conn.cursor() cur.execute("select query, name, expected_value from birepusr.etl_data_sanity_queries where project_name='servicedesk'") rows = cur.fetchall() for row in rows: conn = dbConnection() cur = conn.cursor() cur.execute(row[0]) x = cur.fetchall() if x == []: actual_value = 0 else: actual_value = x[0][0] condition = row[1] expected_value = row[2] status = 'Success' if not expected_value == actual_value: status = 'Fail' message = '''Expected vs Actual values not matching for check '{0}': expected {1} rows but found {2}'''.format(condition, expected_value, actual_value) send_custom_mail('Service-Desk ETL : DATA SANITY TEST FAILED', message) executeQuery('''insert into birepusr.etl_data_sanity_test_results(condition, created_date, expected_value, actual_value, status, task_id, project_name) values ('{0}', now(), {1}, {2}, '{3}', '{4}', 'servicedesk')'''.format(condition, expected_value, actual_value, status, subtaskid)) updateSubTask(subtaskid, "SUCCESS") except (Exception, psycopg2.Error) as error: insertErrorLog(subtaskid, error)
def stageJiraTickets(): """ Insert JIRA issue data into staging db schema Args: No Arguments Returns: No return variable """ subtaskid = createSubTask("extract JIRA data to staging", getMaintaskId()) try: df = getJiraTickets() executeQuery('DELETE FROM birepusr.jira_sd_stage;') if not df.empty: # Arranging the columns of pandas dataframe to insert into the DB jiraData = df[['assignee', 'jira_case', 'components', 'created', 'creator', 'crm_account', 'crm_contact', 'description', 'fixVersions', 'issuetype', 'key', 'priority', 'reporter', 'resolution', 'resolution_date', 'status_description', 'status_name', 'subtask', 'summary', 'updated', 'versions', 'watches', 'Reporter_Email', 'channel', 'product_name', 'request_type', 'assignee_name']] engine = dbConnection() cur = engine.cursor() # Truncating the existing staging data output = io.StringIO() jiraData.to_csv(output, sep='\t', header=True, index=False) output.seek(0) copy_query = "COPY birepusr.jira_sd_stage FROM STDOUT csv DELIMITER '\t' NULL '' ESCAPE '\\' HEADER " # inserting data into the database cur.copy_expert(copy_query, output) engine.commit() updateSubTask(subtaskid, "SUCCESS") except (Exception, psycopg2.Error) as error: insertErrorLog(subtaskid, error)
def insertSprintIssues(df_sprint_issues, sprint_number, team_dim_id): """ this method perform database insert operation in idw.sprint_issues, it insert issues completed in a sprint by given team Args: dataFrame: first argument is pandas dataframe which contain list of issues for given sprint for respective team sprint_number: second argument sprint number to map issues in destination table team_dim_id: team dim id to which sprint and issues belong Returns: No Return """ subtaskid = createSubTask( "insert sprint issues in datawarehouse staging table birepusr.stage_sprint_issues for sprint {0}" .format(sprint_number), getMaintaskId()) try: df_sprint_issues.apply(lambda sprint: executeQuery( dq.insert_issues_in_stage.format( team_dim_id, sprint_number, sprint['jira_ticket_id'], sprint[ 'jira_ticket_key'], str(sprint['ticket_details']).replace("\'", ""))), axis=1) executeQuery( dq.update_issue_load_status.format(sprint_number, team_dim_id)) updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def insertEpics(df_epics, project_dim_id, product_dim_id): """ this method perform database insert operation in idw.epic_dim, it insert epics returned by getProjectEpic() for given project Args: dataFrame: first argument is pandas dataframe which contain list of epics for given project project_dim_id: second argument is project dim id for which epics will be isnerted in idw.epic_dim product_dim_id: product dim id to which epics belong Returns: No Return """ subtaskid = createSubTask( "insert epic worked on by project in warehouse table in idw.epic_dim", getMaintaskId()) try: df_epics.apply(lambda epic: executeQuery((dq.insert_epics.format( re.sub('[^A-Za-z0-9 ]+', '', str(epic['epic_name'])), epic[ 'epic_id'], epic['epic_key'], epic['aha_score'], epic[ 'is_closed'], epic['date_created'], None if epic['date_updated'] is None else "'" + epic['date_updated'] + "'", None if epic['date_resolved'] is None else "'" + epic['date_resolved'] + "'", project_dim_id, "'" + epic['aha_reference'] + "'")).replace( 'None', 'null').replace('nan', 'null')), axis=1) updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def insertProjects(): """ db insert method for persisting new projects in warehouse table idw.jira_projects_dim Args: No Arguments Returns: No return """ subtaskid = createSubTask( "pull projects from JIRA and insert in warehouse table idw.jira_project_dim", getMaintaskId()) try: db_projects = getdbProjects() jira_projects = getProjects() jira_projects["project_id"] = jira_projects["project_id"].astype(int) comparison_df = jira_projects.merge(db_projects, indicator=True, how='outer', on='project_id') comparison_df[comparison_df['_merge'] == 'left_only'].apply( lambda project: executeQuery( dq.insert_projects.format(project['project_id'], project[ 'project_key'], project['project_name'])), axis=1) updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def handler(event, context): createMainTask("JIRA data extraction", "JIRA") stageJiraTickets() executeQuery(backup_raw_data) generateContactDim() updateRequestTypeDim() updateDimCaseOrigin() upsertJiraProduct() getworkflowStatus() generateAssigneeDimension() generateFacttable() persistJiraProductMap() initSecondaryFactGenerator() user_department_mapping() executeQuery(remove_duplicate_transition_log) sanity_check_counts() updateMainTask(getMaintaskId(), "SUCCESS", "JIRA data extraction")
def updateMeasuresInTransitionMap(): """ This method initiate multiple update statements to update time_spent measure for status and assignee It also insert missing assignee for those jira which never went trough assignee transition It update resolution date for all jira in transition map Args: No Arguments Returns: No return variable """ subtaskid = createSubTask("calculate and update measures in transition map table", getMaintaskId()) returnVal = executeQuery(update_resolution_date_intransition_map) returnVal = executeQuery(insert_unchanged_assignee_in_transition_map) if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal) returnVal = executeQuery(workflow_transition_time + " " + update_workflow_transition_time + " " + workflow_assignee_transition_time + " " + update_assignee_transition_time + " " + update_status_change_date_dim_id) if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal) updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
def updateEpicDimId(): """ this method update epic_dim_id in idw.sprint_issues Args: No argument Returns: No Return """ subtaskid = createSubTask("update epic_dim_id in idw.sprint_issues", getMaintaskId()) try: executeQuery(dq.update_epic_dim_id) updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def generate_mapper(): """ This method inserts data into the fact tables and map project and epic dim id Args: No Arguments Returns: No return variable """ subtaskid = createSubTask("pull epics worked on by project from JIRA", getMaintaskId()) try: executeQuery(insert_sprint_issues) executeQuery(update_issue_status_on_sprint_closure) updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def updateProductId(): """ update product dim id using product name matching with project name Args: No Arguments Returns: No return """ subtaskid = createSubTask( "update product id for projects in idw.jira_project_dim", getMaintaskId()) try: executeQuery(dq.update_product_dim_id_for_project) updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def updateSubTask(subtaskid, status): """ Method to update data extraction start entry whcih corresponds to respective sub task Args: 1. integer : accpets subtask id as first argument for which audit entry needs to be updated 2. string : accepts staus as second argument for respective subtask id which needs to be updated Returns: No return variable """ subtaskupdatequery = ('''update birepusr.etl_audit_details set end_date=now(), status='{status}' where audit_details_id={taskid}''').format(status=status, taskid=subtaskid) print('''Subtask Ended with status {0}, taskid :{1}'''.format( status, subtaskid)) executeQuery(subtaskupdatequery)
def updateIsCurrentFlag(): """ Update is current workflow status and assignee flag for each jira key in secondary map Args: No Arguments Returns: No return variable """ subtaskid = createSubTask("update is current workflow status and assignee flag for each jira key in secondary map", getMaintaskId()) returnVal = executeQuery(update_is_current_status_flag + " " + update_is_current_assignee_flag) updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
def generateJiraWorkflowMap(): """ Execute the database insert to add jira transition map for each workflow status per jira key Args: No Arguments Returns: No return variable """ subtaskid = createSubTask("generate workflow mapping for each active jira key", getMaintaskId()) returnVal = executeQuery(insert_jira_workflow_map) updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
def updateMainTask(taskid, status, taskname): """ Method to update data extraction start entry whcih corresponds to main task Args: 1. integer : accpets task id as first argument for which audit entry needs to be updated 2. string : accepts staus as second argument for respective task id which needs to be updated Returns: No return variable """ maintaskupdatequery = ('''update birepusr.etl_audit set end_date=now(), status='{status}' where audit_id={taskid} and end_date IS NULL''').format(status=status, taskid=taskid) executeQuery(maintaskupdatequery) print('''Main task Ended with status {0},Maintaskid : {1}'''.format( status, taskid)) if status == "SUCCESS": send_completion_mail(None, status, taskname)
def upsertJiraProduct(): """ Execute the database insert and update query to insert new product or update existing ones based on response form jira rest api Args: No Arguments Returns: No return variable """ subtaskid = createSubTask("upsert service desk products in jira_product map table", getMaintaskId()) try: MyJira = Jira(**jira_api) df_products = MyJira.getServiceDeskProducts() df_product_dim = executeQueryAndReturnDF("select jira_product_name as product_name, jira_product_api_id as api_id from idw.jira_product_dim") comparison_df = df_products.merge(df_product_dim, indicator=True, on=['api_id'], how='outer') comparison_df[comparison_df['_merge'] == 'left_only'].apply(lambda product: executeQuery(insert_new_product.format(product['product_name_x'], product['api_id'])), axis=1) comparison_df = comparison_df[comparison_df['_merge'] == 'both'].query('product_name_x != product_name_y') comparison_df.apply(lambda product: executeQuery(update_product_name.format(product['product_name_x'], product['api_id'])), axis=1) updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def persistJiraProductMap(): """ get list of products tagged to a jira key and persist in jira_product map table so that user can generate time_to_close metric for each product Args: No Arguments Returns: No return variable """ subtaskid = createSubTask("calculate and update time to close measures in jira_product map table", getMaintaskId()) returnVal = executeQuery(insert_jira_product_map) updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
def generateContactDim(): """ Execute the database insert and update query to populate contacts from mscrm.contact_entity table, Args: No Arguments Returns: No return variable """ subtaskid = createSubTask("copy data from mscrm.contact_entity to idw.contact_dim", getMaintaskId()) returnVal = executeQuery(contact_dim_insert + " " + contact_dim_update) updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
def putSprintSummary(sprints_for_team): """ This method insert sprint summary for team in idw.sprint_summary table Args: Dataframe: pandas dataframe with all the latest sprints for team Returns: No Return """ subtaskid = createSubTask("initialize database insert of sprint summary", getMaintaskId()) try: sprints_for_team.apply(lambda sprint: executeQuery(dq.insert_sprints.format(sprint['sprint_serial_id'], sprint['sprint_number'], sprint['sprint_name'], sprint['team_dim_id'], sprint['sprint_start_date'], sprint['sprint_end_date'], sprint['sprint_complete_date'])), axis=1) updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def updateDimCaseOrigin(): """ Execute the database insert and update query to populate missing case origin identified from source table in mscrm Args: No Arguments Returns: No return variable """ subtaskid = createSubTask( "add missing case origin from source data into idw.jira_origin_dim ", getMaintaskId()) returnVal = executeQuery(load_dim_case_origin) updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog( subtaskid, returnVal)
def executeStaticQueries(): executeQuery(queries)
def truncateStaging(): executeQuery('delete from birepusr.stage_sprint_issues;')
def backupStaging(maintaskId): executeQuery(backupQuery.format(maintaskId))