Beispiel #1
0
def sanity_check_counts():
    """ Execute the sanity check queries and report the failed cases
    Args:
    No Arguments
    Returns:
    No return Arguments
    """
    subtaskid = createSubTask("sanity check queries", getMaintaskId())
    try:
        conn = dbConnection()
        cur = conn.cursor()
        cur.execute("select query, name, expected_value from birepusr.etl_data_sanity_queries where project_name='servicedesk'")
        rows = cur.fetchall()
        for row in rows:
            conn = dbConnection()
            cur = conn.cursor()
            cur.execute(row[0])
            x = cur.fetchall()
            if x == []:
                actual_value = 0
            else:
                actual_value = x[0][0]

            condition = row[1]
            expected_value = row[2]
            status = 'Success'
            if not expected_value == actual_value:
                status = 'Fail'
                message = '''Expected vs Actual values not matching for check '{0}': expected {1} rows but found {2}'''.format(condition, expected_value, actual_value)
                send_custom_mail('Service-Desk ETL : DATA SANITY TEST FAILED', message)

            executeQuery('''insert into birepusr.etl_data_sanity_test_results(condition, created_date, expected_value, actual_value, status, task_id, project_name) values ('{0}', now(), {1}, {2}, '{3}', '{4}', 'servicedesk')'''.format(condition, expected_value, actual_value, status, subtaskid))
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception, psycopg2.Error) as error:
        insertErrorLog(subtaskid, error)
Beispiel #2
0
def stageJiraTickets():
    """ Insert JIRA issue data into staging db schema
    Args:
    No Arguments
    Returns:
    No return variable
    """
    subtaskid = createSubTask("extract JIRA data to staging", getMaintaskId())
    try:
        df = getJiraTickets()
        executeQuery('DELETE FROM birepusr.jira_sd_stage;')
        if not df.empty:
            # Arranging the columns of pandas dataframe to insert into the DB
            jiraData = df[['assignee', 'jira_case', 'components', 'created',
                           'creator', 'crm_account', 'crm_contact',
                           'description', 'fixVersions', 'issuetype', 'key',
                           'priority', 'reporter', 'resolution',
                           'resolution_date', 'status_description',
                           'status_name', 'subtask', 'summary', 'updated',
                           'versions', 'watches', 'Reporter_Email', 'channel',
                           'product_name', 'request_type', 'assignee_name']]
            engine = dbConnection()
            cur = engine.cursor()
            # Truncating the existing staging data
            output = io.StringIO()
            jiraData.to_csv(output, sep='\t', header=True, index=False)
            output.seek(0)
            copy_query = "COPY birepusr.jira_sd_stage FROM STDOUT csv DELIMITER '\t' NULL ''  ESCAPE '\\' HEADER "
            # inserting data into the database
            cur.copy_expert(copy_query, output)
            engine.commit()
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception, psycopg2.Error) as error:
        insertErrorLog(subtaskid, error)
Beispiel #3
0
def insertSprintIssues(df_sprint_issues, sprint_number, team_dim_id):
    """
    this method perform database insert operation in idw.sprint_issues,
    it insert issues completed in a sprint by given team

    Args:
    dataFrame: first argument is pandas dataframe which contain list of issues for given sprint for respective team
    sprint_number: second argument sprint number to map issues in destination table
    team_dim_id: team dim id to which sprint and issues belong

    Returns:
    No Return
    """
    subtaskid = createSubTask(
        "insert sprint issues in datawarehouse staging table birepusr.stage_sprint_issues for sprint {0}"
        .format(sprint_number), getMaintaskId())
    try:
        df_sprint_issues.apply(lambda sprint: executeQuery(
            dq.insert_issues_in_stage.format(
                team_dim_id, sprint_number, sprint['jira_ticket_id'], sprint[
                    'jira_ticket_key'],
                str(sprint['ticket_details']).replace("\'", ""))),
                               axis=1)
        executeQuery(
            dq.update_issue_load_status.format(sprint_number, team_dim_id))
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
def insertEpics(df_epics, project_dim_id, product_dim_id):
    """
    this method perform database insert operation in idw.epic_dim,
    it insert epics returned by getProjectEpic() for given project

    Args:
    dataFrame: first argument is pandas dataframe which contain list of epics for given project
    project_dim_id: second argument is project dim id for which epics will be isnerted in idw.epic_dim
    product_dim_id: product dim id to which epics belong

    Returns:
    No Return
    """
    subtaskid = createSubTask(
        "insert epic worked on by project in warehouse table in idw.epic_dim",
        getMaintaskId())
    try:
        df_epics.apply(lambda epic: executeQuery((dq.insert_epics.format(
            re.sub('[^A-Za-z0-9 ]+', '', str(epic['epic_name'])), epic[
                'epic_id'], epic['epic_key'], epic['aha_score'], epic[
                    'is_closed'], epic['date_created'], None
            if epic['date_updated'] is None else "'" + epic['date_updated'] +
            "'", None
            if epic['date_resolved'] is None else "'" + epic['date_resolved'] +
            "'", project_dim_id, "'" + epic['aha_reference'] + "'")).replace(
                'None', 'null').replace('nan', 'null')),
                       axis=1)
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
def insertProjects():
    """
    db insert method for persisting new projects in warehouse table idw.jira_projects_dim

    Args:
    No Arguments
    Returns:
    No return
    """
    subtaskid = createSubTask(
        "pull projects from JIRA and insert in warehouse table idw.jira_project_dim",
        getMaintaskId())
    try:
        db_projects = getdbProjects()
        jira_projects = getProjects()
        jira_projects["project_id"] = jira_projects["project_id"].astype(int)
        comparison_df = jira_projects.merge(db_projects,
                                            indicator=True,
                                            how='outer',
                                            on='project_id')
        comparison_df[comparison_df['_merge'] == 'left_only'].apply(
            lambda project: executeQuery(
                dq.insert_projects.format(project['project_id'], project[
                    'project_key'], project['project_name'])),
            axis=1)
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
Beispiel #6
0
def handler(event, context):
    createMainTask("JIRA data extraction", "JIRA")
    stageJiraTickets()
    executeQuery(backup_raw_data)
    generateContactDim()
    updateRequestTypeDim()
    updateDimCaseOrigin()
    upsertJiraProduct()
    getworkflowStatus()
    generateAssigneeDimension()
    generateFacttable()
    persistJiraProductMap()
    initSecondaryFactGenerator()
    user_department_mapping()
    executeQuery(remove_duplicate_transition_log)
    sanity_check_counts()
    updateMainTask(getMaintaskId(), "SUCCESS", "JIRA data extraction")
def updateMeasuresInTransitionMap():
    """
     This method initiate multiple update statements to update time_spent measure for status and assignee
     It also insert missing assignee for those jira which  never went trough assignee transition
     It update resolution date for all jira in transition map


     Args:
     No Arguments
     Returns:
     No return variable
    """
    subtaskid = createSubTask("calculate and  update measures in transition map table", getMaintaskId())
    returnVal = executeQuery(update_resolution_date_intransition_map)
    returnVal = executeQuery(insert_unchanged_assignee_in_transition_map) if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
    returnVal = executeQuery(workflow_transition_time + " " + update_workflow_transition_time + " " + workflow_assignee_transition_time + " " + update_assignee_transition_time + " " + update_status_change_date_dim_id) if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
    updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
def updateEpicDimId():
    """
    this method update epic_dim_id in idw.sprint_issues

    Args:
    No argument

    Returns:
    No Return
    """
    subtaskid = createSubTask("update epic_dim_id in idw.sprint_issues",
                              getMaintaskId())
    try:
        executeQuery(dq.update_epic_dim_id)
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
Beispiel #9
0
def generate_mapper():
    """
     This method inserts data into the fact tables and map project and epic dim id

    Args:
    No Arguments
    Returns:
    No return variable
    """
    subtaskid = createSubTask("pull epics worked on by project from JIRA",
                              getMaintaskId())
    try:
        executeQuery(insert_sprint_issues)
        executeQuery(update_issue_status_on_sprint_closure)
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
def updateProductId():
    """
    update product dim id using product name matching with project name

    Args:
    No Arguments
    Returns:
    No return
    """
    subtaskid = createSubTask(
        "update product id for projects in idw.jira_project_dim",
        getMaintaskId())
    try:
        executeQuery(dq.update_product_dim_id_for_project)
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
Beispiel #11
0
def updateSubTask(subtaskid, status):
    """
    Method to update data extraction start entry whcih corresponds to respective sub task

    Args:
    1. integer : accpets subtask id as first argument for which audit entry needs to be updated
    2. string  : accepts staus as second argument for respective subtask id which needs to be updated

    Returns:
    No return variable
    """
    subtaskupdatequery = ('''update birepusr.etl_audit_details
    set end_date=now(),
    status='{status}'
    where audit_details_id={taskid}''').format(status=status, taskid=subtaskid)
    print('''Subtask Ended with status {0}, taskid :{1}'''.format(
        status, subtaskid))
    executeQuery(subtaskupdatequery)
def updateIsCurrentFlag():
    """
     Update is current workflow status and assignee flag for each jira key in secondary map

     Args:
     No Arguments
     Returns:
     No return variable
    """
    subtaskid = createSubTask("update is current workflow status and assignee flag for each jira key in secondary map", getMaintaskId())
    returnVal = executeQuery(update_is_current_status_flag + " " + update_is_current_assignee_flag)
    updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
def generateJiraWorkflowMap():
    """
     Execute the database insert to add jira transition map for each workflow status per jira key

     Args:
     No Arguments
     Returns:
     No return variable
    """
    subtaskid = createSubTask("generate workflow mapping for each active jira key", getMaintaskId())
    returnVal = executeQuery(insert_jira_workflow_map)
    updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
Beispiel #14
0
def updateMainTask(taskid, status, taskname):
    """
    Method to update data extraction start entry whcih corresponds to main task

    Args:
    1. integer : accpets task id as first argument for which audit entry needs to be updated
    2. string  : accepts staus as second argument for respective task id which needs to be updated

    Returns:
    No return variable
    """
    maintaskupdatequery = ('''update birepusr.etl_audit
    set end_date=now(),
    status='{status}'
    where audit_id={taskid} and end_date IS NULL''').format(status=status,
                                                            taskid=taskid)
    executeQuery(maintaskupdatequery)
    print('''Main task Ended with status {0},Maintaskid : {1}'''.format(
        status, taskid))
    if status == "SUCCESS":
        send_completion_mail(None, status, taskname)
def upsertJiraProduct():
    """
    Execute the database insert and update query to insert
    new product or update existing ones based on response form jira rest api

    Args:
    No Arguments
    Returns:
    No return variable
    """
    subtaskid = createSubTask("upsert service desk products in jira_product map table", getMaintaskId())
    try:
        MyJira = Jira(**jira_api)
        df_products = MyJira.getServiceDeskProducts()
        df_product_dim = executeQueryAndReturnDF("select jira_product_name as product_name, jira_product_api_id as api_id from idw.jira_product_dim")
        comparison_df = df_products.merge(df_product_dim, indicator=True, on=['api_id'], how='outer')
        comparison_df[comparison_df['_merge'] == 'left_only'].apply(lambda product: executeQuery(insert_new_product.format(product['product_name_x'], product['api_id'])), axis=1)
        comparison_df = comparison_df[comparison_df['_merge'] == 'both'].query('product_name_x != product_name_y')
        comparison_df.apply(lambda product: executeQuery(update_product_name.format(product['product_name_x'], product['api_id'])), axis=1)
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
def persistJiraProductMap():
    """
    get list of products tagged to a jira key and persist in jira_product map table
    so that user can generate time_to_close metric for each product

    Args:
    No Arguments
    Returns:
    No return variable
    """
    subtaskid = createSubTask("calculate and  update time to close measures in jira_product map table", getMaintaskId())
    returnVal = executeQuery(insert_jira_product_map)
    updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
def generateContactDim():
    """
     Execute the database insert and update query to populate
     contacts from mscrm.contact_entity table,

    Args:
    No Arguments
    Returns:
    No return variable
    """
    subtaskid = createSubTask("copy data from mscrm.contact_entity to idw.contact_dim", getMaintaskId())
    returnVal = executeQuery(contact_dim_insert + " " + contact_dim_update)
    updateSubTask(subtaskid, "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(subtaskid, returnVal)
def putSprintSummary(sprints_for_team):
    """
    This method insert sprint summary for team in idw.sprint_summary table

    Args:
    Dataframe: pandas dataframe with all the latest sprints for team

    Returns:
    No Return
    """
    subtaskid = createSubTask("initialize database insert of sprint summary", getMaintaskId())
    try:
        sprints_for_team.apply(lambda sprint: executeQuery(dq.insert_sprints.format(sprint['sprint_serial_id'], sprint['sprint_number'], sprint['sprint_name'], sprint['team_dim_id'], sprint['sprint_start_date'], sprint['sprint_end_date'], sprint['sprint_complete_date'])), axis=1)
        updateSubTask(subtaskid, "SUCCESS")
    except (Exception) as error:
        insertErrorLog(subtaskid, error)
def updateDimCaseOrigin():
    """
     Execute the database insert and update query to populate
     missing case origin identified from source table in mscrm

    Args:
    No Arguments
    Returns:
    No return variable
    """
    subtaskid = createSubTask(
        "add missing case origin from source data into idw.jira_origin_dim ",
        getMaintaskId())
    returnVal = executeQuery(load_dim_case_origin)
    updateSubTask(subtaskid,
                  "SUCCESS") if returnVal == 'SUCCESS' else insertErrorLog(
                      subtaskid, returnVal)
Beispiel #20
0
def executeStaticQueries():
    executeQuery(queries)
Beispiel #21
0
def truncateStaging():
    executeQuery('delete from birepusr.stage_sprint_issues;')
Beispiel #22
0
def backupStaging(maintaskId):
    executeQuery(backupQuery.format(maintaskId))