def test_df(df, column_list): subtaskid = createSubTask( "Create dataframe of planview and temp table string", getMaintaskId()) try: val = [] val_list = [] for i in df: val = [] for v in i: x = list(v.values())[0] val.append(x) val_list.append(val) df1 = pd.DataFrame(val_list) df1.columns = [column_list] global dfObj dfObj = dfObj.append(df1) df1.columns = [column_list] entityname = 'temp_table' tablestring = " create temp table " + entityname + " (" for columnname in column_list: tablestring = tablestring + '''{0} varchar(100)'''.format( columnname) if column_list.index(columnname) < len(column_list) - 1: tablestring = tablestring + "," global temp_table tablestring = tablestring + ");" temp_table = tablestring except (Exception) as error: insertErrorLog(subtaskid, error)
def sanity_check_counts(): """ Execute the sanity check queries and report the failed cases Args: No Arguments Returns: No return Arguments """ subtaskid = createSubTask("sanity check queries", getMaintaskId()) try: conn = dbConnection() cur = conn.cursor() cur.execute("select query, name, expected_value from birepusr.etl_data_sanity_queries where project_name='planview'") rows = cur.fetchall() for row in rows: conn = dbConnection() cur = conn.cursor() cur.execute(row[0]) x = cur.fetchall() if x == []: actual_value = 0 else: actual_value = x[0][0] condition = row[1] expected_value = row[2] status = 'Success' if not expected_value == actual_value: status = 'Fail' message = '''Expected vs Actual values not matching for check '{0}': expected {1} rows but found {2}'''.format(condition, expected_value, actual_value) send_custom_mail('Service-Desk ETL : DATA SANITY TEST FAILED', message) executeQuery('''insert into birepusr.etl_data_sanity_test_results(condition, created_date, expected_value, actual_value, status, task_id,project_name) values ('{0}', now(), {1}, {2}, '{3}', '{4}','planview')'''.format(condition, expected_value, actual_value, status, subtaskid)) updateSubTask(subtaskid, "SUCCESS") except (Exception, psycopg2.Error) as error: insertErrorLog(subtaskid, error)
def getPlanviewData(mylist, column_list, ppm_id): subtaskid = createSubTask( "Create dataframe of planview and temp table string", getMaintaskId()) try: entityname = 'temp_table' strmy = "{" tablestring = " create temp table " + entityname + " (" for columnname in column_list: strmy = strmy + '''"{0}": "{1}"'''.format( columnname, str(mylist[column_list.index(columnname)] ['elementValue']).replace("\"", "").replace("\\", "\\\\")) tablestring = tablestring + '''{0} varchar(100)'''.format( columnname) if column_list.index(columnname) < len(column_list) - 1: strmy = strmy + ',' tablestring = tablestring + "," strmy = strmy + '}' global temp_table tablestring = tablestring + ");" temp_table = tablestring global dfObj dfObj = dfObj.append(dict(json.loads(strmy)), ignore_index=True) dfObj = dfObj[column_list] dfObj.fillna(value='NULL', inplace=True) updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def loadEntityData(): subtaskid = createSubTask("Load planview data into tables", getMaintaskId()) try: conn = dbConnection() cur = conn.cursor() cur.execute( "select entity_ppm_id from idw.planview_entity_dim order by load_sequence" ) rows1 = cur.fetchall() # rows1 = [5] for row in rows1: ppm_id = row[0] #ppm_id=row cur.execute( "select column_select_list from idw.planview_entity_dim where entity_ppm_id={0}" .format(ppm_id)) rows = cur.fetchall() json_dict = rows[0][0].values() get_list = ",".join([str(e) for e in json_dict]) mList = [int(e) if e.isdigit() else e for e in get_list.split(",")] json_keys = rows[0][0].keys() get_Cols = ",".join([str(e) for e in json_keys]) column_list = [ int(e) if e.isdigit() else e for e in get_Cols.split(",") ] data = getData(ppm_id, mList) df = data['methodValues'] print(df) test_df(df, column_list) ##df.apply(lambda row: getPlanviewData(row, column_list, ppm_id)) createDf(ppm_id, dfObj, temp_table) print(dfObj) dfObj.drop(columns=column_list, inplace=True) df.drop(df.index, inplace=True) updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error) subtaskid = createSubTask("Load planview data into tables", getMaintaskId()) try: updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def initUpdateSprintMetrics(): """ this method initialize metrics update queries for all newly loaded sprints whose metrics is not Args: No Arguments Returns: No Return """ subtaskid = createSubTask( "update sprint merics in idw.jira_sprint_summary", getMaintaskId()) try: updateSprintMetrics() updateSubTask(subtaskid, "SUCCESS") except (Exception, psycopg2.Error) as error: insertErrorLog(subtaskid, error)
def generateTimeSpentMetrics(): """ this method initialize metrics update queries for all newly loaded sprints Args: No Arguments Returns: No Return """ subtaskid = createSubTask( "update time-spent merics by project feature in idw.task_time_spent_fact", getMaintaskId()) try: execute_proc('idw.feature_time_spent_calculation_proc') updateSubTask(subtaskid, "SUCCESS") except (Exception, psycopg2.Error) as error: insertErrorLog(subtaskid, error)
def updateSprintMetrics(): """ This method trigger metrics update queries for a given sprint for given team mertics includes estimated_points,completed points, active team members working on team etc. Args: No Arguments Returns: No Return """ subtaskid = createSubTask("update metrics for newly added sprints", getMaintaskId()) executeQuery(dq.update_estimate_sprint_points) executeQuery(dq.update_actual_completed_sprint_points) executeQuery(dq.update_completed_sprint_points) executeQuery(dq.update_team_members) executeQuery(dq.update_actual_sprint_date) executeQuery(dq.update_sprint_days) updateSubTask(subtaskid, "SUCCESS")
def loadEntityData(): """ This function initiate data extraction from planview for all major entities like Project, Tasks, Resources, Unit etc. Args: No Argument Returns: No Return """ subtaskid = al.createSubTask("Initiate data extraction from planview", al.getMaintaskId()) planview_entities = executeQueryAndReturnDF( "select entity_ppm_id,column_select_list,entity_name from idw.planview_entity_dim order by load_sequence" ) for entity in planview_entities.iterrows(): # get entity details required to pull data from planview ppm_id = int(entity[1]['entity_ppm_id']) column_select_list = entity[1]['column_select_list'] entity_name = entity[1]['entity_name'] get_list = ",".join([str(e) for e in column_select_list.values()]) mList = [int(e) if e.isdigit() else e for e in get_list.split(",")] json_keys = column_select_list.keys() get_Cols = ",".join([str(e) for e in json_keys]) column_list = [ int(e) if e.isdigit() else e for e in get_Cols.split(",") ] data = planview.getData(ppm_id, mList) df = data['methodValues'] dfObj = process_entity_data(df, column_list) temp_table = get_table_schema(column_list, 'pv_entity_temp_table') createDf(dfObj, temp_table, entity_name) dfObj.drop(columns=column_list, inplace=True) df.drop(df.index, inplace=True) al.updateSubTask(subtaskid, "SUCCESS")
def createDf(ppm_id, df, temp_table): subtaskid = createSubTask( "Create temp table to load the planview data into db tables", getMaintaskId()) try: conn = dbConnection() cur = conn.cursor() cur.execute('''{0}'''.format(temp_table)) output = io.StringIO() df.to_csv(output, sep='|', header=True, index=False) output.seek(0) copy_query = "COPY temp_table FROM STDOUT csv DELIMITER '|' NULL '' ESCAPE '\\' HEADER " cur.copy_expert(copy_query, output) cur.execute( '''select entity_name from idw.planview_entity_dim where entity_ppm_id = {0}''' .format(ppm_id)) ppm = cur.fetchall() proc_name = ("idw.proc_" + ppm[0][0]).lower() cur.callproc(proc_name) conn.commit() updateSubTask(subtaskid, "SUCCESS") except (Exception) as error: insertErrorLog(subtaskid, error)
def process_entity_data(df, column_list): """ This function process raw data received from planview. Planview provide data as ordered dict, this function convert ordered dict in dataframe so that data can be imported to table Args: dataframe : take dataframe with raw data as input string : list of columns for whcih data needs to be populated in dataframe Returns: dataframe : return formatted dataframe with required columns """ subtaskid = al.createSubTask( "Create dataframe of planview and temp table string", al.getMaintaskId()) dfObj = pd.DataFrame() try: val = [] val_list = [] for i in df: val = [] for v in i: x = list(v.values())[0] val.append(x) val_list.append(val) df1 = pd.DataFrame(val_list) df1.columns = [column_list] dfObj = dfObj.append(df1) df1.columns = [column_list] al.updateSubTask(subtaskid, "SUCCESS") return dfObj except (Exception) as error: al.insertErrorLog(subtaskid, error)
def timesheet_entry_user(): """ This function process generates data using timesheet and sprint data, to insert timesheet entry into planview. Args: No Arguments Returns: No Return """ subtaskid = al.createSubTask( "Populate timesheet for latest sprints in planview", al.getMaintaskId()) try: #Fetch user ids for processing planview_entities = executeQueryAndReturnDF(dq.get_userlist_to_populate_timesheet) users = planview_entities['user_list'] ts_columns = {'companyid':'companyId','entrytypeid':'entryTypeId','timesheetid':'timesheetId','userid':'userId', 'level1id':'level1Id','level2id':'level2Id','entrydate':'entryDate','entryid':'entryId','level3id':'level3Id','entryhours':'entryHours'} if users[0] is not None: mList = [int(e) if e.isdigit() else int(e) for e in users[0].split(',')] startDate = planview_entities['min'] #Fetch timesheet ids for above user list sheet = planview.getTimesheet_users(mList,startDate[0]) column_names = ['timesheetId','userId','startDate','endDate','entryId','entryDate','entryHours','project_ppm_id','task_ppm_id'] user_timesheet_data = pd.DataFrame(columns=column_names) for tsrow in sheet.itertuples(index=True, name='Pandas'): if not tsrow.entriesProject: user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId,'userId':tsrow.userId, 'startDate':tsrow.startDate, 'endDate':tsrow.endDate,'entryId': None,'entryDate': None,'entryHours': None,'project_ppm_id': None,'task_ppm_id': None}, ignore_index=True) else: for entry in tsrow.entriesProject: user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId, 'userId':tsrow.userId, 'startDate':tsrow.startDate, 'endDate':tsrow.endDate,'entryId': entry['entryId'],'entryDate': entry['entryDate'],'entryHours': entry['entryHours'],'project_ppm_id': entry['level1Id'],'task_ppm_id': entry['level2Id']}, ignore_index=True) sprint_number = executeQueryAndReturnDF(dq.get_sprints_for_timesheet) sprint_list = sprint_number['sprint_number'][0] conn = dbConnection() cur = conn.cursor() #export timesheet details into temporary table cur.execute('''create temp table timesheet_data (timesheetId bigint, userId bigint, startDate date, endDate date, entryId bigint, entryDate date,entryHours numeric, project_ppm_id bigint, task_id bigint) ''') output = io.StringIO() user_timesheet_data.to_csv(output, sep='|', header=True, index=False) output.seek(0) copy_query = "COPY timesheet_data FROM STDOUT csv DELIMITER '|' NULL '' ESCAPE '\\' HEADER " cur.copy_expert(copy_query, output) #Joining timesheet data and sprint data query = dq.timesheet_population_query.format(sprint_list) timesheet_entry = pd.read_sql_query(query, coerce_float=False, con=conn) conn.commit() timesheet_entry.rename(columns=ts_columns,inplace=True) init_timesheet_insert(timesheet_entry,'New') cur.execute('''update idw.jira_sprint_summary set is_timesheet_populated = true where sprint_number in ({0})'''.format(sprint_list)) conn.commit() else: print("User list is Empty") #Processing failed timesheet entries failed_entries_df = executeQueryAndReturnDF("select companyId,entryTypeId,timesheetId,userId,level1Id,level2Id,to_char(entryDate,'YYYY/MM/DD') entryDate,entryId,level3Id,entryHours from idw.planview_failed_timesheetID") failed_entries_df.rename(columns=ts_columns,inplace=True) init_timesheet_insert(failed_entries_df,'Existing') al.updateSubTask(subtaskid, "SUCCESS") except Exception as error: al.insertErrorLog(subtaskid, error)
def delete_entries(): subtaskid = al.createSubTask("Delete timesheet entries for a team", al.getMaintaskId()) try: timesheet_team = executeQueryAndReturnDF(dq.delete_timesheet_entries) for team in timesheet_team.itertuples(index=True, name='Pandas'): print(team.resource_list) users = team.resource_list if users is not None: mList = [ int(e) if e.isdigit() else int(e) for e in users.split(',') ] team_id = team.team_dim_id startDate = team.from_date endDate = team.to_date #Fetch timesheet ids for above user list sheet = planview.getTimesheet_users(mList, startDate) column_names = [ 'timesheetId', 'userId', 'startDate', 'endDate', 'entryId', 'entryDate', 'entryHours', 'project_ppm_id', 'task_ppm_id' ] user_timesheet_data = pd.DataFrame(columns=column_names) for tsrow in sheet.itertuples(index=True, name='Pandas'): if not tsrow.entriesProject: user_timesheet_data = user_timesheet_data.append( { 'timesheetId': tsrow.timesheetId, 'userId': tsrow.userId, 'startDate': tsrow.startDate, 'endDate': tsrow.endDate, 'entryId': None, 'entryDate': None, 'entryHours': None, 'project_ppm_id': None, 'task_ppm_id': None }, ignore_index=True) else: for entry in tsrow.entriesProject: user_timesheet_data = user_timesheet_data.append( { 'timesheetId': tsrow.timesheetId, 'userId': tsrow.userId, 'startDate': tsrow.startDate, 'endDate': tsrow.endDate, 'entryId': entry['entryId'], 'entryDate': entry['entryDate'], 'entryHours': entry['entryHours'], 'project_ppm_id': entry['level1Id'], 'task_ppm_id': entry['level2Id'] }, ignore_index=True) print(user_timesheet_data.to_string()) #export timesheet details into temporary table conn = dbConnection() cur = conn.cursor() cur.execute( '''create temp table timesheet_data_delete (timesheetId bigint, userId bigint, startDate date, endDate date, entryId bigint, entryDate date,entryHours numeric, project_ppm_id bigint, task_id bigint) ''' ) output = io.StringIO() user_timesheet_data.to_csv(output, sep='|', header=True, index=False) output.seek(0) copy_query = "COPY timesheet_data_delete FROM STDOUT csv DELIMITER '|' NULL '' ESCAPE '\\' HEADER " cur.copy_expert(copy_query, output) query = '''select timesheetid,userid,string_agg(entryid::varchar(15), ', ') AS entryid from timesheet_data_delete where entrydate between '{0}' and '{1}' group by timesheetid,userid'''.format( startDate, endDate) entries = pd.read_sql_query(query, con=conn) print(entries) conn.commit() for i in entries.itertuples(index=True, name='Pandas'): print(i.timesheetid) print(i.entryid) entries = i.entryid if entries is not None: entry_List = [ int(e) if e.isdigit() else int(e) for e in entries.split(',') ] print(entry_List) timesheet_id = i.timesheetid #delete timesheet entries for timesheetid planview.deleteTimesheet_entries(timesheet_id, entry_List) cur.execute( '''update idw.timesheet_refresh_log set is_timesheet_refreshed = true where team_dim_id = {0}''' .format(team_id)) conn.commit() except (Exception) as error: al.insertErrorLog(subtaskid, error)
def timesheet_entry_all_user(): """ This function pull timesheet entries from planview for users and insert into planview_timesheet_entries table Args: No Arguments Returns: No Return """ subtaskid = al.createSubTask( "Pull timesheet entries from planview for users and insert into planview_timesheet_entries table", al.getMaintaskId()) try: #Fetch user ids for processing planview_entities = executeQueryAndReturnDF(dq.get_user_list_to_pull_timesheet_entries) conn = dbConnection() cur = conn.cursor() for index, row in planview_entities.iterrows(): users = row['user_list'] if users is not None: mList = [int(e) if e.isdigit() else int(e) for e in users.split(',')] else: raise Exception("User list is Empty") startDate = row['min'] #Fetch timesheet entries for user list sheet = planview.getTimesheet_users(mList,startDate) column_names = ['timesheetId', 'companyId', 'entryDate', 'entryHours', 'entryId', 'entryTypeId', 'externalId', 'internalRate', 'billableRate', 'isBillable', 'isProductive', 'level1Id', 'level2Id', 'level3Id', 'locationId', 'notes', 'state', 'userId'] user_timesheet_data = pd.DataFrame(columns=column_names) for tsrow in sheet.itertuples(index=True, name='Pandas'): for entry in tsrow.entriesProject: user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId,'companyId': entry['companyId'], 'entryDate': entry['entryDate'],'entryHours': entry['entryHours'],'entryId': entry['entryId'],'entryTypeId': entry['entryTypeId'] ,'externalId': entry['externalId'] ,'internalRate': entry['internalRate'] ,'billableRate': entry['billableRate'] ,'isBillable': entry['isBillable'] ,'isProductive': entry['isProductive'] ,'level1Id': entry['level1Id'],'level2Id': entry['level2Id'],'level3Id': entry['level3Id'],'locationId': entry['locationId'],'notes': entry['notes'],'state': entry['state'],'userId':tsrow.userId}, ignore_index=True) for entry1 in tsrow.entriesOther: user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId,'companyId': entry1['companyId'], 'entryDate': entry1['entryDate'],'entryHours': entry1['entryHours'],'entryId': entry1['entryId'],'entryTypeId': entry1['entryTypeId'] ,'externalId': entry1['externalId'] ,'internalRate': entry1['internalRate'] ,'billableRate': entry1['billableRate'] ,'isBillable': entry1['isBillable'] ,'isProductive': entry1['isProductive'] ,'level1Id': entry1['level1Id'],'level2Id': entry1['level2Id'],'level3Id': entry1['level3Id'],'locationId': entry1['locationId'],'notes': entry1['notes'],'state': entry1['state'],'userId':tsrow.userId}, ignore_index=True) for entry2 in tsrow.entriesAccount: user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId,'companyId': entry2['companyId'], 'entryDate': entry2['entryDate'],'entryHours': entry2['entryHours'],'entryId': entry2['entryId'],'entryTypeId': entry2['entryTypeId'] ,'externalId': entry2['externalId'] ,'internalRate': entry2['internalRate'] ,'billableRate': entry2['billableRate'] ,'isBillable': entry2['isBillable'] ,'isProductive': entry2['isProductive'] ,'level1Id': entry2['level1Id'],'level2Id': entry2['level2Id'],'level3Id': entry2['level3Id'],'locationId': entry2['locationId'],'notes': entry2['notes'],'state': entry2['state'],'userId':tsrow.userId}, ignore_index=True) for entry3 in tsrow.entriesPortfolio: user_timesheet_data = user_timesheet_data.append({'timesheetId': tsrow.timesheetId,'companyId': entry3['companyId'], 'entryDate': entry3['entryDate'],'entryHours': entry3['entryHours'],'entryId': entry3['entryId'],'entryTypeId': entry3['entryTypeId'] ,'externalId': entry3['externalId'] ,'internalRate': entry3['internalRate'] ,'billableRate': entry3['billableRate'] ,'isBillable': entry3['isBillable'] ,'isProductive': entry3['isProductive'] ,'level1Id': entry3['level1Id'],'level2Id': entry3['level2Id'],'level3Id': entry3['level3Id'],'locationId': entry3['locationId'],'notes': entry3['notes'],'state': entry3['state'],'userId':tsrow.userId}, ignore_index=True) user_timesheet_data['isBillable'] = user_timesheet_data['isBillable'].astype('bool') user_timesheet_data['isProductive'] = user_timesheet_data['isProductive'].astype('bool') #export timesheet details into temporary table cur.execute('''drop table if exists timesheet_data;''') cur.execute('''create temp table timesheet_data (timesheetid numeric, companyid numeric, entrydate date, entryhours numeric, entryid numeric, entrytypeid numeric, externalid varchar(100), internalrate numeric, billablerate numeric, isbillable boolean, isproductive boolean, level1id numeric, level2id numeric, level3id numeric, locationid numeric, notes text, state text, userid numeric);''') output = io.StringIO() user_timesheet_data.to_csv(output, sep='~', header=True, index=False) output.seek(0) copy_query = "COPY timesheet_data FROM STDOUT csv DELIMITER '~' NULL '' ESCAPE '\\' HEADER " cur.copy_expert(copy_query, output) conn.commit() cur.execute(dq.update_is_active_flag_for_ts_entries) cur.execute(dq.insert_timesheet_entries) cur.execute(dq.update_timesheet_dim_ids) conn.commit() al.updateSubTask(subtaskid, "SUCCESS") except Exception as error: print(error) send_custom_mail("Failed to fetch timesheet entries from planview", error) al.insertErrorLog(subtaskid, error)