class Excel_01(): def __init__(self, file_name, mode='r'): if mode == 'r': self.workbook = load_workbook(file_name) elif mode == 'w': self.workbook = Workbook() #.Workbook(file_name) else: raise Exception('Error: init Excel class with error mode: %s' % mode) def get_sheet(self, sheet_name): names = [] if isinstance(sheet_name, str): if sheet_name.endswith('*'): for name in self.workbook.sheet_names(): if sheet_name[:-1] in name: names.append(name) else: names.append(sheet_name) elif isinstance(sheet_name, list): names = sheet_name else: raise Exception('Error: invalidity sheet_name: %s' % sheet_name) return names def read(self, sheet_name): ''' sheet_name:Excel 中标签页名称 return:[[],[]……] ''' sheet = self.workbook.sheet_by_name(sheet_name) nrows = sheet.nrows data = [] for i in range(nrows): data.append(sheet.row_values(i)) return data def write(self, data, sheet_name): sheet = self.workbook.add_worksheet(sheet_name) red = self.workbook.add_format({'bg_color': 'red', 'color': 'white'}) gray = self.workbook.add_format({'bg_color': 'gray', 'color': 'white'}) green = self.workbook.add_format({ 'bg_color': 'green', 'color': 'white' }) blue = self.workbook.add_format({'bg_color': 'blue', 'color': 'white'}) orange = self.workbook.add_format({ 'bg_color': 'orange', 'color': 'white' }) for i in range(len(data)): for j in range(len(data[i])): if str(data[i][j]) == 'failure': sheet.write(i, j, str(data[i][j]), red) elif str(data[i][j]) == 'NO': sheet.write(i, j, str(data[i][j]), gray) elif str(data[i][j]) == 'blocked': sheet.write(i, j, str(data[i][j]), orange) elif str(data[i][j]) == 'skipped': sheet.write(i, j, str(data[i][j]), blue) elif str(data[i][j]) == 'success': sheet.write(i, j, str(data[i][j]), green) else: sheet.write(i, j, data[i][j]) def close(self): self.workbook.close()
def Dashboard(self, filename): """ Creating Three Dashboards 1. IBRD LOARN KPI Dashboard, 2. IBRD DATA QUALITY STATISTICS Dashboard & 3. IBRD DATA AGGREGATION Dashboard """ df = pd.read_csv(filename) dashboardpath = "./data/excel_dashboard/final_dashboard1.xlsx" wbk = Workbook(dashboardpath) ### 1. IBRD LOARN KPI Dashboard ##### # Total Number of Projects dash1 = pd.read_sql( "select count(distinct Project_ID) No_of_projects from Project", self.engine) # Loans Per Loan Status dash2 = pd.read_sql( """select Loan_Status, count(distinct Loan_Number) No_of_loans from loan group by Loan_Status Order by count(Loan_Number) Desc""", self.engine) # Total Disbursed Loans / Loans Held dash3 = pd.read_sql( """select country, sum(Disbursed_Amount) loans_held from (select distinct country, Disbursed_Amount from country c inner join loan l on c.country_code = l.country_code)s group by country order by sum(Disbursed_Amount) desc limit 10""", self.engine) # Repaid Portion dash4 = pd.read_sql( """select Sum(Repaid_to_IBRD+Repaid_3rd_Party)/Sum(Disbursed_Amount) Repaid_portion from loan""", self.engine) # Total Loans dash5 = pd.read_sql( """select COUNT(distinct loan_Number) loans from loan""", self.engine) # Approved Loans dash6 = pd.read_sql( """select count(distinct Loan_Number) Approved_loans from loan where Loan_Status = 'Approved' """, self.engine) # Percentage of Approved Loans dash10 = pd.DataFrame() dash10['Percent_Approved_Loans'] = dash6['Approved_loans'] / dash5[ 'loans'] # Repaid Loans dash7 = pd.read_sql( """select count(distinct Loan_Number) Repaid_loans from loan where Loan_Status like 'Repaid%' """, self.engine) # Cancelled Loans dash8 = pd.read_sql( """select count(distinct Loan_Number) Cancelled_loans from loan where Loan_Status like 'cancel%' """, self.engine) # Aggregated Totals dash9 = pd.read_sql( "select sum(Original_Principal_Amount) Total_Principal,sum(Cancelled_Amount) Total_Cancelled,sum(Undisbursed_Amount) Total_Undisbursed,sum(Disbursed_Amount) Total_Disbursed,sum(Repaid_to_IBRD) Total_Repaid_IBRD,sum(Due_to_IBRD) Total_Due_to_IBRD,sum(Borrowers_Obligation) Total_Borrowers_Obligation,Sum(Sold_3rd_Party) Total_Sold_3rd_Party,sum(Repaid_3rd_Party) Total_Repaid_3rd_Party,sum(Due_3rd_Party) Total_Due_3rd_Party,Sum(Loans_Held) Total_Loans_Held from loan", self.engine) # Aggregated Averages dash11 = pd.read_sql( "select avg(Original_Principal_Amount) avg_Principal,avg(Cancelled_Amount) avg_Cancelled,avg(Undisbursed_Amount) avg_Undisbursed,avg(Disbursed_Amount) avg_Disbursed,avg(Repaid_to_IBRD) avg_Repaid_IBRD,avg(Due_to_IBRD) avg_Due_to_IBRD,avg(Borrowers_Obligation) avg_Borrowers_Obligation,avg(Sold_3rd_Party) avg_Sold_3rd_Party,avg(Repaid_3rd_Party) avg_Repaid_3rd_Party,avg(Due_3rd_Party) avg_Due_3rd_Party,avg(Loans_Held) avg_Loans_Held from loan", self.engine) # Create Worksheets in the Workbook ws4 = wbk.add_worksheet('Loan_KPI_Dashboard') ws3 = wbk.add_worksheet('Data_Qlty_Stat_Dashboard') ws2 = wbk.add_worksheet('Data_Aggregation_Dashboard') # Format cell borders via a configurable RxC box def draw_frame_border(workbook, worksheet, first_row, first_col, rows_count, cols_count, thickness=1): if cols_count == 1 and rows_count == 1: # whole cell worksheet.conditional_format( first_row, first_col, first_row, first_col, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'top': thickness, 'bottom': thickness, 'left': thickness, 'right': thickness }) }) elif rows_count == 1: # left cap worksheet.conditional_format( first_row, first_col, first_row, first_col, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'top': thickness, 'left': thickness, 'bottom': thickness }) }) # top and bottom sides worksheet.conditional_format( first_row, first_col + 1, first_row, first_col + cols_count - 2, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'top': thickness, 'bottom': thickness }) }) # right cap worksheet.conditional_format( first_row, first_col + cols_count - 1, first_row, first_col + cols_count - 1, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'top': thickness, 'right': thickness, 'bottom': thickness }) }) elif cols_count == 1: # top cap worksheet.conditional_format( first_row, first_col, first_row, first_col, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'top': thickness, 'left': thickness, 'right': thickness }) }) # left and right sides worksheet.conditional_format( first_row + 1, first_col, first_row + rows_count - 2, first_col, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'left': thickness, 'right': thickness }) }) # bottom cap worksheet.conditional_format( first_row + rows_count - 1, first_col, first_row + rows_count - 1, first_col, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'bottom': thickness, 'left': thickness, 'right': thickness }) }) else: # top left corner worksheet.conditional_format( first_row, first_col, first_row, first_col, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'top': thickness, 'left': thickness }) }) # top right corner worksheet.conditional_format( first_row, first_col + cols_count - 1, first_row, first_col + cols_count - 1, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'top': thickness, 'right': thickness }) }) # bottom left corner worksheet.conditional_format( first_row + rows_count - 1, first_col, first_row + rows_count - 1, first_col, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'bottom': thickness, 'left': thickness }) }) # bottom right corner worksheet.conditional_format( first_row + rows_count - 1, first_col + cols_count - 1, first_row + rows_count - 1, first_col + cols_count - 1, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({ 'bottom': thickness, 'right': thickness }) }) # top worksheet.conditional_format( first_row, first_col + 1, first_row, first_col + cols_count - 2, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({'top': thickness}) }) # left worksheet.conditional_format( first_row + 1, first_col, first_row + rows_count - 2, first_col, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({'left': thickness}) }) # bottom worksheet.conditional_format( first_row + rows_count - 1, first_col + 1, first_row + rows_count - 1, first_col + cols_count - 2, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({'bottom': thickness}) }) # right worksheet.conditional_format( first_row + 1, first_col + cols_count - 1, first_row + rows_count - 2, first_col + cols_count - 1, { 'type': 'formula', 'criteria': 'True', 'format': workbook.add_format({'right': thickness}) }) # Formating objects to be used in the Dashboard. bold = wbk.add_format({'bold': 1}) heading_format = wbk.add_format({'bold': 1}) heading_format.set_font_size(25) number_format = wbk.add_format({'bold': 1, 'num_format': '#,##0'}) percentage_format = wbk.add_format({'bold': 1, 'num_format': '0.0%'}) merge_format = wbk.add_format({ 'bold': 1, 'border': 1, 'align': 'center', 'valign': 'vcenter', 'fg_color': 'yellow' }) # Merge 3 cells. ws4.merge_range('E1:H1', 'Merged Range', merge_format) # Write to IBRD LOARN KPI Dashboard ws4.write_row('B3', ['Total Projects'], bold) ws4.write_row('B4', dash1.iloc[:, 0], number_format) for row in range(2, 4): draw_frame_border(wbk, ws4, row, 1, 1, 1, 2) ws4.write_row('D3', ['Total Loans'], bold) ws4.write_row('D4', dash5.iloc[:, 0], number_format) for row in range(2, 4): draw_frame_border(wbk, ws4, row, 3, 1, 1, 2) ws4.write_row('F3', ['Repaid Percentage'], bold) ws4.write_row('F4', dash4.iloc[:, 0], percentage_format) for row in range(2, 4): draw_frame_border(wbk, ws4, row, 5, 1, 1, 2) ws4.write_row('H3', ['Approved Percentage'], bold) ws4.write_row('H4', dash10.iloc[:, 0], percentage_format) for row in range(2, 4): draw_frame_border(wbk, ws4, row, 7, 1, 1, 2) ws4.write_row('J3', ['Repaid Loans'], bold) ws4.write_row('J4', dash7.iloc[:, 0], number_format) for row in range(2, 4): draw_frame_border(wbk, ws4, row, 9, 1, 1, 2) ws4.write_row('L3', ['Cancelled Loans'], bold) ws4.write_row('L4', dash8.iloc[:, 0], number_format) for row in range(2, 4): draw_frame_border(wbk, ws4, row, 11, 1, 1, 2) ws4.write_row('D6', ['Total Principal'], bold) ws4.write_row('D7', dash9.iloc[:, 0], number_format) for row in range(5, 7): draw_frame_border(wbk, ws4, row, 3, 1, 1, 2) ws4.write_row('H6', ['Total Cancelled'], bold) ws4.write_row('H7', dash9.iloc[:, 1], number_format) for row in range(5, 7): draw_frame_border(wbk, ws4, row, 7, 1, 1, 2) ws4.write_row('J6', ['Total UnDisbursed'], bold) ws4.write_row('J7', dash9.iloc[:, 2], number_format) for row in range(5, 7): draw_frame_border(wbk, ws4, row, 9, 1, 1, 2) ws4.write_row('F6', ['Total Disbursed'], bold) ws4.write_row('F7', dash9.iloc[:, 3], number_format) for row in range(5, 7): draw_frame_border(wbk, ws4, row, 5, 1, 1, 2) ws4.write_row('F9', ['Repaid to IBRD'], bold) ws4.write_row('F10', dash9.iloc[:, 4], number_format) for row in range(8, 10): draw_frame_border(wbk, ws4, row, 5, 1, 1, 2) ws4.write_row('D9', ['Due to IBRD'], bold) ws4.write_row('D10', dash9.iloc[:, 5], number_format) for row in range(8, 10): draw_frame_border(wbk, ws4, row, 3, 1, 1, 2) ws4.write_row('B9', ['Borrowers Obligation'], bold) ws4.write_row('B10', dash9.iloc[:, 6], number_format) for row in range(8, 10): draw_frame_border(wbk, ws4, row, 1, 1, 1, 2) ws4.write_row('J9', ['Sold 3rd_Party'], bold) ws4.write_row('J10', dash9.iloc[:, 7], number_format) for row in range(8, 10): draw_frame_border(wbk, ws4, row, 9, 1, 1, 2) ws4.write_row('H9', ['Repaid 3rd_Party'], bold) ws4.write_row('H10', dash9.iloc[:, 8], number_format) for row in range(8, 10): draw_frame_border(wbk, ws4, row, 7, 1, 1, 2) ws4.write_row('L9', ['Due 3rd_Party'], bold) ws4.write_row('L10', dash9.iloc[:, 9], number_format) for row in range(8, 10): draw_frame_border(wbk, ws4, row, 11, 1, 1, 2) ws4.write_row('B6', ['Total Loans Held'], bold) ws4.write_row('B7', dash9.iloc[:, 10], number_format) for row in range(5, 7): draw_frame_border(wbk, ws4, row, 1, 1, 1, 2) ws4.write_row('D13', ['AVG Principal'], bold) ws4.write_row('D14', dash11.iloc[:, 0], number_format) for row in range(12, 14): draw_frame_border(wbk, ws4, row, 3, 1, 1, 2) ws4.write_row('H13', ['AVG Cancelled'], bold) ws4.write_row('H14', dash11.iloc[:, 1], number_format) for row in range(12, 14): draw_frame_border(wbk, ws4, row, 7, 1, 1, 2) ws4.write_row('J13', ['AVG UnDisbursed'], bold) ws4.write_row('J14', dash11.iloc[:, 2], number_format) for row in range(12, 14): draw_frame_border(wbk, ws4, row, 9, 1, 1, 2) ws4.write_row('F13', ['AVG Disbursed'], bold) ws4.write_row('F14', dash11.iloc[:, 3], number_format) for row in range(12, 14): draw_frame_border(wbk, ws4, row, 5, 1, 1, 2) ws4.write_row('F17', ['AVG Repaid to IBRD'], bold) ws4.write_row('F18', dash11.iloc[:, 4], number_format) for row in range(16, 18): draw_frame_border(wbk, ws4, row, 5, 1, 1, 2) ws4.write_row('D17', ['AVG Due to IBRD'], bold) ws4.write_row('D18', dash11.iloc[:, 5], number_format) for row in range(16, 18): draw_frame_border(wbk, ws4, row, 3, 1, 1, 2) ws4.write_row('B17', ['AVG Borrowers Obligation'], bold) ws4.write_row('B18', dash11.iloc[:, 6], number_format) for row in range(16, 18): draw_frame_border(wbk, ws4, row, 1, 1, 1, 2) ws4.write_row('J17', ['AVG Sold 3rd_Party'], bold) ws4.write_row('J18', dash11.iloc[:, 7], number_format) for row in range(16, 18): draw_frame_border(wbk, ws4, row, 9, 1, 1, 2) ws4.write_row('H17', ['AVG Repaid 3rd_Party'], bold) ws4.write_row('H18', dash11.iloc[:, 8], number_format) for row in range(16, 18): draw_frame_border(wbk, ws4, row, 7, 1, 1, 2) ws4.write_row('L17', ['AVG Due 3rd_Party'], bold) ws4.write_row('L18', dash11.iloc[:, 9], number_format) for row in range(16, 18): draw_frame_border(wbk, ws4, row, 11, 1, 1, 2) ws4.write_row('B13', ['AVG Loans Held'], bold) ws4.write_row('B14', dash11.iloc[:, 10], number_format) for row in range(12, 14): draw_frame_border(wbk, ws4, row, 1, 1, 1, 2) #making the heading ws4.write_row('E1', ['IBRD LOAN KPI Dashboard'], heading_format) #Removing gridlines ws4.hide_gridlines(2) # create a data headings. headings = ['Loan Status', 'number'] heading2 = ['Country', ' Loan Held'] # Write a row of data . ws4.write_row('B38', heading2, bold) # Write a column of data ws4.write_column('B39', dash3.iloc[:, 0], number_format) ws4.write_column('C39', dash3.iloc[:, 1], number_format) for col in range(1, 3): for row in range(37, 48): draw_frame_border(wbk, ws4, row, col, 1, 1, 2) #Adding a chart for Loan Status Vs Loans chart2 = wbk.add_chart({'type': 'column'}) # Define Series chart2.add_series({ 'name': '= Loan_KPI_Dashboard !$A$3', 'categories': '=Loan_KPI_Dashboard!$B$39:$B$49', 'values': '=Loan_KPI_Dashboard!$C$39:$C$49' }) # Insert the chart into the worksheet. ws4.insert_chart('E38', chart2) # Add a chart title and some axis labels. chart2.set_title({'name': 'Loans Held per Country'}) chart2.set_x_axis({'name': 'Country'}) chart2.set_y_axis({'name': 'Total Loan Held'}) # Write a row of data starting from 'A1' # with bold format . ws4.write_row('B21', headings, bold) # Write column data into the Top 10 Countries with Loans table ws4.write_column('B22', dash2.iloc[:, 0], number_format) ws4.write_column('C22', dash2.iloc[:, 1], number_format) for col in range(1, 3): for row in range(20, 32): draw_frame_border(wbk, ws4, row, col, 1, 1, 2) #Adding a chart chart1 = wbk.add_chart({'type': 'column'}) # Add Series chart1.add_series({ 'name': '= Loan_KPI_Dashboard !$A$3', 'categories': '=Loan_KPI_Dashboard!$B$22:$B$32', 'values': '=Loan_KPI_Dashboard!$C$22:$C$32' }) # Insert the chart into the worksheet ws4.insert_chart('E21', chart1) # Add a chart title and some axis labels. chart1.set_title({'name': 'Number of loans per loan status'}) chart1.set_x_axis({'name': 'Loan_Status'}) chart1.set_y_axis({'name': 'No_of_loans'}) # Apply a conditional format to the cell range. ws4.conditional_format('C22:C32', {'type': '3_color_scale'}) ws4.conditional_format('C39:C49', {'type': '3_color_scale'}) ### 2. IBRD DATA QUALITY STATISTICS Dashboard ##### # Data Quality, Missing Values, Data Accuracy, Statistical Calculations # values = int(len(df['Loan Number'])) stats = pd.DataFrame() stats['count'] = df.count(0) stats['missing_values'] = [(values - x) for x in stats['count']] stats.reset_index(inplace=True) #getting summary statistics stat = df.describe() #transpose the data stat = stat.T #drop count column and reset index stat.drop(['count'], inplace=True, axis=1) stat.reset_index(inplace=True) stats_final = pd.merge(stats, stat, how='left', on='index') stats_final.fillna(0, inplace=True) ###### 3. IBRD DATA AGGREGATION Dashboard ####### #######Total, Average, Minimum, Maximum ######### data = pd.read_sql( "select processed_date,Original_Principal_Amount,Cancelled_Amount,Undisbursed_Amount,Disbursed_Amount,Repaid_to_IBRD,Due_to_IBRD,Borrowers_Obligation,Sold_3rd_Party,Repaid_3rd_Party,Due_3rd_Party,Loans_Held from loan", self.engine) #introducing the month column data['processed_month'] = data['processed_date'].dt.to_period( 'M').astype('str') #drop processed date data.drop(['processed_date'], inplace=True, axis=1) #Make data agrregations total = data.groupby(['processed_month']).sum() total['category'] = 'Total' total.reset_index(inplace=True) mean = data.groupby(['processed_month']).mean() mean['category'] = 'Average' mean.reset_index(inplace=True) min_ = data.groupby(['processed_month']).min() min_['category'] = 'Min' min_.reset_index(inplace=True) median = data.groupby(['processed_month']).median() median['category'] = 'Median' median.reset_index(inplace=True) max_ = data.groupby(['processed_month']).max() max_['category'] = 'Max' max_.reset_index(inplace=True) #appending the various dataframes final = pd.concat([total, mean], ignore_index=True) final = pd.concat([final, min_], ignore_index=True) final = pd.concat([final, median], ignore_index=True) final = pd.concat([final, max_], ignore_index=True) ##set category as index and stack dataframe final.set_index('category', inplace=True) final = pd.DataFrame(final.stack()) final.reset_index(inplace=True) #creating the column names names = final.iloc[0].to_list() names[0:0] = ['Category'] names[1:1] = ['Field'] names.pop(2) names.pop(2) final.columns = names final = final.loc[~final['Field'].isin(['processed_month'])] #making the headings for the Data Aggregation and Data Quality Dashboards ws2.write_row('E1', ['IBRD DATA AGGREGATION Dashboard'], heading_format) ws3.write_row('E1', ['IBRD DATA QUALITY STATISTICS Dashboard'], heading_format) ## Writing IBRD DATA AGGREGATION Dashboard ## # Removing gridlines ws2.hide_gridlines(2) # create a data list . heading3 = final.columns # Write a row of data starting from 'A1' # with bold format . ws2.write_row(1, 0, heading3, bold) # Write a column of data starting from # 'A2', 'B2', 'C2' respectively . for col in range(0, len(heading3)): ws2.write_column(2, col, final.iloc[:, col], number_format) for col in range(0, len(heading3)): for row in range(1, 57): draw_frame_border(wbk, ws2, row, col, 1, 1, 2) ### Writing to IBRD DATA QUALITY STATISTICS Dashboard ### heading4 = stats_final.columns # Write a row of data ws3.write_row(1, 0, heading4, bold) # Write a column of data for col in range(0, len(heading4)): ws3.write_column(2, col, stats_final.iloc[:, col], number_format) for col in range(0, len(heading4)): for row in range(1, 38): draw_frame_border(wbk, ws3, row, col, 1, 1, 2) # Close Workbook wbk.close() return dashboardpath