def printexcel(self, filepath): # create an excel workbook wb = Workbook() wb1 = wb.active ## spreadsheet "General"## wb1.title = 'General' col = wb1.column_dimensions['A'] col.font = Font(bold=True) col.width = 35 daterun = datetime.now() # use pretty values for boolean if self.__corrected: applied_corrections = 'Yes' else: applied_corrections = 'No' if self.__table.with_metadata: use_metadata = 'Yes' else: use_metadata = 'No' # start filling the sheet "General" wb1.append(['Dataset file', self.__table.filename]) wb1.append(['Dataset filepath', self.__table.source]) wb1.append(['QC tool version', __version__]) wb1.append(['Date qc run', daterun.strftime("%d/%m/%Y %H:%M:%S")]) wb1.append(['Total columns', self.total_columns]) wb1.append(['Total rows', self.total_rows]) wb1.append(['Metadata used', use_metadata]) wb1.append(['Missing columns'] + self.missing_headers) wb1.append(['Extra columns'] + self.invalid_headers) wb1.append(['Invalid rows', self.__total_invalid_rows]) ws2 = wb.create_sheet("Row Statistics") col2 = ws2.column_dimensions['A'] col2.width = 40 col2.font = Font(bold=True) #ws2.append(['rows with only id column filled', len(self.__rows_only_id)]) #ws2.append(['rows with no id column filled', len(self.__rows_no_id)]) ws2.append([ 'rows with 0-24% of the columns filled', self.filled_rows_stats.get('filled_0_24') ]) ws2.append([ 'rows with 25-49% of the columns filled', self.filled_rows_stats.get('filled_25_49') ]) ws2.append([ 'rows with 50-74% of the columns filled', self.filled_rows_stats.get('filled_50_74') ]) ws2.append([ 'rows with 75-99% of the columns filled', self.filled_rows_stats.get('filled_75_99') ]) ws2.append([ 'rows with 100% of the columns filled', self.filled_rows_stats.get('filled_100') ]) ws2.append([ 'rows with 0-24% of the columns valid', self.valid_rows_stats.get('valid_0_24') ]) ws2.append([ 'rows with 25-49% of the columns valid', self.valid_rows_stats.get('valid_25_49') ]) ws2.append([ 'rows with 50-74% of the columns valid', self.valid_rows_stats.get('valid_50_74') ]) ws2.append([ 'rows with 75-99% of the columns valid', self.valid_rows_stats.get('valid_75_99') ]) ws2.append([ 'rows with 100% of the columns valid', self.valid_rows_stats.get('valid_100') ]) chart1 = BarChart() chart1.type = 'bar' chart1.style = 11 chart1.y_axis.title = '# of rows' chart1.title = 'Number of rows per filled columns' values1 = Reference(ws2, min_col=1, min_row=3, max_row=7, max_col=2) chart1.add_data(values1) chart1.shape = 4 ws2.add_chart(chart1, 'D1') chart2 = BarChart() chart2.type = 'bar' chart2.style = 12 chart2.y_axis.title = '# of rows' chart2.title = 'Number of rows per valid columns' values2 = Reference(ws2, min_col=1, min_row=8, max_row=13, max_col=2) chart2.add_data(values2) chart2.shame = 4 ws2.add_chart(chart2, 'D20') ## Column Statistics Sheet ## ws3 = wb.create_sheet("Column Statistics") # make bold the first column title_row = ws3.row_dimensions[1] title_row.font = Font(bold=True) # get the columns stats and fill the rows df_cstats = self.__column_stats_2_df() for r in dataframe_to_rows(df_cstats, index=False, header=True): ws3.append(r) for i in range(1, len(df_cstats.columns) + 1): ws3.column_dimensions[get_column_letter(i)].width = 20 # start filling Cleaning suggestions sheet if self.corrected: cleaning_sheetname = 'Cleaned values' else: cleaning_sheetname = 'Cleaning suggestions' ws4 = wb.create_sheet(cleaning_sheetname) left_border = Border(left=Side(style='thick')) right_border = Border(right=Side(style='thick')) header_border = Border(left=Side(style='thick'), right=Side(style='thick')) center_alignment = Alignment(horizontal='center') start_col = 1 for name, colreport in self.__columnreports.items(): end_col = start_col + 1 ws4.column_dimensions[get_column_letter(start_col)].width = 15 ws4.column_dimensions[get_column_letter(end_col)].width = 15 # merge the two cells in the first row and put the variable name as title ws4.merge_cells(start_row=1, start_column=start_col, end_row=1, end_column=end_col) header = ws4.cell(row=1, column=start_col, value=colreport.qcfield.name) header.border = header_border header.alignment = center_alignment header.font = Font(bold=True) # add also the subtitlles ws4.cell(row=2, column=start_col, value='Invalid Value').border = left_border ws4.cell(row=2, column=end_col, value='Corrected Value').border = right_border # collect all the correction suggestions for invalid values # constraint violations corrections = list(colreport.ccorrections) ctonulls = [(value, 'Null') for value in colreport.cnulls] # datatype violations corrections.extend(list(colreport.dcorrections)) dtonulss = [(value, 'Null') for value in colreport.dnulls] # append null sugestions to corrections corrections.extend(ctonulls) corrections.extend(dtonulss) # now start filling the rows for that variable start_row = 3 for pair in corrections: left = ws4.cell(row=start_row, column=start_col, value=pair[0]) left.border = left_border right = ws4.cell(row=start_row, column=end_col, value=pair[1]) right.border = right_border start_row += 1 start_col += 2 wb.save(filepath)