コード例 #1
0
    def printexcel(self, filepath):
        # create an excel workbook
        wb = Workbook()

        wb1 = wb.active

        ## spreadsheet "General"##
        wb1.title = 'General'
        col = wb1.column_dimensions['A']
        col.font = Font(bold=True)
        col.width = 35

        daterun = datetime.now()
        # use pretty values for boolean
        if self.__corrected:
            applied_corrections = 'Yes'
        else:
            applied_corrections = 'No'

        if self.__table.with_metadata:
            use_metadata = 'Yes'
        else:
            use_metadata = 'No'
        # start filling the sheet "General"
        wb1.append(['Dataset file', self.__table.filename])
        wb1.append(['Dataset filepath', self.__table.source])
        wb1.append(['QC tool version', __version__])
        wb1.append(['Date qc run', daterun.strftime("%d/%m/%Y %H:%M:%S")])
        wb1.append(['Total columns', self.total_columns])
        wb1.append(['Total rows', self.total_rows])
        wb1.append(['Metadata used', use_metadata])
        wb1.append(['Missing columns'] + self.missing_headers)
        wb1.append(['Extra columns'] + self.invalid_headers)
        wb1.append(['Invalid rows', self.__total_invalid_rows])

        ws2 = wb.create_sheet("Row Statistics")
        col2 = ws2.column_dimensions['A']
        col2.width = 40
        col2.font = Font(bold=True)
        #ws2.append(['rows with only id column filled', len(self.__rows_only_id)])
        #ws2.append(['rows with no id column filled', len(self.__rows_no_id)])
        ws2.append([
            'rows with 0-24% of the columns filled',
            self.filled_rows_stats.get('filled_0_24')
        ])
        ws2.append([
            'rows with 25-49% of the columns filled',
            self.filled_rows_stats.get('filled_25_49')
        ])
        ws2.append([
            'rows with 50-74% of the columns filled',
            self.filled_rows_stats.get('filled_50_74')
        ])
        ws2.append([
            'rows with 75-99% of the columns filled',
            self.filled_rows_stats.get('filled_75_99')
        ])
        ws2.append([
            'rows with 100% of the columns filled',
            self.filled_rows_stats.get('filled_100')
        ])

        ws2.append([
            'rows with 0-24% of the columns valid',
            self.valid_rows_stats.get('valid_0_24')
        ])
        ws2.append([
            'rows with 25-49% of the columns valid',
            self.valid_rows_stats.get('valid_25_49')
        ])
        ws2.append([
            'rows with 50-74% of the columns valid',
            self.valid_rows_stats.get('valid_50_74')
        ])
        ws2.append([
            'rows with 75-99% of the columns valid',
            self.valid_rows_stats.get('valid_75_99')
        ])
        ws2.append([
            'rows with 100% of the columns valid',
            self.valid_rows_stats.get('valid_100')
        ])

        chart1 = BarChart()
        chart1.type = 'bar'
        chart1.style = 11
        chart1.y_axis.title = '# of rows'
        chart1.title = 'Number of rows per filled columns'
        values1 = Reference(ws2, min_col=1, min_row=3, max_row=7, max_col=2)
        chart1.add_data(values1)
        chart1.shape = 4
        ws2.add_chart(chart1, 'D1')

        chart2 = BarChart()
        chart2.type = 'bar'
        chart2.style = 12
        chart2.y_axis.title = '# of rows'
        chart2.title = 'Number of rows per valid columns'
        values2 = Reference(ws2, min_col=1, min_row=8, max_row=13, max_col=2)
        chart2.add_data(values2)
        chart2.shame = 4
        ws2.add_chart(chart2, 'D20')

        ## Column Statistics Sheet ##
        ws3 = wb.create_sheet("Column Statistics")
        # make bold the first column
        title_row = ws3.row_dimensions[1]
        title_row.font = Font(bold=True)
        # get the columns stats and fill the rows
        df_cstats = self.__column_stats_2_df()
        for r in dataframe_to_rows(df_cstats, index=False, header=True):
            ws3.append(r)
        for i in range(1, len(df_cstats.columns) + 1):
            ws3.column_dimensions[get_column_letter(i)].width = 20

        # start filling Cleaning suggestions sheet
        if self.corrected:
            cleaning_sheetname = 'Cleaned values'
        else:
            cleaning_sheetname = 'Cleaning suggestions'
        ws4 = wb.create_sheet(cleaning_sheetname)
        left_border = Border(left=Side(style='thick'))
        right_border = Border(right=Side(style='thick'))
        header_border = Border(left=Side(style='thick'),
                               right=Side(style='thick'))
        center_alignment = Alignment(horizontal='center')
        start_col = 1
        for name, colreport in self.__columnreports.items():
            end_col = start_col + 1
            ws4.column_dimensions[get_column_letter(start_col)].width = 15
            ws4.column_dimensions[get_column_letter(end_col)].width = 15

            # merge the two cells in the first row and put the variable name as title
            ws4.merge_cells(start_row=1,
                            start_column=start_col,
                            end_row=1,
                            end_column=end_col)
            header = ws4.cell(row=1,
                              column=start_col,
                              value=colreport.qcfield.name)
            header.border = header_border
            header.alignment = center_alignment
            header.font = Font(bold=True)
            # add also the subtitlles
            ws4.cell(row=2, column=start_col,
                     value='Invalid Value').border = left_border
            ws4.cell(row=2, column=end_col,
                     value='Corrected Value').border = right_border
            # collect all the correction suggestions for invalid values
            # constraint violations
            corrections = list(colreport.ccorrections)
            ctonulls = [(value, 'Null') for value in colreport.cnulls]
            # datatype violations
            corrections.extend(list(colreport.dcorrections))
            dtonulss = [(value, 'Null') for value in colreport.dnulls]
            # append null sugestions to corrections
            corrections.extend(ctonulls)
            corrections.extend(dtonulss)
            # now start filling the rows for that variable
            start_row = 3
            for pair in corrections:
                left = ws4.cell(row=start_row, column=start_col, value=pair[0])
                left.border = left_border
                right = ws4.cell(row=start_row, column=end_col, value=pair[1])
                right.border = right_border
                start_row += 1
            start_col += 2

        wb.save(filepath)