コード例 #1
0
ファイル: data_compare.py プロジェクト: janice5tian/pydqc
def _insert_compare_string_results(string_results, ws, row_height):
    """
    Insert string result into a worksheet

    Parameters
    ----------
    string_results: dict
        result to insert
    ws: Excel worksheet instance
    row_height: float
        Height of the row
    """

    # construct thick border
    thin = Side(border_style="thin", color="000000")
    border = Border(top=thin, left=thin, right=thin, bottom=thin)

    row_heights = {}

    # loop and output result
    for result in string_results:
        column = result['column']
        if not 'result_df' in result.keys():
            ws.append([column, result['error_msg']])
            for col in ['A', 'B']:
                ws['%s%d' %(col, ws.max_row)].style = 'Bad'
            ws.append([''])
            continue
        result_df = result['result_df'][0][['feature', 'value', 'graph']]
        value_counts_df = result['result_df'][1]
        head_row = _insert_df(result_df, ws)

        # if there is value counts result
        if len(value_counts_df) > 0:
            value_counts_df = value_counts_df.rename(columns={'value': 'top 10 values', 'count_x': 'count_1', 'count_y': 'count_2'})
            databar_head = _insert_df(value_counts_df, ws, header=True, head_style='60 % - Accent5')
            for row_idx in range(databar_head, databar_head+value_counts_df.shape[0]+1):
                row_heights[row_idx] = 25

            # add conditional formatting: data bar
            first = FormatObject(type='num', val=0)
            second = FormatObject(type='num', val=np.max([value_counts_df['count_1'].max(), value_counts_df['count_2'].max()]))
            data_bar1 = DataBar(cfvo=[first, second], color=TABLE1_DARK.replace('#', ''), showValue=True, minLength=None, maxLength=None)
            data_bar2 = DataBar(cfvo=[first, second], color=TABLE2_DARK.replace('#', ''), showValue=True, minLength=None, maxLength=None)

            # assign the data bar to a rule
            rule1 = Rule(type='dataBar', dataBar=data_bar1)
            ws.conditional_formatting.add('B%d:B%d' %(databar_head+1, databar_head+len(value_counts_df)), rule1)
            rule2 = Rule(type='dataBar', dataBar=data_bar2)
            ws.conditional_formatting.add('C%d:C%d' %(databar_head+1, databar_head+len(value_counts_df)), rule2)

            # draw the thick outline border
            _style_range(ws, 'A%d:C%d'%(head_row, databar_head+len(value_counts_df)), border=border)
        else:
            _style_range(ws, 'A%d:C%d'%(head_row, head_row+result_df.shape[0]-1), border=border)

        # add gap
        ws.append([''])

    _adjust_ws(ws, row_height=row_height, row_heights=row_heights, adjust_type='str')
コード例 #2
0
def _insert_string_results(string_results, ws, row_height):
    """
    Insert result of a string type column into a worksheet

    Parameters
    ----------
    string_results: dict
        The result dictionary
    ws: Excel worksheet instance
    row_height: float
        Height of the rows
    """

    # construct thin border
    thin = Side(border_style="thin", color="000000")
    border = Border(top=thin, left=thin, right=thin, bottom=thin)

    # loop and output result
    for result in string_results:
        column = result['column']
        if 'result_df' not in result.keys():
            ws.append([column, result['error_msg']])
            for col in ['A', 'B']:
                ws['%s%d' %(col, ws.max_row)].style = 'Bad'
            ws.append([''])
            continue
        result_df = result['result_df'][0]
        value_counts_df = result['result_df'][1]
        head_row = _insert_df(result_df, ws)

        # if there is value counts result
        if len(value_counts_df) > 0:
            value_counts_df = value_counts_df.rename(columns={column: 'top 10 values'})
            databar_head = _insert_df(value_counts_df, ws, header=True, head_style='60 % - Accent5')

            # add conditional formatting: data bar
            first = FormatObject(type='num', val=0)
            second = FormatObject(type='num', val=value_counts_df['count'].max())
            data_bar = DataBar(cfvo=[first, second], color=DIS_LINE.replace('#', ''),
                               showValue=True, minLength=None, maxLength=None)

            # assign the data bar to a rule
            rule = Rule(type='dataBar', dataBar=data_bar)
            ws.conditional_formatting.add('B%d:B%d' %(databar_head+1, databar_head+len(value_counts_df)), rule)

            # draw the thick outline border
            _style_range(ws, 'A%d:B%d'%(head_row, databar_head+len(value_counts_df)), border=border)
        else:
            _style_range(ws, 'A%d:B%d'%(head_row, head_row+result_df.shape[0]-1), border=border)

        # add gap
        ws.append([''])

    # adjust the worksheet
    _adjust_ws(ws=ws, row_height=row_height)