Exemple #1
0
def data_out(df, batch=''):

    if df.empty == False:
        outfile = 'C:/Users/xcxg109/NonDriveFiles/Delta Attributes_' + str(
            batch) + '.xlsx'

        df = df[['Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_ID', \
                 'Category_Name', 'Grainger_Attr_ID', 'Grainger_Attribute_Name', \
                 'GWS_Attribute_Name', 'GWS_Attr_ID', 'GWS_PIM_Path', 'GWS_Category_ID', \
                 'GWS_Category_Name', 'GWS_Node_ID', 'GWS_Node_Name', 'STEP_Category_ID', \
                 'STEP_Attr_ID']]

        df = df.sort_values(
            ['Segment_Name', 'Category_Name', 'Grainger_Attribute_Name'],
            ascending=[True, True, True])
        df_filter = df[df['GWS_Node_ID'].isna()]

        writer = pd.ExcelWriter(outfile, engine='xlsxwriter')

        df_filter.to_excel(writer,
                           sheet_name="STEP ONLY Attributes",
                           startrow=0,
                           startcol=0,
                           index=False)
        df.to_excel(writer,
                    sheet_name="ALL Attributes",
                    startrow=0,
                    startcol=0,
                    index=False)

        worksheet1 = writer.sheets['STEP ONLY Attributes']
        worksheet2 = writer.sheets['ALL Attributes']

        col_widths = fd.get_col_widths(df_filter)
        col_widths = col_widths[1:]

        for i, width in enumerate(col_widths):
            if width > 40:
                width = 40
            elif width < 10:
                width = 10
            worksheet1.set_column(i, i, width)

        col_widths = fd.get_col_widths(df)
        col_widths = col_widths[1:]

        for i, width in enumerate(col_widths):
            if width > 40:
                width = 40
            elif width < 10:
                width = 10
            worksheet2.set_column(i, i, width)

        writer.save()
Exemple #2
0
def data_out(df, atts_df, batch=''):
    # output for sku-based pivot table
    sku_data = df[['Supplier_Parent_Group', 'Supplier_ID', 'Supplier_Name', 'Segment_ID', 'Segment_Name', \
                 'Family_ID', 'Family_Name', 'Category_ID', 'Category_Name', 'Grainger_SKU', 'PM_Code', \
                 'Sales_Status', 'Relationship_Mgr_Code', '2019_COGS']]        
    sku_data = sku_data.drop_duplicates(subset=['Grainger_SKU'])
    sku_data = sku_data.rename(columns={'Grainger_SKU':'Material_No'})

    fill = atts_df[['Supplier_Parent_Group', 'Supplier_ID', 'Supplier_Name', 'Segment_ID', 'Segment_Name', \
               'Family_ID', 'Family_Name', 'Category_ID', 'Category_Name', 'Attr_ID', 'Attribute_Name', \
               'Endeca_Ranking', 'Supplier_Fill_Rate_%', 'Category_Fill_Rate_%']] 
    fill = fill.drop_duplicates(subset=['Category_ID', 'Attr_ID'])
    fill = fill.sort_values(by=['Segment_Name', 'Family_Name', 'Category_Name', 'Endeca_Ranking'])
        
    outfile = 'C:/Users/xcxg109/NonDriveFiles/SUPPLIER_REPORT_'+str(batch)+'_.xlsx'

    writer = pd.ExcelWriter(outfile, engine='xlsxwriter')
    workbook  = writer.book

    sku_data.to_excel (writer, sheet_name="SKU Data", startrow=0, startcol=0, index=False)
    fill.to_excel(writer, sheet_name='Attribute Fill Rates', startrow =0, startcol=0, index=False)

    worksheet1 = writer.sheets['Attribute Fill Rates']
    worksheet2 = writer.sheets['SKU Data']

    layout = workbook.add_format()
    layout.set_text_wrap('text_wrap')
    layout.set_align('left')

    col_widths = fd.get_col_widths(fill)
    col_widths = col_widths[1:]
    
    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet1.set_column(i, i, width)

    col_widths = fd.get_col_widths(sku_data)
    col_widths = col_widths[1:]
    
    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet2.set_column(i, i, width)

    writer.save()
Exemple #3
0
def data_out(df, quer, batch=''):

    if df.empty == False:
        outfile = 'C:/Users/xcxg109/NonDriveFiles/STEP-PIM_' + str(
            batch) + '_HIER.xlsx'

        writer = pd.ExcelWriter(outfile, engine='xlsxwriter')
        df.to_excel(writer,
                    sheet_name="DATA",
                    startrow=0,
                    startcol=0,
                    index=False)
        worksheet = writer.sheets['DATA']
        col_widths = fd.get_col_widths(df)
        col_widths = col_widths[1:]

        for i, width in enumerate(col_widths):
            if width > 40:
                width = 40
            elif width < 10:
                width = 10
            worksheet.set_column(i, i, width)
        writer.save()
    else:
        print('EMPTY DATAFRAME')
def data_out(df):

    if df.empty == False:
        outfile = 'C:/Users/xcxg109/NonDriveFiles/STEP_WS_Cats.xlsx'

        writer = pd.ExcelWriter(outfile, engine='xlsxwriter')

        df.to_excel(writer,
                    sheet_name="Category",
                    startrow=0,
                    startcol=0,
                    index=False)

        worksheet1 = writer.sheets['Category']

        col_widths = fd.get_col_widths(df)
        col_widths = col_widths[1:]

        for i, width in enumerate(col_widths):
            if width > 40:
                width = 40
            elif width < 10:
                width = 10
            worksheet1.set_column(i, i, width)

        writer.save()

    else:
        print('EMPTY DATAFRAME')
def data_out(df):

    outfile = 'C:/Users/xcxg109/NonDriveFiles/ALLTIMEHIGH_FINAL_Rankings_WED.xlsx'

    df = df.sort_values(['GWS_Leaf_Node_ID', 'New_Rank', 'GWS_Attribute_Name'],
                        ascending=[True, True, True])

    writer = pd.ExcelWriter(outfile, engine='xlsxwriter')
    workbook = writer.book

    df.to_excel(writer,
                sheet_name="STEP Attributes",
                startrow=0,
                startcol=0,
                index=False)
    worksheet1 = writer.sheets['STEP Attributes']

    col_widths = fd.get_col_widths(df)
    col_widths = col_widths[1:]

    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet1.set_column(i, i, width)

    layout = workbook.add_format()
    layout.set_text_wrap('text_wrap')
    layout.set_align('left')

    writer.save()
def data_out(df, batch=''):

    if df.empty == False:
        outfile = 'C:/Users/xcxg109/NonDriveFiles/Delta_Project_STEP_Values_' + str(
            batch) + '_.xlsx'

        writer = pd.ExcelWriter(outfile, engine='xlsxwriter')

        df.to_excel(writer,
                    sheet_name="ALL STEP Att_Values",
                    startrow=0,
                    startcol=0,
                    index=False)

        worksheet1 = writer.sheets['ALL STEP Att_Values']

        col_widths = fd.get_col_widths(df)
        col_widths = col_widths[1:]

        for i, width in enumerate(col_widths):
            if width > 40:
                width = 40
            elif width < 10:
                width = 10
            worksheet1.set_column(i, i, width)

        writer.save()

    else:
        print('EMPTY DATAFRAME')
Exemple #7
0
def data_out(final_df, node):
    final_df = final_df.sort_values(
        ['WS_Category_Name', 'WS_Node_Name', 'WS_Attribute_Name'],
        ascending=[True, True, True])

    final_no_dupes = final_df.drop_duplicates(
        subset=['WS_Node_ID', 'WS_Attr_ID', 'Normalized_Unit'])
    final_no_dupes = final_no_dupes [['WS_Category_ID', 'WS_Category_Name', 'WS_Node_ID', 'WS_Node_Name', \
                                'WS_SKU', 'WS_Attr_ID', 'WS_Attribute_Name', 'Attribute_Definition', \
                                'Numeric_Display_Type', 'Unit_Group_ID', 'Normalized_Unit', 'Attribute_Values', \
                                'UOMs in Attribute']]
    final_no_dupes = final_no_dupes.rename(columns={'WS_SKU': 'Example SKU'})

    outfile = 'C:/Users/xcxg109/NonDriveFiles/' + str(
        node) + '_multi-UOMs.xlsx'
    writer = pd.ExcelWriter(outfile, engine='xlsxwriter')
    workbook = writer.book

    final_no_dupes.to_excel(writer,
                            sheet_name="UOMs",
                            startrow=0,
                            startcol=0,
                            index=False)

    worksheet1 = writer.sheets['UOMs']

    layout = workbook.add_format()
    layout.set_text_wrap('text_wrap')
    layout.set_align('left')

    col_widths = fd.get_col_widths(final_no_dupes)
    col_widths = col_widths[1:]

    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet1.set_column(i, i, width)

    writer.save()
def data_out(final_df, node, node_name, batch=''):
    final_df['concat'] = final_df['Grainger_Attribute_Name'].map(
        str) + final_df['Grainger_Attribute_Value'].map(str)

    final_df['Group_ID'] = final_df.groupby(
        final_df['concat']).grouper.group_info[0] + 1

    final_df = final_df[['Group_ID', 'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_ID', \
                'Category_Name', 'PM_Code', 'Sales_Status', 'Relationship_MGR_Code', 'WS_Category_ID', \
                'WS_Category_Name', 'WS_Node_ID', 'WS_Node_Name', 'Grainger_SKU', 'WS_SKU', 'Grainger_Attr_ID', \
                'WS_Attr_ID', 'WS_Attr_Value_ID', 'Multivalue?', 'Data_Type', 'Numeric_Display_Type', \
                'WS_Attribute_Name', 'WS_Attribute_Definition', 'Normalized_Value', 'Normalized_Unit', \
                'Numerator', 'Denominator',  'Grainger_Attribute_Name', 'Grainger_Attribute_Definition', \
                'Grainger_Category_Specific_Definition', 'Grainger_Attribute_Value', 'WS_Value',\
                'STEP-WS_Match?', 'Potential_Replaced_Values', 'Revised_Value']]

    final_no_dupes = final_df.drop_duplicates(subset=[
        'Grainger_Attribute_Name', 'Grainger_Attribute_Value', 'Data_Type'
    ])

    final_no_dupes = final_no_dupes [['Group_ID', 'Category_ID', 'Category_Name', 'Grainger_SKU', 'Data_Type', \
               'Numeric_Display_Type', 'WS_Attribute_Name', 'WS_Attribute_Definition', 'Grainger_Attribute_Name', \
               'Grainger_Attribute_Definition', 'Grainger_Category_Specific_Definition', 'Grainger_Attribute_Value', \
               'WS_Value', 'STEP-WS_Match?', 'Potential_Replaced_Values', 'Revised_Value']]
    final_no_dupes = final_no_dupes.rename(
        columns={'Grainger_SKU': 'Example SKU'})

    outfile = 'C:/Users/xcxg109/NonDriveFiles/' + str(node) + '_' + str(
        node_name) + '_' + str(batch) + '_STEP-WS_Analysis.xlsx'

    writer = pd.ExcelWriter(outfile, engine='xlsxwriter')
    workbook = writer.book

    final_no_dupes.to_excel(writer,
                            sheet_name="Uniques",
                            startrow=0,
                            startcol=0,
                            index=False)
    final_df.to_excel(writer,
                      sheet_name="All Text UOMs",
                      startrow=0,
                      startcol=0,
                      index=False)

    worksheet1 = writer.sheets['Uniques']
    worksheet2 = writer.sheets['All Text UOMs']

    layout = workbook.add_format()
    layout.set_text_wrap('text_wrap')
    layout.set_align('left')
    col_widths = fd.get_col_widths(final_no_dupes)
    col_widths = col_widths[1:]

    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet1.set_column(i, i, width)

    worksheet1.set_column('G:G', 50, layout)
    worksheet1.set_column('H:H', 30, layout)
    worksheet1.set_column('J:J', 50, layout)

    col_widths = fd.get_col_widths(final_df)
    col_widths = col_widths[1:]

    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet2.set_column(i, i, width)

    worksheet2.set_column('V:V', 50, layout)
    worksheet2.set_column('Y:Y', 50, layout)
    worksheet2.set_column('AA:AA', 50, layout)

    writer.save()
Exemple #9
0
        count += 1

# if original df < 30K rows, process the entire thing at once
else:
    data_out(final_df, quer)

outfile = 'C:/Users/xcxg109/NonDriveFiles/STEP_only_SKUs.xlsx'

writer = pd.ExcelWriter(outfile, engine='xlsxwriter')
no_match_df.to_excel(writer,
                     sheet_name="DATA",
                     startrow=0,
                     startcol=0,
                     index=False)
worksheet = writer.sheets['DATA']
col_widths = fd.get_col_widths(no_match_df)
col_widths = col_widths[1:]

for i, width in enumerate(col_widths):
    if width > 40:
        width = 40

    elif width < 10:
        width = 10

    worksheet.set_column(i, i, width)

writer.save()

print("--- {} minutes ---".format(round((time.time() - start_time) / 60, 2)))
def data_out(final_df, node, node_name):
    # NOTE: nonconforming rows of df were dropped and df was sorted before being passed here

    final_df['concat'] = final_df['WS_Attribute_Name'].map(
        str) + final_df['Normalized_Value'].map(str)
    final_df['Group_ID'] = final_df.groupby(
        final_df['concat']).grouper.group_info[0] + 1

    final_df = final_df[['Group_ID', 'PIM_Path', 'WS_Category_ID', 'WS_Category_Name', 'WS_Node_ID', 'WS_Node_Name', \
                   'STEP_Category_ID', 'WS_SKU', 'STEP_Attr_ID', 'WS_Attr_ID', 'WS_Attr_Value_ID', 'Multivalue', \
                   'Data_Type', 'Numeric_Display_Type', 'WS_Attribute_Name', 'Original_Value', 'Original_Unit', \
                   'Grainger_Attribute_Value', 'Normalized_Value', 'Normalized_Unit', 'Potential_Issue']]

    final_no_dupes = final_df.drop_duplicates(
        subset=['WS_Attribute_Name', 'Normalized_Value', 'Data_Type'])
    final_no_dupes = final_no_dupes[['Group_ID', 'PIM_Path', 'WS_Category_ID', 'WS_Category_Name', 'WS_Node_ID', \
                                     'WS_Node_Name', 'WS_SKU', 'Data_Type', 'WS_Attribute_Name', \
                                     'Grainger_Attribute_Value', 'Normalized_Value', 'Normalized_Unit', \
                                     'Potential_Issue']]

    outfile = 'C:/Users/xcxg109/NonDriveFiles/' + str(node) + '_' + str(
        node_name) + '_MULTIVALUE_ISSUES.xlsx'

    writer = pd.ExcelWriter(outfile, engine='xlsxwriter')
    workbook = writer.book

    final_no_dupes.to_excel(writer,
                            sheet_name="Uniques",
                            startrow=0,
                            startcol=0,
                            index=False)
    final_df.to_excel(writer,
                      sheet_name="All MultiValue Issues",
                      startrow=0,
                      startcol=0,
                      index=False)

    worksheet1 = writer.sheets['Uniques']
    worksheet2 = writer.sheets['All MultiValue Issues']

    layout = workbook.add_format()
    layout.set_text_wrap('text_wrap')
    layout.set_align('left')

    col_widths = fd.get_col_widths(final_no_dupes)
    col_widths = col_widths[1:]

    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet1.set_column(i, i, width)

    worksheet1.set_column('J:J', 50, layout)
    worksheet1.set_column('K:K', 50, layout)
    worksheet1.set_column('M:M', 30, layout)

    col_widths = fd.get_col_widths(final_df)
    col_widths = col_widths[1:]

    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet2.set_column(i, i, width)

    worksheet2.set_column('P:P', 50, layout)
    worksheet2.set_column('R:R', 50, layout)
    worksheet2.set_column('S:S', 50, layout)
    worksheet2.set_column('U:U', 50, layout)

    writer.save()
Exemple #11
0
def data_out(final_df, node, batch=''):
#    final_df = final_df.drop(final_df[(final_df['STEP-WS_Match?'] == 'Y' or final_df['Potential_Replaced_Values'] == '')])
#    final_df = final_df[final_df.Potential_Replaced_Values != '']
    final_df = final_df[final_df.Grainger_Attribute_Name != 'Item']
    
    final_df = final_df.sort_values(['Potential_Replaced_Values'], ascending=[True])
    
    final_df['concat'] = final_df['Grainger_Attribute_Name'].map(str) + final_df['Grainger_Attribute_Value'].map(str)
    final_df['Group_ID'] = final_df.groupby(final_df['concat']).grouper.group_info[0] + 1
    final_df = final_df[['Group_ID', 'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_ID', \
                'Category_Name', 'WS_Category_ID', 'WS_Category_Name', 'WS_Node_ID', 'WS_Node_Name', 'PM_Code', \
                'Sales_Status', 'RELATIONSHIP_MANAGER_CODE', 'Grainger_SKU', 'WS_SKU', 'WS_Attr_ID', \
                'WS_Attr_Value_ID', 'WS_Attribute_Name', 'WS_Original_Value', 'Grainger_Attr_ID', \
                'Grainger_Attribute_Name', 'Grainger_Attribute_Value']]

    final_no_dupes = final_df.drop_duplicates(subset=['Grainger_Attribute_Name', 'Grainger_Attribute_Value'])
    final_no_dupes = final_no_dupes [['Group_ID', 'Category_ID', 'Category_Name', 'Grainger_SKU', 'Grainger_Attr_ID', \
                                      'Grainger_Attribute_Name', 'Grainger_Attribute_Value']]
    final_no_dupes = final_no_dupes.rename(columns={'Grainger_SKU':'Example SKU'})

    outfile = 'C:/Users/xcxg109/NonDriveFiles/'+str(node)+'_'+str(batch)+'_text_UOMs.xlsx'  
    writer = pd.ExcelWriter(outfile, engine='xlsxwriter')
    workbook  = writer.book

    final_no_dupes.to_excel (writer, sheet_name="Uniques", startrow=0, startcol=0, index=False)
    final_df.to_excel (writer, sheet_name="All Text UOMs", startrow=0, startcol=0, index=False)

    worksheet1 = writer.sheets['Uniques']
    worksheet2 = writer.sheets['All Text UOMs']

    layout = workbook.add_format()
    layout.set_text_wrap('text_wrap')
    layout.set_align('left')

    col_widths = fd.get_col_widths(final_no_dupes)
    col_widths = col_widths[1:]
    
    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet1.set_column(i, i, width)

    worksheet1.set_column('G:G', 50, layout)
    worksheet1.set_column('H:H', 30, layout)
    worksheet1.set_column('J:J', 50, layout)

    col_widths = fd.get_col_widths(final_df)
    col_widths = col_widths[1:]
    
    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet2.set_column(i, i, width)

    worksheet2.set_column('V:V', 50, layout)
    worksheet2.set_column('Y:Y', 50, layout)
    worksheet2.set_column('AA:AA', 50, layout)

    writer.save()
no_match_df = no_match_df.drop(
    ['GWS_Category_ID', 'GWS_Attr_ID', 'GWS_Attribute_Name'], axis=1)

outfile = 'C:/Users/xcxg109/NonDriveFiles/Delta_Project_Attribute_Breakdown.xlsx'

writer = pd.ExcelWriter(outfile, engine='xlsxwriter')

if no_match_df.empty == False:
    no_match_df.to_excel(writer,
                         sheet_name="Delta Atts not in WS",
                         startrow=0,
                         startcol=0,
                         index=False)
    worksheet1 = writer.sheets['Delta Atts not in WS']

    col_widths = fd.get_col_widths(no_match_df)
    col_widths = col_widths[1:]

    for i, width in enumerate(col_widths):
        if width > 40:
            width = 40
        elif width < 10:
            width = 10
        worksheet1.set_column(i, i, width)

gws_atts.to_excel(writer,
                  sheet_name="Delta ALL Atts",
                  startrow=0,
                  startcol=0,
                  index=False)
worksheet2 = writer.sheets['Delta ALL Atts']