def attribute_process(grainger_df, uom_df, lov_df, lov_list, gamut_dict: Dict, att_process_node):
    attribute_df = pd.DataFrame()
    grainger_att_vals = pd.DataFrame()

    # remove "-" values from attribute value field
    grainger_df.loc[grainger_df.Grainger_Attribute_Value == '-', 'Grainger_Attribute_Value'] = np.NaN
   
    grainger_att_vals = q.grainger_values(grainger_df)

    temp_df, gamut_dict = grainger_process(grainger_df, grainger_att_vals, uom_df, lov_df, lov_list, gamut_dict, att_process_node)
    attribute_df = pd.concat([attribute_df, temp_df], axis=0, sort=False)
    
    attribute_df = attribute_df.drop_duplicates(subset=['Grainger_Attr_ID'])
    

    if 'Grainger_Attribute_Name' in attribute_df.columns:
        # drop files that have an exact match for "Item" and "Series" and a contains match for green attributes
        attribute_df = attribute_df[~attribute_df['Grainger_Attribute_Name'].str.contains(r'^Item$', regex=True)]
        attribute_df = attribute_df[~attribute_df['Grainger_Attribute_Name'].str.contains(r'^Series$', regex=True)]
        attribute_df = attribute_df[~attribute_df['Grainger_Attribute_Name'].str.contains('Green Certification')]
        attribute_df = attribute_df[~attribute_df['Grainger_Attribute_Name'].str.contains('Green Environmental')]

        attribute_df['STEP Attribute Name'] = attribute_df['Grainger_Attribute_Name']
    
    attribute_df = attribute_df.rename(columns={'Segment_ID':'Segment ID', 'Segment_Name':'Segment Name', \
                'Family_ID':'Family ID', 'Family_Name':'Family Name', 'Category_ID':'STEP Category ID', \
                'Category_Name':'Category Name', 'Grainger_Attr_ID':'STEP Attribute ID', 'Percent_Numeric':'%_Numeric', \
                'Potential_UOMs':'Potential UOMs', 'Grainger_Attribute_Name':'Attribute Name', \
                'Recommended_Data_Type':'Recommended Data Type', 'Sample_Values':'Sample Values', \
                'Restricted_Attribute_Value_Domain':'Restricted Attribute Value Domain'})
    
    return attribute_df, gamut_dict
def gamut_process(gnode, gamut_dict):
    """if gamut node has not been preiously process (in gamut_dict), process and add it to the dictionary"""
    gamut_df = q.gamut_definition(gnode, 'tax_att."categoryId"')
        
    if gamut_df.empty==False:
        gamut_df['alt_gamut_name'] = process.process_att(gamut_df['Gamut_Attribute_Name'])  #prep att name for merge
        gamut_dict[gnode] = gamut_df #store the processed df in dict for future reference

    else:
        print('{} EMPTY Gamut DATAFRAME'.format(gnode))    
        
    return gamut_dict, gamut_df
def gws_process(gws_node, gws_dict: Dict):
    """if gws node has not been previously processed (in gws_dict), process and add it to the dictionary"""
    gws_df = q.gws_atts(ws_attr_query, gws_node, 'tax.id')  #tprod."categoryId"')  #get gws attribute values for each gamut_l3 node\
    
    if gws_df.empty==False:
        gws_df = gws_df.drop_duplicates(subset='GWS_Attr_ID')  #gws attribute IDs are unique, so no need to group by pim node before getting unique
        gws_df['alt_gws_name'] = process.process_att(gws_df['GWS_Attribute_Name'])  #prep att name for merge
        gws_dict[gws_node] = gws_df #store the processed df in dict for future reference

    else:
        print('{} EMPTY GWS DATAFRAME'.format(gws_node))    
        
    return gws_dict, gws_df
def grainger_process(grainger_df, grainger_all, uom_df, lov_df, lov_list,
                     gamut_dict, grainger_node):
    """create a list of grainger skus, run through through the gws_skus query and pull gws attribute data if skus are present
        concat both dataframs and join them on matching attribute names"""

    df = pd.DataFrame()
    grainger_new = pd.DataFrame()

    cat_name = grainger_df['Category_Name'].unique()
    cat_name = list(cat_name)
    cat_name = cat_name.pop()
    print('cat name = {} {}'.format(grainger_node, cat_name))

    grainger_skus = grainger_df.drop_duplicates(
        subset='Grainger_SKU'
    )  #create list of unique grainger skus that feed into gws query
    grainger_sku_count = len(grainger_skus)
    print('grainger sku count = ', grainger_sku_count)

    if len(grainger_skus) > 7000:
        num_split = round(len(grainger_skus) / 7000, 0)
        num_split = int(num_split)

        if num_split == 1:
            num_split = 2

        print('splitting grainger_df into {} batches'.format(num_split))
        grainger_split = np.array_split(grainger_df, num_split)

        for i in range(0, num_split):
            loop_time = time.time()
            print('batch no: ', i + 1)

            temp_df = analyze(grainger_split[i], uom_df, lov_df, lov_list)
            grainger_new = pd.concat([grainger_new, temp_df],
                                     axis=0,
                                     sort=False)

            print("--- grainger loop time = {} minutes ---".format(
                round((time.time() - loop_time) / 60, 2)))

        grainger_df = grainger_new

    else:
        grainger_df = analyze(grainger_df, uom_df, lov_df, lov_list)

    grainger_df = grainger_df.drop_duplicates(subset=[
        'Category_ID', 'Grainger_Attr_ID'
    ])  #group by Category_ID and attribute name and keep unique
    grainger_df['STEP Blue Path'] = grainger_df['Segment_Name'] + ' > ' + grainger_df['Family_Name'] + \
                                                        ' > ' + grainger_df['Category_Name']

    grainger_df = grainger_df.drop(
        ['Grainger_SKU', 'Grainger_Attribute_Value'],
        axis=1)  #remove unneeded columns

    if grainger_all.empty == False:

        # for non-text rows, clean up UOMs in sample value column
        for row in grainger_df.itertuples():
            potential_uoms = str(row.Potential_UOMs)
            dt = str(row.Recommended_Data_Type)

        if dt != 'text':
            grainger_df = process_sample_vals(grainger_df, row, potential_uoms)

    grainger_df['alt_grainger_name'] = process.process_att(
        grainger_df['Grainger_Attribute_Name'])  #prep att name for merge

    gamut_skus = q.gws_skus(
        grainger_skus)  #get gamut sku list to determine pim nodes to pull

    # if gws skus are present, go get the gamut attribute definition for the node
    if gamut_skus.empty == False:
        gamut_l3 = gamut_skus['Gamut_Node_ID'].unique()

        print('gamut L3s ', gamut_l3)

        for gamut_node in gamut_l3:
            if gamut_node in gamut_dict:
                gamut_df = gamut_dict[gamut_node]
                print('node {} in Gamut dict'.format(gamut_node))
            else:
                gamut_dict, gamut_df = gamut_process(gamut_node, gamut_dict)

            if gamut_df.empty == False:
                node_name = gamut_df['Gamut_Node_Name'].unique()
                node_name = list(node_name)
                node_name = node_name.pop()
                print('node name = {} {}'.format(gamut_node, node_name))

                #add correlating grainger and gamut data to opposite dataframes
                grainger_df = q.grainger_assign_nodes(grainger_df, gamut_df)
                gamut_df = q.gamut_assign_nodes(grainger_df, gamut_df)

                temp_df = pd.merge(grainger_df, gamut_df, left_on=['alt_grainger_name', 'Category_ID', 'Gamut_Node_ID', 'Gamut_Category_ID', \
                                                                   'Gamut_Category_Name', 'Gamut_Node_Name', 'Gamut_PIM_Path', 'STEP Blue Path', \
                                                                   'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'],
                                                right_on=['alt_gamut_name', 'Category_ID', 'Gamut_Node_ID', 'Gamut_Category_ID', \
                                                          'Gamut_Category_Name', 'Gamut_Node_Name', 'Gamut_PIM_Path', 'STEP Blue Path', \
                                                          'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], how='outer')

                temp_df = match_category(
                    temp_df
                )  #compare grainger and gamut atts and create column to say whether they match

                df = pd.concat(
                    [df, temp_df], axis=0, sort=False
                )  #add prepped df for this gamut node to the final df
                df['Matching'] = df['Matching'].str.replace(
                    'no', 'Potential Match')
                df = df[df.Matching != 'GWS only']
        #         temp_df.to_csv('C:/Users/xcxg109/NonDriveFiles/graingerProcessDF.csv')
            else:
                print('Gamut {} EMPTY DATAFRAME'.format(gamut_node))
                df = grainger_df
                df['Gamut_Attribute_Definition'] = ''

    else:
        df = grainger_df
        df['Gamut_Attribute_Definition'] = ''

        print('No Gamut SKUs for Grainger node {}'.format(grainger_node))


#                df = pd.merge(df, gamut_df, left_on=['alt_grainger_name'], \
#                                               right_on=['alt_gamut_name'], how='outer')

#              df = match_category(df) #compare grainger and gws atts and create column to say whether they match
#              df['Matching'] = df['Matching'].str.replace('no', 'Potential Match')

#    if gws_skus.empty==False:
#create a dictionary of the unique gws nodes that corresponde to the grainger node
#       gws_l3 = gws_skus['GWS_Node_ID'].unique()  #create list of pim nodes to pull
#       print('GWS L3s ', gws_l3)

#       for node in gws_l3:
#           if node in gws_dict:
#               gws_df = gws_dict[node]
#               print ('node {} in GWS dict'.format(node))

#           else:
#              gws_dict, gws_df = gws_process(node, gws_dict)

#          if gws_df.empty==False:
#               node_name = gws_df['GWS_Node_Name'].unique()
#              node_name = list(node_name)
#             node_name = node_name.pop()
#            print('node name = {} {}'.format(node, node_name))
#add correlating grainger and gws data to opposite dataframes
#             grainger_df = q.grainger_assign_nodes(grainger_df, gws_df)
#             gws_df = q.gws_assign_nodes(grainger_df, gws_df)

#           temp_df = pd.merge(grainger_df, gws_df, left_on=['alt_grainger_name', 'Category_ID', 'GWS_Node_ID', 'GWS_Category_ID', \
#                                                             'GWS_Category_Name', 'GWS_Node_Name', 'GWS_PIM_Path', 'STEP Blue Path', \
##                                                            'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'],
#                                       right_on=['alt_gws_name', 'Category_ID', 'GWS_Node_ID', 'GWS_Category_ID', \
#                                                'GWS_Category_Name', 'GWS_Node_Name', 'GWS_PIM_Path', 'STEP Blue Path', \
#                                               'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], how='outer')

#                temp_df = match_category(temp_df) #compare grainger and gws atts and create column to say whether they match

#    df = pd.concat([df, temp_df], axis=0, sort=False) #add prepped df for this gws node to the final df
#                df['Matching'] = df['Matching'].str.replace('no', 'Potential Match')

# drop all of the rows that are 'GWS only' in the Match column
#                df = df[df.Matching != 'GWS only']

#                df = df.drop(['alt_grainger_name', 'GWS_Node_ID', 'GWS_Category_ID', 'GWS_Category_Name', \
#                              'GWS_Node_Name', 'GWS_PIM_Path'], axis=1)

#else:
#   print('GWS Node {} EMPTY DATAFRAME'.format(node))

#    else:
#       df = grainger_df
#      print('No GWS SKUs for Grainger node {}'.format(grainger_node))

    df.reset_index(drop=True, inplace=True)
    df = choose_definition(df)

    return df, gamut_dict  #where gamut_att_temp is the list of all normalized values for gamut attributes
# read in uom and LOV files
uom_df = pd.DataFrame()

#uom_groups_url = 'https://raw.githubusercontent.com/gamut-code/attribute_mapping/master/UOM_data_sheet.csv'
# create df of the uom groupings (ID and UOMs for each group)
#data_file = requests.get(uom_groups_url).content
#uom_df = pd.read_csv(io.StringIO(data_file.decode('utf-8')))

# get uom list
filename = 'C:/Users/xcxg109/NonDriveFiles/reference/UOM_data_sheet.csv'
uom_df = pd.read_csv(filename)
# create df of the lovs and their concat values

filename = 'C:/Users/xcxg109/NonDriveFiles/reference/LOV_list.csv'
lov_df, lov_list = q.get_LOVs(filename)

data_type = 'grainger_query'
print('working...')

search_level = fd.WS_search_type()

if search_level == 'cat.CATEGORY_ID':
    search_data = fd.data_in(data_type, settings.directory_name)

    while True:
        try:
            query_level = input(
                "Query Level: \n1. ETL query (L1)\n2. ALL (no restrictions) Query "
            )
            if query_level in ['1']:
Esempio n. 6
0
        elif search_level in ['2', 's', 'S']:
            search_level = 'single'
            break
        elif search_level in ['3', 'sku', 'SKU']:
            data_type = 'sku'
            break
    except ValueError:
        print('Invalid search type')

search_data = fd.data_in(data_type, settings.directory_name)

print('working...')
start_time = time.time()

# read in grainger data
allCATS_df = q.get_att_values()

if data_type == 'gws_query':
    for node in search_data:
        start_time = time.time()

        if search_level == 'single':
            init_ws_df = gws.gws_q(gws_values_single, 'tprod."categoryId"',
                                   node)

        elif search_level == 'group':
            init_ws_df = gws.gws_q(gws_values_group, 'tprod."categoryId"',
                                   node)

        print('k = ', node)
Esempio n. 7
0
        search_string = search_string + '; ' + 'dia.'

    search_string = search_string[2:]

    df.at[row.Index, 'Potential_UOMs'] = search_string

    return df


# get uom list
filename = 'C:/Users/xcxg109/NonDriveFiles/reference/UOM_data_sheet.csv'
uom_df = pd.read_csv(filename)
# create df of the lovs and their concat values

filename = 'C:/Users/xcxg109/NonDriveFiles/reference/LOV_Categories.csv'
lov_df, lov_list = q.get_LOVs(filename)

lov_df['Potential_UOMs'] = ''

for row in lov_df.itertuples():
    lov_df = process_LOV_vals(lov_df, row)

lov_df = lov_df.replace(r'^\s*$', np.NaN, regex=True)
lov_df = lov_df.sort_values(['Potential_UOMs'], ascending=[True])

outfile = 'C:/Users/xcxg109/NonDriveFiles/reference/LOV_UOMs.xlsx'
writer = pd.ExcelWriter(outfile, engine='xlsxwriter')
workbook = writer.book

lov_df.to_excel(writer, sheet_name="LOVs", startrow=0, startcol=0, index=False)




ws_df = pd.DataFrame()
gamut_df = pd.DataFrame()
attribute_df = pd.DataFrame()
gamut_dict = dict()




# read in WS data
print('Choose WS PIM file')
allCATS_df = q.get_att_values()

# read in and clean WS data
print('\nChoose Gamut file')
Gamut_allCATS_df = q.get_att_values()

print('working...')
start_time = time.time()

node_ids = allCATS_df['WS_Node_ID'].unique().tolist()
print('number of nodes = ', len(node_ids))


for k in node_ids:
    ws_df = allCATS_df.loc[allCATS_df['WS_Node_ID']== k]
    
Esempio n. 9
0












# read in WS data
print('Choose WS PIM file')
allCATS_df = q.get_att_values()

# read in and clean WS data
print('\nChoose Gamut file')
Gamut_allCATS_df = q.get_att_values()

print('working...')
start_time = time.time()

node_ids = allCATS_df['Category_ID'].unique().tolist()
print('number of nodes = ', len(node_ids))


for node in node_ids:
    temp_df = allCATS_df.loc[allCATS_df['Category_ID']== node]