def gamut_process(gnode, gamut_dict): """if gamut node has not been preiously process (in gamut_dict), process and add it to the dictionary""" gamut_df = q.gamut_definition(gnode, 'tax_att."categoryId"') if gamut_df.empty==False: gamut_df['alt_gamut_name'] = process.process_att(gamut_df['Gamut_Attribute_Name']) #prep att name for merge gamut_dict[gnode] = gamut_df #store the processed df in dict for future reference else: print('{} EMPTY Gamut DATAFRAME'.format(gnode)) return gamut_dict, gamut_df
def gws_process(gws_node, gws_dict: Dict): """if gws node has not been previously processed (in gws_dict), process and add it to the dictionary""" gws_df = q.gws_atts(ws_attr_query, gws_node, 'tax.id') #tprod."categoryId"') #get gws attribute values for each gamut_l3 node\ if gws_df.empty==False: gws_df = gws_df.drop_duplicates(subset='GWS_Attr_ID') #gws attribute IDs are unique, so no need to group by pim node before getting unique gws_df['alt_gws_name'] = process.process_att(gws_df['GWS_Attribute_Name']) #prep att name for merge gws_dict[gws_node] = gws_df #store the processed df in dict for future reference else: print('{} EMPTY GWS DATAFRAME'.format(gws_node)) return gws_dict, gws_df
def gamut_process(node, Gamut_allCATS_df, gamut_dict: Dict, k): """if gamut node has not been previously processed (in gamut_dict), process and add it to the dictionary""" gamut_sample_vals = pd.DataFrame() gamut_att_vals = pd.DataFrame() gamut_df_1 = Gamut_allCATS_df.loc[Gamut_allCATS_df['Gamut_Node_ID']== node] #get gamut attribute values for each gamut_l3 node if gamut_df_1.empty==False: gamut_df_2 = gamut.gamut_q(gamut_usage_query, 'tax_att."categoryId"', node) print('Gamut ', node) if gamut_df_2.empty==False: gamut_df_2 = gamut_df_2.groupby(['Gamut_Attr_ID'])['Gamut_MERCH_Usage'].apply('; '.join).reset_index() gamut_df = pd.merge(gamut_df_1, gamut_df_2, how = 'outer', on = 'Gamut_Attr_ID') else: gamut_df = gamut_df_1 gamut_df['Gamut_MERCH_Usage'] = "" gamut_df.loc[gamut_df['Gamut_MERCH_Usage'] == '', 'Gamut_MERCH_Usage'] = np.nan gamut_df = gamut_df.sort_values(['Gamut_Attr_ID', 'Gamut_MERCH_Usage']).drop_duplicates(subset = 'Gamut_Attr_ID') gamut_att_vals, gamut_sample_vals = gamut_values(gamut_df, node) #gamut_values exports a list of --all-- normalized values and sample_values if gamut_att_vals.empty==False: gamut_sample_vals = gamut_sample_vals.rename(columns={'Gamut_Normalized Value': 'Gamut Attribute Sample Values'}) gamut_df = pd.merge(gamut_df, gamut_sample_vals, on=['Gamut_Attribute_Name']) #add top 10 normalized values to report gamut_df = pd.merge(gamut_df, gamut_att_vals, on=['Gamut_Attr_ID']) gamut_df = gamut_df.drop_duplicates(subset='Gamut_Attr_ID') #gamut attribute IDs are unique, so no need to group by pim node before getting unique gamut_df['alt_gamut_name'] = process.process_att(gamut_df['Gamut_Attribute_Name']) #prep att name for merge gamut_dict[node] = gamut_df #store the processed df in dict for future reference else: print('{} EMPTY DATAFRAME'.format(node)) return gamut_dict, gamut_df
def ws_process(ws_df, ws_sample, ws_all, fill_rate, Gamut_allCATS_df, gamut_dict: Dict, k): """create a list of grainger skus, run through through the gamut_skus query and pull gamut attribute data if skus are present concat both dataframs and join them on matching attribute names""" df = pd.DataFrame() gamut_skus = pd.DataFrame() node_name = ws_df['WS_Node_Name'].unique().tolist() node_name = cat_name.pop() print('node name = {} {}'.format(k, node_name)) # ws_skus = ws_df.drop_duplicates(subset='WS_SKU') #create list of unique WS skus that feed into gamut query ws_skus = ws_df['WS_SKU'].unique().tolist() print('ws sku count = ', len(ws_skus)) ws_df = ws_df.drop_duplicates(subset=['WS_Category_ID', 'WS_Attr_ID']) #group by WS_Node_ID and attribute name and keep unique ws_df = ws_df.drop(['WS_SKU', 'WS_Attribute_Value'], axis=1) #remove unneeded columns ws_df = pd.merge(ws_df, ws_sample, on=['WS_Attribute_Name']) ws_df = pd.merge(ws_df, ws_all, on=['WS_Attr_ID']) ws_df = pd.merge(ws_df, fill_rate, on=['WS_Attribute_Name']) ws_df['alt_ws_name'] = process.process_att(ws_df['WS_Attribute_Name']) #prep att name for merge gamut_skus = Gamut_allCATS_df[Gamut_allCATS_df['Gamut_SKU'].isin(ws_skus)] if gamut_skus.empty==False: #create a dictionary of the unique gamut nodes that corresponde to the grainger node gamut_l3 = gamut_skus['Gamut_Node_ID'].unique() #create list of pim nodes to pull print('GWS L3s ', gamut_l3) for node in gamut_l3: if node in gamut_dict: gamut_df = gamut_dict[node] print ('node {} in gamut dict'.format(node)) else: gamut_dict, gamut_df = gamut_process(node, Gamut_allCATS_df, gamut_dict, k)
def gamut_process(node, gamut_dict: Dict, k): """if gamut node has not been previously processed (in gamut_dict), process and add it to the dictionary""" gamut_sample_vals = pd.DataFrame() gamut_att_vals = pd.DataFrame() gamut_df = q.gamut_atts( gamut_attr_query, node, 'tax.id' ) #tprod."categoryId"') #get gamut attribute values for each gamut_l3 node\ if gamut_df.empty == False: gamut_att_vals, gamut_sample_vals = q.gamut_values( gamut_attr_values, node, 'tax.id' ) #gamut_values exports a list of --all-- normalized values and sample_values if gamut_att_vals.empty == False: gamut_sample_vals = gamut_sample_vals.rename( columns={'Normalized Value': 'Gamut Attribute Sample Values'}) gamut_df = pd.merge(gamut_df, gamut_sample_vals, on=['Gamut_Attribute_Name' ]) #add t0p 5 normalized values to report gamut_df = pd.merge(gamut_df, gamut_att_vals, on=['Gamut_Attr_ID' ]) #add t0p 5 normalized values to report gamut_df = gamut_df.drop_duplicates( subset='Gamut_Attr_ID' ) #gamut attribute IDs are unique, so no need to group by pim node before getting unique gamut_df['alt_gamut_name'] = process.process_att( gamut_df['Gamut_Attribute_Name']) #prep att name for merge gamut_dict[ node] = gamut_df #store the processed df in dict for future reference else: print('{} EMPTY DATAFRAME'.format(node)) return gamut_dict, gamut_df
def grainger_process(grainger_df, grainger_all, uom_df, lov_df, lov_list, gamut_dict, grainger_node): """create a list of grainger skus, run through through the gws_skus query and pull gws attribute data if skus are present concat both dataframs and join them on matching attribute names""" df = pd.DataFrame() grainger_new = pd.DataFrame() cat_name = grainger_df['Category_Name'].unique() cat_name = list(cat_name) cat_name = cat_name.pop() print('cat name = {} {}'.format(grainger_node, cat_name)) grainger_skus = grainger_df.drop_duplicates( subset='Grainger_SKU' ) #create list of unique grainger skus that feed into gws query grainger_sku_count = len(grainger_skus) print('grainger sku count = ', grainger_sku_count) if len(grainger_skus) > 7000: num_split = round(len(grainger_skus) / 7000, 0) num_split = int(num_split) if num_split == 1: num_split = 2 print('splitting grainger_df into {} batches'.format(num_split)) grainger_split = np.array_split(grainger_df, num_split) for i in range(0, num_split): loop_time = time.time() print('batch no: ', i + 1) temp_df = analyze(grainger_split[i], uom_df, lov_df, lov_list) grainger_new = pd.concat([grainger_new, temp_df], axis=0, sort=False) print("--- grainger loop time = {} minutes ---".format( round((time.time() - loop_time) / 60, 2))) grainger_df = grainger_new else: grainger_df = analyze(grainger_df, uom_df, lov_df, lov_list) grainger_df = grainger_df.drop_duplicates(subset=[ 'Category_ID', 'Grainger_Attr_ID' ]) #group by Category_ID and attribute name and keep unique grainger_df['STEP Blue Path'] = grainger_df['Segment_Name'] + ' > ' + grainger_df['Family_Name'] + \ ' > ' + grainger_df['Category_Name'] grainger_df = grainger_df.drop( ['Grainger_SKU', 'Grainger_Attribute_Value'], axis=1) #remove unneeded columns if grainger_all.empty == False: # for non-text rows, clean up UOMs in sample value column for row in grainger_df.itertuples(): potential_uoms = str(row.Potential_UOMs) dt = str(row.Recommended_Data_Type) if dt != 'text': grainger_df = process_sample_vals(grainger_df, row, potential_uoms) grainger_df['alt_grainger_name'] = process.process_att( grainger_df['Grainger_Attribute_Name']) #prep att name for merge gamut_skus = q.gws_skus( grainger_skus) #get gamut sku list to determine pim nodes to pull # if gws skus are present, go get the gamut attribute definition for the node if gamut_skus.empty == False: gamut_l3 = gamut_skus['Gamut_Node_ID'].unique() print('gamut L3s ', gamut_l3) for gamut_node in gamut_l3: if gamut_node in gamut_dict: gamut_df = gamut_dict[gamut_node] print('node {} in Gamut dict'.format(gamut_node)) else: gamut_dict, gamut_df = gamut_process(gamut_node, gamut_dict) if gamut_df.empty == False: node_name = gamut_df['Gamut_Node_Name'].unique() node_name = list(node_name) node_name = node_name.pop() print('node name = {} {}'.format(gamut_node, node_name)) #add correlating grainger and gamut data to opposite dataframes grainger_df = q.grainger_assign_nodes(grainger_df, gamut_df) gamut_df = q.gamut_assign_nodes(grainger_df, gamut_df) temp_df = pd.merge(grainger_df, gamut_df, left_on=['alt_grainger_name', 'Category_ID', 'Gamut_Node_ID', 'Gamut_Category_ID', \ 'Gamut_Category_Name', 'Gamut_Node_Name', 'Gamut_PIM_Path', 'STEP Blue Path', \ 'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], right_on=['alt_gamut_name', 'Category_ID', 'Gamut_Node_ID', 'Gamut_Category_ID', \ 'Gamut_Category_Name', 'Gamut_Node_Name', 'Gamut_PIM_Path', 'STEP Blue Path', \ 'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], how='outer') temp_df = match_category( temp_df ) #compare grainger and gamut atts and create column to say whether they match df = pd.concat( [df, temp_df], axis=0, sort=False ) #add prepped df for this gamut node to the final df df['Matching'] = df['Matching'].str.replace( 'no', 'Potential Match') df = df[df.Matching != 'GWS only'] # temp_df.to_csv('C:/Users/xcxg109/NonDriveFiles/graingerProcessDF.csv') else: print('Gamut {} EMPTY DATAFRAME'.format(gamut_node)) df = grainger_df df['Gamut_Attribute_Definition'] = '' else: df = grainger_df df['Gamut_Attribute_Definition'] = '' print('No Gamut SKUs for Grainger node {}'.format(grainger_node)) # df = pd.merge(df, gamut_df, left_on=['alt_grainger_name'], \ # right_on=['alt_gamut_name'], how='outer') # df = match_category(df) #compare grainger and gws atts and create column to say whether they match # df['Matching'] = df['Matching'].str.replace('no', 'Potential Match') # if gws_skus.empty==False: #create a dictionary of the unique gws nodes that corresponde to the grainger node # gws_l3 = gws_skus['GWS_Node_ID'].unique() #create list of pim nodes to pull # print('GWS L3s ', gws_l3) # for node in gws_l3: # if node in gws_dict: # gws_df = gws_dict[node] # print ('node {} in GWS dict'.format(node)) # else: # gws_dict, gws_df = gws_process(node, gws_dict) # if gws_df.empty==False: # node_name = gws_df['GWS_Node_Name'].unique() # node_name = list(node_name) # node_name = node_name.pop() # print('node name = {} {}'.format(node, node_name)) #add correlating grainger and gws data to opposite dataframes # grainger_df = q.grainger_assign_nodes(grainger_df, gws_df) # gws_df = q.gws_assign_nodes(grainger_df, gws_df) # temp_df = pd.merge(grainger_df, gws_df, left_on=['alt_grainger_name', 'Category_ID', 'GWS_Node_ID', 'GWS_Category_ID', \ # 'GWS_Category_Name', 'GWS_Node_Name', 'GWS_PIM_Path', 'STEP Blue Path', \ ## 'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], # right_on=['alt_gws_name', 'Category_ID', 'GWS_Node_ID', 'GWS_Category_ID', \ # 'GWS_Category_Name', 'GWS_Node_Name', 'GWS_PIM_Path', 'STEP Blue Path', \ # 'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], how='outer') # temp_df = match_category(temp_df) #compare grainger and gws atts and create column to say whether they match # df = pd.concat([df, temp_df], axis=0, sort=False) #add prepped df for this gws node to the final df # df['Matching'] = df['Matching'].str.replace('no', 'Potential Match') # drop all of the rows that are 'GWS only' in the Match column # df = df[df.Matching != 'GWS only'] # df = df.drop(['alt_grainger_name', 'GWS_Node_ID', 'GWS_Category_ID', 'GWS_Category_Name', \ # 'GWS_Node_Name', 'GWS_PIM_Path'], axis=1) #else: # print('GWS Node {} EMPTY DATAFRAME'.format(node)) # else: # df = grainger_df # print('No GWS SKUs for Grainger node {}'.format(grainger_node)) df.reset_index(drop=True, inplace=True) df = choose_definition(df) return df, gamut_dict #where gamut_att_temp is the list of all normalized values for gamut attributes
def grainger_process(grainger_df, grainger_sample, grainger_all, gamut_dict: Dict, k): """create a list of grainger skus, run through through the gamut_skus query and pull gamut attribute data if skus are present concat both dataframs and join them on matching attribute names""" df = pd.DataFrame() cat_name = grainger_df['Category_Name'].unique() cat_name = list(cat_name) cat_name = cat_name.pop() print('cat name = {} {}'.format(k, cat_name)) grainger_skus = grainger_df.drop_duplicates( subset='Grainger_SKU' ) #create list of unique grainger skus that feed into gamut query grainger_sku_count = len(grainger_skus) print('grainger sku count = ', grainger_sku_count) grainger_df = grainger_df.drop_duplicates(subset=[ 'Category_ID', 'Grainger_Attr_ID' ]) #group by Category_ID and attribute name and keep unique grainger_df['Grainger Blue Path'] = grainger_df['Segment_Name'] + ' > ' + grainger_df['Family_Name'] + \ ' > ' + grainger_df['Category_Name'] grainger_df = grainger_df.drop( ['Grainger_SKU', 'Grainger_Attribute_Value'], axis=1) #remove unneeded columns grainger_df = pd.merge(grainger_df, grainger_sample, on=['Grainger_Attribute_Name']) grainger_df = pd.merge(grainger_df, grainger_all, on=['Grainger_Attr_ID']) grainger_df['alt_grainger_name'] = process.process_att( grainger_df['Grainger_Attribute_Name']) #prep att name for merge #grainger_df.to_csv ("F:/CGabriel/Grainger_Shorties/OUTPUT/grainger_test.csv") gamut_skus = q.gamut_skus( grainger_skus) #get gamut sku list to determine pim nodes to pull if gamut_skus.empty == False: #create a dictionary of the unique gamut nodes that corresponde to the grainger node gamut_l3 = gamut_skus['Gamut_Node_ID'].unique( ) #create list of pim nodes to pull print('GAMUT L3s ', gamut_l3) for node in gamut_l3: if node in gamut_dict: gamut_df = gamut_dict[node] else: gamut_dict, gamut_df = gamut_process(node, gamut_dict, k) if gamut_df.empty == False: node_name = gamut_df['Gamut_Node_Name'].unique() node_name = list(node_name) node_name = node_name.pop() print('node name = {} {}'.format(node, node_name)) #add correlating grainger and gamut data to opposite dataframes grainger_df = grainger_assign_nodes(grainger_df, gamut_df) gamut_df = gamut_assign_nodes(grainger_df, gamut_df) skus = gamut_skus[gamut_skus['Gamut_Node_ID'] == node] temp_df = pd.merge(grainger_df, gamut_df, left_on=['alt_grainger_name', 'Category_ID', 'Gamut_Node_ID', 'Gamut_Category_ID', \ 'Gamut_Category_Name', 'Gamut_Node_Name', 'Gamut_PIM_Path', 'Grainger Blue Path', \ 'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], right_on=['alt_gamut_name', 'Category_ID', 'Gamut_Node_ID', 'Gamut_Category_ID', \ 'Gamut_Category_Name', 'Gamut_Node_Name', 'Gamut_PIM_Path', 'Grainger Blue Path', \ 'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], how='outer') temp_df = match_category( temp_df ) #compare grainger and gamut atts and create column to say whether they match temp_df['grainger_sku_count'] = grainger_sku_count temp_df['gamut_sku_count'] = len(skus) #temp_skus['Gamut_SKU'] temp_df[ 'Grainger-Gamut Terminal Node Mapping'] = cat_name + ' -- ' + node_name temp_df['Gamut/Grainger SKU Counts'] = temp_df[ 'gamut_sku_count'].map( str) + ' / ' + temp_df['grainger_sku_count'].map(str) df = pd.concat( [df, temp_df], axis=0, sort=False ) #add prepped df for this gamut node to the final df df['Matching'] = df['Matching'].str.replace( 'no', 'Potential Match') else: print('Gamut Node {} EMPTY DATAFRAME'.format(node)) else: print('No Gamut SKUs for Grainger node {}'.format(k)) return df, gamut_dict #where gamut_att_temp is the list of all normalized values for gamut attributes
def grainger_process(grainger_df, grainger_sample, grainger_all, k): """create a list of grainger skus, run through through the gamut_skus query and pull gamut attribute data if skus are present concat both dataframs and join them on matching attribute names""" df = pd.DataFrame() gamut_sample_vals = pd.DataFrame() gamut_att_vals = pd.DataFrame() # gamut_l3 = dict() grainger_skus = grainger_df.drop_duplicates( subset='Grainger_SKU' ) #create list of unique grainger skus that feed into gamut query grainger_sku_count = len(grainger_skus) print('Grainger SKU count = ', grainger_sku_count) grainger_df = grainger_df.drop_duplicates(subset=[ 'Category_ID', 'Grainger_Attr_ID' ]) #group by Category_ID and attribute name and keep unique grainger_df['Grainger Blue Path'] = grainger_df['Segment_Name'] + ' > ' + grainger_df['Family_Name'] + \ ' > ' + grainger_df['Category_Name'] grainger_df = grainger_df.drop( ['Grainger_SKU', 'Grainger_Attribute_Value'], axis=1) #remove unneeded columns grainger_df = pd.merge(grainger_df, grainger_sample, on=['Grainger_Attribute_Name']) grainger_df = pd.merge(grainger_df, grainger_all, on=['Grainger_Attribute_Name']) grainger_df['Grainger_Attribute_Name'] = process.process_att( grainger_df['Grainger_Attribute_Name']) #prep att name for merge grainger_df.to_csv( "F:/CGabriel/Grainger_Shorties/OUTPUT/grainger_test.csv") gamut_skus = q.gamut_skus( grainger_skus) #get gamut sku list to determine pim nodes to pull gamut_skus = gamut_skus.drop_duplicates(subset='Gamut_SKU') # gamut_sku_counts = gamut_sku_list.groupby('Gamut_SKU')['Gamut_SKU']).count()) if gamut_skus.empty == False: #create a dictionary of the unique gamut nodes that corresponde to the grainger node gamut_l3 = gamut_skus['Gamut_Node_ID'].unique( ) #create list of pim nodes to pull for node in gamut_l3: gamut_df = q.gamut_atts( node, 'tax.id' ) #tprod."categoryId"') #get gamut attribute values for each gamut_l3 node gamut_att_vals, gamut_sample_vals = q.gamut_values( gamut_df ) #gamut_values exports a list of --all-- normalized values (temp_df) and sample_values gamut_sample_vals = gamut_sample_vals.rename( columns={'Normalized Value': 'Gamut Attribute Sample Values'}) gamut_att_vals = gamut_att_vals.rename( columns={'Normalized Value': 'Gamut ALL Values'}) gamut_df = gamut_df.drop_duplicates( subset='Gamut_Attr_ID' ) #gamut attribute IDs are unique, so no need to group by pim node before getting unique gamut_df = gamut_df.drop( [ 'Gamut_SKU', 'Grainger_SKU', 'Original Value', 'Normalized Value' ], axis=1) #normalized values are collected as sample_value grainger_df['Gamut_Node_ID'] = int( node) #add correlating gamut node to grainger_df gamut_df = pd.merge(gamut_df, gamut_sample_vals, on=['Gamut_Attribute_Name' ]) #add t0p 5 normalized values to report gamut_df = pd.merge(gamut_df, gamut_att_vals, on=['Gamut_Attribute_Name' ]) #add t0p 5 normalized values to report gamut_df['Category_ID'] = int( k) #add grainger Category_ID column for gamut attributes gamut_df['Gamut_Attribute_Name'] = process.process_att( gamut_df['Gamut_Attribute_Name']) #prep att name for merge #create df based on names that match exactly gamut_df.to_csv( "F:/CGabriel/Grainger_Shorties/OUTPUT/gamut_test.csv") temp_df = pd.merge(grainger_df, gamut_df, left_on=[ 'Grainger_Attribute_Name', 'Category_ID', 'Gamut_Node_ID' ], right_on=[ 'Gamut_Attribute_Name', 'Category_ID', 'Gamut_Node_ID' ], how='outer') temp_df = match_category( temp_df ) #compare grainger and gamut atts and create column to say whether they match temp_df['Grainger-Gamut Terminal Node Mapping'] = temp_df[ 'Category_Name'] + ' -- ' + temp_df['Gamut_Node_Name'] df = pd.concat( [df, temp_df], axis=0) #add prepped df for this gamut node to the final df return df #where gamut_att_temp is the list of all normalized values for gamut attributes