class HEINZCRToolBox: LVL3_HEADERS = ['assortment_group_fk', 'assortment_fk', 'target', 'product_fk', 'in_store', 'kpi_fk_lvl1', 'kpi_fk_lvl2', 'kpi_fk_lvl3', 'group_target_date', 'assortment_super_group_fk'] LVL2_HEADERS = ['assortment_group_fk', 'assortment_fk', 'target', 'passes', 'total', 'kpi_fk_lvl1', 'kpi_fk_lvl2', 'group_target_date'] LVL1_HEADERS = ['assortment_group_fk', 'target', 'passes', 'total', 'kpi_fk_lvl1'] ASSORTMENT_FK = 'assortment_fk' ASSORTMENT_GROUP_FK = 'assortment_group_fk' ASSORTMENT_SUPER_GROUP_FK = 'assortment_super_group_fk' BRAND_VARIENT = 'brand_varient' NUMERATOR = 'numerator' DENOMINATOR = 'denominator' DISTRIBUTION_KPI = 'Distribution - SKU' OOS_SKU_KPI = 'OOS - SKU' OOS_KPI = 'OOS' def __init__(self, data_provider, output): self.output = output self.data_provider = data_provider self.common = CommonV2 # remove later self.common_v2 = CommonV2(self.data_provider) self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.kpi_results_queries = [] self.ps_data_provider = PsDataProvider(self.data_provider, self.output) self.survey = Survey(self.data_provider, output=self.output, ps_data_provider=self.ps_data_provider, common=self.common_v2) self.store_sos_policies = self.ps_data_provider.get_store_policies() self.labels = self.ps_data_provider.get_labels() self.store_info = self.data_provider[Data.STORE_INFO] self.store_info = self.ps_data_provider.get_ps_store_info(self.store_info) self.country = self.store_info['country'].iloc[0] self.current_date = datetime.now() self.extra_spaces_template = pd.read_excel(Const.EXTRA_SPACES_RELEVANT_SUB_CATEGORIES_PATH) self.store_targets = pd.read_excel(Const.STORE_TARGETS_PATH) self.sub_category_weight = pd.read_excel(Const.SUB_CATEGORY_TARGET_PATH, sheetname='category_score') self.kpi_weights = pd.read_excel(Const.SUB_CATEGORY_TARGET_PATH, sheetname='max_weight') self.targets = self.ps_data_provider.get_kpi_external_targets() self.store_assortment = PSAssortmentDataProvider( self.data_provider).execute(policy_name=None) self.supervisor_target = self.get_supervisor_target() try: self.sub_category_assortment = pd.merge(self.store_assortment, self.all_products.loc[:, ['product_fk', 'sub_category', 'sub_category_fk']], how='left', on='product_fk') self.sub_category_assortment = \ self.sub_category_assortment[~self.sub_category_assortment['assortment_name'].str.contains( 'ASSORTMENT')] self.sub_category_assortment = pd.merge(self.sub_category_assortment, self.sub_category_weight, how='left', left_on='sub_category', right_on='Category') except KeyError: self.sub_category_assortment = pd.DataFrame() self.update_score_sub_category_weights() try: self.store_assortment_without_powerskus = \ self.store_assortment[self.store_assortment['assortment_name'].str.contains('ASSORTMENT')] except KeyError: self.store_assortment_without_powerskus = pd.DataFrame() self.adherence_results = pd.DataFrame(columns=['product_fk', 'trax_average', 'suggested_price', 'into_interval', 'min_target', 'max_target', 'percent_range']) self.extra_spaces_results = pd.DataFrame( columns=['sub_category_fk', 'template_fk', 'count']) self.powersku_scores = {} self.powersku_empty = {} self.powersku_bonus = {} self.powersku_price = {} self.powersku_sos = {} def main_calculation(self, *args, **kwargs): """ This function calculates the KPI results. """ if self.scif.empty: return # these function must run first # self.adherence_results = self.heinz_global_price_adherence(pd.read_excel(Const.PRICE_ADHERENCE_TEMPLATE_PATH, # sheetname="Price Adherence")) self.adherence_results = self.heinz_global_price_adherence(self.targets) self.extra_spaces_results = self.heinz_global_extra_spaces() self.set_relevant_sub_categories() # this isn't relevant to the 'Perfect Score' calculation self.heinz_global_distribution_per_category() self.calculate_assortment() self.calculate_powersku_assortment() self.main_sos_calculation() self.calculate_powersku_price_adherence() self.calculate_perfect_store_extra_spaces() self.check_bonus_question() self.calculate_perfect_sub_category() def calculate_assortment(self): if self.store_assortment_without_powerskus.empty: return products_in_store = self.scif[self.scif['facings'] > 0]['product_fk'].unique().tolist() pass_count = 0 total_kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type('Distribution') identifier_dict = self.common_v2.get_dictionary(kpi_fk=total_kpi_fk) oos_kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type('OOS') oos_identifier_dict = self.common_v2.get_dictionary(kpi_fk=oos_kpi_fk) for row in self.store_assortment_without_powerskus.itertuples(): result = 0 if row.product_fk in products_in_store: result = 1 pass_count += 1 sku_kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type('Distribution - SKU') self.common_v2.write_to_db_result(sku_kpi_fk, numerator_id=row.product_fk, denominator_id=row.assortment_fk, result=result, identifier_parent=identifier_dict, should_enter=True) oos_result = 0 if result else 1 oos_sku_kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type('OOS - SKU') self.common_v2.write_to_db_result(oos_sku_kpi_fk, numerator_id=row.product_fk, denominator_id=row.assortment_fk, result=oos_result, identifier_parent=oos_identifier_dict, should_enter=True) number_of_products_in_assortment = len(self.store_assortment_without_powerskus) if number_of_products_in_assortment: total_result = (pass_count / float(number_of_products_in_assortment)) * 100 oos_products = number_of_products_in_assortment - pass_count oos_result = (oos_products / float(number_of_products_in_assortment)) * 100 else: total_result = 0 oos_products = number_of_products_in_assortment oos_result = number_of_products_in_assortment self.common_v2.write_to_db_result(total_kpi_fk, numerator_id=Const.OWN_MANUFACTURER_FK, denominator_id=self.store_id, numerator_result=pass_count, denominator_result=number_of_products_in_assortment, result=total_result, identifier_result=identifier_dict) self.common_v2.write_to_db_result(oos_kpi_fk, numerator_id=Const.OWN_MANUFACTURER_FK, denominator_id=self.store_id, numerator_result=oos_products, denominator_result=number_of_products_in_assortment, result=oos_result, identifier_result=oos_identifier_dict) def calculate_powersku_assortment(self): if self.sub_category_assortment.empty: return 0 sub_category_kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type(Const.POWER_SKU_SUB_CATEGORY) sku_kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type(Const.POWER_SKU) target_kpi_weight = float( self.kpi_weights['Score'][self.kpi_weights['KPIs'] == Const.KPI_WEIGHTS['POWERSKU']].iloc[ 0]) kpi_weight = self.get_kpi_weight('POWERSKU') products_in_session = self.scif[self.scif['facings'] > 0]['product_fk'].unique().tolist() self.sub_category_assortment['in_session'] = \ self.sub_category_assortment.loc[:, 'product_fk'].isin(products_in_session) # save PowerSKU results at SKU level for sku in self.sub_category_assortment[ ['product_fk', 'sub_category_fk', 'in_session', 'sub_category']].itertuples(): parent_dict = self.common_v2.get_dictionary( kpi_fk=sub_category_kpi_fk, sub_category_fk=sku.sub_category_fk) relevant_sub_category_df = self.sub_category_assortment[ self.sub_category_assortment['sub_category'] == sku.sub_category] if relevant_sub_category_df.empty: sub_category_count = 0 else: sub_category_count = len(relevant_sub_category_df) result = 1 if sku.in_session else 0 score = result * (target_kpi_weight / float(sub_category_count)) self.common_v2.write_to_db_result(sku_kpi_fk, numerator_id=sku.product_fk, denominator_id=sku.sub_category_fk, score=score, result=result, identifier_parent=parent_dict, should_enter=True) # save PowerSKU results at sub_category level aggregated_results = self.sub_category_assortment.groupby('sub_category_fk').agg( {'in_session': 'sum', 'product_fk': 'count'}).reset_index().rename( columns={'product_fk': 'product_count'}) aggregated_results['percent_complete'] = \ aggregated_results.loc[:, 'in_session'] / aggregated_results.loc[:, 'product_count'] aggregated_results['result'] = aggregated_results['percent_complete'] for sub_category in aggregated_results.itertuples(): identifier_dict = self.common_v2.get_dictionary(kpi_fk=sub_category_kpi_fk, sub_category_fk=sub_category.sub_category_fk) result = sub_category.result score = result * kpi_weight self.powersku_scores[sub_category.sub_category_fk] = score self.common_v2.write_to_db_result(sub_category_kpi_fk, numerator_id=sub_category.sub_category_fk, denominator_id=self.store_id, identifier_parent=sub_category.sub_category_fk, identifier_result=identifier_dict, result=result * 100, score=score, weight=target_kpi_weight, target=target_kpi_weight, should_enter=True) def heinz_global_distribution_per_category(self): relevant_stores = pd.DataFrame(columns=self.store_sos_policies.columns) for row in self.store_sos_policies.itertuples(): policies = json.loads(row.store_policy) df = self.store_info for key, value in policies.items(): try: df_1 = df[df[key].isin(value)] except KeyError: continue if not df_1.empty: stores = self.store_sos_policies[(self.store_sos_policies['store_policy'] == row.store_policy.encode('utf-8')) & ( self.store_sos_policies[ 'target_validity_start_date'] <= datetime.date( self.current_date))] if stores.empty: relevant_stores = stores else: relevant_stores = relevant_stores.append(stores, ignore_index=True) relevant_stores = relevant_stores.drop_duplicates(subset=['kpi', 'sku_name', 'target', 'sos_policy'], keep='last') for row in relevant_stores.itertuples(): sos_policy = json.loads(row.sos_policy) numerator_key = sos_policy[self.NUMERATOR].keys()[0] denominator_key = sos_policy[self.DENOMINATOR].keys()[0] numerator_val = sos_policy[self.NUMERATOR][numerator_key] denominator_val = sos_policy[self.DENOMINATOR][denominator_key] target = row.target * 100 if numerator_key == 'manufacturer': numerator_key = numerator_key + '_name' if denominator_key == 'sub_category' \ and denominator_val.lower() != 'all' \ and json.loads(row.store_policy).get('store_type') \ and len(json.loads(row.store_policy).get('store_type')) == 1: try: denominator_id = self.all_products[self.all_products[denominator_key] == denominator_val][ denominator_key + '_fk'].values[0] numerator_id = self.all_products[self.all_products[numerator_key] == numerator_val][ numerator_key.split('_')[0] + '_fk'].values[0] # self.common.write_to_db_result_new_tables(fk=12, numerator_id=numerator_id, # numerator_result=None, # denominator_id=denominator_id, # denominator_result=None, # result=target) self.common_v2.write_to_db_result(fk=12, numerator_id=numerator_id, numerator_result=None, denominator_id=denominator_id, denominator_result=None, result=target) except Exception as e: Log.warning(denominator_key + ' - - ' + denominator_val) def calculate_perfect_store(self): pass def calculate_perfect_sub_category(self): kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type(Const.PERFECT_STORE_SUB_CATEGORY) parent_kpi = self.common_v2.get_kpi_fk_by_kpi_type(Const.PERFECT_STORE) total_score = 0 sub_category_fk_list = [] kpi_type_dict_scores = [self.powersku_scores, self.powersku_empty, self.powersku_price, self.powersku_sos] for kpi_dict in kpi_type_dict_scores: sub_category_fk_list.extend(kpi_dict.keys()) kpi_weight_perfect_store = 0 if self.country in self.sub_category_weight.columns.to_list(): kpi_weight_perfect_store = self.sub_category_weight[self.country][ self.sub_category_weight['Category'] == Const.PERFECT_STORE_KPI_WEIGHT] if not kpi_weight_perfect_store.empty: kpi_weight_perfect_store = kpi_weight_perfect_store.iloc[0] unique_sub_cat_fks = list(dict.fromkeys(sub_category_fk_list)) sub_category_fks = self.sub_category_weight.sub_category_fk.unique().tolist() relevant_sub_cat_list = [x for x in sub_category_fks if str(x) != 'nan'] # relevant_sub_cat_list = self.sub_category_assortment['sub_category_fk'][ # self.sub_category_assortment['Category'] != pd.np.nan].unique().tolist() for sub_cat_fk in unique_sub_cat_fks: if sub_cat_fk in relevant_sub_cat_list: bonus_score = 0 try: bonus_score = self.powersku_bonus[sub_cat_fk] except: pass sub_cat_weight = self.get_weight(sub_cat_fk) sub_cat_score = self.calculate_sub_category_sum(kpi_type_dict_scores, sub_cat_fk) result = sub_cat_score score = (result * sub_cat_weight) + bonus_score total_score += score self.common_v2.write_to_db_result(kpi_fk, numerator_id=sub_cat_fk, denominator_id=self.store_id, result=result, score=score, identifier_parent=parent_kpi, identifier_result=sub_cat_fk, weight=sub_cat_weight * 100, should_enter=True) self.common_v2.write_to_db_result(parent_kpi, numerator_id=Const.OWN_MANUFACTURER_FK, denominator_id=self.store_id, result=total_score, score=total_score, identifier_result=parent_kpi, target=kpi_weight_perfect_store, should_enter=True) def main_sos_calculation(self): relevant_stores = pd.DataFrame(columns=self.store_sos_policies.columns) for row in self.store_sos_policies.itertuples(): policies = json.loads(row.store_policy) df = self.store_info for key, value in policies.items(): try: if key != 'additional_attribute_3': df1 = df[df[key].isin(value)] except KeyError: continue if not df1.empty: stores = \ self.store_sos_policies[(self.store_sos_policies['store_policy'].str.encode( 'utf-8') == row.store_policy.encode('utf-8')) & (self.store_sos_policies['target_validity_start_date'] <= datetime.date( self.current_date))] if stores.empty: relevant_stores = stores else: relevant_stores = relevant_stores.append(stores, ignore_index=True) relevant_stores = relevant_stores.drop_duplicates(subset=['kpi', 'sku_name', 'target', 'sos_policy'], keep='last') results_df = pd.DataFrame(columns=['sub_category', 'sub_category_fk', 'score']) sos_sub_category_kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type(Const.SOS_SUB_CATEGORY) for row in relevant_stores.itertuples(): sos_policy = json.loads(row.sos_policy) numerator_key = sos_policy[self.NUMERATOR].keys()[0] denominator_key = sos_policy[self.DENOMINATOR].keys()[0] numerator_val = sos_policy[self.NUMERATOR][numerator_key] denominator_val = sos_policy[self.DENOMINATOR][denominator_key] json_policy = json.loads(row.store_policy) kpi_fk = row.kpi # This is to assign the KPI to SOS_manufacturer_category_GLOBAL if json_policy.get('store_type') and len(json_policy.get('store_type')) > 1: kpi_fk = 8 if numerator_key == 'manufacturer': numerator_key = numerator_key + '_name' # we need to include 'Philadelphia' as a manufacturer for all countries EXCEPT Chile if self.country == 'Chile': numerator_values = [numerator_val] else: numerator_values = [numerator_val, 'Philadelphia'] else: # if the numerator isn't 'manufacturer', we just need to convert the value to a list numerator_values = [numerator_val] if denominator_key == 'sub_category': include_stacking_list = ['Nuts', 'DRY CHEESE', 'IWSN', 'Shredded', 'SNACK'] if denominator_val in include_stacking_list: facings_field = 'facings' else: facings_field = 'facings_ign_stack' else: facings_field = 'facings_ign_stack' if denominator_key == 'sub_category' and denominator_val.lower() == 'all': # Here we are talkin on a KPI when the target have no denominator, # the calculation should be done on Numerator only numerator = self.scif[(self.scif[numerator_key] == numerator_val) & (self.scif['location_type'] == 'Primary Shelf') ][facings_field].sum() kpi_fk = 9 denominator = None denominator_id = None else: numerator = self.scif[(self.scif[numerator_key].isin(numerator_values)) & (self.scif[denominator_key] == denominator_val) & (self.scif['location_type'] == 'Primary Shelf')][facings_field].sum() denominator = self.scif[(self.scif[denominator_key] == denominator_val) & (self.scif['location_type'] == 'Primary Shelf')][facings_field].sum() try: if denominator is not None: denominator_id = self.all_products[self.all_products[denominator_key] == denominator_val][ denominator_key + '_fk'].values[0] if numerator is not None: numerator_id = self.all_products[self.all_products[numerator_key] == numerator_val][ numerator_key.split('_')[0] + '_fk'].values[0] sos = 0 if numerator and denominator: sos = np.divide(float(numerator), float(denominator)) * 100 score = 0 target = row.target * 100 if sos >= target: score = 100 identifier_parent = None should_enter = False if denominator_key == 'sub_category' and kpi_fk == row.kpi: # if this a sub_category result, save it to the results_df for 'Perfect Store' store results_df.loc[len(results_df)] = [denominator_val, denominator_id, score / 100] identifier_parent = self.common_v2.get_dictionary(kpi_fk=sos_sub_category_kpi_fk, sub_category_fk=denominator_id) should_enter = True manufacturer = None self.common_v2.write_to_db_result(kpi_fk, numerator_id=numerator_id, numerator_result=numerator, denominator_id=denominator_id, denominator_result=denominator, result=target, score=sos, target=target, score_after_actions=manufacturer, identifier_parent=identifier_parent, should_enter=should_enter) except Exception as e: Log.warning(denominator_key + ' - - ' + denominator_val) # if there are no sub_category sos results, there's no perfect store information to be saved if len(results_df) == 0: return 0 # save aggregated results for each sub category kpi_weight = self.get_kpi_weight('SOS') for row in results_df.itertuples(): identifier_result = \ self.common_v2.get_dictionary(kpi_fk=sos_sub_category_kpi_fk, sub_category_fk=row.sub_category_fk) # sub_cat_weight = self.get_weight(row.sub_category_fk) result = row.score score = result * kpi_weight self.powersku_sos[row.sub_category_fk] = score # limit results so that aggregated results can only add up to 3 self.common_v2.write_to_db_result(sos_sub_category_kpi_fk, numerator_id=row.sub_category_fk, denominator_id=self.store_id, result=row.score, score=score, identifier_parent=row.sub_category_fk, identifier_result=identifier_result, weight=kpi_weight, target=kpi_weight, should_enter=True) def calculate_powersku_price_adherence(self): adherence_kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type(Const.POWER_SKU_PRICE_ADHERENCE) adherence_sub_category_kpi_fk = \ self.common_v2.get_kpi_fk_by_kpi_type(Const.POWER_SKU_PRICE_ADHERENCE_SUB_CATEGORY) if self.sub_category_assortment.empty: return False results = pd.merge(self.sub_category_assortment, self.adherence_results, how='left', on='product_fk') results['into_interval'].fillna(0, inplace=True) for row in results.itertuples(): parent_dict = self.common_v2.get_dictionary(kpi_fk=adherence_sub_category_kpi_fk, sub_category_fk=row.sub_category_fk) score_value = 'Not Present' in_session = row.in_session if in_session: if not pd.isna(row.trax_average) and row.suggested_price: price_in_interval = 1 if row.into_interval == 1 else 0 if price_in_interval == 1: score_value = 'Pass' else: score_value = 'Fail' else: score_value = 'No Price' score = Const.PRESENCE_PRICE_VALUES[score_value] self.common_v2.write_to_db_result(adherence_kpi_fk, numerator_id=row.product_fk, denominator_id=row.sub_category_fk, result=row.trax_average, score=score, target=row.suggested_price, numerator_result=row.min_target, denominator_result=row.max_target, weight=row.percent_range, identifier_parent=parent_dict, should_enter=True) aggregated_results = results.groupby('sub_category_fk').agg( {'into_interval': 'sum', 'product_fk': 'count'}).reset_index().rename( columns={'product_fk': 'product_count'}) aggregated_results['percent_complete'] = \ aggregated_results.loc[:, 'into_interval'] / aggregated_results.loc[:, 'product_count'] for row in aggregated_results.itertuples(): identifier_result = self.common_v2.get_dictionary(kpi_fk=adherence_sub_category_kpi_fk, sub_category_fk=row.sub_category_fk) kpi_weight = self.get_kpi_weight('PRICE') result = row.percent_complete score = result * kpi_weight self.powersku_price[row.sub_category_fk] = score self.common_v2.write_to_db_result(adherence_sub_category_kpi_fk, numerator_id=row.sub_category_fk, denominator_id=self.store_id, result=result, score=score, numerator_result=row.into_interval, denominator_result=row.product_count, identifier_parent=row.sub_category_fk, identifier_result=identifier_result, weight=kpi_weight, target=kpi_weight, should_enter=True) def heinz_global_price_adherence(self, config_df): config_df = config_df.sort_values(by=["received_time"], ascending=False).drop_duplicates( subset=['start_date', 'end_date', 'ean_code', 'store_type'], keep="first") if config_df.empty: Log.warning("No external_targets data found - Price Adherence will not be calculated") return self.adherence_results self.match_product_in_scene.loc[self.match_product_in_scene['price'].isna(), 'price'] = \ self.match_product_in_scene.loc[self.match_product_in_scene['price'].isna(), 'promotion_price'] # =============== remove after updating logic to support promotional pricing =============== results_df = self.adherence_results my_config_df = \ config_df[config_df['store_type'].str.encode('utf-8') == self.store_info.store_type[0].encode('utf-8')] products_in_session = self.scif['product_ean_code'].unique().tolist() products_in_session = [ean for ean in products_in_session if ean is not pd.np.nan and ean is not None] my_config_df = my_config_df[my_config_df['ean_code'].isin(products_in_session)] for row in my_config_df.itertuples(): product_pk = \ self.all_products[self.all_products['product_ean_code'] == row.ean_code]['product_fk'].iloc[0] mpisc_df_price = \ self.match_product_in_scene[(self.match_product_in_scene['product_fk'] == product_pk) | (self.match_product_in_scene[ 'substitution_product_fk'] == product_pk)]['price'] try: suggested_price = float(row.suggested_price) except Exception as e: Log.error("Product with ean_code {} is not in the configuration file for customer type {}" .format(row.ean_code, self.store_info.store_type[0].encode('utf-8'))) break percentage_weight = int(row.percentage_weight) upper_percentage = (100 + percentage_weight) / float(100) lower_percentage = (100 - percentage_weight) / float(100) min_price = suggested_price * lower_percentage max_price = suggested_price * upper_percentage percentage_sku = percentage_weight into_interval = 0 prices_sum = 0 count = 0 trax_average = None for price in mpisc_df_price: if price and pd.notna(price): prices_sum += price count += 1 if prices_sum > 0: trax_average = prices_sum / count into_interval = 0 if not np.isnan(suggested_price): if min_price <= trax_average <= max_price: into_interval = 100 results_df.loc[len(results_df)] = [product_pk, trax_average, suggested_price, into_interval / 100, min_price, max_price, percentage_sku] self.common_v2.write_to_db_result(10, numerator_id=product_pk, numerator_result=suggested_price, denominator_id=product_pk, denominator_result=trax_average, result=row.percentage_weight, score=into_interval) if trax_average: mark_up = (np.divide(np.divide(float(trax_average), float(1.13)), float(suggested_price)) - 1) * 100 self.common_v2.write_to_db_result(11, numerator_id=product_pk, numerator_result=suggested_price, denominator_id=product_pk, denominator_result=trax_average, score=mark_up, result=mark_up) return results_df def calculate_perfect_store_extra_spaces(self): extra_spaces_kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type( Const.PERFECT_STORE_EXTRA_SPACES_SUB_CATEGORY) sub_cats_for_store = self.relevant_sub_categories if self.extra_spaces_results.empty: pass try: relevant_sub_categories = [x.strip() for x in self.extra_spaces_template[ self.extra_spaces_template['country'].str.encode('utf-8') == self.country.encode('utf-8')][ 'sub_category'].iloc[0].split(',')] except IndexError: Log.warning( 'No relevant sub_categories for the Extra Spaces KPI found for the following country: {}'.format( self.country)) self.extra_spaces_results = pd.merge(self.extra_spaces_results, self.all_products.loc[:, [ 'sub_category_fk', 'sub_category']].dropna().drop_duplicates(), how='left', on='sub_category_fk') relevant_extra_spaces = \ self.extra_spaces_results[self.extra_spaces_results['sub_category'].isin( relevant_sub_categories)] kpi_weight = self.get_kpi_weight('EXTRA') for row in relevant_extra_spaces.itertuples(): self.powersku_empty[row.sub_category_fk] = 1 * kpi_weight score = result = 1 if row.sub_category_fk in sub_cats_for_store: sub_cats_for_store.remove(row.sub_category_fk) self.common_v2.write_to_db_result(extra_spaces_kpi_fk, numerator_id=row.sub_category_fk, denominator_id=row.template_fk, result=result, score=score, identifier_parent=row.sub_category_fk, target=1, should_enter=True) for sub_cat_fk in sub_cats_for_store: result = score = 0 self.powersku_empty[sub_cat_fk] = 0 self.common_v2.write_to_db_result(extra_spaces_kpi_fk, numerator_id=sub_cat_fk, denominator_id=0, result=result, score=score, identifier_parent=sub_cat_fk, target=1, should_enter=True) def heinz_global_extra_spaces(self): try: supervisor = self.store_info['additional_attribute_3'][0] store_target = -1 # for row in self.store_sos_policies.itertuples(): # policies = json.loads(row.store_policy) # for key, value in policies.items(): # try: # if key == 'additional_attribute_3' and value[0] == supervisor: # store_target = row.target # break # except KeyError: # continue for row in self.supervisor_target.itertuples(): try: if row.supervisor == supervisor: store_target = row.target break except: continue except Exception as e: Log.error("Supervisor target is not configured for the extra spaces report ") raise e results_df = self.extra_spaces_results # limit to only secondary scenes relevant_scif = self.scif[(self.scif['location_type_fk'] == float(2)) & (self.scif['facings'] > 0)] if relevant_scif.empty: return results_df # aggregate facings for every scene/sub_category combination in the visit relevant_scif = \ relevant_scif.groupby(['scene_fk', 'template_fk', 'sub_category_fk'], as_index=False)['facings'].sum() # sort sub_categories by number of facings, largest first relevant_scif = relevant_scif.sort_values(['facings'], ascending=False) # drop all but the sub_category with the largest number of facings for each scene relevant_scif = relevant_scif.drop_duplicates(subset=['scene_fk'], keep='first') for row in relevant_scif.itertuples(): results_df.loc[len(results_df)] = [row.sub_category_fk, row.template_fk, row.facings] self.common_v2.write_to_db_result(13, numerator_id=row.template_fk, numerator_result=row.facings, denominator_id=row.sub_category_fk, denominator_result=row.facings, context_id=row.scene_fk, result=store_target) return results_df def check_bonus_question(self): bonus_kpi_fk = self.common_v2.get_kpi_fk_by_kpi_type(Const.BONUS_QUESTION_SUB_CATEGORY) bonus_weight = self.kpi_weights['Score'][self.kpi_weights['KPIs'] == Const.KPI_WEIGHTS['Bonus']].iloc[0] sub_category_fks = self.sub_category_weight.sub_category_fk.unique().tolist() sub_category_fks = [x for x in sub_category_fks if str(x) != 'nan'] if self.survey.check_survey_answer(('question_fk', Const.BONUS_QUESTION_FK), 'Yes,yes,si,Si'): result = 1 else: result = 0 for sub_cat_fk in sub_category_fks: sub_cat_weight = self.get_weight(sub_cat_fk) score = result * sub_cat_weight target_weight = bonus_weight * sub_cat_weight self.powersku_bonus[sub_cat_fk] = score self.common_v2.write_to_db_result(bonus_kpi_fk, numerator_id=sub_cat_fk, denominator_id=self.store_id, result=result, score=score, identifier_parent=sub_cat_fk, weight=target_weight, target=target_weight, should_enter=True) def commit_results_data(self): self.common_v2.commit_results_data() def update_score_sub_category_weights(self): all_sub_category_fks = self.all_products[['sub_category', 'sub_category_fk']].drop_duplicates() self.sub_category_weight = pd.merge(self.sub_category_weight, all_sub_category_fks, left_on='Category', right_on='sub_category', how='left') def get_weight(self, sub_category_fk): weight_value = 0 if self.country in self.sub_category_weight.columns.to_list(): weight_df = self.sub_category_weight[self.country][ (self.sub_category_weight.sub_category_fk == sub_category_fk)] if weight_df.empty: return 0 weight_value = weight_df.iloc[0] if pd.isna(weight_value): weight_value = 0 weight = weight_value * 0.01 return weight def get_kpi_weight(self, kpi_name): weight = self.kpi_weights['Score'][self.kpi_weights['KPIs'] == Const.KPI_WEIGHTS[kpi_name]].iloc[0] return weight def get_supervisor_target(self): supervisor_target = self.targets[self.targets['kpi_type'] == 'Extra Spaces'] return supervisor_target def calculate_sub_category_sum(self, dict_list, sub_cat_fk): total_score = 0 for item in dict_list: try: total_score += item[sub_cat_fk] except: pass return total_score def set_relevant_sub_categories(self): if self.country in self.sub_category_weight.columns.to_list(): df = self.sub_category_weight[['Category', 'sub_category_fk', self.country]].dropna() self.relevant_sub_categories = df.sub_category_fk.to_list() else: self.relevant_sub_categories = []
class ToolBox: def __init__(self, data_provider, output, common): self.common = common self.output = output self.data_provider = data_provider # ----------- fix for nan types in dataprovider ----------- # all_products = self.data_provider._static_data_provider.all_products.where( # (pd.notnull(self.data_provider._static_data_provider.all_products)), None) # self.data_provider._set_all_products(all_products) # self.data_provider._init_session_data(None, True) # self.data_provider._init_report_data(self.data_provider.session_uid) # self.data_provider._init_reporting_data(self.data_provider.session_id) # ----------- fix for nan types in dataprovider ----------- self.block = Block(self.data_provider) self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.templates = self.data_provider.all_templates self.ps_data_provider = PsDataProvider(self.data_provider, self.output) self.result_values_dict = self.make_result_values_dict() self.store_assortment = self.ps_data_provider.get_store_assortment() self.store_sos_policies = self.ps_data_provider.get_store_policies() self.labels = self.ps_data_provider.get_labels() self.scene_info = self.data_provider[Data.SCENES_INFO] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.products = self.data_provider[Data.PRODUCTS] self.store_info = self.data_provider[Data.STORE_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.full_mpis = self.match_product_in_scene.merge(self.products, on='product_fk', suffixes=['', '_p'])\ .merge(self.scene_info, on='scene_fk', suffixes=['', '_s'])\ .merge(self.templates, on='template_fk', suffixes=['', '_t']) self.mpis = self.full_mpis[self.full_mpis['product_type'] != 'Irrelevant'] self.mpis = self.filter_df(self.mpis, Const.SOS_EXCLUDE_FILTERS, exclude=1) self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.scenes = self.scene_info['scene_fk'].tolist() self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.tmb_map = pd.read_excel(Const.TMB_MAP_PATH).set_index('Num Shelves').to_dict('index') self .blockchain = {} self.template = {} self.dependencies = {} self.dependency_lookup = {} self.base_measure = None self.global_fail = 0 # main functions: def main_calculation(self, template_path): """ This function gets all the scene results from the SceneKPI, after that calculates every session's KPI, and in the end it calls "filter results" to choose every KPI and scene and write the results in DB. """ self.template = pd.read_excel(template_path, sheetname=None) self.dependencies = {key: None for key in self.template[Const.KPIS][Const.KPI_NAME]} self.dependency_reorder() main_template = self.template[Const.KPIS] self.dependency_lookup = main_template.set_index(Const.KPI_NAME)[Const.DEPENDENT].to_dict() self.shun() for i, main_line in main_template.iterrows(): self.global_fail = 0 self.calculate_main_kpi(main_line) # self.flag_failures() def calculate_main_kpi(self, main_line): kpi_name = main_line[Const.KPI_NAME] kpi_type = main_line[Const.TYPE] scene_types = self.read_cell_from_line(main_line, Const.SCENE_TYPE) general_filters = {} relevant_scif = self.filter_df(self.scif.copy(), Const.SOS_EXCLUDE_FILTERS, exclude=1) if scene_types: relevant_scif = relevant_scif[relevant_scif['template_name'].isin(scene_types)] general_filters['template_name'] = scene_types if relevant_scif.empty: return # if kpi_name not in ( # 'What is Apple Sauce Multi Serve linear footage?', # 'What is Apple Sauce Single Serve linear footage?', # 'What is Canned Fruit linear footage?', # 'What is Canned Veg category linear footage?', # 'What is Del Monte Canned Veg linear footage?', # 'What is PFC linear footage?', # 'What is Squeezers linear footage', # 'What is the COS Fruit category linear footage?', # # ): # # if kpi_name not in ('Are the majority of Green Giant Spec Veg blocked above Green Giant Core Veg'): if kpi_name not in ('Are PFC shelved between Canned and Squeezers?'): # if kpi_name not in ('is multi serve pineapple shelved above Canned Fruit?'): # if kpi_type not in (Const.BLOCKING, Const.BLOCKING_PERCENT, Const.SOS, Const.ANCHOR, Const.MULTI_BLOCK, # Const.SAME_AISLE, Const.SHELF_REGION, Const.SHELF_PLACEMENT): return # print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') # print(kpi_name) dependent_kpis = self.read_cell_from_line(main_line, Const.DEPENDENT) dependent_results = self.read_cell_from_line(main_line, Const.DEPENDENT_RESULT) if dependent_kpis: for dependent_kpi in dependent_kpis: if self.dependencies[dependent_kpi] not in dependent_results: if dependent_results or self.dependencies[dependent_kpi] is None: return kpi_line = self.template[kpi_type].set_index(Const.KPI_NAME).loc[kpi_name] function = self.get_kpi_function(kpi_type) try: all_kwargs = function(kpi_name, kpi_line, relevant_scif, general_filters) except Exception as e: # print(e) if self.global_fail: all_kwargs = [{'score': 0, 'result': None, 'failed': 0}] Log.warning('kpi "{}" failed to calculate'.format(kpi_name)) else: all_kwargs = [{'score': 0, 'result': None, 'failed': 1}] Log.error('kpi "{}" failed error: "{}"'.format(kpi_name, e)) finally: if not isinstance(all_kwargs, list) or not all_kwargs: all_kwargs = [all_kwargs] # print(all_kwargs) for kwargs in all_kwargs: if not kwargs or kwargs['score'] is None: kwargs = {'score': 0, 'result': 0, 'failed': 0} self.write_to_db(kpi_name, **kwargs) self.dependencies[kpi_name] = kwargs['result'] def flag_failures(self): for kpi, val in self.dependencies.items(): if val is None: Log.warning('Warning: KPI "{}" not run for session "{}"'.format( kpi, self.session_uid)) def calculate_sos(self, kpi_name, kpi_line, relevant_scif, general_filters): num = self.filter_df(relevant_scif, self.get_kpi_line_filters(kpi_line))[ 'net_len_ign_stack'].sum() / 304.8 return {'score': 1, 'result': num} def calculate_same_aisle(self, kpi_name, kpi_line, relevant_scif, general_filters): filters = self.get_kpi_line_filters(kpi_line) relevant_scif = self.filter_df(self.scif, filters) if relevant_scif.empty: return result = 0 if len(relevant_scif.scene_fk.unique()) == 1: result = 1 return {'score': 1, 'result': result} def calculate_shelf_placement(self, kpi_name, kpi_line, relevant_scif, general_filters): location = kpi_line['Shelf Placement'].lower() tmb_map = pd.read_excel(Const.TMB_MAP_PATH).melt(id_vars=['Num Shelves'], var_name=['Shelf'])\ .set_index(['Num Shelves', 'Shelf']).reset_index() tmb_map.columns = ['max_shelves', 'shelf_number_from_bottom', 'TMB'] tmb_map['TMB'] = tmb_map['TMB'].str.lower() filters = self.get_kpi_line_filters(kpi_line) mpis = self.filter_df(self.mpis, filters) mpis = self.filter_df(mpis, {'stacking_layer': 1}) if mpis.empty: return filters.update(general_filters) mpis = self.filter_df(mpis, {'scene_fk': list(relevant_scif.scene_id.unique())}) bay_shelf = self.filter_df(self.full_mpis, general_filters).set_index(['scene_fk', 'bay_number'])\ .groupby(level=[0, 1])[['shelf_number', 'shelf_number_from_bottom']].max() bay_max_shelf = bay_shelf['shelf_number'].to_dict() bay_shelf['shelf_offset'] = bay_shelf['shelf_number_from_bottom'] - \ bay_shelf['shelf_number'] bay_shelf = bay_shelf.drop('shelf_number_from_bottom', axis=1).rename( columns={'shelf_number': 'max_shelves'}) mpis = mpis.merge(bay_shelf, on=['bay_number', 'scene_fk']) mpis['true_shelf'] = mpis['shelf_number_from_bottom'] + mpis['shelf_offset'] mpis = mpis.merge(tmb_map, on=['max_shelves', 'shelf_number_from_bottom']) result = self.safe_divide(self.filter_df(mpis, {'TMB': location}).shape[0], mpis.shape[0]) return {'score': 1, 'result': result} def calulate_shelf_region(self, kpi_name, kpi_line, relevant_scif, general_filters): base = self.get_base_name(kpi_name, Const.REGIONS) location = kpi_line['Shelf Placement'].lower() if base not in self.blockchain: num_filters = self.get_kpi_line_filters(kpi_line, 'numerator') den_filters = self.get_kpi_line_filters(kpi_line, 'denominator') mpis = self.filter_df(self.mpis, den_filters) reg_list = ['left', 'center', 'right'] self.blockchain[base] = {reg: 0 for reg in reg_list} self.blockchain[base]['den'] = 0 for scene in mpis.scene_fk.unique(): smpis = self.filter_df(mpis, {'scene_fk': scene}) num_df = self.filter_df(smpis, num_filters) bays = sorted(list(smpis.bay_number.unique())) size = len(bays) / Const.NUM_REG mod = len(bays) % Const.NUM_REG # find start ponts for center and right groups (left is always 0), this is bays var index center = size right = size * 2 if mod == 1: right += 1 # if there is one odd bay we expand center elif mod == 2: center += 1 # If 2, we expand left and right by one right += 1 self.blockchain[base]['den'] += num_df.shape[0] regions = [0, center, right, len(bays)] for i, reg in enumerate(reg_list): self.blockchain[base][reg] += self.filter_df( num_df, {'bay_number': bays[regions[i]:regions[i+1]]}).shape[0] result = self.safe_divide(self.blockchain[base][location], self.blockchain[base]['den']) return {'score': 1, 'result': result} def calculate_sequence(self, kpi_name, kpi_line, relevant_scif, general_filters): # this attribute should be pulled from the template once the template is updated vector = kpi_line['Vector'] orth = (set(['x', 'y']) - set(vector)).pop() Segment = namedtuple('Segment', 'seg position facings orth_min orth_max matches') segments = [i.strip() for i in self.splitter(kpi_line['Sequence'])] result = 0 for scene in relevant_scif.scene_fk.unique(): scene_scif = relevant_scif[relevant_scif['scene_fk'] == scene] seg_list = [] for seg in segments: seg_filters = self.get_kpi_line_filters(kpi_line, seg) _, _, mpis_dict, _, results = self.base_block(kpi_name, kpi_line, scene_scif, general_filters, filters=seg_filters, check_orient=0) cluster = results.sort_values('facing_percentage', ascending=False).iloc[0, 0] df = pd.DataFrame([(n['polygon'].centroid.x, n['polygon'].centroid.y, n['facings'], list(n['match_fk'].values)) + n['polygon'].bounds for i, n in cluster.nodes(data=True) if n['block_key'].value not in Const.ALLOWED_FLAGS], columns=['x', 'y', 'facings', 'matches', 'x_min', 'y_min', 'x_max', 'y_max']) facings = df.facings.sum() seg_list.append(Segment(seg=seg, position=(df[vector]*df['facings']).sum()/facings, facings=facings, orth_min=mpis_dict[scene]['rect_{}'.format(orth)].min(), orth_max=mpis_dict[scene]['rect_{}'.format(orth)].max(), matches=df['matches'].sum())) order = [x.seg for x in sorted(seg_list, key=lambda x: x.position)] if '_'.join(order) == '_'.join(segments) or \ (kpi_line['Reversible'] in ['Y', 'y'] and '_'.join(order) == '_'.join(segments[::-1])): flow_count = 1 # 1 is intentional, since loop is smaller than list by 1 for i in range(1, len(order)): if self.safe_divide(self.seq_axis_engulfs_df(i, seg_list, orth), seg_list[i].facings) >= .1 and\ self.safe_divide(self.seq_axis_engulfs_df(i, seg_list, orth, r=1), seg_list[i-1].facings) >= .1: flow_count += 1 if flow_count == len(order): result = 1 return {'result': result, 'score': 1} def seq_axis_engulfs_df(self, i, seg_list, orth, r=0): j = i - 1 if r: i, j = j, i return self.mpis[(self.mpis['scene_match_fk'].isin(seg_list[i].matches)) & (seg_list[j].orth_min <= self.mpis['rect_{}'.format(orth)]) & (self.mpis['rect_{}'.format(orth)] <= seg_list[j].orth_max)].shape[0] def calculate_max_block_adj_base(self, kpi_name, kpi_line, relevant_scif, general_filters): allowed_edges = [x.upper() for x in self.read_cell_from_line(kpi_line, Const.EDGES)] d = {'A': {}, 'B': {}} for k, v in d.items(): filters = self.get_kpi_line_filters(kpi_line, k) _, _, mpis_dict, _, results = self.base_block(kpi_name, kpi_line, relevant_scif, general_filters, filters=filters, check_orient=0) v['row'] = results.sort_values('facing_percentage', ascending=False).iloc[0, :] v['items'] = sum([list(n['match_fk']) for i, n in v['row']['cluster'].nodes(data=True) if n['block_key'].value not in Const.ALLOWED_FLAGS], []) scene_graph = self.block.adj_graphs_by_scene[d[k]['row']['scene_fk']] matches = [(edge, scene_graph[item][edge]['direction']) for item in v['items'] for edge in scene_graph[item].keys() if scene_graph[item][edge]['direction'] in allowed_edges] v['edge_matches'], v['directions'] = zip(*matches) if matches else ([], []) result = 0 if set(d['A']['edge_matches']) & set(d['B']['items']): result = 1 return {'score': 1, 'result': result}, set(d['A']['directions']) def calculate_max_block_adj(self, kpi_name, kpi_line, relevant_scif, general_filters): result, _ = self.calculate_max_block_adj_base(kpi_name, kpi_line, relevant_scif, general_filters) return result def calculate_integrated_core(self, kpi_name, kpi_line, relevant_scif, general_filters): result, dirs = self.calculate_max_block_adj_base(kpi_name, kpi_line, relevant_scif, general_filters) if len(dirs) < 2: result['result'] = 0 return result def calculate_block_together(self, kpi_name, kpi_line, relevant_scif, general_filters): result, _ = self.calculate_max_block_adj_base(kpi_name, kpi_line, relevant_scif, general_filters) result['result'] = result['result'] ^ 1 # this kpi is reversed (is not blocked together?) so we xor return result def calculate_serial_adj(self, kpi_name, kpi_line, relevant_scif, general_filters): result = {'score': 0, 'result': 0} scif = self.filter_df(relevant_scif, self.get_kpi_line_filters(kpi_line, 'A')) sizes = self.get_kpi_line_filters(kpi_line, 'A')['DLM_ VEGSZ(C)'] num_count_sizes = 0 if self.get_kpi_line_filters(kpi_line, 'A')['DLM_ VEGSZ(C)'] == [u'FAMILY LARGE'] else 1 if scif.empty: return subsets = scif[kpi_line['Unit']].unique() tally = 0 skip = 0 for subset in subsets: size_pass = 0 size_skip = 0 for size in sizes: sub_kpi_line = kpi_line.copy() for i in sub_kpi_line.index: if sub_kpi_line[i] == ','.join(sizes): sub_kpi_line[i] == size general_filters[kpi_line['Unit']] = [subset] try: result, _ = self.calculate_max_block_adj_base(kpi_name, sub_kpi_line, relevant_scif, general_filters) tally += result['result'] size_pass += 1 except TypeError: # yeah, i really should define a custom error, but, another day size_skip += 1 # we will ignore subsets that are missing either A group or B group if size_pass and not num_count_sizes: # Family large only needs to be next to one size, so we need to be careful how we increment skip skip += 0 # family passed, even if one size failed, so we don't increment skip if not size_pass and not num_count_sizes: skip += 1 # Family size failed so we increment by one else: skip += size_skip # this is the mutipk rt. target = len(subsets)*len(sizes) - skip if num_count_sizes else len(subsets) - skip #family only needs to pass one size, multipk both result['result'] = 0 if target else None if self.safe_divide(tally, target) > 75: result['result'] = 1 return result def calculate_adjacency_list(self, kpi_name, kpi_line, relevant_scif, general_filters): max_block = self.read_cell_from_line(kpi_line, Const.MAX_BLOCK) item_filters = {} kwargs_list = [] if max_block: _, _, _, _, blocks = self.base_block( kpi_name, kpi_line, relevant_scif, general_filters, check_orient=False) block = blocks.sort_values('facing_percentage').reset_index().iloc[-1, :]['cluster'] ids = sum([node['group_attributes']['match_fk_list'] for i, node in block.node(data=True)], []) item_filters = {'scene_match_fk': ids} if Const.END_OF_CAT in self.get_results_value(kpi_line): anchor_filters = item_filters if item_filters else self.get_kpi_line_filters(kpi_line) anchor = self.anchor_base(general_filters, anchor_filters, relevant_scif['scene_fk'].unique(), 1) if sum(anchor.values()) > 0: kwargs_list.append({'score': 1, 'result': Const.END_OF_CAT, 'target': 1}) all_results = self.base_adjacency( kpi_name, kpi_line, relevant_scif, general_filters, item_filters=item_filters) for result in sum([x for x, y in all_results.values()], []): # result_fk = self.result_values_dict[result] kwargs_list.append({'score': 1, 'result': result}) return kwargs_list def anchor_base(self, general_filters, potential_end, scenes, min_shelves, ratio=False): results = {} cat_filters = dict(general_filters) func_dict = {'left': [min, op.lt, float('inf')], 'right': [max, op.gt, 0]} results['left'], results['right'] = 0, 0 for scene in scenes: cat_filters['scene_fk'] = scene cat_mpis = self.filter_df(self.mpis, cat_filters) # cat_mpis = self.filter_df(cat_mpis, Const.ALLOWED_FILTERS, exclude=1) cat_mpis = self.filter_df(cat_mpis, {'product_type': 'Empty'}, exclude=1) cat_mpis = self.filter_df(cat_mpis, {'stacking_layer': 1}) bays = {'left': cat_mpis['bay_number'].min(), 'right': cat_mpis['bay_number'].max()} for dir, bay in bays.items(): agg_func, operator, fill_val = func_dict[dir] bay_mpis = self.filter_df(cat_mpis, {'bay_number': bay}) smpis = self.filter_df(bay_mpis, potential_end).groupby( ['scene_fk', 'bay_number', 'shelf_number'])['facing_sequence_number'].agg(agg_func) if smpis.empty: continue rmpis = self.filter_df(bay_mpis, potential_end, exclude=1) \ .groupby(['scene_fk', 'bay_number', 'shelf_number'])['facing_sequence_number'].agg(agg_func) locs = pd.concat([smpis, rmpis], axis=1) locs.columns = ['A', 'B'] locs.dropna(subset=['A'], inplace=True) if ratio: min_shelves = max(self.filter_df( self.mpis, {'scene_fk': scene, 'bay_number': bay})['shelf_number']) min_shelves = round(min_shelves / 2.0) locs.fillna(fill_val, inplace=True) if sum(operator(locs['A'], locs['B'])) >= min_shelves: results[dir] = 1 return results def calculate_anchor(self, kpi_name, kpi_line, relevant_scif, general_filters): scenes = relevant_scif['scene_fk'].unique() potential_end = self.get_kpi_line_filters(kpi_line, 'numerator') general_filters.update(self.get_kpi_line_filters(kpi_line, 'denominator')) results = self.anchor_base(general_filters, potential_end, scenes, 0, ratio=True) edges = self.splitter(kpi_line[Const.EDGES].strip()) result = 0 for edge in edges: if results[edge]: result = 1 return {'score': 1, 'result': result} def base_block(self, kpi_name, kpi_line, relevant_scif, general_filters_base, check_orient=1, other=1, filters={}, multi=0): result = pd.DataFrame() general_filters = dict(general_filters_base) blocks = pd.DataFrame() result = pd.DataFrame() orientation = 'Not Blocked' scenes = self.filter_df(relevant_scif, general_filters).scene_fk.unique() if 'template_name' in general_filters: del general_filters['template_name'] if 'scene_fk' in general_filters: del general_filters['scene_fk'] mpis_dict = {} valid_scene_found = 0 for scene in scenes: score = 0 empty_check = 0 scene_filter = {'scene_fk': scene} if not filters: filters = self.get_kpi_line_filters(kpi_line) filters.update(general_filters) # mpis is only here for debugging purposes mpis = self.filter_df(self.mpis, scene_filter) mpis = self.filter_df(mpis, filters) mpis = self.filter_df(mpis, {'stacking_layer': 1}) mpis_dict[scene] = mpis if mpis.empty: empty_check = -1 continue allowed_filter = Const.ALLOWED_FILTERS if not other: allowed_filter = {'product_type': 'Empty'} result = pd.concat([result, self.block.network_x_block_together(filters, location=scene_filter, additional={ 'allowed_products_filters': allowed_filter, 'include_stacking': False, 'check_vertical_horizontal': check_orient, 'minimum_facing_for_block': 1})]) blocks = result[result['is_block'] == True] valid_scene_found = 1 if not blocks.empty and not multi: score = 1 orientation = blocks.loc[0, 'orientation'] break if empty_check == -1 and not valid_scene_found: self.global_fail = 1 raise TypeError('No Data Found fo kpi "'.format(kpi_name)) return score, orientation, mpis_dict, blocks, result def calculate_block(self, kpi_name, kpi_line, relevant_scif, general_filters): base = self.get_base_name(kpi_name, Const.ORIENTS) if base in self.blockchain: # Data exists. Get it. result, orientation, mpis_dict, blocks = self.blockchain[base] else: # Data doesn't exist, so create and add it result, orientation, mpis_dict, blocks, _ = self.base_block( kpi_name, kpi_line, relevant_scif, general_filters) self.blockchain[base] = result, orientation, mpis_dict, blocks # result_fk = self.result_values_dict[orientation] if kpi_line['AntiBlock'] in ['Y', 'y']: result = result ^ 1 kwargs = {'score': 1, 'result': result} return kwargs def calculate_block_orientation(self, kpi_name, kpi_line, relevant_scif, general_filters): allowed_orientation = kpi_line['Orientation'].strip() # Check if data for this kpi already exists base = self.get_base_name(kpi_name, Const.ORIENTS) if base in self.blockchain: # Data exists. Get it. result, orientation, mpis_dict, blocks = self.blockchain[base] else: # Data doesn't exist, so create and add it result, orientation, mpis_dict, blocks, _ = self.base_block( kpi_name, kpi_line, relevant_scif, general_filters) self.blockchain[base] = result, orientation, mpis_dict, blocks if allowed_orientation.upper() != orientation: result = 0 return {'score': 1, 'result': result} # def calculate_block_percent(self, kpi_name, kpi_line, relevant_scif, general_filters): # # def concater(a, b): # return pd.concat([a, b]) # # allowed_orientation = kpi_line['Orientation'].strip() # facings, score, den, result = 0, 0, 0, 0 # # Check if data for this kpi already exists # base = self.get_base_name(kpi_name, Const.ORIENTS) # if base in self.blockchain: # # Data exists. Get it. # score, orientation, mpis_dict, blocks = self.blockchain[base] # else: # # Data doesn't exist, so create and add it # score, orientation, mpis_dict, blocks, _ = self.base_block( # kpi_name, kpi_line, relevant_scif, general_filters) # self.blockchain[base] = score, orientation, mpis_dict, blocks # # den = reduce(concater, mpis_dict.values()).shape[0] # if orientation.lower() == allowed_orientation: # for row in blocks.itertuples(): # skus = sum([list(node['match_fk']) for i, node in row.cluster.nodes(data=True)], []) # mpis = mpis_dict[row.scene_fk] # facings = mpis[mpis['scene_match_fk'].isin(skus)].shape[0] # score = 1 # result = self.safe_divide(facings, den) # return {'numerator_result': facings, 'denominator_result': den, 'result': result, 'score': score} def calculate_multi_block(self, kpi_name, kpi_line, relevant_scif, general_filters): den_filter = self.get_kpi_line_filters(kpi_line, 'denominator') num_filter = self.get_kpi_line_filters(kpi_line, 'numerator') if kpi_line[Const.ALL_SCENES_REQUIRED] in ('Y', 'y'): # get value for all scenes required all_scenes_required = True else: all_scenes_required = False groups = list(*num_filter.values()) result = 0 scenes = self.filter_df(relevant_scif, general_filters).scene_fk.unique() if 'template_name' in general_filters: del general_filters['template_name'] for scene in scenes: # check every scene groups_exempt = 0 score = 0 scene_general_filters = general_filters.copy() scene_general_filters.update({'scene_fk': scene}) for group in groups: # check all the groups in the current scene sub_filters = {num_filter.keys()[0]: [group]} sub_filters.update(den_filter) sub_score = 0 try: sub_score, _, _, _, _ = self.base_block(kpi_name, kpi_line, relevant_scif, scene_general_filters, check_orient=0, filters=sub_filters) except TypeError as e: if e[0] == 'No Data Found fo kpi "': # no relevant products found, so this group is exempt groups_exempt += 1 else: raise e score += sub_score if score and score == len(groups) - groups_exempt: # check to make sure all non-exempt groups were blocked result += 1 if not all_scenes_required: # we already found one passing scene so we don't need to continue break if all_scenes_required: final_result = 1 if result == len(scenes) else 0 # make sure all scenes have a passing result else: final_result = 1 if result > 0 else 0 return {'score': 1, 'result': final_result} def make_mpis(self, kpi_line, general_filters, ign_stacking=1, use_full_mpis=0): mpis = self.full_mpis if use_full_mpis else self.mpis filters = self.get_kpi_line_filters(kpi_line) filters.update(general_filters) if ign_stacking: filters.update(Const.IGN_STACKING) return self.filter_df(self.mpis, filters) def shun(self): exclude = self.template['Exclude'] filters = {} for i, row in exclude.iterrows(): filters.update(self.get_kpi_line_filters(row)) self.mpis = self.filter_df(self.mpis, filters, exclude=1) self.full_mpis = self.filter_df(self.full_mpis, filters, exclude=1) self.scif = self.filter_df(self.scif, filters, exclude=1) @staticmethod def filter_df(df, filters, exclude=0): cols = set(df.columns) for key, val in filters.items(): if key not in cols: return pd.DataFrame() if not isinstance(val, list): val = [val] if exclude: df = df[~df[key].isin(val)] else: df = df[df[key].isin(val)] return df @staticmethod def filter_mask(df, filters, exclude=0): mask = [] for key, val in filters.items(): if not isinstance(val, list): val = [val] if exclude: mask.append(~df[key].isin(val)) else: mask.append(df[key].isin(val)) return reduce((lambda x, y: x & y), mask) @staticmethod def filter_join(filters): final_filter = defaultdict(list) filters = reduce((lambda x, y: x + y.items() if isinstance(x, list) else x.items() + y.items()), filters) for (key, val) in filters: final_filter[key].append(val) return final_filter @staticmethod def ratio_score(num, den, target=None): ratio = 0 if den: ratio = round(num*100.0/den, 2) score = 1 if ratio >= target and target else 0 return ratio, score @staticmethod def read_cell_from_line(line, col): try: val = line[col] if not pd.isnull(line[col]) else [] except: val = [] if val: if hasattr(val, 'split'): if ', ' in val: val = val.split(', ') elif ',' in val: val = val.split(',') if not isinstance(val, list): val = [val] return val def get_kpi_line_filters(self, kpi_orig, name=''): kpi_line = kpi_orig.copy() if name: name = name.lower() + ' ' filters = defaultdict(list) attribs = [x.lower() for x in kpi_line.index] kpi_line.index = attribs c = 1 while 1: if '{}param {}'.format(name, c) in attribs and kpi_line['{}param {}'.format(name, c)]\ and not pd.isnull(kpi_line['{}param {}'.format(name, c)]): filters[kpi_line['{}param {}'.format( name, c)]] += self.splitter(kpi_line['{}value {}'.format(name, c)]) else: if c > 3: # just in case someone inexplicably chose a nonlinear numbering format. break c += 1 return filters @staticmethod def splitter(text_str, delimiter=','): ret = [text_str] if hasattr(text_str, 'split'): ret = [x.strip() for x in text_str.split(delimiter)] return ret @staticmethod def get_base_name(kpi, group): base = kpi.lower() for obj in group: base = base.replace(obj, '').strip() return base @staticmethod def safe_divide(num, den): res = 0 if den: res = num*100.0 / den return res def sos_with_num_and_dem(self, kpi_line, relevant_scif, general_filters, facings_field): num_filters = self.get_kpi_line_filters(kpi_line, name='numerator') den_filters = self.get_kpi_line_filters(kpi_line, name='denominator') num_filters.update(general_filters) den_filters.update(general_filters) num_scif = self.filter_df(relevant_scif, num_filters) den_scif = self.filter_df(relevant_scif, den_filters) den = den_scif[facings_field].sum() num = num_scif[facings_field].sum() if den: ratio = round((num / float(den))*100, 2) else: ratio = 0 return ratio, num, den def dependency_reorder(self): kpis = self.template[Const.KPIS].copy() name_to_index = kpis.reset_index().set_index(Const.KPI_NAME)['index'].to_dict() dependent_index = list(kpis[kpis[Const.DEPENDENT].notnull()].index) kpis_index = list(set(kpis.index) - set(dependent_index)) set_index = set(kpis_index) c = 0 while dependent_index: i = dependent_index.pop(0) kpi = kpis.loc[i, Const.KPI_NAME] dependencies = self.read_cell_from_line(kpis.loc[i, :], Const.DEPENDENT) met = True for dependency in dependencies: if name_to_index[dependency] not in set_index: met = False if met: kpis_index.append(i) set_index.add(i) c = 0 else: dependent_index.append(i) c += 1 if c > kpis.shape[0] * 1.1: Log.error('Circular Dependency Found: KPIs Affected {}'.format( [kpis.loc[i, Const.KPI_NAME] for i in dependent_index])) break self.template[Const.KPIS] = kpis.reindex(index=pd.Index(kpis_index)).reset_index(drop=True) def get_kpi_function(self, kpi_type): """ transfers every kpi to its own function :param kpi_type: value from "sheet" column in the main sheet :return: function """ if kpi_type == Const.SHELF_PLACEMENT: return self.calculate_shelf_placement elif kpi_type == Const.SHELF_REGION: return self.calulate_shelf_region elif kpi_type == Const.ADJACENCY: return self.calculate_adjacency_list elif kpi_type == Const.ANCHOR: return self.calculate_anchor elif kpi_type == Const.BLOCKING: return self.calculate_block elif kpi_type == Const.BLOCKING_PERCENT: return self.calculate_block_percent elif kpi_type == Const.BLOCK_ORIENTATION: return self.calculate_block_orientation elif kpi_type == Const.MULTI_BLOCK: return self.calculate_multi_block elif kpi_type == Const.MAX_BLOCK_ADJ: return self.calculate_max_block_adj elif kpi_type == Const.INTEGRATED: return self.calculate_integrated_core elif kpi_type == Const.BLOCKED_TOGETHER: return self.calculate_block_together elif kpi_type == Const.SERIAL: return self.calculate_serial_adj elif kpi_type == Const.SEQUENCE: return self.calculate_sequence elif kpi_type == Const.RELATIVE_POSTION: return self.calculate_sequence elif kpi_type == Const.SOS: return self.calculate_sos elif kpi_type == Const.SAME_AISLE: return self.calculate_same_aisle else: Log.warning( "The value '{}' in column sheet in the template is not recognized".format(kpi_type)) return None def make_result_values_dict(self): query = "SELECT * FROM static.kpi_result_value;" return pd.read_sql_query(query, self.ps_data_provider.rds_conn.db).set_index('value')['pk'].to_dict() def write_to_db(self, kpi_name, score=0, result=None, target=None, numerator_result=0, denominator_result=None, numerator_id=999, denominator_id=999, failed=0): """ writes result in the DB :param kpi_name: str :param score: float :param display_text: str :param result: str :param threshold: int """ kpi_fk = self.common.get_kpi_fk_by_kpi_type(kpi_name) self.common.write_to_db_result(fk=kpi_fk, score=score, result=result, target=target, numerator_result=numerator_result, denominator_result=denominator_result, numerator_id=numerator_id, denominator_id=denominator_id)