class NESTLEUKToolBox(NESTLEUKConsts): LEVEL1 = 1 LEVEL2 = 2 LEVEL3 = 3 def __init__(self, data_provider, output): self.k_engine = BaseCalculationsScript(data_provider, output) self.output = output self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.store_info = self.data_provider[Data.STORE_INFO] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.tools = NESTLEUKGENERALToolBox(self.data_provider, self.output, rds_conn=self.rds_conn) self.kpi_static_data = self.get_kpi_static_data() self.kpi_results_queries = [] self.store_type = self.store_info['store_type'].iloc[0] self.store_type = '' if self.store_type is None else self.store_type self.templates_class = NESTLEUKParseTemplates('Nestle_UK_v3.0') self.template_ava_class = NESTLEUKParseTemplates('Template') self.templates_data = self.templates_class.parse_template( sheet_name='KPIs') self.template_ava_data = self.template_ava_class.parse_template( sheet_name='Hierarchy') self.template_ava_visible = self.template_ava_class.parse_template( sheet_name='Visible') self.template_ava_bottom_shelf = self.template_ava_class.parse_template( sheet_name='Bottom shelf') self.template_ava_adjacent = self.template_ava_class.parse_template( sheet_name='Adjacent') self.template_ava_diamond = self.template_ava_class.parse_template( sheet_name='Diamond') self.scores = pd.DataFrame(columns=['ean_code', 'visible', 'ava']) self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0] self.custom_scif_queries = [] # self.templates_data = self.template.parse_kpi() def get_kpi_static_data(self): """ This function extracts the static KPI data and saves it into one global data frame. The data is taken from static.kpi / static.atomic_kpi / static.kpi_set. """ query = NESTLEUKQueries.get_all_kpi_data() kpi_static_data = pd.read_sql_query(query, self.rds_conn.db) return kpi_static_data # def main_calculation(self, set_name, *args, **kwargs): # """ # This function calculates the KPI results. # """ # # if set_name in ('OSA',): # set_score = self.check_on_shelf_availability(set_name) # # self.check_on_shelf_availability_on_scene_level(set_name) # elif set_name in ('Linear Share of Shelf vs. Target', 'Linear Share of Shelf'): # set_score = self.custom_share_of_shelf(set_name) # elif set_name in ('Shelf Level',): # set_score = self.calculate_eye_level_availability(set_name) # elif set_name in ('Product Blocking',): # set_score = self.calculate_block_together_sets(set_name) # elif set_name == 'Pallet Presence': # set_score, pallet_score, half_pallet_score = self.calculate_pallet_presence() # elif set_name == 'Share of Assortment': # set_score = self.calculate_share_of_assortment() # self.save_level2_and_level3(set_name, set_name, set_score) # elif set_name == 'Shelf Impact Score': # set_score = self.shelf_impact_score() # # else: # return # set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name'] == set_name]['kpi_set_fk'].values[0] # self.write_to_db_result(set_fk, set_score, self.LEVEL1) # return set_score def calculate_nestle_score(self, set_name): """ This function calculates the KPI results. """ set_scores = {} main_children = self.templates_data[self.templates_data[ self.templates_class.KPI_GROUP] == set_name] for c in xrange(len(main_children)): main_child = main_children.iloc[c] children = self.templates_data[self.templates_data[ self.templates_class.KPI_GROUP] == main_child[ self.templates_class.KPI_NAME]] scores = [] for i in xrange(len(children)): child = children.iloc[i] kpi_type = child[self.templates_class.KPI_TYPE] if not self.store_type in child[ self.templates_class.STORE_TYPE]: continue if not set(child[self.templates_class.SCENE_TYPE].split( self.templates_class.SEPARATOR)) & set( self.scif['template_name'].unique().tolist()): continue if kpi_type == self.BLOCK_TOGETHER: score = self.calculate_block_together_sets(child) elif kpi_type == self.FACING_COUNT: score = self.calculate_facing_count(child) elif kpi_type == self.AVAILABILITY: score = self.calculate_availability(child) elif kpi_type == self.FACING_SOS: score = self.calculate_facing_sos(child) elif kpi_type == self.SHELF_POSITION: score = self.calculate_shelf_position(child) else: Log.warning( "KPI of type '{}' is not supported".format(kpi_type)) continue if score is not None: child_score_weight = child[self.templates_class.WEIGHT] atomic_fk = self.get_atomic_fk(child) self.write_to_db_result(atomic_fk, score, level=self.LEVEL3) if isinstance(score, tuple): score = score[0] weighted_score = score * float(child_score_weight) scores.append(weighted_score) if not scores: scores = [0] if scores: score_type = main_child[self.templates_class.SCORE] score_weight = float(main_child[self.templates_class.WEIGHT]) if score_type == self.templates_class.SUM_OF_SCORES: score = sum(scores) else: score = 0 kpi_name = main_child[self.templates_class.KPI_NAME] kpi_fk = self.kpi_static_data[self.kpi_static_data['kpi_name'] == kpi_name]['kpi_fk'].values[0] # self.write_to_db_result(kpi_fk, score, level=self.LEVEL2) set_scores[kpi_fk] = (score_weight, score) # total_weight = sum([score[0] for score in set_scores.values()]) for kpi_fk in set_scores.keys(): self.write_to_db_result(kpi_fk, set_scores[kpi_fk][1], level=self.LEVEL2) # set_score = sum([score[0] * score[1] for score in set_scores.values()]) / total_weight set_score = round( sum([score[0] * score[1] for score in set_scores.values()]), 2) set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name'] == set_name]['kpi_set_fk'].values[0] self.write_to_db_result(set_fk, set_score, level=self.LEVEL1) @kpi_runtime() def calculate_ava(self): """ This function calculates the KPI results. """ set_scores = {} for set_name in self.template_ava_data['Set Name'].unique().tolist(): kpk = self.template_ava_data[ self.template_ava_data['Set Name'] == set_name]['KPI Group'].unique().tolist() for main_kpi in kpk: atomics = self.template_ava_data[ self.template_ava_data['KPI Group'] == main_kpi] for i in xrange(len(atomics)): atomic = atomics.iloc[i] if not set(atomic[self.templates_class.SCENE_TYPE].split( self.templates_class.SEPARATOR)) & set( self.scif['template_name'].unique().tolist()): continue else: templates = map( lambda x: x.strip(), atomic[self.templates_class.SCENE_TYPE].split(',')) scenes_to_check = self.scif[ self.scif['template_name'].isin( templates)]['scene_fk'].unique().tolist() kpi_type = atomic[self.templates_class.KPI_TYPE] if kpi_type == self.BOTTOM_SHELF: params = self.template_ava_bottom_shelf[ self.template_ava_bottom_shelf['KPI Name'] == atomic['KPI Name']].iloc[0] self.calculate_bottom_shelf(params, scenes_to_check) elif kpi_type == self.ADJACENT: params = self.template_ava_adjacent[ self.template_ava_adjacent['KPI Name'] == atomic['KPI Name']].iloc[0] self.calculate_adjacent(params, scenes_to_check) elif kpi_type == self.DIAMOND: params = self.template_ava_diamond[ self.template_ava_diamond['KPI Name'] == atomic['KPI Name']].iloc[0] self.calculate_diamond(params, scenes_to_check) else: Log.warning("KPI of type '{}' is not supported".format( kpi_type)) continue def get_custom_query(self, scene_fk, product_fk, in_assortment_OSA=0, oos_osa=0, mha_in_assortment=0, mha_oos=0, length_mm_custom=0): attributes = pd.DataFrame( [(self.session_fk, scene_fk, product_fk, in_assortment_OSA, oos_osa, mha_in_assortment, mha_oos, length_mm_custom)], columns=[ 'session_fk', 'scene_fk', 'product_fk', 'in_assortment_OSA', 'oos_osa', 'mha_in_assortment', 'mha_oos', 'length_mm_custom' ]) query = insert(attributes.to_dict(), self.PSERVICE_CUSTOM_SCIF) self.custom_scif_queries.append(query) def calculate_bottom_shelf(self, kpi, scenes_to_check): target = int(kpi[self.templates_class.TARGET]) shelf_number = map(lambda x: x.strip(), kpi['shelf_number_from_bottom'].split(',')) shelf_percent = int(kpi['shelf_percent']) products_for_check = map(lambda x: x.strip(), kpi['product_ean_code'].split(',')) products_for_check = self.all_products[ self.all_products['product_ean_code'].isin( products_for_check)]['product_fk'].tolist() for scene in scenes_to_check: shelf_edges = self.build_shelf_edges(scene, shelf_percent) for product_fk in products_for_check: result = self.tools.calculate_availability( product_fk=product_fk, scene_fk=scene) if result: in_assortment_osa = 1 result = self.calculate_contain(scene, product_fk, shelf_edges, shelf_number) mha_in_assortment = 1 if result >= target else 0 else: in_assortment_osa = mha_in_assortment = 0 self.get_custom_query(scene_fk=scene, product_fk=product_fk, in_assortment_OSA=in_assortment_osa, mha_in_assortment=mha_in_assortment) def build_shelf_edges(self, scene_fk, shelf_percent): matches = self.match_product_in_scene[self.tools.get_filter_condition( self.match_product_in_scene, **{'scene_fk': scene_fk})] left = matches.copy().sort_values('x_mm', ascending=True).iloc[0] left = int(left['x_mm']) - (int(left['width_mm']) / 2 ) # TODO width_mm_net right = matches.copy().sort_values('x_mm', ascending=False).iloc[0] right = int(right['x_mm']) + (int(right['width_mm']) / 2 ) # TODO width_mm_net shelf_len = right - left shelf_len_after_downsize = (shelf_len - (shelf_len * shelf_percent / 100)) / 2 edges = { 'left': left + shelf_len_after_downsize, 'right': right - shelf_len_after_downsize } return edges def build_product_edges(self, matches): points = [] for x, product_show in matches.iterrows(): left = int(product_show['x_mm']) - ( int(product_show['width_mm']) / 2) # TODO width_mm_net right = int(product_show['x_mm']) + ( int(product_show['width_mm']) / 2) # TODO width_mm_net edges_point = {'left': left, 'right': right} points.append(edges_point) return points def calculate_contain(self, scene, product_fk, shelf_edges, shelf_number): matches = self.match_product_in_scene[self.tools.get_filter_condition( self.match_product_in_scene, **{ 'scene_fk': scene, 'shelf_number_from_bottom': shelf_number, 'product_fk': product_fk })] points = self.build_product_edges(matches) for point in points: if (shelf_edges['left'] < point['left'] < shelf_edges['right']) or \ (shelf_edges['left'] < point['right'] < shelf_edges['right']): return True return False def calculate_diamond(self, kpi, scenes_to_check): target = int(kpi[self.templates_class.TARGET]) products_for_check = map(lambda x: x.strip(), kpi['product_ean_code'].split(',')) products_for_check = self.all_products[ self.all_products['product_ean_code'].isin( products_for_check)]['product_fk'].tolist() for scene in scenes_to_check: if self.validate_scene(scene): polygon = self.build_diamond_polygon(scene) for product_fk in products_for_check: result = self.tools.calculate_availability( product_fk=product_fk, scene_fk=scenes_to_check) if result: in_assortment_osa = 1 result = self.calculate_polygon(scene=scene, product_fk=product_fk, polygon=polygon) mha_in_assortment = 1 if result >= target else 0 else: in_assortment_osa = mha_in_assortment = 0 self.get_custom_query(scene_fk=scene, product_fk=product_fk, in_assortment_OSA=in_assortment_osa, mha_in_assortment=mha_in_assortment) def validate_scene(self, scene_fk): matches = self.match_product_in_scene[self.tools.get_filter_condition( self.match_product_in_scene, **{'scene_fk': scene_fk})] if len(matches['shelf_number'].unique().tolist()) > 1: return True return False def build_diamond_polygon(self, scene_fk): matches = self.match_product_in_scene[self.tools.get_filter_condition( self.match_product_in_scene, **{'scene_fk': scene_fk})] shelf_number = min(matches['shelf_number'].unique().tolist()) top = matches[(matches['shelf_number'] == shelf_number) & (matches['stacking_layer'] == 1)].sort_values( 'y_mm', ascending=False).iloc[0] top = int(top['y_mm']) - (int(top['height_mm']) / 2 ) # TODO height_mm_net try: bottom = matches[(matches['shelf_number_from_bottom'] == 2) & (matches['stacking_layer'] == 1)].sort_values( 'y_mm', ascending=False).iloc[0] bottom = int(bottom['y_mm']) - (int(bottom['height_mm']) / 2 ) # TODO height_mm_net except: bottom = matches[matches['shelf_number_from_bottom'] == 1].sort_values('y_mm', ascending=False).iloc[0] bottom = int(bottom['y_mm']) + (int(bottom['height_mm']) / 2 ) # TODO height_mm_net left = matches.copy().sort_values('x_mm', ascending=True).iloc[0] left = int(left['x_mm']) - (int(left['width_mm']) / 2 ) # TODO width_mm_net right = matches.copy().sort_values('x_mm', ascending=False).iloc[0] right = int(right['x_mm']) + (int(right['width_mm']) / 2 ) # TODO width_mm_net middle_x = (right + left) / 2 middle_y = (top + bottom) / 2 polygon = Polygon([(middle_x, top), (right, middle_y), (middle_x, bottom), (left, middle_y)]) return polygon def calculate_polygon(self, scene, product_fk, polygon): matches = self.match_product_in_scene[self.tools.get_filter_condition( self.match_product_in_scene, **{'scene_fk': scene})] points = self.build_array_of_points(matches, product_fk) for point in points: if polygon.contains(point): return True return False def build_array_of_points(self, matches, product): points = [] for x, product_show in matches[matches['product_fk'] == product].iterrows(): top = int(product_show['y_mm']) + ( int(product_show['height_mm']) / 2) # TODO height_mm_net bottom = int(product_show['y_mm']) - ( int(product_show['height_mm']) / 2) # TODO height_mm_net left = int(product_show['x_mm']) - ( int(product_show['width_mm']) / 2) # TODO width_mm_net right = int(product_show['x_mm']) + ( int(product_show['width_mm']) / 2) # TODO width_mm_net mask_point = Point(left, top), Point(right, top), Point( left, bottom), Point(right, bottom) points += mask_point return points def calculate_adjacent(self, kpi, scenes_to_check): adjacent_type = kpi['adjacent_type'] adjacent_value = kpi['adjacent_value'] anchor_filters = {adjacent_type: adjacent_value} products_for_check = map(lambda x: x.strip(), kpi['product_ean_code'].split(',')) products_for_check = self.all_products[ self.all_products['product_ean_code'].isin( products_for_check)]['product_fk'].tolist() general_filters = {'scene_fk': scenes_to_check} for scene in scenes_to_check: for product_fk in products_for_check: result = self.tools.calculate_availability( product_fk=product_fk, scene_fk=scenes_to_check) if result: in_assortment_osa = 1 result = not self.tools.calculate_non_proximity( tested_filters={'product_fk': product_fk}, anchor_filters=anchor_filters, allowed_diagonal=False, **general_filters) mha_in_assortment = 1 if result else 0 else: in_assortment_osa = mha_in_assortment = 0 self.get_custom_query(scene_fk=scene, product_fk=product_fk, in_assortment_OSA=in_assortment_osa, mha_in_assortment=mha_in_assortment) @kpi_runtime() def calculate_block_together_sets(self, kpi): """ This function calculates every block-together-typed KPI from the relevant sets, and returns the set final score. """ templates = kpi[self.templates_class.SCENE_TYPE].split( self.templates_class.SEPARATOR) brands_for_block_check = kpi[self.templates_class.BRAND].split( self.templates_class.SEPARATOR) scenes_to_check = self.scif[self.scif['template_name'].isin( templates)]['scene_fk'].unique().tolist() if not kpi[self.templates_class.CATEGORY]: result = self.tools.calculate_block_together( brand_name=brands_for_block_check, scene_fk=scenes_to_check) else: category = kpi[self.templates_class.CATEGORY] result = self.tools.calculate_block_together( brand_name=brands_for_block_check, scene_fk=scenes_to_check, category=category) score = 100 if result else 0 return score @kpi_runtime() def calculate_facing_count(self, kpi): """ This function calculates every block-together-typed KPI from the relevant sets, and returns the set final score. """ templates = kpi[self.templates_class.SCENE_TYPE].split( self.templates_class.SEPARATOR) products_for_check = kpi[self.templates_class.SKU] scenes_to_check = self.scif[self.scif['template_name'].isin( templates)]['scene_fk'].unique().tolist() result = self.tools.calculate_availability( product_ean_code=products_for_check, scene_fk=scenes_to_check, stacking_layer=1) if kpi[self.templates_class.TARGET]: target = float(kpi[self.templates_class.TARGET]) else: target = kpi[self.templates_class.TARGET] score = 100 if result >= target else 0 return score @kpi_runtime() def calculate_availability(self, kpi): """ This function calculates every block-together-typed KPI from the relevant sets, and returns the set final score. """ kpi_name = kpi[self.templates_class.KPI_NAME] templates_data = self.templates_class.parse_template( sheet_name='Availability', lower_headers_row_index=4, upper_headers_row_index=3, data_content_column_index=6, input_column_name_separator=', ') scene_types = [] scores = [] session_templates = self.scif['template_name'].unique().tolist() for scene_type in session_templates: availability_id = '{};{}'.format(self.store_type, scene_type) if availability_id in templates_data.columns: availability_data = templates_data[ (templates_data[self.templates_class.KPI_NAME] == kpi_name) & (templates_data[availability_id] == 1)] else: continue if not availability_data.empty: scene_types.append(scene_type) products_for_check = templates_data[ self.templates_class.availability_consts.PRODUCT_EAN_CODES].tolist( ) for products_list in products_for_check: try: products = products_list.split(', ') except Exception as e: products = products_list result = self.tools.calculate_availability( product_ean_code=products, template_name=scene_types, stacking_layer=1) score = 100 if result > 0 else 0 scores.append(score) if 0 in scores: final_score = 0 else: final_score = 100 return final_score @kpi_runtime() def calculate_shelf_position(self, kpi): """ This function calculates every block-together-typed KPI from the relevant sets, and returns the set final score. """ kpi_name = kpi[self.templates_class.KPI_NAME] templates_data = self.templates_class.parse_template( sheet_name='Shelf Position') scores = [] shelf_position_data = templates_data[( templates_data[self.templates_class.KPI_NAME] == kpi_name)] products_for_check = shelf_position_data[ self.templates_class.availability_consts.PRODUCT_EAN_CODES].tolist( ) templates = kpi[self.templates_class.SCENE_TYPE].split( self.templates_class.SEPARATOR) scenes_to_check = self.scif[self.scif['template_name'].isin( templates)]['scene_fk'].unique().tolist() for products_list in products_for_check: try: products = products_list.split(', ') except Exception as e: products = products_list shelves = shelf_position_data.loc[ shelf_position_data[ self.templates_class.availability_consts.PRODUCT_EAN_CODES] == products_list]['Shelf Position'].values[0].split(',') result = self.tools.calculate_shelf_level_assortment( shelves=[int(shelf) for shelf in shelves], product_ean_code=products, scene_fk=scenes_to_check) score = 100 if result > 0 else 0 scores.append(score) if 0 in scores: final_score = 0 else: final_score = 100 return final_score @kpi_runtime() def calculate_facing_sos(self, kpi): """ This function calculates every block-together-typed KPI from the relevant sets, and returns the set final score. """ templates = kpi[self.templates_class.SCENE_TYPE].split( self.templates_class.SEPARATOR) manufactruers_for_check = kpi[self.templates_class.MANUFACTURER] scenes_to_check = self.scif[self.scif['template_name'].isin( templates)]['scene_fk'].unique().tolist() if kpi[self.templates_class.CATEGORY] is None: sos_filters = {'manufacturer_name': manufactruers_for_check} result = self.tools.calculate_share_of_shelf( sos_filters=sos_filters, scene_fk=scenes_to_check, stacking_layer=1) else: sos_filters = {'manufacturer_name': manufactruers_for_check} category = kpi[self.templates_class.CATEGORY] result = self.tools.calculate_share_of_shelf( sos_filters=sos_filters, scene_fk=scenes_to_check, category=category, stacking_layer=1) score = 100 if result > kpi[self.templates_class.TARGET] else 0 return score def get_atomic_fk(self, params): """ This function gets an Atomic KPI's FK out of the template data. """ atomic_name = params[self.templates_class.KPI_NAME] kpi_name = params[self.templates_class.KPI_GROUP] atomic_fk = self.kpi_static_data[ (self.kpi_static_data['kpi_name'] == kpi_name) & (self.kpi_static_data['atomic_kpi_name'] == atomic_name )]['atomic_kpi_fk'] if atomic_fk.empty: return None return atomic_fk.values[0] def write_to_db_result(self, fk, score, level): """ This function creates the result data frame of every KPI (atomic KPI/KPI/KPI set), and appends the insert SQL query into the queries' list, later to be written to the DB. """ attributes = self.create_attributes_dict(fk, score, level) if level == self.LEVEL1: table = KPS_RESULT elif level == self.LEVEL2: table = KPK_RESULT elif level == self.LEVEL3: table = KPI_RESULT else: return query = insert(attributes, table) self.kpi_results_queries.append(query) def create_attributes_dict(self, fk, score, level): """ This function creates a data frame with all attributes needed for saving in KPI results tables. """ if level == self.LEVEL1: kpi_set_name = self.kpi_static_data[ self.kpi_static_data['kpi_set_fk'] == fk]['kpi_set_name'].values[0] attributes = pd.DataFrame( [(kpi_set_name, self.session_uid, self.store_id, self.visit_date.isoformat(), format(score, '.2f'), fk)], columns=[ 'kps_name', 'session_uid', 'store_fk', 'visit_date', 'score_1', 'kpi_set_fk' ]) elif level == self.LEVEL2: kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] == fk]['kpi_name'].values[0] attributes = pd.DataFrame( [(self.session_uid, self.store_id, self.visit_date.isoformat(), fk, kpi_name, score)], columns=[ 'session_uid', 'store_fk', 'visit_date', 'kpi_fk', 'kpk_name', 'score' ]) elif level == self.LEVEL3: data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk'] == fk] atomic_kpi_name = data['atomic_kpi_name'].values[0] kpi_fk = data['kpi_fk'].values[0] kpi_set_name = self.kpi_static_data[ self.kpi_static_data['atomic_kpi_fk'] == fk]['kpi_set_name'].values[0] attributes = pd.DataFrame( [(atomic_kpi_name, self.session_uid, kpi_set_name, self.store_id, self.visit_date.isoformat(), datetime.utcnow().isoformat(), score, kpi_fk, fk)], columns=[ 'display_text', 'session_uid', 'kps_name', 'store_fk', 'visit_date', 'calculation_time', 'score', 'kpi_fk', 'atomic_kpi_fk' ]) else: attributes = pd.DataFrame() return attributes.to_dict() def commit_custom_scif(self): if not self.rds_conn.is_connected: self.rds_conn.connect_rds() cur = self.rds_conn.db.cursor() delete_query = NESTLEUKQueries.get_delete_session_custom_scif( self.session_fk) cur.execute(delete_query) self.rds_conn.db.commit() queries = self.merge_insert_queries(self.custom_scif_queries) for query in queries: try: cur.execute(query) except: print 'could not run query: {}'.format(query) self.rds_conn.db.commit() def merge_insert_queries(self, insert_queries): # other_queries = [] query_groups = {} for query in insert_queries: if 'update' in query: self.update_queries.append(query) else: static_data, inserted_data = query.split('VALUES ') if static_data not in query_groups: query_groups[static_data] = [] query_groups[static_data].append(inserted_data) merged_queries = [] for group in query_groups: for group_index in xrange(0, len(query_groups[group]), 10**4): merged_queries.append('{0} VALUES {1}'.format( group, ',\n'.join( query_groups[group][group_index:group_index + 10**4]))) # merged_queries.extend(other_queries) return merged_queries @log_runtime('Saving to DB') def commit_results_data(self): """ This function writes all KPI results to the DB, and commits the changes. """ self.commit_custom_scif() cur = self.rds_conn.db.cursor() delete_queries = NESTLEUKQueries.get_delete_session_results_query( self.session_uid) for query in delete_queries: cur.execute(query) queries = self.merge_insert_queries(self.kpi_results_queries) for query in queries: cur.execute(query) self.rds_conn.db.commit()
class PENAFLORAR_SANDDIAGEOARToolBox: LEVEL1 = 1 LEVEL2 = 2 LEVEL3 = 3 ACTIVATION_STANDARD = 'Activation Standard' def __init__(self, data_provider, output): self.output = output self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.store_info = self.data_provider[Data.STORE_INFO] self.store_type = self.store_info['additional_attribute_1'].values[0] self.kpi_static_data = self.get_kpi_static_data() self.set_templates_data = {} self.match_display_in_scene = self.get_match_display() self.kpi_results_queries = [] self.scores = {self.LEVEL1: {}, self.LEVEL2: {}, self.LEVEL3: {}} self.output = output self.common = Common(self.data_provider) self.commonV2 = CommonV2(self.data_provider) self.global_gen = DIAGEOGenerator(self.data_provider, self.output, self.common) self.tools = DIAGEOToolBox( self.data_provider, output, match_display_in_scene=self.match_display_in_scene ) # replace the old one self.diageo_generator = DIAGEOGenerator(self.data_provider, self.output, self.common) def get_kpi_static_data(self): """ This function extracts the static KPI data and saves it into one global data frame. The data is taken from static.kpi / static.atomic_kpi / static.kpi_set. """ query = DIAGEOQueries.get_all_kpi_data() kpi_static_data = pd.read_sql_query(query, self.rds_conn.db) return kpi_static_data def get_match_display(self): """ This function extracts the display matches data and saves it into one global data frame. The data is taken from probedata.match_display_in_scene. """ query = DIAGEOQueries.get_match_display(self.session_uid) match_display = pd.read_sql_query(query, self.rds_conn.db) return match_display def main_calculation(self, set_names): """ This function calculates the KPI results. """ log_runtime('Updating templates')(self.tools.update_templates)() # Global assortment kpis assortment_res_dict = self.diageo_generator.diageo_global_assortment_function_v2( ) self.commonV2.save_json_to_new_tables(assortment_res_dict) for set_name in set_names: set_score = 0 if set_name not in self.tools.KPI_SETS_WITHOUT_A_TEMPLATE and set_name not in self.set_templates_data.keys( ): try: self.set_templates_data[ set_name] = self.tools.download_template(set_name) except: Log.warning("Couldn't find a template for set name: " + str(set_name)) continue # if set_name in ('MPA', 'New Products',): # set_score = self.calculate_assortment_sets(set_name) # Global Visible to Customer / Visible to Consumer if set_name in ('Visible to Customer', 'Visible to Consumer %'): # Global function sku_list = filter( None, self.scif[self.scif['product_type'] == 'SKU'].product_ean_code.tolist()) res_dict = self.diageo_generator.diageo_global_visible_percentage( sku_list) if res_dict: # Saving to new tables # parent_res = res_dict[-1] self.commonV2.save_json_to_new_tables(res_dict) # Saving to old tables # result = parent_res['result'] # self.save_level2_and_level3(set_name=set_name, kpi_name=set_name, score=result) # Saving to old tables filters = {self.tools.VISIBILITY_PRODUCTS_FIELD: 'Y'} set_score = self.tools.calculate_visible_percentage( visible_filters=filters) self.save_level2_and_level3(set_name, set_name, set_score) elif set_name in ('Relative Position'): # Global function res_dict = self.diageo_generator.diageo_global_relative_position_function( self.set_templates_data[set_name], location_type='template_display_name') if res_dict: # Saving to new tables self.commonV2.save_json_to_new_tables(res_dict) set_score = self.calculate_relative_position_sets(set_name) else: return if set_score == 0: pass elif set_score is False: continue set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name'] == set_name]['kpi_set_fk'].values[0] self.write_to_db_result(set_fk, set_score, self.LEVEL1) # commiting to new tables self.commonV2.commit_results_data() def calculate_relative_position_sets(self, set_name): """ This function calculates every relative-position-typed KPI from the relevant sets, and returns the set final score. """ scores = [] for params in self.set_templates_data[set_name]: if self.store_info.at[0, 'additional_attribute_2'] == params.get( 'additional_attribute_2', 'Empty'): tested_filters = { params.get(TESTED_TYPE): params.get(TESTED_VALUE) } anchor_filters = { params.get(ANCHOR_TYPE): params.get(ANCHOR_VALUE) } direction_data = { 'top': self._get_direction_for_relative_position( params.get(self.tools.TOP_DISTANCE)), 'bottom': self._get_direction_for_relative_position( params.get(self.tools.BOTTOM_DISTANCE)), 'left': self._get_direction_for_relative_position( params.get(self.tools.LEFT_DISTANCE)), 'right': self._get_direction_for_relative_position( params.get(self.tools.RIGHT_DISTANCE)) } general_filters = { 'template_display_name': params.get(self.tools.LOCATION) } result = self.tools.calculate_relative_position( tested_filters, anchor_filters, direction_data, **general_filters) score = 1 if result else 0 scores.append(score) self.save_level2_and_level3(set_name, params.get(self.tools.KPI_NAME), score) if not scores: return False set_score = (sum(scores) / float(len(scores))) * 100 return set_score def _get_direction_for_relative_position(self, value): """ This function converts direction data from the template (as string) to a number. """ if value == self.tools.UNLIMITED_DISTANCE: value = 1000 elif not value or not str(value).isdigit(): value = 0 else: value = int(value) return value def calculate_assortment_sets(self, set_name): """ This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score. """ scores = [] for params in self.set_templates_data[set_name]: target = str(params.get(self.store_type, '')) if target.isdigit() or target.capitalize() in ( self.tools.RELEVANT_FOR_STORE, self.tools.OR_OTHER_PRODUCTS): products = str( params.get(self.tools.PRODUCT_EAN_CODE, params.get(self.tools.PRODUCT_EAN_CODE2, ''))).replace(',', ' ').split() target = 1 if not target.isdigit() else int(target) kpi_name = params.get(self.tools.GROUP_NAME, params.get(self.tools.PRODUCT_NAME)) kpi_static_data = self.kpi_static_data[ (self.kpi_static_data['kpi_set_name'] == set_name) & (self.kpi_static_data['kpi_name'] == kpi_name)] if len(products) > 1: result = 0 for product in products: product_score = self.tools.calculate_assortment( product_ean_code=product) result += product_score atomic_fk = kpi_static_data[ kpi_static_data['description'] == product]['atomic_kpi_fk'].values[0] self.write_to_db_result(atomic_fk, product_score, level=self.LEVEL3) score = 1 if result >= target else 0 else: result = self.tools.calculate_assortment( product_ean_code=products) atomic_fk = kpi_static_data['atomic_kpi_fk'].values[0] score = 1 if result >= target else 0 self.write_to_db_result(atomic_fk, score, level=self.LEVEL3) scores.append(score) kpi_fk = kpi_static_data['kpi_fk'].values[0] self.write_to_db_result(kpi_fk, score, level=self.LEVEL2) if not scores: return False set_score = (sum(scores) / float(len(scores))) * 100 return set_score def calculate_activation_standard(self): """ This function calculates the Activation Standard KPI, and saves the result to the DB (for all 3 levels). """ final_score = 0 for params in self.tools.download_template(self.ACTIVATION_STANDARD): set_name = params.get(self.tools.ACTIVATION_SET_NAME) kpi_name = params.get(self.tools.ACTIVATION_KPI_NAME) target = float(params.get(self.tools.ACTIVATION_TARGET)) target = target * 100 if target < 1 else target score_type = params.get(self.tools.ACTIVATION_SCORE) weight = float(params.get(self.tools.ACTIVATION_WEIGHT)) if kpi_name: kpi_fk = self.kpi_static_data[ (self.kpi_static_data['kpi_set_name'] == set_name) & (self.kpi_static_data['kpi_name'] == kpi_name )]['kpi_fk'].values[0] score = self.scores[self.LEVEL2].get(kpi_fk, 0) else: set_fk = self.kpi_static_data[ self.kpi_static_data['kpi_set_name'] == set_name]['kpi_set_fk'].values[0] score = self.scores[self.LEVEL1].get(set_fk, 0) if score >= target: score = 100 else: if score_type == 'PROPORTIONAL': score = (score / float(target)) * 100 else: score = 0 final_score += score * weight self.save_level2_and_level3(self.ACTIVATION_STANDARD, set_name, score) set_fk = self.kpi_static_data[ self.kpi_static_data['kpi_set_name'] == self.ACTIVATION_STANDARD]['kpi_set_fk'].values[0] self.write_to_db_result(set_fk, final_score, self.LEVEL1) def save_level2_and_level3(self, set_name, kpi_name, score): """ Given KPI data and a score, this functions writes the score for both KPI level 2 and 3 in the DB. """ kpi_data = self.kpi_static_data[ (self.kpi_static_data['kpi_set_name'] == set_name) & (self.kpi_static_data['kpi_name'] == kpi_name)] try: kpi_fk = kpi_data['kpi_fk'].values[0] except: Log.warning("kpi name or set name don't exist") return atomic_kpi_fk = kpi_data['atomic_kpi_fk'].values[0] self.write_to_db_result(kpi_fk, score, self.LEVEL2) self.write_to_db_result(atomic_kpi_fk, score, self.LEVEL3) def write_to_db_result(self, fk, score, level): """ This function creates the result data frame of every KPI (atomic KPI/KPI/KPI set), and appends the insert SQL query into the queries' list, later to be written to the DB. """ attributes = self.create_attributes_dict(fk, score, level) if level == self.LEVEL1: table = KPS_RESULT elif level == self.LEVEL2: table = KPK_RESULT elif level == self.LEVEL3: table = KPI_RESULT else: return query = insert(attributes, table) self.kpi_results_queries.append(query) def create_attributes_dict(self, fk, score, level): """ This function creates a data frame with all attributes needed for saving in KPI results tables. """ if level == self.LEVEL1: kpi_set_name = self.kpi_static_data[ self.kpi_static_data['kpi_set_fk'] == fk]['kpi_set_name'].values[0] attributes = pd.DataFrame( [(kpi_set_name, self.session_uid, self.store_id, self.visit_date.isoformat(), format(score, '.2f'), fk)], columns=[ 'kps_name', 'session_uid', 'store_fk', 'visit_date', 'score_1', 'kpi_set_fk' ]) elif level == self.LEVEL2: kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] == fk]['kpi_name'].values[0] attributes = pd.DataFrame( [(self.session_uid, self.store_id, self.visit_date.isoformat(), fk, kpi_name, score)], columns=[ 'session_uid', 'store_fk', 'visit_date', 'kpi_fk', 'kpk_name', 'score' ]) elif level == self.LEVEL3: data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk'] == fk] atomic_kpi_name = data['atomic_kpi_name'].values[0] kpi_fk = data['kpi_fk'].values[0] kpi_set_name = self.kpi_static_data[ self.kpi_static_data['atomic_kpi_fk'] == fk]['kpi_set_name'].values[0] attributes = pd.DataFrame( [(atomic_kpi_name, self.session_uid, kpi_set_name, self.store_id, self.visit_date.isoformat(), datetime.utcnow().isoformat(), score, kpi_fk, fk)], columns=[ 'display_text', 'session_uid', 'kps_name', 'store_fk', 'visit_date', 'calculation_time', 'score', 'kpi_fk', 'atomic_kpi_fk' ]) else: attributes = pd.DataFrame() return attributes.to_dict() @log_runtime('Saving to DB') def commit_results_data(self): """ This function writes all KPI results to the DB, and commits the changes. """ insert_queries = self.merge_insert_queries(self.kpi_results_queries) self.rds_conn.disconnect_rds() self.rds_conn.connect_rds() cur = self.rds_conn.db.cursor() delete_queries = DIAGEOQueries.get_delete_session_results_query_old_tables( self.session_uid) for query in delete_queries: cur.execute(query) for query in insert_queries: cur.execute(query) self.rds_conn.db.commit() @staticmethod def merge_insert_queries(insert_queries): query_groups = {} for query in insert_queries: static_data, inserted_data = query.split('VALUES ') if static_data not in query_groups: query_groups[static_data] = [] query_groups[static_data].append(inserted_data) merged_queries = [] for group in query_groups: merged_queries.append('{0} VALUES {1}'.format( group, ',\n'.join(query_groups[group]))) return merged_queries
class BATRUNewTemplate: def __init__(self, project_name, set_name): self.project = project_name self.log_suffix = '{}: '.format(self.project) self.queries = [] self.kpi = set_name self.sets_added = {} self.kpis_added = {} self.kpi_counter = {'set': 0, 'kpi': 0, 'atomic': 0} self.data = pd.DataFrame() self.set_fk = self.get_set_fk(set_name) if set_name == BATRUConst.SK_SET_NAME: self.delete_static_DB() self.aws_conn = PSProjectConnector(self.project, DbUsers.CalculationEng) self.kpi_static_data = self.get_kpi_data() self.get_kpis_from_template() elif set_name == BATRUConst.SAS_SET_NAME: self.aws_conn = PSProjectConnector(self.project, DbUsers.CalculationEng) self.kpi_static_data = self.get_kpi_data() self.get_kpis_from_template_sas() elif set_name == BATRUConst.P4_SET_NAME: self.delete_static_DB() self.kpi_static_data = self.get_kpi_data() self.p4_template = parse_template(BATRUConst.P4_PATH, BATRUConst.POSM_SHEET) for column in self.p4_template.columns: self.p4_template[column] = self.encode_column_in_df( self.p4_template, column) self.alreadyAddedAtomics = pd.DataFrame(columns=[ BATRUConst.SET_NAME, BATRUConst.KPI_NAME, BATRUConst.GROUP_NAME_P4, BATRUConst.ATOMIC_NAME ]) def get_kpis_from_template_sas(self): list_of_dicts = [] sections_template = parse_template(BATRUConst.P3_PATH, BATRUConst.SAS_ZONE_SHEET) fixtures = sections_template['Equipment'].unique() display_names = list(sections_template['display_name'].unique()) display_names.append("No competitors in SAS Zone") for fixture in fixtures: for i in range(0, 11): if i == 0: level_2_name = fixture else: level_2_name = BATRUConst.P3_COUNT_FIXTURE.format( fixture, i) for level_3_name in display_names: kpi_dictionary = { BATRUConst.SET_NAME: BATRUConst.SAS_SET_NAME, BATRUConst.KPI_NAME: level_2_name, BATRUConst.ATOMIC_NAME: level_3_name } list_of_dicts.append(kpi_dictionary) self.data = pd.DataFrame(list_of_dicts) @staticmethod def encode_column_in_df(df, column_name): return df[column_name].str.encode('utf-8') def get_kpis_from_template(self): list_of_dicts = [] sections_template = parse_template(BATRUConst.P3_PATH, BATRUConst.SK_SHEET) fixtures = sections_template['fixture'].unique() sections = sections_template['section_name'].unique() for fixture in fixtures: for i in range(0, 11): if i == 0: level_2_name = fixture else: level_2_name = fixture + " - {}".format(i) for model_id in sections: for name in BATRUConst.convert_names.keys(): if name == BATRUConst.MODEL_ID: level_3_name = model_id display_text = model_id relativ_score = 1 else: level_3_name = name relativ_score = 0 display_text = self.encode_string( BATRUConst.convert_names[name]) kpi_dictionary = { BATRUConst.SET_NAME: BATRUConst.SK_SET_NAME, BATRUConst.KPI_NAME: level_2_name, BATRUConst.ATOMIC_NAME: level_3_name, BATRUConst.MODEL_ID: model_id, BATRUConst.RELATIVE_SCORE: relativ_score, BATRUConst.DISPLAY_TEXT: display_text } list_of_dicts.append(kpi_dictionary) self.data = pd.DataFrame(list_of_dicts) @staticmethod def encode_string(str): try: return str.replace("'", "\\'").encode('utf-8') except: Log.debug('The name {} is already coded'.format(str)) return str @property def rds_conn(self): if not hasattr(self, '_rds_conn'): self._rds_conn = PSProjectConnector(self.project, DbUsers.CalculationEng) return self._rds_conn def delete_static_DB(self): cur = self.rds_conn.db.cursor() atomic_query = """ delete from static.atomic_kpi where kpi_fk in (select pk from static.kpi where kpi_set_fk = {}); """.format(self.set_fk) kpi_query = """ delete from static.kpi where kpi_set_fk = {}; """.format(self.set_fk) delete_queries = [atomic_query, kpi_query] for query in delete_queries: cur.execute(query) print query self.rds_conn.db.commit() def get_set_fk(self, set_name): self.rds_conn.connect_rds() query = """ select pk from static.kpi_set where name = "{}"; """.format(set_name) kpi_static_data = pd.read_sql_query(query, self.rds_conn.db) return kpi_static_data.iloc[0][0] def get_kpi_data(self): self.rds_conn.connect_rds() query = """ select api.name as atomic_kpi_name, api.pk as atomic_kpi_fk, api.description, kpi.display_text as kpi_name, kpi.pk as kpi_fk, api.model_id as section, kps.name as kpi_set_name, kps.pk as kpi_set_fk from static.kpi_set kps left join static.kpi kpi on kps.pk = kpi.kpi_set_fk left join static.atomic_kpi api on kpi.pk = api.kpi_fk; """ kpi_data = pd.read_sql_query(query, self.rds_conn.db) str_columns = [ 'description', 'kpi_name', 'atomic_kpi_name', 'kpi_set_name', 'section' ] for column in str_columns: kpi_data[column] = self.encode_column_in_df(kpi_data, column) return kpi_data def handle_update(self): if self.kpi == BATRUConst.P4_SET_NAME: self.add_p4_to_static() self.commit_to_db() elif self.kpi == BATRUConst.SK_SET_NAME: self.add_kpis_to_static_p3() self.add_atomics_to_static_p3() Log.info('{} Sets, {} KPIs and {} Atomics have been added'.format( self.kpi_counter['set'], self.kpi_counter['kpi'], self.kpi_counter['atomic'])) elif self.kpi == BATRUConst.SAS_SET_NAME: self.add_kpis_to_static_sas() self.add_atomics_to_static_sas() Log.info('{} Sets, {} KPIs and {} Atomics have been added'.format( self.kpi_counter['set'], self.kpi_counter['kpi'], self.kpi_counter['atomic'])) def add_kpis_to_static_sas(self): kpis = self.data.drop_duplicates( subset=[BATRUConst.SET_NAME, BATRUConst.KPI_NAME], keep='first') self.aws_conn.connect_rds() cur = self.aws_conn.db.cursor() for i in xrange(len(kpis)): set_name = self.encode_string(kpis.iloc[i][BATRUConst.SET_NAME]) kpi_name = self.encode_string(kpis.iloc[i][BATRUConst.KPI_NAME]) if self.kpi_static_data[ (self.kpi_static_data[BATRUConst.SET_NAME] == set_name) & (self.kpi_static_data[BATRUConst.KPI_NAME] == kpi_name)].empty: set_fk = self.kpi_static_data[self.kpi_static_data[ BATRUConst.SET_NAME] == set_name][ BATRUConst.SET_FK].values[0] level2_query = """ INSERT INTO static.kpi (kpi_set_fk, display_text) VALUES ('{0}', '{1}');""".format(set_fk, kpi_name) print level2_query cur.execute(level2_query) if set_name in self.kpis_added.keys(): self.kpis_added[set_name][kpi_name] = cur.lastrowid else: self.kpis_added[set_name] = {kpi_name: cur.lastrowid} print level2_query self.kpi_counter['kpi'] += 1 self.aws_conn.db.commit() def add_atomics_to_static_sas(self): atomics = self.data queries = [] for i in xrange(len(atomics)): atomic = atomics.iloc[i] set_name = self.encode_string(atomic[BATRUConst.SET_NAME]) kpi_name = self.encode_string(atomic[BATRUConst.KPI_NAME]) atomic_name = self.encode_string(atomic[BATRUConst.ATOMIC_NAME]) names = [atomic_name] for index, name in enumerate(names): if self.kpi_static_data[ (self.kpi_static_data[BATRUConst.SET_NAME] == set_name) & (self.kpi_static_data[BATRUConst.KPI_NAME] == kpi_name) & (self.kpi_static_data[BATRUConst.ATOMIC_NAME] == name)].empty: if set_name in self.kpis_added.keys( ) and kpi_name in self.kpis_added[set_name].keys(): kpi_fk = self.kpis_added[set_name][kpi_name] else: kpi_fk = self.kpi_static_data[(self.kpi_static_data[ BATRUConst.SET_NAME] == set_name) & ( self.kpi_static_data[BATRUConst.KPI_NAME] == kpi_name)][BATRUConst.KPI_FK].values[0] level3_query = """ INSERT INTO static.atomic_kpi (kpi_fk, name, description, display_text, presentation_order, display) VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}');""".format( kpi_fk, name, name, name, 1, 'Y') queries.append(level3_query) self.kpi_counter['atomic'] += 1 self.aws_conn.connect_rds() cur = self.aws_conn.db.cursor() for query in queries: cur.execute(query) print query self.aws_conn.db.commit() def add_kpis_to_static_p3(self): kpis = self.data.drop_duplicates( subset=[BATRUConst.SET_NAME, BATRUConst.KPI_NAME], keep='first') self.aws_conn.connect_rds() cur = self.aws_conn.db.cursor() for i in xrange(len(kpis)): set_name = self.encode_string(kpis.iloc[i][BATRUConst.SET_NAME]) kpi_name = self.encode_string(kpis.iloc[i][BATRUConst.KPI_NAME]) if self.kpi_static_data[ (self.kpi_static_data[BATRUConst.SET_NAME] == set_name) & (self.kpi_static_data[BATRUConst.KPI_NAME] == kpi_name)].empty: if set_name in self.sets_added.keys(): set_fk = self.sets_added[set_name] else: set_fk = self.set_fk level2_query = """ INSERT INTO static.kpi (kpi_set_fk, display_text) VALUES ('{0}', '{1}');""".format(set_fk, kpi_name) cur.execute(level2_query) if set_name in self.kpis_added.keys(): self.kpis_added[set_name][kpi_name] = cur.lastrowid else: self.kpis_added[set_name] = {kpi_name: cur.lastrowid} print level2_query self.kpi_counter['kpi'] += 1 self.aws_conn.db.commit() def add_atomics_to_static_p3(self): atomics = self.data queries = [] for i in xrange(len(atomics)): atomic = atomics.iloc[i] set_name = self.encode_string(atomic[BATRUConst.SET_NAME]) kpi_name = self.encode_string(atomic[BATRUConst.KPI_NAME]) atomic_name = self.encode_string(atomic[BATRUConst.ATOMIC_NAME]) model_id = self.encode_string(atomic[BATRUConst.MODEL_ID]) relative_score = atomic[BATRUConst.RELATIVE_SCORE] display_text = self.encode_string(atomic[BATRUConst.DISPLAY_TEXT]) names = [atomic_name] for index, name in enumerate(names): if self.kpi_static_data[ (self.kpi_static_data[BATRUConst.SET_NAME] == set_name) & (self.kpi_static_data[BATRUConst.KPI_NAME] == kpi_name) & (self.kpi_static_data[BATRUConst.ATOMIC_NAME] == name) & (self.kpi_static_data['section'] == model_id)].empty: if set_name in self.kpis_added.keys( ) and kpi_name in self.kpis_added[set_name].keys(): kpi_fk = self.kpis_added[set_name][kpi_name] else: kpi_fk = self.kpi_static_data[(self.kpi_static_data[ BATRUConst.SET_NAME] == set_name) & ( self.kpi_static_data[BATRUConst.KPI_NAME] == kpi_name)][BATRUConst.KPI_FK].values[0] level3_query = """ INSERT INTO static.atomic_kpi (kpi_fk, name, description, display_text, presentation_order, model_id, relative_score, display) VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}', '{6}', '{7}'); """.format(kpi_fk, name, name, display_text, index + 1, model_id, relative_score, 'Y') queries.append(level3_query) self.kpi_counter['atomic'] += 1 self.aws_conn.connect_rds() cur = self.aws_conn.db.cursor() for query in queries: cur.execute(query) print query self.aws_conn.db.commit() def add_p4_to_static(self): template_for_static = self.p4_template[BATRUConst.COLUMNS_FOR_STATIC] # Saves to static all KPI (equipments) with a counter. atomic_queries = [] kpi_queries = [] kpi_names = template_for_static[ BATRUConst.KPI_NAME_FIELD].unique().tolist() for kpi in kpi_names: kpi_with_count = self.add_kpi_count(kpi) for kpi_count in kpi_with_count: if self.kpi_static_data[(self.kpi_static_data['kpi_set_name'] == BATRUConst.P4_SET_NAME) & (self.kpi_static_data['kpi_name'] == kpi_count)].empty: kpi_queries.append(kpi_count) self.save_kpi_level(self.set_fk, kpi_queries) # We need to re-run query for updated kpis. self.kpi_static_data = self.get_kpi_data() # This part is not combined with the loop above since it needs all kpis (with count) to be saved first. for kpi_name in kpi_names: atomics_for_static = template_for_static[template_for_static[ BATRUConst.KPI_NAME_FIELD] == kpi_name] for i in xrange(len(atomics_for_static)): row = atomics_for_static.iloc[i] group = self.encode_string(row[BATRUConst.GROUP_NAME_FIELD]) product = self.encode_string( row[BATRUConst.PRODUCT_NAME_FIELD]) kpi_with_count = self.add_kpi_count(kpi_name) # This will create group and product atomics for kpi in kpi_with_count: is_exist = self.alreadyAddedAtomics[ (self.alreadyAddedAtomics[BATRUConst.SET_NAME] == BATRUConst.P4_SET_NAME) & (self.alreadyAddedAtomics[BATRUConst.KPI_NAME] == kpi) & (self.alreadyAddedAtomics[BATRUConst.GROUP_NAME_P4] == group) & (self.alreadyAddedAtomics[BATRUConst.ATOMIC_NAME] == product)] if is_exist.empty: try: kpi_fk = self.kpi_static_data[ (self.kpi_static_data[BATRUConst.SET_FK] == self.set_fk) & (self.kpi_static_data[BATRUConst.KPI_NAME] == kpi)][BATRUConst.KPI_FK].values[0] dict_already_added = { BATRUConst.SET_NAME: BATRUConst.P4_SET_NAME, BATRUConst.KPI_NAME: kpi, BATRUConst.GROUP_NAME_P4: group, BATRUConst.ATOMIC_NAME: product } self.alreadyAddedAtomics = self.alreadyAddedAtomics.append( dict_already_added, ignore_index=True) product_query = (kpi_fk, product, product, product, group, BATRUConst.PRODUCT_RELATIVE_SCORE) group_query = (kpi_fk, group, group, group, None, BATRUConst.GROUP_RELATIVE_SCORE) atomic_queries.extend([group_query, product_query]) except IndexError as e: print "kpi '{}' does not exist.".format(kpi) self.create_atomic_queries(set(atomic_queries)) def create_atomic_queries(self, queries_to_commit): level3_query = """ INSERT INTO static.atomic_kpi (kpi_fk, name, description, display_text, presentation_order, display, model_id, relative_score) VALUES ('{}', '{}', '{}', '{}', '{}', '{}', '{}', {});""" for query in queries_to_commit: if self.is_new(query): self.queries.append( level3_query.format( query[0], query[1], query[2], query[3], 1, 'Y', '{}'.format(query[4]) if query[4] else 'NULL', query[5]).replace("'NULL'", "NULL")) def is_new(self, data, level=3): if level == 3: existing = self.kpi_static_data[ (self.kpi_static_data[BATRUConst.SET_FK] == self.set_fk) & (self.kpi_static_data[BATRUConst.KPI_FK] == data[0]) & (self.kpi_static_data[BATRUConst.ATOMIC_NAME] == data[1])] elif level == 2: existing = self.kpi_static_data[ (self.kpi_static_data[BATRUConst.SET_FK] == self.set_fk) & (self.kpi_static_data[BATRUConst.KPI_NAME] == data[0])] else: Log.debug('not valid level for checking new KPIs') return False return existing.empty def save_kpi_level(self, set_fk, kpi_list): level2_query = """ INSERT INTO static.kpi (kpi_set_fk, display_text) VALUES ('{}', '{}');""" new_kpis = [] for kpi in kpi_list: if self.is_new([kpi], level=2): new_kpis.append(kpi) count_for_show = 0 self.rds_conn.connect_rds() all = len(kpi_list) cur = self.rds_conn.db.cursor() for kpi in new_kpis: query = level2_query.format(set_fk, kpi.replace("'", "''")) print query count_for_show += 1 cur.execute(query) if count_for_show % 10 == 0: print 'done {} / {}'.format(count_for_show, all) self.rds_conn.db.commit() self.rds_conn.disconnect_rds() def add_kpi_count(self, kpi_name): kpis = [kpi_name] i = 2 while i <= BATRUConst.MAX_KPI_COUNT: kpi = BATRUConst.P4_COUNT_FIXTURE.format(kpi_name, i) kpis.append(kpi) i += 1 return kpis def commit_to_db(self): self.rds_conn.connect_rds() cur = self.rds_conn.db.cursor() kpis_sum = len(self.queries) count_for_show = 0 for query in self.queries: # try: print query cur.execute(query) count_for_show += 1 if count_for_show % 10 == 0: print 'There are {} / {}'.format(count_for_show, kpis_sum) self.rds_conn.db.commit() self.rds_conn.disconnect_rds()
class CCBRToolBox: def __init__(self, data_provider, output): self.output = output self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.tools = CCBRGENERALToolBox(self.data_provider, self.output, rds_conn=self.rds_conn) self.store_info = self.data_provider[Data.STORE_INFO] self.kpi_results_queries = [] self.survey = Survey(self.data_provider, self.output) self.kpi_results_new_tables_queries = [] self.New_kpi_static_data = self.get_new_kpi_static_data() self.session_id = self.data_provider.session_id self.prices_per_session = PsDataProvider( self.data_provider, self.output).get_price_union(self.session_id) self.common_db = Common(self.data_provider) self.count_sheet = pd.read_excel(PATH, Const.COUNT).fillna("") self.group_count_sheet = pd.read_excel(PATH, Const.GROUP_COUNT).fillna("") self.survey_sheet = pd.read_excel(PATH, Const.SURVEY).fillna("") def main_calculation(self): """ This function calculates the KPI results. """ kpis_sheet = pd.read_excel(PATH, Const.KPIS).fillna("") for index, row in kpis_sheet.iterrows(): self.handle_atomic(row) self.handle_simon_kpis() self.commit_results_data() def handle_simon_kpis(self): """ activate the availability and pricing functions """ active_products = self.all_products.loc[ self.all_products["is_active"] > 0] self.calculate_availability(active_products) self.calculate_pricing(self.all_products) def calculate_availability(self, active_products): """ calculates the availability for all products per session, used is sovi and sovi vertical reports :param active_products: a df containing only active products """ active_products_sku_and_other = active_products[ (active_products['product_type'] == 'SKU') | (active_products['product_type'] == 'Other')] active_products_pks = active_products_sku_and_other[ 'product_fk'].unique().tolist() filters = {'product_fk': active_products_pks} filtered_df = self.scif[self.tools.get_filter_condition( self.scif, **filters)] facing_filtered = filtered_df.loc[filtered_df['facings'] > 0][[ 'template_fk', 'product_fk', 'facings' ]] facing_filtered_pks = facing_filtered['product_fk'].unique().tolist() for product in facing_filtered_pks: product_df = facing_filtered.loc[facing_filtered['product_fk'] == product] product_template_fks = product_df['template_fk'].unique().tolist() for template_fk in product_template_fks: sum_facing = product_df.loc[product_df['template_fk'] == template_fk]['facings'].sum() self.write_to_db_result_new_tables(fk=Const.AVAILABILITY_PK, numerator_id=product, score='1', denominator_id=template_fk, numerator_result='1', result=sum_facing) def calculate_pricing(self, all_products): """ inserting the db the pricing of all active and inactive skus. used in preco and preco vertical reports :param all_products: df containing all products """ only_sku_type_products = all_products.loc[all_products['product_type'] == 'SKU'] all_products_fks_size = only_sku_type_products[['product_fk', 'size']].fillna("") product_fks_and_prices = self.prices_per_session merge_size_and_price = pd.merge(all_products_fks_size, product_fks_and_prices, how='left', on='product_fk') merge_size_and_price['value'] = merge_size_and_price['value'].fillna( '0') for row in merge_size_and_price.itertuples(): product = row[1] # row['product_fk'] size = row[2] # row['size'] price = row[3] # row['value'] if size == '': size = 0 if price > 0: self.write_to_db_result_new_tables(fk=Const.PRICING_PK, numerator_id=product, numerator_result=size, result=price) def handle_atomic(self, row): """ run the correct kpi for a specific row in the template :param row: a row from the template """ atomic_name = row[Const.ENGLISH_KPI_NAME].strip() kpi_type = row[Const.KPI_TYPE].strip() if kpi_type == Const.SURVEY: self.handle_survey_atomics(atomic_name) elif kpi_type == Const.COUNT: self.handle_count_atomics(atomic_name) elif kpi_type == Const.GROUP_COUNT: self.handle_group_count_atomics(atomic_name) def handle_survey_atomics(self, atomic_name): """ handle survey questions :param atomic_name: the name of the kpi :return: only if the survey filters aren't satisfied """ row = self.survey_sheet.loc[self.survey_sheet[Const.ENGLISH_KPI_NAME] == atomic_name] if row.empty: Log.warning("Dataframe is empty, wrong kpi name: " + atomic_name) return store_type_filter = self.store_info['store_type'].values[0].strip() store_type_template = row[Const.STORE_TYPE_TEMPLATE].values[0].strip() # if cell in template is not empty if store_type_template != "": store_types = store_type_template.split(",") store_types = [item.strip() for item in store_types] if store_type_filter not in store_types: return # find the answer to the survey in session question_id = row[Const.SURVEY_QUESTION_ID].values[0] question_answer_template = row[Const.TARGET_ANSWER].values[0] survey_result = self.survey.get_survey_answer( ('question_fk', question_id)) if question_answer_template == Const.NUMERIC: if not survey_result: survey_result = 0 if not isinstance(survey_result, (int, long, float)): Log.warning("question id " + str(question_id) + " in template is not a number") survey_result = 0 else: answer = self.survey.check_survey_answer( ('question_fk', question_id), question_answer_template) survey_result = 1 if answer else -1 try: atomic_pk = self.common_db.get_kpi_fk_by_kpi_name_new_tables( atomic_name) except IndexError: Log.warning("There is no matching Kpi fk for kpi name: " + atomic_name) return self.write_to_db_result_new_tables(fk=atomic_pk, numerator_id=self.session_id, numerator_result=survey_result, result=survey_result) def handle_count_atomics(self, atomic_name): """ handle count kpis, used in consolidada report :param atomic_name: the name of the kpi to calculate """ sum_of_count = 0 target = 0 count_result = 0 row = self.count_sheet.loc[self.count_sheet[Const.ENGLISH_KPI_NAME] == atomic_name] if row.empty: Log.warning("Dataframe is empty, wrong kpi name: " + atomic_name) return try: atomic_pk = self.common_db.get_kpi_fk_by_kpi_name_new_tables( atomic_name) except IndexError: Log.warning("There is no matching Kpi fk for kpi name: " + atomic_name) return for index, row in row.iterrows(): sum_of_count, target, count_result = self.handle_count_row(row) if not isinstance(sum_of_count, (int, float, long)): sum_of_count = count_result self.write_to_db_result_new_tables(fk=atomic_pk, numerator_id=self.session_id, numerator_result=sum_of_count, denominator_result=target, result=count_result) def handle_group_count_atomics(self, atomic_name): """ handle group count kpis (different from count in or and and conditions), used in consolidada report :param atomic_name: the name of the kpi to calculate """ rows = self.group_count_sheet.loc[self.group_count_sheet[ Const.GROUP_KPI_NAME] == atomic_name] group_weight = 0 group_result = 0 group_target = 0 group_sum_of_count = 0 sum_of_count_df = pd.DataFrame() target_operator = "" if rows.empty: Log.warning("Dataframe is empty, wrong kpi name: " + atomic_name) return try: atomic_pk = self.common_db.get_kpi_fk_by_kpi_name_new_tables( atomic_name) except IndexError: Log.warning("There is no matching Kpi fk for kpi name: " + atomic_name) return for index, row in rows.iterrows(): target_operator = row[Const.TARGET_OPERATOR].strip() weight = row[Const.WEIGHT] sum_of_count, target, count_result = self.handle_count_row(row) if count_result >= 1: group_weight += weight if group_weight >= 100: # use for getting numeric results instead of 1 and 0 if (target_operator == '+'): sum_of_count_df = pd.concat( [sum_of_count_df, sum_of_count]) else: group_result = 1 break # conditional, if given -1000 kpi must fail elif count_result == -1000: group_result = 0 break # use for getting numeric results instead of 1 and 0 if (target_operator == '+'): if sum_of_count_df.empty: group_sum_of_count = 0 else: group_sum_of_count = len(sum_of_count_df.groupby('scene_id')) group_result = group_sum_of_count self.write_to_db_result_new_tables(fk=atomic_pk, numerator_id=self.session_id, numerator_result=group_sum_of_count, denominator_result=group_target, result=group_result) def handle_count_row(self, row): """ filters qall params in aspecific row and send it to the correct count calculation :param row: :return: """ count_type = row[Const.COUNT_TYPE].strip() target = row[Const.TARGET] target_operator = row[Const.TARGET_OPERATOR].strip() product_template = row[Const.PRODUCT] store_type_filter = self.store_info['store_type'].values[0] store_type_template = row[Const.STORE_TYPE_TEMPLATE] product_size = row[Const.PRODUCT_SIZE] product_size_operator = row[Const.PRODUCT_SIZE_OPERATOR].strip() product_measurement_unit = row[Const.MEASUREMENT_UNIT].strip() consider_few = row[Const.CONSIDER_FEW] multipack_template = row[Const.MULTIPACK].strip() multipack_df = None # filter store type if store_type_template != "": store_types = store_type_template.split(",") store_types = [item.strip() for item in store_types] if store_type_filter not in store_types: return 0, 0, 0 filtered_df = self.scif.copy() # filter product if product_template != "": products_to_check = product_template.split(",") products_to_check = [item.strip() for item in products_to_check] filtered_df = filtered_df[filtered_df['product_name'].isin( products_to_check)] if filtered_df.empty: return 0, 0, 0 # filter product size if product_size != "": if product_measurement_unit == 'l': product_size *= 1000 ml_df = filtered_df[filtered_df['size_unit'] == 'ml'] l_df = filtered_df[filtered_df['size_unit'] == 'l'] if multipack_template != "": multipack_df = filtered_df[filtered_df['MPACK'] == 'Y'] temp_df = l_df.copy() temp_df['size'] = l_df['size'].apply((lambda x: x * 1000)) filtered_df = pd.concat([temp_df, ml_df]) if product_size_operator == '<': filtered_df = filtered_df[filtered_df['size'] < product_size] elif product_size_operator == '<=': filtered_df = filtered_df[filtered_df['size'] <= product_size] elif product_size_operator == '>': filtered_df = filtered_df[filtered_df['size'] > product_size] elif product_size_operator == '>=': filtered_df = filtered_df[filtered_df['size'] >= product_size] elif product_size_operator == '=': filtered_df = filtered_df[filtered_df['size'] == product_size] # multipack conditions is an or between product size and MPACK if multipack_template != "": filtered_df = pd.concat([filtered_df, multipack_df]).drop_duplicates() filters = self.get_filters_from_row(row) count_of_units = 0 if count_type == Const.SCENE: count_of_units = self.count_of_scenes(filtered_df, filters, target_operator, target) elif count_type == Const.FACING: count_of_units = self.count_of_facings(filtered_df, filters, consider_few, target) elif count_type == Const.SCENE_SOS: count_of_units = self.count_of_sos(filtered_df, filters) else: Log.warning("Couldn't find a correct COUNT variable in template") if target_operator == '<=': count_result = 1 if (target <= count_of_units) else 0 # use for getting numeric results instead of 1 and 0 elif target_operator == '+': if isinstance(count_of_units, (int, float, long)): count_result = count_of_units else: count_result = len(count_of_units) else: count_result = 1 if (target >= count_of_units) else 0 return count_of_units, target, count_result def get_filters_from_row(self, row): """ handle filters appering in scif :param row: row containing all filters :return: a dictionary of the filters """ filters = dict(row) # no need to be accounted for, fields that aren't in scif for field in Const.DELETE_FIELDS: if field in filters: del filters[field] if Const.WEIGHT in filters.keys(): del filters[Const.WEIGHT] if Const.GROUP_KPI_NAME in filters.keys(): del filters[Const.GROUP_KPI_NAME] exclude_manufacturer = filters[Const.EXCLUDE_MANUFACTURER].strip() if exclude_manufacturer != "": filters[Const.MANUFACTURER] = (exclude_manufacturer, Const.EXCLUDE_FILTER) del filters[Const.EXCLUDE_MANUFACTURER] exclude_category = filters[Const.EXCLUDE_CATEGORY].strip() if exclude_category != "": filters[Const.CATEGORY] = (exclude_category, Const.EXCLUDE_FILTER) del filters[Const.EXCLUDE_CATEGORY] # filter all the empty cells for key in filters.keys(): if (filters[key] == ""): del filters[key] elif isinstance(filters[key], tuple): filters[key] = (filters[key][0].split(","), filters[key][1]) else: filters[key] = filters[key].split(",") filters[key] = [item.strip() for item in filters[key]] return self.create_filters_according_to_scif(filters) def create_filters_according_to_scif(self, filters): """ adjusting the template names to scif names :param filters: only the scif filters in the template shape :return: the filters dictionary """ convert_from_scif = { Const.TEMPLATE_GROUP: 'template_group', Const.TEMPLATE_NAME: 'template_name', Const.BRAND: 'brand_name', Const.CATEGORY: 'category', Const.MANUFACTURER: 'manufacturer_name', Const.PRODUCT_TYPE: 'product_type', Const.MULTIPACK: 'MPAK' } for key in filters.keys(): filters[convert_from_scif[key]] = filters.pop(key) return filters def count_of_scenes(self, filtered_df, filters, target_operator, target): """ calculate the count of scene types :param filtered_df: the first filtered (no scif filters) dataframe :param filters: the scif filters :param target_operator: the operation to do, + for returning a dataframe (used in group count) :param target: the target :return: dataframe for group counts +, number of scenes for all other functions """ scene_data = filtered_df[self.tools.get_filter_condition( filtered_df, **filters)] if target_operator == '+': # filter by scene_id and by template_name (scene type) scene_types_groupby = scene_data.groupby( ['template_name', 'scene_id'])['facings'].sum().reset_index() number_of_scenes = scene_types_groupby[ scene_types_groupby['facings'] >= target] else: number_of_scenes = len(scene_data['scene_id'].unique()) return number_of_scenes def count_of_sos(self, filtered_df, filters): """ calculating the share of shelf :param filtered_df: the first filtered (no scif filters) dataframe :param filters: the scif filters :return: the number of different scenes answered the condition (hard coded 50%) """ scene_data = filtered_df[self.tools.get_filter_condition( filtered_df, **filters)] scene_data = scene_data.rename(columns={"facings": "facings_nom"}) # filter by scene_id and by template_name (scene type) scene_types_groupby = scene_data.groupby(['template_name', 'scene_id' ])['facings_nom'].sum() all_products_groupby = self.scif.groupby(['template_name', 'scene_id' ])['facings'].sum() merge_result = pd.concat((scene_types_groupby, all_products_groupby), axis=1, join='inner').reset_index() return len(merge_result[ merge_result['facings_nom'] >= merge_result['facings'] * 0.5]) def count_of_facings(self, filtered_df, filters, consider_few, target): ''' calculate the count of facings :param filtered_df: the first filtered (no scif filters) dataframe :param filters: the scif filters :param consider_few: in case there is a need to consider more then one brand :param target: the target to pass :return: ''' facing_data = filtered_df[self.tools.get_filter_condition( filtered_df, **filters)] if consider_few != "": facing_data_groupby = facing_data.groupby(['brand_name' ])['facings'].sum() if len(facing_data_groupby[ facing_data_groupby >= target]) >= consider_few: number_of_facings = facing_data['facings'].sum() else: number_of_facings = 0 else: number_of_facings = facing_data['facings'].sum() return number_of_facings def get_new_kpi_static_data(self): """ This function extracts the static new KPI data (new tables) and saves it into one global data frame. The data is taken from static.kpi_level_2. """ query = CCBRQueries.get_new_kpi_data() kpi_static_data = pd.read_sql_query(query, self.rds_conn.db) return kpi_static_data def write_to_db_result_new_tables(self, fk, numerator_id, numerator_result, result, denominator_id=None, denominator_result=None, score=None): """ This function creates the result data frame of new rables KPI, and appends the insert SQL query into the queries' list, later to be written to the DB. """ table = KPI_NEW_TABLE attributes = self.create_attributes_dict_new_tables( fk, numerator_id, numerator_result, denominator_id, denominator_result, result, score) query = insert(attributes, table) self.kpi_results_new_tables_queries.append(query) def create_attributes_dict_new_tables(self, kpi_fk, numerator_id, numerator_result, denominator_id, denominator_result, result, score): """ This function creates a data frame with all attributes needed for saving in KPI results new tables. """ attributes = pd.DataFrame( [(kpi_fk, self.session_id, numerator_id, numerator_result, denominator_id, denominator_result, result, score)], columns=[ 'kpi_level_2_fk', 'session_fk', 'numerator_id', 'numerator_result', 'denominator_id', 'denominator_result', 'result', 'score' ]) return attributes.to_dict() @log_runtime('Saving to DB') def commit_results_data(self): """ This function writes all KPI results to the DB, and commits the changes. """ insert_queries = self.merge_insert_queries( self.kpi_results_new_tables_queries) self.rds_conn.disconnect_rds() self.rds_conn.connect_rds() cur = self.rds_conn.db.cursor() delete_query = CCBRQueries.get_delete_session_results_query( self.session_uid, self.session_id) cur.execute(delete_query) for query in insert_queries: cur.execute(query) self.rds_conn.db.commit() self.rds_conn.disconnect_rds() @staticmethod def merge_insert_queries(insert_queries): query_groups = {} for query in insert_queries: static_data, inserted_data = query.split('VALUES ') if static_data not in query_groups: query_groups[static_data] = [] query_groups[static_data].append(inserted_data) merged_queries = [] for group in query_groups: merged_queries.append('{0} VALUES {1}'.format( group, ',\n'.join(query_groups[group]))) return merged_queries
class DIAGEOBR_SANDToolBox: LEVEL1 = 1 LEVEL2 = 2 LEVEL3 = 3 def __init__(self, data_provider, output): self.k_engine = BaseCalculationsScript(data_provider, output) self.output = output self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.store_info = self.data_provider[Data.STORE_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.store_channel = self.store_info['store_type'].values[0] if self.store_channel: self.store_channel = self.store_channel.upper() self.store_type = self.store_info['additional_attribute_1'].values[0] self.segment = self.get_business_unit_name() self.scene_info = self.data_provider[Data.SCENES_INFO] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.match_display_in_scene = self.get_match_display() self.set_templates_data = {} self.kpi_static_data = self.get_kpi_static_data() self.tools = DIAGEOToolBox(self.data_provider, output, match_display_in_scene=self.match_display_in_scene) self.kpi_results_queries = [] self.common = Common(self.data_provider) self.commonV2 = CommonV2(self.data_provider) self.diageo_generator = DIAGEOGenerator(self.data_provider, self.output, self.common) def get_business_unit_name(self): """ This function extracts the static KPI data and saves it into one global data frame. The data is taken from static.kpi / static.atomic_kpi / static.kpi_set. """ query = DIAGEOQueries.get_business_unit_name(self.store_id) business_unit_name = pd.read_sql_query(query, self.rds_conn.db) if business_unit_name['business_unit_name'].empty: return "" else: return business_unit_name['business_unit_name'].values[0] def get_kpi_static_data(self): """ This function extracts the static KPI data and saves it into one global data frame. The data is taken from static.kpi / static.atomic_kpi / static.kpi_set. """ query = DIAGEOQueries.get_all_kpi_data() kpi_static_data = pd.read_sql_query(query, self.rds_conn.db) return kpi_static_data def get_match_display(self): """ This function extracts the display matches data and saves it into one global data frame. The data is taken from probedata.match_display_in_scene. """ query = DIAGEOQueries.get_match_display(self.session_uid) match_display = pd.read_sql_query(query, self.rds_conn.db) return match_display def main_calculation(self, set_names): """ This function calculates the KPI results. """ log_runtime('Updating templates')(self.tools.update_templates)() # Global assortment kpis assortment_res_dict = self.diageo_generator.diageo_global_assortment_function_v2() self.commonV2.save_json_to_new_tables(assortment_res_dict) for set_name in set_names: set_score = 0 if set_name not in self.tools.KPI_SETS_WITHOUT_A_TEMPLATE and set_name not in self.set_templates_data.keys(): try: self.set_templates_data[set_name] = self.tools.download_template(set_name) except: Log.warning("Couldn't find a template for set name: " + str(set_name)) continue # if set_name in ('MPA', 'New Products'): # set_score = self.calculate_assortment_sets(set_name) # elif set_name in ('POSM',): # set_score = self.calculate_posm_sets(set_name) if set_name == 'Visible to Customer': # Global function sku_list = filter(None, self.scif[self.scif['product_type'] == 'SKU'].product_ean_code.tolist()) res_dict = self.diageo_generator.diageo_global_visible_percentage(sku_list) if res_dict: # Saving to new tables parent_res = res_dict[-1] for r in res_dict: self.commonV2.write_to_db_result(**r) # Saving to old tables set_score = result = parent_res['result'] self.save_level2_and_level3(set_name=set_name, kpi_name=set_name, score=result) # filters = {self.tools.VISIBILITY_PRODUCTS_FIELD: 'Y'} # set_score = self.tools.calculate_visible_percentage(visible_filters=filters) # self.save_level2_and_level3(set_name, set_name, set_score) elif set_name in ('Secondary Displays', 'Secondary'): # Global function res_dict = self.diageo_generator.diageo_global_secondary_display_secondary_function() # Saving to new tables if res_dict: self.commonV2.write_to_db_result(fk=res_dict['fk'], numerator_id=1, denominator_id=self.store_id, result=res_dict['result']) # Saving to old tables set_score = self.tools.calculate_assortment(assortment_entity='scene_id', location_type='Secondary Shelf') self.save_level2_and_level3(set_name, set_name, set_score) if set_score == 0: pass elif set_score is False: return set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name'] == set_name]['kpi_set_fk'].values[0] self.write_to_db_result(set_fk, set_score, self.LEVEL1) # commiting to new tables self.commonV2.commit_results_data() def save_level2_and_level3(self, set_name, kpi_name, score): """ Given KPI data and a score, this functions writes the score for both KPI level 2 and 3 in the DB. """ kpi_data = self.kpi_static_data[(self.kpi_static_data['kpi_set_name'] == set_name) & (self.kpi_static_data['kpi_name'] == kpi_name)] try: kpi_fk = kpi_data['kpi_fk'].values[0] except: Log.warning("kpi name or set name don't exist") return atomic_kpi_fk = kpi_data['atomic_kpi_fk'].values[0] self.write_to_db_result(kpi_fk, score, self.LEVEL2) self.write_to_db_result(atomic_kpi_fk, score, self.LEVEL3) def calculate_posm_sets(self, set_name): """ This function calculates every POSM-typed KPI from the relevant sets, and returns the set final score. """ scores = [] for params in self.set_templates_data[set_name]: if self.store_channel is None: break kpi_res = self.tools.calculate_posm(display_name=params.get(self.tools.DISPLAY_NAME)) score = 1 if kpi_res > 0 else 0 if params.get(self.store_type) == self.tools.RELEVANT_FOR_STORE: scores.append(score) if score == 1 or params.get(self.store_type) == self.tools.RELEVANT_FOR_STORE: self.save_level2_and_level3(set_name, params.get(self.tools.DISPLAY_NAME), score) if not scores: return False set_score = (sum(scores) / float(len(scores))) * 100 return set_score def calculate_assortment_sets(self, set_name): """ This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score. """ scores = [] segment = '{};{}'.format(self.store_type, self.segment) for params in self.set_templates_data[set_name]: if params.get(segment, '').capitalize() in (self.tools.RELEVANT_FOR_STORE, self.tools.OR_OTHER_PRODUCTS): object_type = self.tools.ENTITY_TYPE_CONVERTER.get(params.get(self.tools.ENTITY_TYPE), 'product_ean_code') objects = [str(params.get(self.tools.PRODUCT_EAN_CODE, params.get(self.tools.PRODUCT_EAN_CODE2, '')))] if params.get(self.store_type) == self.tools.OR_OTHER_PRODUCTS: additional_objects = str(params.get(self.tools.ADDITIONAL_SKUS)).split(',') objects.extend(additional_objects) filters = {object_type: objects} result = self.tools.calculate_assortment(**filters) score = 1 if result > 0 else 0 scores.append(score) self.save_level2_and_level3(set_name, params.get(self.tools.PRODUCT_NAME), score) if not scores: return False set_score = (sum(scores) / float(len(scores))) * 100 return set_score def write_to_db_result(self, fk, score, level): """ This function the result data frame of every KPI (atomic KPI/KPI/KPI set), and appends the insert SQL query into the queries' list, later to be written to the DB. """ attributes = self.create_attributes_dict(fk, score, level) if level == self.LEVEL1: table = KPS_RESULT elif level == self.LEVEL2: table = KPK_RESULT elif level == self.LEVEL3: table = KPI_RESULT else: return query = insert(attributes, table) self.kpi_results_queries.append(query) def create_attributes_dict(self, fk, score, level): """ This function creates a data frame with all attributes needed for saving in KPI results tables. """ score = round(score, 2) if level == self.LEVEL1: kpi_set_name = self.kpi_static_data[self.kpi_static_data['kpi_set_fk'] == fk]['kpi_set_name'].values[0] score_type = '%' if kpi_set_name in self.tools.KPI_SETS_WITH_PERCENT_AS_SCORE else '' attributes = pd.DataFrame([(kpi_set_name, self.session_uid, self.store_id, self.visit_date.isoformat(), format(score, '.2f'), score_type, fk)], columns=['kps_name', 'session_uid', 'store_fk', 'visit_date', 'score_1', 'score_2', 'kpi_set_fk']) elif level == self.LEVEL2: kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] == fk]['kpi_name'].values[0].replace("'", "\\'") attributes = pd.DataFrame([(self.session_uid, self.store_id, self.visit_date.isoformat(), fk, kpi_name, score)], columns=['session_uid', 'store_fk', 'visit_date', 'kpi_fk', 'kpk_name', 'score']) elif level == self.LEVEL3: data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk'] == fk] atomic_kpi_name = data['atomic_kpi_name'].values[0].replace("'", "\\'") kpi_fk = data['kpi_fk'].values[0] kpi_set_name = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk'] == fk]['kpi_set_name'].values[0] attributes = pd.DataFrame([(atomic_kpi_name, self.session_uid, kpi_set_name, self.store_id, self.visit_date.isoformat(), datetime.utcnow().isoformat(), score, kpi_fk, fk, None, None)], columns=['display_text', 'session_uid', 'kps_name', 'store_fk', 'visit_date', 'calculation_time', 'score', 'kpi_fk', 'atomic_kpi_fk', 'threshold', 'result']) else: attributes = pd.DataFrame() return attributes.to_dict() @log_runtime('Saving to DB') def commit_results_data(self): """ This function writes all KPI results to the DB, and commits the changes. """ self.rds_conn.disconnect_rds() self.rds_conn.connect_rds() cur = self.rds_conn.db.cursor() delete_queries = DIAGEOQueries.get_delete_session_results_query_old_tables(self.session_uid) for query in delete_queries: cur.execute(query) for query in self.kpi_results_queries: cur.execute(query) self.rds_conn.db.commit()
class BATRUAssortment: def __init__(self): self.parsed_args = _parse_arguments() self.project = self.parsed_args.project self.rds_conn = self.rds_connect self.file_path = self.parsed_args.file self.start_date = self.parsed_args.date self.partial_update = self.parsed_args.update self.store_data = self.get_store_data self.all_products = self.get_product_data self.current_top_skus = self.get_current_top_skus self.stores = {} self.products = {} self.all_queries = [] if self.start_date is None: self.current_date = datetime.now().date() else: self.current_date = datetime.strptime(self.start_date, '%Y-%m-%d').date() self.deactivate_date = self.current_date - timedelta(1) self.activate_date = self.current_date if self.partial_update in ('1', 'True', 'Yes', 'Y'): self.partial_update = True else: self.partial_update = False def upload_assortment(self): """ This is the main function of the assortment. It does the validation and then upload the assortment. :return: """ Log.debug("Parsing and validating the assortment template") is_valid, invalid_inputs = self.p1_assortment_validator() Log.info("Assortment upload is started") self.upload_store_assortment_file() if not is_valid: Log.warning("Errors were found during the template validation") if invalid_inputs[INVALID_STORES]: Log.warning("The following stores don't exist in the DB: {}" "".format(invalid_inputs[INVALID_STORES])) if invalid_inputs[INVALID_PRODUCTS]: Log.warning("The following products don't exist in the DB: {}" "".format(invalid_inputs[INVALID_PRODUCTS])) Log.info("Assortment upload is finished") @property def rds_connect(self): self.rds_conn = PSProjectConnector(self.project, DbUsers.CalculationEng) try: pd.read_sql_query('select pk from probedata.session limit 1', self.rds_conn.db) except Exception as e: self.rds_conn.disconnect_rds() self.rds_conn = PSProjectConnector(self.project, DbUsers.CalculationEng) return self.rds_conn @property def get_store_data(self): query = "select pk as store_fk, store_number_1 as store_number from static.stores" self.store_data = pd.read_sql_query(query, self.rds_conn.db) return self.store_data @property def get_product_data(self): query = "select pk as product_fk, product_ean_code from static.product " \ "where delete_date is null" self.all_products = pd.read_sql_query(query, self.rds_conn.db) return self.all_products @property def get_current_top_skus(self): query = """select store_fk, product_fk from pservice.custom_osa where end_date is null""" data = pd.read_sql_query(query, self.rds_conn.db) return data def p1_assortment_validator(self): """ This function validates the store assortment template. It compares the OUTLET_ID (= store_number_1) and the products ean_code to the stores and products from the DB :return: False in case of an error and True in case of a valid template """ raw_data = self.parse_assortment_template() legal_template = True invalid_inputs = {INVALID_STORES: [], INVALID_PRODUCTS: []} valid_stores = self.store_data.loc[ self.store_data['store_number'].isin(raw_data[OUTLET_ID])] if len(valid_stores) != len(raw_data[OUTLET_ID].unique()): invalid_inputs[INVALID_STORES] = list( set(raw_data[OUTLET_ID].unique()) - set(valid_stores['store_number'])) Log.debug("The following stores don't exist in the DB: {}".format( invalid_inputs[INVALID_STORES])) legal_template = False valid_product = self.all_products.loc[self.all_products[EAN_CODE].isin( raw_data[EAN_CODE])] if len(valid_product) != len(raw_data[EAN_CODE].unique()): invalid_inputs[INVALID_PRODUCTS] = list( set(raw_data[EAN_CODE].unique()) - set(valid_product[EAN_CODE])) Log.debug( "The following products don't exist in the DB: {}".format( invalid_inputs[INVALID_PRODUCTS])) legal_template = False return legal_template, invalid_inputs def parse_assortment_template(self): """ This functions turns the csv into DF It tries to handle all of the possible format situation that I encountered yet (different delimiter and unicode) :return: DF that contains the store_number_1 (Outlet ID) and the product_ean_code of the assortments """ data = pd.read_csv(self.file_path, sep='\t') if OUTLET_ID not in data.columns or EAN_CODE not in data.columns: data = pd.read_csv(self.file_path) if OUTLET_ID not in data.columns or EAN_CODE not in data.columns: data = pd.read_csv(self.file_path, encoding='utf-7') data = data.drop_duplicates(subset=data.columns, keep='first') data = data.fillna('') return data def set_end_date_for_irrelevant_assortments(self, stores_list): """ This function sets an end_date to all of the irrelevant stores in the assortment. :param stores_list: List of the stores from the assortment template """ Log.debug("Closing assortment for stores out of template") irrelevant_stores = self.store_data.loc[ ~self.store_data['store_number']. isin(stores_list)]['store_fk'].unique().tolist() current_assortment_stores = self.current_top_skus['store_fk'].unique( ).tolist() stores_to_remove = list( set(irrelevant_stores).intersection( set(current_assortment_stores))) for store in stores_to_remove: query = [ self.get_store_deactivation_query(store, self.deactivate_date) ] self.commit_results(query) Log.debug("Assortment is closed for ({}) stores".format( len(stores_to_remove))) def upload_store_assortment_file(self): raw_data = self.parse_assortment_template() data = [] list_of_stores = raw_data[OUTLET_ID].unique().tolist() if not self.partial_update: self.set_end_date_for_irrelevant_assortments(list_of_stores) Log.debug("Preparing assortment data for update") store_counter = 0 for store in list_of_stores: store_data = {} store_products = raw_data.loc[raw_data[OUTLET_ID] == store][EAN_CODE].tolist() store_data[store] = store_products data.append(store_data) store_counter += 1 if store_counter % 1000 == 0 or store_counter == len( list_of_stores): Log.debug("Assortment is prepared for {}/{} stores".format( store_counter, len(list_of_stores))) Log.debug("Updating assortment data in DB") store_counter = 0 for store_data in data: self.update_db_from_json(store_data) if self.all_queries: queries = self.merge_insert_queries(self.all_queries) self.commit_results(queries) self.all_queries = [] store_counter += 1 if store_counter % 1000 == 0 or store_counter == len(data): Log.debug( "Assortment is updated in DB for {}/{} stores".format( store_counter, len(data))) @staticmethod def merge_insert_queries(queries): """ This function aggregates all of the insert queries :param queries: all of the queries (update and insert) for the assortment :return: The merged insert queries """ query_groups = {} other_queries = [] for query in queries: if 'VALUES' not in query: other_queries.append(query) continue static_data, inserted_data = query.split('VALUES ') if static_data not in query_groups: query_groups[static_data] = [] query_groups[static_data].append(inserted_data) merged_queries = [] for group in query_groups: for group_index in xrange(0, len(query_groups[group]), 10**4): merged_queries.append('{0} VALUES {1}'.format( group, ',\n'.join( query_groups[group][group_index:group_index + 10**4]))) return other_queries + merged_queries def update_db_from_json(self, data): update_products = set() missing_products = set() store_number = data.keys()[0] if store_number is None: Log.debug("'{}' column or value is missing".format(STORE_NUMBER)) return store_fk = self.get_store_fk(store_number) if store_fk is None: Log.debug( 'Store Number {} does not exist in DB'.format(store_number)) return for key in data[store_number]: validation = False if isinstance(key, (float, int)): validation = True elif isinstance(key, (str, unicode)): validation = True if validation: product_ean_code = str(key).split(',')[-1] product_fk = self.get_product_fk(product_ean_code) if product_fk is None: missing_products.add(product_ean_code) else: update_products.add(product_fk) if missing_products: Log.debug( 'The following EAN Codes for Store Number {} do not exist in DB: {}.' ''.format(store_number, list(missing_products))) queries = [] current_products = self.current_top_skus[ self.current_top_skus['store_fk'] == store_fk]['product_fk'].tolist() products_to_deactivate = tuple( set(current_products).difference(update_products)) products_to_activate = tuple( set(update_products).difference(current_products)) if products_to_deactivate: if len(products_to_deactivate) == 1: queries.append( self.get_deactivation_query( store_fk, "(" + str(products_to_deactivate[0]) + ")", self.deactivate_date)) else: queries.append( self.get_deactivation_query(store_fk, tuple(products_to_deactivate), self.deactivate_date)) for product_fk in products_to_activate: queries.append( self.get_activation_query(store_fk, product_fk, self.activate_date)) self.all_queries.extend(queries) Log.debug( 'Store Number {} - Products to update {}: Deactivated {}, Activated {}' ''.format(store_number, len(update_products), len(products_to_deactivate), len(products_to_activate))) def get_store_fk(self, store_number): """ This functions returns the store's fk :param store_number: 'store_number_1' attribute of the store :return: store fk """ store_number = str(store_number) if store_number in self.stores: store_fk = self.stores[store_number] else: store_fk = self.store_data[self.store_data['store_number'] == store_number] if not store_fk.empty: store_fk = store_fk['store_fk'].values[0] self.stores[store_number] = store_fk else: store_fk = None return store_fk def get_product_fk(self, product_ean_code): product_ean_code = str(product_ean_code).strip() if product_ean_code in self.products: product_fk = self.products[product_ean_code] else: product_fk = self.all_products[ self.all_products['product_ean_code'] == product_ean_code] if not product_fk.empty: product_fk = product_fk['product_fk'].values[0] self.products[product_ean_code] = product_fk else: product_fk = None return product_fk @staticmethod def get_deactivation_query(store_fk, product_fks, date): query = \ """ update {} set end_date = '{}', is_current = NULL where store_fk = {} and product_fk in {} and end_date is null; """\ .format(STORE_ASSORTMENT_TABLE, date, store_fk, product_fks) return query @staticmethod def get_store_deactivation_query(store_fk, date): query = \ """ update {} set end_date = '{}', is_current = NULL where store_fk = {} and end_date is null; """.format(STORE_ASSORTMENT_TABLE, date, store_fk) return query @staticmethod def get_activation_query(store_fk, product_fk, date): attributes = pd.DataFrame( [(store_fk, product_fk, str(date), 1)], columns=['store_fk', 'product_fk', 'start_date', 'is_current']) query = insert(attributes.to_dict(), STORE_ASSORTMENT_TABLE) return query def commit_results(self, queries): """ This function commits the results into the DB in batches. query_num is the number of queires that were executed in the current batch After batch_size is reached, the function re-connects the DB and cursor. """ self.rds_conn.connect_rds() cursor = self.rds_conn.db.cursor() batch_size = 1000 query_num = 0 failed_queries = [] for query in queries: try: cursor.execute(query) # print query except Exception as e: Log.warning( 'Committing to DB failed to due to: {}. Query: {}'.format( e, query)) self.rds_conn.db.commit() failed_queries.append(query) self.rds_conn.connect_rds() cursor = self.rds_conn.db.cursor() continue if query_num > batch_size: self.rds_conn.db.commit() self.rds_conn.connect_rds() cursor = self.rds_conn.db.cursor() query_num = 0 query_num += 1 self.rds_conn.db.commit()
class DIAGEOUK_SANDToolBox: LEVEL1 = 1 LEVEL2 = 2 LEVEL3 = 3 def __init__(self, data_provider, output): self.k_engine = BaseCalculationsScript(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.store_info = self.data_provider[Data.STORE_INFO] self.store_channel = self.store_info['store_type'].values[0] if self.store_channel: self.store_channel = self.store_channel.upper() self.store_type = self.store_info['additional_attribute_1'].values[0] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.match_display_in_scene = self.get_match_display() self.set_templates_data = {} self.kpi_static_data = self.get_kpi_static_data() self.kpi_results_queries = [] self.output = output self.common = Common(self.data_provider) self.commonV2 = CommonV2(self.data_provider) self.global_gen = DIAGEOGenerator(self.data_provider, self.output, self.common) self.tools = DIAGEOToolBox( self.data_provider, output, match_display_in_scene=self.match_display_in_scene ) # replace the old one self.diageo_generator = DIAGEOGenerator(self.data_provider, self.output, self.common, menu=True) def get_kpi_static_data(self): """ This function extracts the static KPI data and saves it into one global data frame. The data is taken from static.kpi / static.atomic_kpi / static.kpi_set. """ query = DIAGEOQueries.get_all_kpi_data() kpi_static_data = pd.read_sql_query(query, self.rds_conn.db) return kpi_static_data def get_match_display(self): """ This function extracts the display matches data and saves it into one global data frame. The data is taken from probedata.match_display_in_scene. """ query = DIAGEOQueries.get_match_display(self.session_uid) match_display = pd.read_sql_query(query, self.rds_conn.db) return match_display def main_calculation(self, set_names): """ This function calculates the KPI results. """ log_runtime('Updating templates')(self.tools.update_templates)() # SOS Out Of The Box kpis self.activate_ootb_kpis() # Global assortment kpis assortment_res_dict = self.diageo_generator.diageo_global_assortment_function_v2( ) self.commonV2.save_json_to_new_tables(assortment_res_dict) # Global assortment kpis - v3 for NEW MOBILE REPORTS use assortment_res_dict_v3 = self.diageo_generator.diageo_global_assortment_function_v3( ) self.commonV2.save_json_to_new_tables(assortment_res_dict_v3) equipment_score_scenes = self.get_equipment_score_relevant_scenes() res_dict = self.diageo_generator.diageo_global_equipment_score( save_scene_level=False, scene_list=equipment_score_scenes) self.commonV2.save_json_to_new_tables(res_dict) # Global Menu kpis menus_res_dict = self.diageo_generator.diageo_global_share_of_menu_cocktail_function( cocktail_product_level=True) self.commonV2.save_json_to_new_tables(menus_res_dict) for set_name in set_names: set_score = 0 if set_name not in self.tools.KPI_SETS_WITHOUT_A_TEMPLATE and set_name not in self.set_templates_data.keys( ): try: self.set_templates_data[ set_name] = self.tools.download_template(set_name) except: Log.warning("Couldn't find a template for set name: " + str(set_name)) continue # Global relative position if set_name in ('Relative Position'): # Global function res_dict = self.diageo_generator.diageo_global_relative_position_function( self.set_templates_data[set_name], location_type='template_group') self.commonV2.save_json_to_new_tables(res_dict) # Saving to old tables self.set_templates_data[set_name] = parse_template( RELATIVE_PATH, lower_headers_row_index=2) set_score = self.calculate_relative_position_sets(set_name) # elif set_name in ('MPA', 'New Products', 'Local MPA'): elif set_name in ('Local MPA'): set_score = self.calculate_assortment_sets(set_name) # Global Secondary Displays elif set_name in ('Secondary Displays', 'Secondary'): # Global function res_json = self.diageo_generator.diageo_global_secondary_display_secondary_function( ) if res_json: # Saving to new tables self.commonV2.write_to_db_result( fk=res_json['fk'], numerator_id=1, denominator_id=self.store_id, result=res_json['result']) # Saving to old tables set_score = self.tools.calculate_number_of_scenes( location_type='Secondary') if not set_score: set_score = self.tools.calculate_number_of_scenes( location_type='Secondary Shelf') self.save_level2_and_level3(set_name, set_name, set_score) elif set_name == 'POSM': set_score = self.calculate_posm_sets(set_name) elif set_name in ('Visible to Customer', 'Visible to Consumer %'): # Global function sku_list = filter( None, self.scif[self.scif['product_type'] == 'SKU'].product_ean_code.tolist()) res_dict = self.diageo_generator.diageo_global_visible_percentage( sku_list) if res_dict: # Saving to new tables parent_res = res_dict[-1] self.commonV2.save_json_to_new_tables(res_dict) # Saving to old tables # result = parent_res['result'] # self.save_level2_and_level3(set_name=set_name, kpi_name=set_name, score=result) # Saving to old tables filters = {self.tools.VISIBILITY_PRODUCTS_FIELD: 'Y'} set_score = self.tools.calculate_visible_percentage( visible_filters=filters) self.save_level2_and_level3(set_name, set_name, set_score) else: continue if set_score == 0: pass elif set_score is False: continue set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name'] == set_name]['kpi_set_fk'].values[0] self.write_to_db_result(set_fk, set_score, self.LEVEL1) # commiting to new tables self.commonV2.commit_results_data() def save_level2_and_level3(self, set_name, kpi_name, score): """ Given KPI data and a score, this functions writes the score for both KPI level 2 and 3 in the DB. """ kpi_data = self.kpi_static_data[ (self.kpi_static_data['kpi_set_name'].str.encode('utf-8') == set_name.encode('utf-8')) & (self.kpi_static_data['kpi_name'].str.encode('utf-8') == kpi_name.encode('utf-8'))] try: kpi_fk = kpi_data['kpi_fk'].values[0] except: Log.warning("kpi name or set name don't exist") return atomic_kpi_fk = kpi_data['atomic_kpi_fk'].values[0] self.write_to_db_result(kpi_fk, score, self.LEVEL2) self.write_to_db_result(atomic_kpi_fk, score, self.LEVEL3) def calculate_relative_position_sets(self, set_name): """ This function calculates every relative-position-typed KPI from the relevant sets, and returns the set final score. """ scores = [] for i in xrange(len(self.set_templates_data[set_name])): params = self.set_templates_data[set_name].iloc[i] if self.store_channel == params.get(self.tools.CHANNEL, '').upper(): scif_tested_param = 'brand_name' if params.get(self.tools.TESTED_TYPE, '') == self.tools.BRAND \ else 'product_ean_code' scif_anchor_param = 'brand_name' if params.get(self.tools.ANCHOR_TYPE, '') == self.tools.BRAND \ else 'product_ean_code' tested_filters = { scif_tested_param: params.get(self.tools.TESTED_NEW) } anchor_filters = { scif_anchor_param: params.get(self.tools.ANCHOR_NEW) } direction_data = { 'top': self._get_direction_for_relative_position( params.get(self.tools.TOP_DISTANCE)), 'bottom': self._get_direction_for_relative_position( params.get(self.tools.BOTTOM_DISTANCE)), 'left': self._get_direction_for_relative_position( params.get(self.tools.LEFT_DISTANCE)), 'right': self._get_direction_for_relative_position( params.get(self.tools.RIGHT_DISTANCE)) } if params.get(self.tools.LOCATION_OLD, ''): general_filters = { 'template_group': params.get(self.tools.LOCATION_OLD) } else: general_filters = {} result = self.tools.calculate_relative_position( tested_filters, anchor_filters, direction_data, **general_filters) score = 1 if result else 0 scores.append(score) self.save_level2_and_level3(set_name, params.get(self.tools.KPI_NAME), score) if not scores: return False set_score = (sum(scores) / float(len(scores))) * 100 return set_score def _get_direction_for_relative_position(self, value): """ This function converts direction data from the template (as string) to a number. """ if value == self.tools.UNLIMITED_DISTANCE: value = 1000 elif not value or not str(value).isdigit(): value = 0 else: value = int(value) return value def calculate_posm_sets(self, set_name): """ This function calculates every POSM-typed KPI from the relevant sets, and returns the set final score. """ scores = [] for params in self.set_templates_data[set_name]: if self.store_channel is None: break kpi_res = self.tools.calculate_posm( display_name=params.get(self.tools.DISPLAY_NAME)) score = 1 if kpi_res > 0 else 0 if params.get(self.store_type) == self.tools.RELEVANT_FOR_STORE: scores.append(score) if score == 1 or params.get( self.store_type) == self.tools.RELEVANT_FOR_STORE: self.save_level2_and_level3( set_name, params.get(self.tools.DISPLAY_NAME), score) if not scores: return False set_score = (sum(scores) / float(len(scores))) * 100 return set_score def calculate_assortment_sets(self, set_name): """ This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score. """ scores = [] for params in self.set_templates_data[set_name]: target = str(params.get(self.store_type, '')) if target.isdigit() or target.capitalize() in ( self.tools.RELEVANT_FOR_STORE, self.tools.OR_OTHER_PRODUCTS): products = str( params.get(self.tools.PRODUCT_EAN_CODE, params.get(self.tools.PRODUCT_EAN_CODE2, ''))).replace(',', ' ').split() target = 1 if not target.isdigit() else int(target) kpi_name = params.get(self.tools.GROUP_NAME, params.get(self.tools.PRODUCT_NAME)) kpi_static_data = self.kpi_static_data[ (self.kpi_static_data['kpi_set_name'] == set_name) & (self.kpi_static_data['kpi_name'] == kpi_name)] if len(products) > 1: result = 0 for product in products: product_score = self.tools.calculate_assortment( product_ean_code=product) result += product_score try: product_name = self.all_products[ self.all_products['product_ean_code'] == product]['product_name'].values[0] except Exception as e: Log.warning( 'Product {} is not defined in the DB'.format( product)) continue try: atomic_fk = \ kpi_static_data[kpi_static_data['atomic_kpi_name'] == product_name]['atomic_kpi_fk'].values[ 0] except Exception as e: Log.warning( 'Product {} is not defined in the DB'.format( product_name)) continue self.write_to_db_result(atomic_fk, product_score, level=self.LEVEL3) score = 1 if result >= target else 0 else: result = self.tools.calculate_assortment( product_ean_code=products) atomic_fk = kpi_static_data['atomic_kpi_fk'].values[0] score = 1 if result >= target else 0 self.write_to_db_result(atomic_fk, score, level=self.LEVEL3) scores.append(score) kpi_fk = kpi_static_data['kpi_fk'].values[0] self.write_to_db_result(kpi_fk, score, level=self.LEVEL2) if not scores: return False set_score = (sum(scores) / float(len(scores))) * 100 return set_score # def calculate_assortment_sets(self, set_name): # the old version. I changed it to the function of KE for local MPA. # """ # This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score. # """ # scores = [] # for params in self.set_templates_data[set_name]: # if params.get(self.store_type, '').capitalize() in (self.tools.RELEVANT_FOR_STORE, # self.tools.OR_OTHER_PRODUCTS): # object_type = self.tools.ENTITY_TYPE_CONVERTER.get(params.get(self.tools.ENTITY_TYPE), # 'product_ean_code') # objects = [str(params.get(self.tools.PRODUCT_EAN_CODE, params.get(self.tools.PRODUCT_EAN_CODE2, '')))] # if params.get(self.store_type) == self.tools.OR_OTHER_PRODUCTS: # additional_objects = str(params.get(self.tools.ADDITIONAL_SKUS)).split(',') # objects.extend(additional_objects) # filters = {object_type: objects} # result = self.tools.calculate_assortment(**filters) # score = 1 if result > 0 else 0 # scores.append(score) # # self.save_level2_and_level3(set_name, params.get(self.tools.PRODUCT_NAME), score) # # if not scores: # return False # set_score = (sum(scores) / float(len(scores))) * 100 # return set_score def write_to_db_result(self, fk, score, level): """ This function the result data frame of every KPI (atomic KPI/KPI/KPI set), and appends the insert SQL query into the queries' list, later to be written to the DB. """ attributes = self.create_attributes_dict(fk, score, level) if level == self.LEVEL1: table = KPS_RESULT elif level == self.LEVEL2: table = KPK_RESULT elif level == self.LEVEL3: table = KPI_RESULT else: return query = insert(attributes, table) self.kpi_results_queries.append(query) def create_attributes_dict(self, fk, score, level): """ This function creates a data frame with all attributes needed for saving in KPI results tables. """ score = round(score, 2) if level == self.LEVEL1: kpi_set_name = self.kpi_static_data[ self.kpi_static_data['kpi_set_fk'] == fk]['kpi_set_name'].values[0] score_type = '%' if kpi_set_name in self.tools.KPI_SETS_WITH_PERCENT_AS_SCORE else '' attributes = pd.DataFrame( [(kpi_set_name, self.session_uid, self.store_id, self.visit_date.isoformat(), format(score, '.2f'), score_type, fk)], columns=[ 'kps_name', 'session_uid', 'store_fk', 'visit_date', 'score_1', 'score_2', 'kpi_set_fk' ]) elif level == self.LEVEL2: kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] == fk]['kpi_name'].values[0].replace( "'", "\\'") attributes = pd.DataFrame( [(self.session_uid, self.store_id, self.visit_date.isoformat(), fk, kpi_name, score)], columns=[ 'session_uid', 'store_fk', 'visit_date', 'kpi_fk', 'kpk_name', 'score' ]) elif level == self.LEVEL3: data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk'] == fk] atomic_kpi_name = data['atomic_kpi_name'].values[0].replace( "'", "\\'") kpi_fk = data['kpi_fk'].values[0] kpi_set_name = self.kpi_static_data[ self.kpi_static_data['atomic_kpi_fk'] == fk]['kpi_set_name'].values[0] attributes = pd.DataFrame([ (atomic_kpi_name, self.session_uid, kpi_set_name, self.store_id, self.visit_date.isoformat(), datetime.utcnow().isoformat(), score, kpi_fk, fk, None, None) ], columns=[ 'display_text', 'session_uid', 'kps_name', 'store_fk', 'visit_date', 'calculation_time', 'score', 'kpi_fk', 'atomic_kpi_fk', 'threshold', 'result' ]) else: attributes = pd.DataFrame() return attributes.to_dict() def activate_ootb_kpis(self): # FACINGS_SOS_MANUFACTURER_IN_WHOLE_STORE - level 1 sos_store_fk = self.commonV2.get_kpi_fk_by_kpi_name('SOS OUT OF STORE') sos_store = ManufacturerFacingsSOSInWholeStore( data_provider=self.data_provider, kpi_definition_fk=sos_store_fk).calculate() # FACINGS_SOS_CATEGORY_IN_WHOLE_STORE - level 2 sos_cat_out_of_store_fk = self.commonV2.get_kpi_fk_by_kpi_name( 'SOS CATEGORY OUT OF STORE') sos_cat_out_of_store = self.calculate_sos_of_cat_of_out_of_store_new( sos_cat_out_of_store_fk) # FACINGS_SOS_SUB_CATEGORY_OUT_OF_CATEGORY - level 3 sos_sub_cat_out_of_cat_fk = self.commonV2.get_kpi_fk_by_kpi_name( 'SOS SUB CATEGORY OUT OF CATEGORY') sos_sub_cat_out_of_cat = SubCategoryFacingsSOSPerCategory( data_provider=self.data_provider, kpi_definition_fk=sos_sub_cat_out_of_cat_fk).calculate() # FACINGS_SOS_MANUFACTURER_OUT_OF_SUB_CATEGORY - level 4 sos_man_out_of_sub_cat_fk = self.commonV2.get_kpi_fk_by_kpi_name( 'SOS MANUFACTURER OUT OF SUB CATEGORY') sos_man_out_of_sub_cat = ManufacturerFacingsSOSPerSubCategoryInStore( data_provider=self.data_provider, kpi_definition_fk=sos_man_out_of_sub_cat_fk).calculate() # FACINGS_SOS_BRAND_OUT_OF_SUB_CATEGORY_IN_WHOLE_STORE - level 5 sos_brand_out_of_sub_cat_fk = self.commonV2.get_kpi_fk_by_kpi_name( 'SOS BRAND OUT OF MANUFACTURER') sos_brand_out_of_sub_cat = self.calculate_sos_of_brand_out_of_manufacturer_in_sub_cat( sos_brand_out_of_sub_cat_fk) # Savings results in Hierarchy self.save_hierarchy(sos_store, sos_cat_out_of_store, sos_sub_cat_out_of_cat, sos_man_out_of_sub_cat, sos_brand_out_of_sub_cat) def calculate_sos_of_brand_out_of_manufacturer_in_sub_cat(self, kpi_fk): res_list = [] res_dict = dict() # Get rid of Irrelevant and Empty types and keep only facings > 1 filtered_scif = self.scif[ ~self.scif['product_type'].isin(['Irrelevant', 'Empty']) & self.scif['facings_ign_stack'] > 0] # Filter by each Sub Category and Manufacturer sub_cat_fk_list = filtered_scif['sub_category_fk'].unique().tolist() for sub_cat in sub_cat_fk_list: filtered_scif_by_sub_cat = filtered_scif[ filtered_scif['sub_category_fk'] == sub_cat] list_of_relevant_manufacturers = filtered_scif_by_sub_cat[ 'manufacturer_fk'].unique().tolist() for manu_fk in list_of_relevant_manufacturers: filtered_scif_by_sub_cat_and_manufacturer = filtered_scif_by_sub_cat[ filtered_scif_by_sub_cat['manufacturer_fk'] == manu_fk] denominator_result = filtered_scif_by_sub_cat_and_manufacturer[ 'facings_ign_stack'].sum() # Calculate results per Brand list_of_relevant_brands = filtered_scif_by_sub_cat_and_manufacturer[ 'brand_fk'].unique().tolist() for brand_fk in list_of_relevant_brands: filtered_scif_by_brand = filtered_scif_by_sub_cat_and_manufacturer[ filtered_scif_by_sub_cat_and_manufacturer['brand_fk'] == brand_fk] facings_brand_results = filtered_scif_by_brand[ 'facings_ign_stack'].sum() result_for_brand = facings_brand_results / denominator_result # Preparing the results' dictionary res_dict['kpi_definition_fk'] = kpi_fk res_dict['numerator_id'] = brand_fk res_dict['numerator_result'] = facings_brand_results res_dict['denominator_id'] = int(sub_cat) res_dict['denominator_result'] = denominator_result res_dict['identifier_result'] = (int(brand_fk), int(sub_cat), int(manu_fk)) res_dict['identifier_parent'] = int(manu_fk), ( int(sub_cat)) res_dict['result'] = result_for_brand res_dict['score'] = result_for_brand res_list.append(res_dict.copy()) return res_list def calculate_sos_of_cat_of_out_of_store_new(self, kpi_fk): res_list = [] res_dict = dict() # Get rid of Irrelevant and Empty types and keep only facings ignore stacking > 1 filtered_scif = self.scif[ ~self.scif['product_type'].isin(['Irrelevant', 'Empty']) & self.scif['facings_ign_stack'] > 0] denominator_result = filtered_scif['facings_ign_stack'].sum() categories_fk_list = filtered_scif['category_fk'].unique().tolist() # Calculate result per category (using facings_ign_stack!) for category_fk in categories_fk_list: filtered_scif_by_category = filtered_scif[ filtered_scif['category_fk'] == category_fk] facings_category_result = filtered_scif_by_category[ 'facings_ign_stack'].sum() result_for_category = facings_category_result / denominator_result # Preparing the results' dictionary res_dict['kpi_definition_fk'] = kpi_fk res_dict['numerator_id'] = category_fk res_dict['numerator_result'] = facings_category_result res_dict['denominator_id'] = self.store_id res_dict['denominator_result'] = denominator_result res_dict['result'] = result_for_category res_dict['score'] = result_for_category res_list.append(res_dict.copy()) return res_list def save_hierarchy(self, level_1, level_2, level_3, level_4, level_5): for i in level_1: res = i.to_dict kpi_identifier = "level_1" self.commonV2.write_to_db_result( fk=res['kpi_definition_fk'], numerator_id=res['numerator_id'], denominator_id=res['denominator_id'], numerator_result=res['numerator_result'], denominator_result=res['denominator_result'], result=res['result'], score=res['result'], identifier_result=kpi_identifier, should_enter=False) for res in level_2: kpi_identifier = "level_2_" + str(int(res['numerator_id'])) parent_identifier = "level_1" self.commonV2.write_to_db_result( fk=res['kpi_definition_fk'], numerator_id=res['numerator_id'], denominator_id=res['denominator_id'], numerator_result=res['numerator_result'], denominator_result=res['denominator_result'], result=res['result'], score=res['result'], identifier_result=kpi_identifier, identifier_parent=parent_identifier, should_enter=True) for i in level_3: res = i.to_dict kpi_identifier = str(int(res['numerator_id'])) parent_identifier = "level_2_" + str(int(res['denominator_id'])) self.commonV2.write_to_db_result( fk=res['kpi_definition_fk'], numerator_id=res['numerator_id'], denominator_id=res['denominator_id'], numerator_result=res['numerator_result'], denominator_result=res['denominator_result'], result=res['result'], score=res['result'], identifier_result=kpi_identifier, identifier_parent=parent_identifier, should_enter=True) for i in level_4: res = i.to_dict kpi_identifier = "level_4_" + str( (int(res['numerator_id']), int(res['denominator_id']))) parent_identifier = str(int(res['denominator_id'])) self.commonV2.write_to_db_result( fk=res['kpi_definition_fk'], numerator_id=res['numerator_id'], denominator_id=res['denominator_id'], numerator_result=res['numerator_result'], denominator_result=res['denominator_result'], result=res['result'], score=res['result'], identifier_result=kpi_identifier, identifier_parent=parent_identifier, should_enter=True) for res in level_5: kpi_identifier = "level_5_" + str(res['identifier_result']) parent_identifier = "level_4_" + str(res['identifier_parent']) self.commonV2.write_to_db_result( fk=res['kpi_definition_fk'], numerator_id=res['numerator_id'], denominator_id=res['denominator_id'], numerator_result=res['numerator_result'], denominator_result=res['denominator_result'], result=res['result'], score=res['result'], identifier_result=kpi_identifier, identifier_parent=parent_identifier, should_enter=True) @log_runtime('Saving to DB') def commit_results_data(self): """ This function writes all KPI results to the DB, and commits the changes. """ self.rds_conn.disconnect_rds() self.rds_conn.connect_rds() cur = self.rds_conn.db.cursor() delete_queries = DIAGEOQueries.get_delete_session_results_query_old_tables( self.session_uid) for query in delete_queries: cur.execute(query) for query in self.kpi_results_queries: cur.execute(query) self.rds_conn.db.commit() def get_equipment_score_relevant_scenes(self): scenes = [] if not self.diageo_generator.scif.empty: scenes = self.diageo_generator.scif[self.diageo_generator.scif['template_name'] == \ 'ON - DRAUGHT TAPS']['scene_fk'].unique().tolist() return scenes
class SINGHATHToolBox: def __init__(self, data_provider, output): self.output = output self.data_provider = data_provider self.common = Common(self.data_provider) self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.templates = self.data_provider[Data.TEMPLATES] self.session_info = self.data_provider[Data.SESSION_INFO] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_info = self.data_provider[Data.STORE_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.kpi_static_data = self.common.get_kpi_static_data() self.kpi_results_queries = [] self.templates_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', TEMPLATE_PARENT_FOLDER, TEMPLATE_NAME) self.kpi_template = pd.ExcelFile(self.templates_path) def get_products_price_for_ean_codes(self, ean_codes, session_fk): # https://jira.trax-cloud.com/browse/TOHA-2024 to have this in data provider self.rds_conn.connect_rds() query = """ select value as price, is_promotion, product_fk, name, ean_code, category_fk, brand_fk, type as product_type, sub_category_fk from probedata.manual_collection_price mcp join static_new.product prod on mcp.product_fk=prod.pk where mcp.value is not null and prod.is_active =1 and session_fk={session_fk} and ean_code in {ean_codes}; """ df = pd.read_sql_query( query.format( ean_codes=ean_codes, session_fk=session_fk, ), self.rds_conn.db) return df def main_calculation(self, *args, **kwargs): """ This function calculates the KPI results. """ self.filter_and_send_kpi_to_calc() self.common.commit_results_data() return 0 def filter_and_send_kpi_to_calc(self): kpi_sheet = self.kpi_template.parse(KPI_SHEET) for index, kpi_sheet_row in kpi_sheet.iterrows(): if not is_nan(kpi_sheet_row[KPI_ACTIVE_COL]): if str(kpi_sheet_row[KPI_ACTIVE_COL]).strip().lower() in [ '0.0', 'n', 'no' ]: print("KPI :{} deactivated in sheet.".format( kpi_sheet_row[KPI_NAME_COL])) continue if not is_nan(kpi_sheet_row[KPI_SHEET_STORE_TYPES_COL]): if bool(kpi_sheet_row[KPI_SHEET_STORE_TYPES_COL].strip()) and \ kpi_sheet_row[KPI_SHEET_STORE_TYPES_COL].strip().lower() != 'all': print "Check the store types in excel..." permitted_store_types = [ x.strip() for x in kpi_sheet_row[KPI_SHEET_STORE_TYPES_COL].split(',') if x.strip() ] if self.store_info.store_type.values[ 0] not in permitted_store_types: print "Store type not permitted..." continue kpi = self.kpi_static_data[ (self.kpi_static_data[DF_KPI_TYPE_COL] == kpi_sheet_row[KPI_NAME_COL]) & (self.kpi_static_data['delete_time'].isnull())] if kpi.empty: print("KPI Name:{} not found in DB".format( kpi_sheet_row[KPI_NAME_COL])) continue sheet_name = kpi_sheet_row[KPI_SHEET_NAME_COL] sheet_data_frame = self.kpi_template.parse(sheet_name).fillna( method='ffill') if sheet_name == PRICE_SHEET: self.write_price_difference(kpi, sheet_data_frame) elif sheet_name == POS_PRESENCE_SHEET: self.write_pos_presence(kpi, sheet_data_frame) elif sheet_name == DUMP_DISPLAY_PRESENCE: self.write_dump_display_presence(kpi, sheet_data_frame) def write_price_difference(self, kpi, price_sheet_data_frame): # drop the first column price_sheet_data_frame.columns = price_sheet_data_frame.iloc[0] price_sheet_data_frame = price_sheet_data_frame.reindex( price_sheet_data_frame.index.drop(0)) all_ean_codes = tuple( map( str, price_sheet_data_frame[[ PRICE_SHEET_EAN_CODE_1_COL, PRICE_SHEET_EAN_CODE_2_COL ]].values.ravel('F'))) prod_price_data = self.get_products_price_for_ean_codes( ean_codes=all_ean_codes, session_fk=self.session_info['pk'].iloc[0]) for index, each_row in price_sheet_data_frame.iterrows(): result = 1 own_manufacturer_ean = each_row.get(PRICE_SHEET_EAN_CODE_1_COL, None) competitive_manufacturer_ean = each_row.get( PRICE_SHEET_EAN_CODE_2_COL, None) if not own_manufacturer_ean or not competitive_manufacturer_ean: continue own_manufacturer = prod_price_data.query( "ean_code=='{code}'".format(code=own_manufacturer_ean)) competitive_manufacturer = prod_price_data.query( "ean_code=='{code}'".format(code=competitive_manufacturer_ean)) if own_manufacturer.empty or competitive_manufacturer.empty: print "Own or Competitive Manufacturer not present in the session." continue own_manufacturer_price = own_manufacturer['price'].iloc[0] competitive_manufacturer_price = competitive_manufacturer[ 'price'].iloc[0] # if the `calculated price difference` <= `given price difference` then only it is a pass if float(own_manufacturer_price - competitive_manufacturer_price) > \ float(each_row.get(PRICE_SHEET_PRICE_DIFFERENCE_COL, 0)): result = 0 self.common.write_to_db_result( fk=kpi['pk'].iloc[0], numerator_id=int(own_manufacturer['product_fk'].iloc[0]), numerator_result=result, denominator_id=int( competitive_manufacturer['product_fk'].iloc[0]), denominator_result=result, context_id=self.store_id, result=own_manufacturer_price, score=competitive_manufacturer_price, ) def write_pos_presence(self, kpi, pos_data_frame): for each_ean in pos_data_frame[POS_PRESENCE_EAN_COL]: presence = 1 product_df = self.scif.query( "product_ean_code=='{each_ean}' and product_type=='{type}'". format(each_ean=each_ean, type=DUMP_DISPLAY_POS_TYPE)) if product_df.empty: product_df = self.all_products.query( "product_ean_code=='{each_ean}' and product_type=='{type}'" .format(each_ean=each_ean, type=DUMP_DISPLAY_POS_TYPE)) presence = 0 if product_df.empty: # This should not happen # This means the POS ean code is not in the product master data continue # raise Exception("KPI {kpi_name}: The product with EAN {ean} and type {type}" # " in template is not in DB.".format( # kpi_name=kpi[DF_KPI_TYPE_COL].iloc[0], # ean=each_ean, # type=DUMP_DISPLAY_POS_TYPE, # )) self.common.write_to_db_result( fk=kpi['pk'].iloc[0], numerator_id=int(product_df['product_fk'].iloc[0]), denominator_id=self.store_id, context_id=self.store_id, result=presence, score=presence, ) def write_dump_display_presence(self, kpi, dump_display_data_frame): dump_display_data_group = dump_display_data_frame.groupby( DUMP_DISPLAY_CATEGORY_COL) for category, dump_display_data in dump_display_data_group: presence = 0 # iterate through rows for each category category_name = DUMP_CATEGORY_MAP[category] category_fk = self.all_products.query( "category=='{category}'".format( category=category_name))['category_fk'].iloc[0] # get the applicable scene types -- start set_scene_types = set() scene_type_list = list( dump_display_data[DUMP_DISPLAY_SCENE_TYPE_COL].values.ravel( 'F')) for each_list in scene_type_list: set_scene_types.update( tuple(str(each.strip()) for each in each_list.split(','))) # get the applicable scene types -- end # make template name case-insensitive search -- start self.templates["template_name"] = self.templates[ "template_name"].str.lower() _scene_types = map(str.lower, list(set_scene_types)) # make template name case-insensitive search -- start allowed_template_fks = self.templates.query( "template_name in {allowed_templates}".format( allowed_templates=_scene_types) )['template_fk'].values.tolist() template_scif = self.scif.query( 'template_fk in {}'.format(allowed_template_fks)) if template_scif.empty: print "kpi: {kpi}: Template/Scene Types: {templates} are not present in session {sess}" \ .format(kpi=kpi[DF_KPI_TYPE_COL].iloc[0], templates=_scene_types, sess=self.session_uid) continue template_scif_by_scene_id = template_scif.groupby('scene_id') row_truths = [] # to check if all items in the category is true for scene_id, scene_data in template_scif_by_scene_id: dump_display_product_group = dump_display_data.groupby( DUMP_DISPLAY_PROD_TYPE_COL) one_condition_fail = False for prod_type, product_items in dump_display_product_group: if one_condition_fail: continue logic = product_items[DUMP_DISPLAY_LOGIC_COL].iloc[ 0].strip().lower() for idx, each_prod_entry in product_items.iterrows(): _pos_codes = str( each_prod_entry[DUMP_DISPLAY_EAN_CODE_COL]) all_pos_ean_codes = tuple( map(str, [ x.strip() for x in _pos_codes.split(',') if x ])) facings_count = 0 prod_scif_with_ean = scene_data.query( 'product_ean_code in {all_skus} and category_fk=="{category_fk}"' .format(all_skus=all_pos_ean_codes, category_fk=category_fk)) if not prod_scif_with_ean.empty: facings_count = int( prod_scif_with_ean['facings'].iloc[0]) if facings_count < int( each_prod_entry[DUMP_DISPLAY_COUNT_COL]): if logic == 'and': # one prod type didn't satisfy; try next scene. one_condition_fail = True break else: row_truths.append(prod_type) if row_truths and all( [ech in row_truths for ech in DUMP_DISPLAY_PROD_TYPE_LIST]): # dump display is found, break out and save presence for this category presence = 1 break else: row_truths = [] # save for each category self.common.write_to_db_result( fk=int(kpi['pk'].iloc[0]), # only one category numerator_id=category_fk, denominator_id=self.store_id, context_id=self.store_id, result=presence, score=presence, )
class PURINAToolBox: LEVEL1 = 1 LEVEL2 = 2 LEVEL3 = 3 def __init__(self, data_provider, output): self.output = output self.data_provider = data_provider self.common = Common(self.data_provider) self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.kpi_static_data = self.common.get_kpi_static_data() self.session_info = self.data_provider[Data.SESSION_INFO] self.session_fk = self.session_info['pk'].values[0] self.kpi_results_queries = [] self.kpi_static_queries = [] self.purina_scif = self.scif.loc[self.scif['category_fk'] == PET_FOOD_CATEGORY] def calculate_purina(self, *args, **kwargs): """ This function calculates the KPI results. """ if not self.is_session_purina(): return # Update all new static KPIs self.create_new_static_kpi() self.kpi_static_data = self.common.get_kpi_static_data(refresh=True) self.update_kpi_score() self.run_data_collecting() self.common.commit_results_data() def update_kpi_score(self): # Only to see results in join :( for kpi in PURINA_KPI: kpi_fk = self.get_kpi_fk_by_kpi_name(kpi, self.LEVEL2, set_name=PURINA_SET) self.common.write_to_db_result(kpi_fk, self.LEVEL2, 1) def run_data_collecting(self): """ This function run the man calculation of linear sos with sub category out of subsegment or price out of subsegment :param price_kpi: :return: """ data = self.purina_scif.dropna(subset=[LINEAR_SIZE]) if data.empty: Log.info("No relevant purina's products were found in session.") return # subseg_name_list = data[SCIF_SUBSEGMENT].unique() # for subseg in subseg_name_list: # if not subseg: # subseg = NO_SUBSEG # by_subseg = data.loc[pd.isnull(data[SCIF_SUBSEGMENT])] # subseg_ft = self.cm_to_ft(sum(by_subseg[LINEAR_SIZE])) # else: # by_subseg = data.loc[data[SCIF_SUBSEGMENT] == subseg] # subseg_ft = self.cm_to_ft(sum(by_subseg[LINEAR_SIZE])) # atomic_fk = self.get_kpi_fk_by_kpi_name(subseg, self.LEVEL3, father=SUBSEGMENT_KPI, set_name=SUBSEGMENT_SET) # self.common.old_write_to_db_result(fk=atomic_fk, level=self.LEVEL3, score=subseg_ft) # atomic_fk = self.get_kpi_fk_by_kpi_name(subseg, self.LEVEL3, father=SUBSEGMENT_KPI, set_name=PRICE_SET) # self.common.old_write_to_db_result(fk=atomic_fk, level=self.LEVEL3, score=subseg_ft) # gets all category linear size category_ft = self.cm_to_ft(sum(data[LINEAR_SIZE])) fk = self.get_kpi_fk_by_kpi_name(PURINA_SET, self.LEVEL1) self.common.write_to_db_result(fk, self.LEVEL1, category_ft) man = data['manufacturer_name'].unique() for mf in man: by_mf = data.loc[data['manufacturer_name'] == mf] manufacturer_ft = self.cm_to_ft(sum(by_mf[LINEAR_SIZE])) relevant_kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == MANUFACTUR) & (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0] atomic_fk = self.get_kpi_fk_by_kpi_name(mf, self.LEVEL3, father=MANUFACTUR, set_name=PURINA_SET) if atomic_fk: self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3, score=manufacturer_ft, score_2=manufacturer_ft, session_uid=self.session_uid, store_fk=self.store_id, display_text=mf.replace("'","''"), visit_date=self.visit_date.isoformat(), calculation_time=datetime.utcnow().isoformat(), kps_name=PURINA_SET, kpi_fk=relevant_kpi_fk) else: print 'atomic cannot be saved for manufacturer {}'.format(mf) brands = by_mf['brand_name'].unique() for brand in brands: by_brand = by_mf.loc[data['brand_name'] == brand] brand_ft = self.cm_to_ft(sum(by_brand[LINEAR_SIZE])) kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == BRAND) & (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0] atomic_fk = self.get_kpi_fk_by_kpi_name(brand, self.LEVEL3, father=BRAND, set_name=PURINA_SET) if atomic_fk: self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3, score=brand_ft, score_2=brand_ft, style=mf.replace("'","''"), session_uid=self.session_uid, store_fk=self.store_id, display_text=brand.replace("'","''"), visit_date=self.visit_date.isoformat(), calculation_time=datetime.utcnow().isoformat(), kps_name=PURINA_SET, kpi_fk=kpi_fk) else: print 'atomic cannot be saved for brand {}'.format(brand) categories = by_brand[SCIF_CATEOGRY].unique() for cat in categories: if not cat: cat = OTHER by_cat = by_brand.loc[pd.isnull(by_brand[SCIF_PRICE])] cat_ft = self.cm_to_ft(sum(by_cat[LINEAR_SIZE])) else: by_cat = by_brand.loc[data[SCIF_SUB_CATEOGRY] == cat] cat_ft = self.cm_to_ft(sum(by_cat[LINEAR_SIZE])) kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == CATEGORY) & (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0] atomic_fk = self.get_kpi_fk_by_kpi_name(cat, self.LEVEL3, father=CATEGORY, set_name=PURINA_SET) if atomic_fk: self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3, score=cat_ft, score_2=cat_ft, style=mf.replace("'","''"), result=brand.replace("'","''"), session_uid=self.session_uid, store_fk=self.store_id, display_text=cat.replace("'","''"), visit_date=self.visit_date.isoformat(), calculation_time=datetime.utcnow().isoformat(), kps_name=PURINA_SET, kpi_fk=kpi_fk) else: print 'atomic cannot be saved for category {}'.format(cat) sub_cats = by_cat[SCIF_SUB_CATEOGRY].unique() for sub_cat in sub_cats: if not sub_cat: sub_cat = OTHER by_sub_cat = by_cat.loc[pd.isnull(by_cat[SCIF_PRICE])] sub_cat_ft = self.cm_to_ft(sum(by_sub_cat[LINEAR_SIZE])) else: by_sub_cat = by_cat.loc[data[SCIF_SUB_CATEOGRY] == sub_cat] sub_cat_ft = self.cm_to_ft(sum(by_sub_cat[LINEAR_SIZE])) # write to db under sub category atomic kpi score with brand name in results kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == SUB_CATEGORY) & (self.kpi_static_data['kpi_set_name'] == PURINA_SET)][ 'kpi_fk'].values[0] atomic_fk = self.get_kpi_fk_by_kpi_name(sub_cat, self.LEVEL3, father=SUB_CATEGORY, set_name=PURINA_SET) if atomic_fk: self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3, score=sub_cat_ft, score_2=sub_cat_ft, style=mf.replace("'","''"), result=brand.replace("'","''"), result_2=cat.replace("'","''"), session_uid=self.session_uid, store_fk=self.store_id, display_text=sub_cat.replace("'","''"), visit_date=self.visit_date.isoformat(), calculation_time=datetime.utcnow().isoformat(), kps_name=PURINA_SET, kpi_fk=kpi_fk) else: print 'atomic cannot be saved for sub category {}'.format(sub_cat) prices = by_sub_cat[SCIF_PRICE].unique() for price_class in prices: if not price_class: price_class = OTHER by_prices = by_sub_cat.loc[pd.isnull(by_sub_cat[SCIF_PRICE])] price_ft = self.cm_to_ft(sum(by_prices[LINEAR_SIZE])) else: by_prices = by_sub_cat.loc[by_sub_cat[SCIF_PRICE] == price_class] price_ft = self.cm_to_ft(sum(by_prices[LINEAR_SIZE])) kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == PRICE_KPI) & (self.kpi_static_data['kpi_set_name'] == PURINA_SET)][ 'kpi_fk'].values[0] atomic_fk = self.get_kpi_fk_by_kpi_name(price_class, self.LEVEL3, father=PRICE_KPI, set_name=PURINA_SET) if atomic_fk: self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3, score=price_ft, score_2=price_ft, style=mf.replace("'","''"), result=brand.replace("'","''"), result_2=cat.replace("'","''"), result_3=sub_cat.replace("'","''"), session_uid=self.session_uid, store_fk=self.store_id, display_text=price_class.replace("'", "''"), visit_date=self.visit_date.isoformat(), calculation_time=datetime.utcnow().isoformat(), kps_name=PURINA_SET, kpi_fk=kpi_fk ) else: print 'atomic cannot be saved for price class {}'.format(price_class) @staticmethod def cm_to_ft(cm): return cm / 30.48 def get_labels(self): query = """select pk, labels, ean_code from static_new.product """ labels = pd.read_sql_query(query, self.rds_conn.db) return labels def get_kpi_fk_by_kpi_name(self, kpi_name, kpi_level, father=None, logic_father=None, set_name=None): if kpi_level == self.LEVEL1: column_key = 'kpi_set_fk' column_value = 'kpi_set_name' father_value = 'kpi_set_name' elif kpi_level == self.LEVEL2: column_key = 'kpi_fk' column_value = 'kpi_name' father_value = 'kpi_set_name' elif kpi_level == self.LEVEL3: column_key = 'atomic_kpi_fk' column_value = 'atomic_kpi_name' father_value = 'kpi_name' else: raise ValueError('invalid level') try: relevant = self.kpi_static_data[self.kpi_static_data[column_value] == kpi_name] if father: relevant = relevant[relevant[father_value] == father] if set_name: relevant = relevant[relevant['kpi_set_name'] == set_name] return relevant[column_key].values[0] except IndexError: Log.info('Kpi name: {}, isn\'t equal to any kpi name in static table'.format(kpi_name)) return None def create_new_static_kpi(self): # This functions takes all brands, sub categories, categories and manufacturers in session. # The function adds them to database in case they are new. brands = self.get_all_brands() sub_cats = self.get_all_sub_categories() manufacturer = self.get_all_manufacturers() cats = self.get_all_categories() prices = self.get_all_price_classes() new_brands = self.purina_scif.loc[~self.purina_scif['brand_name'].isin(brands)]['brand_name'].unique() new_manufacturer = self.purina_scif.loc[~self.purina_scif['manufacturer_name'].isin(manufacturer)][ 'manufacturer_name'].unique() new_sub_cat = self.purina_scif.loc[(~self.purina_scif[SCIF_SUB_CATEOGRY].isin(sub_cats)) & (~pd.isnull(self.purina_scif[SCIF_SUB_CATEOGRY]))][SCIF_SUB_CATEOGRY].unique() new_cat = self.purina_scif.loc[(~self.purina_scif[SCIF_CATEOGRY].isin(cats)) & (~pd.isnull(self.purina_scif[SCIF_CATEOGRY]))][SCIF_CATEOGRY].unique() new_prices = self.purina_scif.loc[(~self.purina_scif[SCIF_PRICE].isin(prices)) & (~pd.isnull(self.purina_scif[SCIF_PRICE]))][SCIF_PRICE].unique() self.save_static_atomics(BRAND, new_brands, PURINA_SET) self.save_static_atomics(MANUFACTUR, new_manufacturer, PURINA_SET) self.save_static_atomics(CATEGORY, new_cat, PURINA_SET) self.save_static_atomics(SUB_CATEGORY, new_sub_cat, PURINA_SET) self.save_static_atomics(PRICE_KPI, new_prices, PURINA_SET) self.commit_static_data() def get_all_brands(self): return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == BRAND]['atomic_kpi_name'] def get_all_sub_categories(self): return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == SUB_CATEGORY]['atomic_kpi_name'] def get_all_manufacturers(self): return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == MANUFACTUR]['atomic_kpi_name'] def get_all_categories(self): return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == CATEGORY]['atomic_kpi_name'] def get_all_price_classes(self): return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == PRICE_KPI]['atomic_kpi_name'] def save_static_atomics(self, kpi_name, atomics, set_name): kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == kpi_name) & (self.kpi_static_data['kpi_set_name'] == set_name)]['kpi_fk'].values[0] for current in atomics: current = current.replace("'", "''") query = """ INSERT INTO {0} (`kpi_fk`, `name`, `description`, `display_text`,`presentation_order`, `display`) VALUES ('{1}', '{2}', '{3}', '{4}', '{5}', '{6}');""".format(STATIC_ATOMIC, kpi_fk, current, current, current, 1, 'Y') self.kpi_static_queries.append(query) def commit_static_data(self): """ This function writes all KPI results to the DB, and commits the changes. """ self.rds_conn.disconnect_rds() self.rds_conn.connect_rds() # ProjectConnector(self.project_name, DbUsers.CalculationEng) cur = self.rds_conn.db.cursor() for query in self.kpi_static_queries: try: cur.execute(query) except Exception as e: Log.info('query {} could not be executed.'.format(query)) self.rds_conn.db.commit() self.rds_conn.disconnect_rds() def is_session_purina(self): # This function checks is the session is of Purina project by its category and that it is a successful visit. session_data = self.get_session_category_data() session_data = session_data.loc[(session_data['category_fk'] == 13) & (session_data['resolution_fk'] == 1) & (session_data['exclude_status_fk'] == 1)] if not session_data.empty: return True return False def get_session_category_data(self): local_con = PSProjectConnector(self.project_name, DbUsers.CalculationEng) query = """select category_fk, resolution_fk, exclude_status_fk from probedata.session_category where session_fk = {}""".format(self.session_fk) data = pd.read_sql_query(query, local_con.db) return data
class AddKPIs(object): def __init__(self, project_name, template_path=None, remove_duplicates=False, add_kpi_pks=False): self.project_name = project_name self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.kpi_static_data = self.get_kpi_static_data() self.template_path = self.get_template_path(template_path) self.template_data = pd.read_excel(self.template_path) self.remove_duplicates = remove_duplicates self.kpi_counter = 0 self.insert_queries = [] self.output_path = self.get_output_file_path() self.error_cells = set() self.add_kpi_pks = add_kpi_pks @staticmethod def get_template_path(template_path): return template_path if template_path is not None else os.path.join( os.path.dirname(os.path.realpath(__file__)), 'new_tables_template.xlsx') def get_output_file_path(self): path_to_list = self.template_path.split('/') file_name = path_to_list[len(path_to_list) - 1] output_path = os.path.join('/tmp', file_name) return output_path def get_kpi_static_data(self): """ This function extracts the static KPI data and saves it into one global data frame. The data is taken from static.kpi / static.atomic_kpi / static.kpi_set. """ query = Queries.get_new_kpi_data() kpi_static_data = pd.read_sql_query(query, self.rds_conn.db) return kpi_static_data def add_kpis_from_template(self): self.validate_template() if len(self.error_cells) == 0: self.insert_into_kpi_lvl_2() else: self.highlight_errors_in_template() print 'errors found in template. see highlighted in path: {}'.format( self.output_path) def validate_template(self): self.check_similar_types() self.check_binary_fields() if not self.remove_duplicates: self.check_duplicate_in_template() def check_similar_types(self): kpi_types = set(self.template_data[Consts.KPI_TYPE].unique().tolist()) existing_types = set( self.kpi_static_data[Consts.KPI_TYPE].unique().tolist()) similar_types = kpi_types.intersection(existing_types) if similar_types: err_df = self.template_data[self.template_data[ Consts.KPI_TYPE].isin(similar_types)] cells_list = [(i + 1, Consts.KPI_TYPE, Consts.SALMON) for i in err_df.index.values] self.error_cells.update(cells_list) def check_binary_fields(self): binary_fields_df = self.template_data[Consts.BINARY_FIELDS] allowed_values = [1, 0, '1', '0', '1.0', '0.0', np.nan] for col in binary_fields_df.columns.tolist(): err_df = binary_fields_df[~binary_fields_df[col]. isin(allowed_values)] if len(err_df) > 0: cells_list = [(i + 1, col, Consts.LIME) for i in err_df.index.values] self.error_cells.update(cells_list) def check_duplicate_in_template(self): template_data = self.template_data template_data['count'] = 1 count_rows = template_data.groupby( Consts.KPI_TYPE, as_index=False).agg({'count': np.sum}) count_rows = count_rows[count_rows['count'] != 1] if len(count_rows) > 0: duplicate_kpis = count_rows[Consts.KPI_TYPE].values.tolist() print 'duplicate kpis: ', str(duplicate_kpis) for kpi in duplicate_kpis: err_df = template_data[template_data[Consts.KPI_TYPE] == kpi] cells_list = [(i + 1, Consts.KPI_TYPE, Consts.BLUE) for i in err_df.index.values] self.error_cells.update(cells_list) def highlight_errors_in_template(self): writer = pd.ExcelWriter(self.output_path, engine='xlsxwriter') self.template_data.to_excel(writer, sheet_name='Sheet1', index=False) workbook = writer.book worksheet = writer.sheets['Sheet1'] # error_format = workbook.add_format({'fg_color': '#EEC93F'}) for i, col, color in list(self.error_cells): value = self.template_data.loc[i - 1, col] col_num = self.template_data.columns.get_loc(col) error_format = workbook.add_format({'fg_color': color}) worksheet.write(i, col_num, value, error_format) writer.save() def insert_into_kpi_lvl_2(self): if self.remove_duplicates: self.template_data = self.template_data.drop_duplicates( subset=['type'], keep='first') for i, row in self.template_data.iterrows(): attributes = self.create_attributes_dict(row) query = insert(attributes, Consts.STATIC_KPI_LVL_2) self.insert_queries.append(query) merged_queries = self.merge_insert_queries() # print merged_queries self.commit_to_db(merged_queries) def create_attributes_dict(self, kpi_row): attributes_dict = { 'type': { 0: kpi_row['type'].replace("'", "\\'").encode('utf-8') }, 'client_name': { 0: kpi_row['client_name'].replace("'", "\\'").encode('utf-8') }, 'numerator_type_fk': { 0: kpi_row['numerator_type_fk'] }, 'denominator_type_fk': { 0: kpi_row['denominator_type_fk'] }, 'kpi_score_type_fk': { 0: kpi_row['kpi_score_type_fk'] }, 'kpi_result_type_fk': { 0: kpi_row['kpi_result_type_fk'] }, 'session_relevance': { 0: kpi_row['session_relevance'] if not np.isnan(kpi_row['session_relevance']) else 0 }, 'scene_relevance': { 0: kpi_row['scene_relevance'] if not np.isnan(kpi_row['scene_relevance']) else 0 }, 'planogram_relevance': { 0: kpi_row['planogram_relevance'] if not np.isnan(kpi_row['planogram_relevance']) else 0 }, 'live_session_relevance': { 0: kpi_row['live_session_relevance'] if not np.isnan(kpi_row['live_session_relevance']) else 0 }, 'live_scene_relevance': { 0: kpi_row['live_scene_relevance'] if not np.isnan(kpi_row['live_scene_relevance']) else 0 }, 'is_percent': { 0: kpi_row['is_percent'] if not np.isnan(kpi_row['is_percent']) else 0 }, 'kpi_target_type_fk': { 0: kpi_row['kpi_target_type_fk'] }, 'kpi_calculation_stage_fk': { 0: 3 }, 'valid_from': { 0: '1990-01-01' }, 'valid_until': { 0: '2050-01-01' }, 'initiated_by': { 0: 'Custom' }, 'context_type_fk': { 0: kpi_row['context_type_fk'] } } if self.add_kpi_pks: attributes_dict.update({'pk': {0: kpi_row['pk']}}) return attributes_dict def merge_insert_queries(self): query_groups = {} for query in self.insert_queries: if not query: continue static_data, inserted_data = query.split('VALUES ') if static_data not in query_groups: query_groups[static_data] = [] query_groups[static_data].append(inserted_data) merged_queries = [] for group in query_groups: for group_index in xrange(0, len(query_groups[group]), 10**4): merged_queries.append('{0} VALUES {1}'.format( group, ',\n'.join( query_groups[group][group_index:group_index + 10**4]))) return merged_queries def commit_to_db(self, queries): self.rds_conn.connect_rds() cur = self.rds_conn.db.cursor() for query in queries: try: cur.execute(query) self.rds_conn.db.commit() print 'kpis were added to the db' except Exception as e: print 'kpis were not inserted: {}'.format(repr(e))
class AddKPIsToAPI(object): def __init__(self, project_name, file_path=None, kpi_list=None, all_existing_kpis=False, kpis_to_exclude=None): self.project_name = project_name self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.kpi_static_data = self.get_kpi_static_data() self.existing_configurations = self.get_kpi_view_config_api() self.template_path = file_path self.template_data = pd.read_excel( self.template_path) if self.template_path is not None else None self.all_existing_kpis = all_existing_kpis self.insert_queries = [] self.kpi_list = list( set(kpi_list)) if kpi_list is not None else kpi_list self.kpis_to_exclude = list( set(kpis_to_exclude)) if kpis_to_exclude is not None else [] def get_output_file_path(self): path_to_list = self.template_path.split('/') file_name = path_to_list[len(path_to_list) - 1] output_path = os.path.join('/tmp', file_name) return output_path def get_kpi_view_config_api(self): query = """ select * from static.kpi_view_configuration where application='API' """ kpi_config_data = pd.read_sql_query(query, self.rds_conn.db) return kpi_config_data['kpi_level_2_fk'].values.tolist() def get_kpi_static_data(self): """ This function extracts the static KPI data and saves it into one global data frame. The data is taken from static.kpi / static.atomic_kpi / static.kpi_set. """ query = Queries.get_new_kpi_data() kpi_static_data = pd.read_sql_query(query, self.rds_conn.db) return kpi_static_data def configure_kpis_for_api(self): if self.all_existing_kpis: if self.kpi_list or self.template_path: print 'all_existing_kpis is set to True => kpi list or kpi file data will be ignored' kpi_pks = self.kpi_static_data['pk'].values.tolist() else: kpi_pks = self.template_data['pk'].unique().tolist( ) if self.template_data is not None else self.kpi_list kpi_pks = kpi_pks if kpi_pks is not None else [] kpi_pks = list(set(kpi_pks) - set(self.existing_configurations)) kpi_pks = list(set(kpi_pks) - set(self.kpis_to_exclude)) self.generate_insert_queries(kpi_pks) if self.insert_queries: merged_queries = self.merge_insert_queries() self.commit_to_db(merged_queries) if not self.insert_queries: print 'No kpis were added' def generate_insert_queries(self, kpi_pks): for pk in kpi_pks: attributes = self.create_attributes_dict(pk) query = insert(attributes, Consts.STATIC_KPI_VIEW_CONFIG) self.insert_queries.append(query) @staticmethod def create_attributes_dict(pk): attributes_dict = { 'application': { 0: 'API' }, 'kpi_level_2_fk': { 0: pk }, 'kpi_level_1_fk': { 0: 0 }, 'page': { 0: "" } } return attributes_dict def merge_insert_queries(self): query_groups = {} for query in self.insert_queries: if not query: continue static_data, inserted_data = query.split('VALUES ') if static_data not in query_groups: query_groups[static_data] = [] query_groups[static_data].append(inserted_data) merged_queries = [] for group in query_groups: for group_index in xrange(0, len(query_groups[group]), 10**4): merged_queries.append('{0} VALUES {1}'.format( group, ',\n'.join( query_groups[group][group_index:group_index + 10**4]))) return merged_queries def commit_to_db(self, queries): self.rds_conn.connect_rds() cur = self.rds_conn.db.cursor() for query in queries: try: cur.execute(query) self.rds_conn.db.commit() print 'kpis were added to the db' except Exception as e: print 'kpis were not inserted: {}'.format(repr(e))
class SOLARBRToolBox: LEVEL1 = 1 LEVEL2 = 2 LEVEL3 = 3 EXCLUDE_EMPTY = False EXCLUDE_FILTER = 0 EMPTY = 'Empty' def __init__(self, data_provider, output): self.output = output self.data_provider = data_provider self.common = Common(self.data_provider) self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.k_engine = BaseCalculationsGroup(data_provider, output) self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.store_info = self.data_provider[Data.STORE_INFO] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.kpi_static_data = self.common.get_kpi_static_data() self.kpi_results_queries = [] self.templates = {} self.session_id = self.data_provider.session_id self.score_templates = {} self.get_templates() self.get_score_template() self.manufacturer_fk = self.all_products[ self.all_products['manufacturer_name'] == 'Coca Cola'].iloc[0] self.sos = SOS(self.data_provider, self.output) self.total_score = 0 self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0] self.toolbox = GENERALToolBox(self.data_provider) self.scenes_info = self.data_provider[Data.SCENES_INFO] self.kpi_results_new_tables_queries = [] # self.store_type = self.data_provider.store_type def get_templates(self): for sheet in Const.SHEETS_MAIN: self.templates[sheet] = pd.read_excel(MAIN_TEMPLATE_PATH, sheetname=sheet.decode("utf-8"), keep_default_na=False) def get_score_template(self): for sheet in Const.SHEETS_SCORE: self.score_templates[sheet] = pd.read_excel(SCORE_TEMPLATE_PATH, sheetname=sheet.decode("utf-8"), keep_default_na=False, encoding = "utf-8") def main_calculation(self, *args, **kwargs): main_template = self.templates[Const.KPIS] for i, main_line in main_template.iterrows(): self.calculate_main_kpi(main_line) self.commit_results() def calculate_main_kpi(self, main_line): kpi_name = main_line[Const.KPI_NAME] kpi_type = main_line[Const.Type] scene_types = self.does_exist(main_line, Const.SCENE_TYPES) result = score = 0 general_filters = {} scif_scene_types = self.scif['template_name'].unique().tolist() store_type = str(self.store_info["store_type"].iloc[0]) store_types = self.does_exist_store(main_line, Const.STORE_TYPES) if store_type in store_types: if scene_types: if (('All' in scene_types) or bool(set(scif_scene_types) & set(scene_types))) : if not ('All' in scene_types): general_filters['template_name'] = scene_types if kpi_type == Const.SOVI: relevant_template = self.templates[kpi_type] relevant_template = relevant_template[relevant_template[Const.KPI_NAME] == kpi_name] if relevant_template["numerator param 1"].all() and relevant_template["denominator param"].all(): function = self.get_kpi_function(kpi_type) for i, kpi_line in relevant_template.iterrows(): result, score = function(kpi_line, general_filters) else: pass else: pass @staticmethod def does_exist(kpi_line, column_name): """ checks if kpi_line has values in this column, and if it does - returns a list of these values :param kpi_line: line from template :param column_name: str :return: list of values if there are, otherwise None """ if column_name in kpi_line.keys() and kpi_line[column_name] != "": cell = kpi_line[column_name] if type(cell) in [int, float]: return [cell] elif type(cell) in [unicode, str]: return cell.split(", ") return None @staticmethod def does_exist_store(kpi_line, column_name): """ checks if kpi_line has values in this column, and if it does - returns a list of these values :param kpi_line: line from template :param column_name: str :return: list of values if there are, otherwise None """ if column_name in kpi_line.keys() and kpi_line[column_name] != "": cell = kpi_line[column_name] if type(cell) in [int, float]: return [cell] elif type(cell) in [unicode, str]: return cell.split(",") return None def calculate_sos(self, kpi_line, general_filters): kpi_name = kpi_line[Const.KPI_NAME] den_type = kpi_line[Const.DEN_TYPES_1] den_value = kpi_line[Const.DEN_VALUES_1].split(',') num_type = kpi_line[Const.NUM_TYPES_1] num_value = kpi_line[Const.NUM_VALUES_1].split(',') general_filters[den_type] = den_value sos_filters = {num_type : num_value} if kpi_line[Const.NUM_TYPES_2]: num_type_2 = kpi_line[Const.NUM_TYPES_2] num_value_2 = kpi_line[Const.NUM_VALUES_2].split(',') sos_filters[num_type_2] = num_value_2 sos_value = self.sos.calculate_share_of_shelf(sos_filters, **general_filters) # sos_value *= 100 sos_value = round(sos_value, 2) score = self.get_score_from_range(kpi_name, sos_value) manufacturer_products = self.all_products[ self.all_products['manufacturer_name'] == num_value[0]].iloc[0] manufacturer_fk = manufacturer_products["manufacturer_fk"] all_products = self.all_products[ self.all_products['category'] == den_value[0]].iloc[0] category_fk = all_products["category_fk"] numerator_res, denominator_res = self.get_numerator_and_denominator(sos_filters, **general_filters) self.common.write_to_db_result_new_tables(fk = 1, numerator_id=manufacturer_fk, numerator_result= numerator_res, denominator_id=category_fk, denominator_result= denominator_res, result=sos_value, score= score, score_after_actions= score) return sos_value, score def get_score_from_range(self, kpi_name, sos_value): store_type = str(self.store_info["store_type"].iloc[0]) self.score_templates[store_type] = self.score_templates[store_type].replace(kpi_name, kpi_name.encode("utf-8")) score_range = self.score_templates[store_type].query('Kpi == "' + str(kpi_name.encode("utf-8")) + '" & Low <= ' + str(sos_value) + ' & High >= ' + str(sos_value)+'') score = score_range['Score'].iloc[0] return score def get_kpi_function(self, kpi_type): """ transfers every kpi to its own function .encode('utf-8') :param kpi_type: value from "sheet" column in the main sheet :return: function """ if kpi_type == Const.SOVI: return self.calculate_sos else: Log.warning("The value '{}' in column sheet in the template is not recognized".format(kpi_type)) return None @staticmethod def round_result(result): return round(result, 3) def get_numerator_and_denominator(self, sos_filters=None, include_empty=False, **general_filters): if include_empty == self.EXCLUDE_EMPTY and 'product_type' not in sos_filters.keys() + general_filters.keys(): general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) pop_filter = self.toolbox.get_filter_condition(self.scif, **general_filters) subset_filter = self.toolbox.get_filter_condition(self.scif, **sos_filters) try: pop = self.scif filtered_population = pop[pop_filter] if filtered_population.empty: return 0,0 else: subset_population = filtered_population[subset_filter] # ratio = TBox.calculate_ratio_sum_field_in_rows(filtered_population, subset_population, Fd.FACINGS) df = filtered_population subset_df = subset_population sum_field = Fd.FACINGS try: Validation.is_empty_df(df) Validation.is_empty_df(subset_df) Validation.is_subset(df, subset_df) Validation.df_columns_equality(df, subset_df) Validation.validate_columns_exists(df, [sum_field]) Validation.validate_columns_exists(subset_df, [sum_field]) Validation.is_none(sum_field) except Exception, e: msg = "Data verification failed: {}.".format(e) # raise Exception(msg) default_value = 0 numerator = TBox.calculate_frame_column_sum(subset_df, sum_field, default_value) denominator = TBox.calculate_frame_column_sum(df, sum_field, default_value) return numerator, denominator except Exception as e: Log.error(e.message) return True def commit_results(self): insert_queries = self.merge_insert_queries(self.kpi_results_new_tables_queries) self.rds_conn.disconnect_rds() self.rds_conn.connect_rds() cur = self.rds_conn.db.cursor() delete_query = SOLARBRQueries.get_delete_session_results_query(self.session_uid, self.session_id) cur.execute(delete_query) for query in insert_queries: cur.execute(query) self.rds_conn.db.commit() self.rds_conn.disconnect_rds() @staticmethod def merge_insert_queries(insert_queries): query_groups = {} for query in insert_queries: static_data, inserted_data = query.split('VALUES ') if static_data not in query_groups: query_groups[static_data] = [] query_groups[static_data].append(inserted_data) merged_queries = [] for group in query_groups: merged_queries.append('{0} VALUES {1}'.format(group, ',\n'.join(query_groups[group]))) return merged_queries
class DBHandler: """ Tnuva has NCC report that comparing the results of the OOS SKU level for the current session and the previous ones. We didn't want to calculate it during the report and this doesn't exist yet in the API so this util class is handling on fetching the results. """ def __init__(self, project_name, session_uid): self.project_name = project_name self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.session_uid = session_uid def _get_previous_session_fk(self): """ This method fetches the last completed session_fk for the current store. """ last_session_fk_query = self._get_last_visit_fk_query() last_session_fk = self._execute_db_query(last_session_fk_query) if len(last_session_fk) != 2: Log.warning( Consts.LOG_EMPTY_PREVIOUS_SESSIONS.format(self.session_uid)) last_session_fk = None else: last_session_fk = last_session_fk.loc[1, BasicConsts.PK] return last_session_fk def _get_oos_results(self, session_fk): """ This method gets a session_fk and fetches the relevant OOS results. """ query = self._previous_oos_results_query(session_fk) result = self._execute_db_query(query) return result def get_last_session_oos_results(self): """ This is the main method of this util and the only public one. It fetches the relevant OOS results for the last relevant visit if exists. """ last_session_fk = self._get_previous_session_fk() if last_session_fk is None: return None oos_results = self._get_oos_results(last_session_fk) return oos_results def get_kpi_result_value(self): """ This method extracts the kpi_result_types from the DB. """ result_type_query = self._get_kpi_result_value_query() result_types = self._execute_db_query(result_type_query) return result_types def _execute_db_query(self, query): """ This method is responsible on the DB execution. It gets a query (string) and executes it. """ try: result = pd.read_sql_query(query, self.rds_conn.db) except DatabaseError: self.rds_conn.connect_rds() result = pd.read_sql_query(query, self.rds_conn.db) return result def get_oos_reasons_for_session(self, session_uid): oos_reasons_query = self._get_oos_reasons_query(session_uid) oos_reasons = self._execute_db_query(oos_reasons_query) return oos_reasons # The following are the queries that we are using in order to get the previous # sessions relevant results. @staticmethod def _previous_oos_results_query(session_fk): """ This m :param session_fk: :return: """ prev_results_query = """SELECT kpi_level_2_fk, numerator_id, result FROM report.kpi_level_2_results WHERE session_fk = {} AND kpi_level_2_fk IN (SELECT pk FROM static.kpi_level_2 WHERE type IN {}) """.format(session_fk, Consts.PREV_RES_KPIS_FOR_NCC) return prev_results_query @staticmethod def _get_kpi_result_value_query(): kpi_result_type = """SELECT pk, value FROM static.kpi_result_value;""" return kpi_result_type def _get_last_visit_fk_query(self): """ Before fetching the results we need to get the previous session from the same store. This is a query that used by the Mobile team. This query returns the current session's fk and the previous one as well. """ last_two_sessions_query = """SELECT s1.pk FROM probedata.session s1 JOIN probedata.session s2 ON s2.store_fk = s1.store_fk AND s2.visit_date >= s1.visit_date AND s2.start_time >= s1.start_time AND ( SELECT count(1) from probedata.scene as sc where sc.session_uid = s2.session_uid and status <> 6 AND sc.delete_time is null) = 0 WHERE s2.session_uid = '{}' AND s2.delete_time is NULL AND s1.delete_time is NULL AND ( SELECT count(1) from probedata.scene as sc where sc.session_uid = s1.session_uid and status <> 6 AND sc.delete_time is null) = 0 ORDER BY s1.visit_date DESC , s1.start_time DESC limit 2;""".format(self.session_uid) return last_two_sessions_query @staticmethod def _get_oos_reasons_query(session_uid): query = """ SELECT * FROM probedata.oos_exclude oe JOIN static.oos_message om on om.pk=oe.oos_message_fk JOIN static.oos_message_type omt on omt.pk=om.type where oe.session_uid = '{}' and oe.delete_time is null; """.format(session_uid) return query