コード例 #1
0
class NESTLEUKToolBox(NESTLEUKConsts):
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.k_engine = BaseCalculationsScript(data_provider, output)
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.tools = NESTLEUKGENERALToolBox(self.data_provider,
                                            self.output,
                                            rds_conn=self.rds_conn)
        self.kpi_static_data = self.get_kpi_static_data()
        self.kpi_results_queries = []
        self.store_type = self.store_info['store_type'].iloc[0]
        self.store_type = '' if self.store_type is None else self.store_type
        self.templates_class = NESTLEUKParseTemplates('Nestle_UK_v3.0')
        self.template_ava_class = NESTLEUKParseTemplates('Template')
        self.templates_data = self.templates_class.parse_template(
            sheet_name='KPIs')
        self.template_ava_data = self.template_ava_class.parse_template(
            sheet_name='Hierarchy')
        self.template_ava_visible = self.template_ava_class.parse_template(
            sheet_name='Visible')
        self.template_ava_bottom_shelf = self.template_ava_class.parse_template(
            sheet_name='Bottom shelf')
        self.template_ava_adjacent = self.template_ava_class.parse_template(
            sheet_name='Adjacent')
        self.template_ava_diamond = self.template_ava_class.parse_template(
            sheet_name='Diamond')
        self.scores = pd.DataFrame(columns=['ean_code', 'visible', 'ava'])
        self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0]
        self.custom_scif_queries = []

        # self.templates_data = self.template.parse_kpi()

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = NESTLEUKQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    # def main_calculation(self, set_name, *args, **kwargs):
    #     """
    #     This function calculates the KPI results.
    #     """
    #
    #     if set_name in ('OSA',):
    #         set_score = self.check_on_shelf_availability(set_name)
    #         # self.check_on_shelf_availability_on_scene_level(set_name)
    #     elif set_name in ('Linear Share of Shelf vs. Target', 'Linear Share of Shelf'):
    #         set_score = self.custom_share_of_shelf(set_name)
    #     elif set_name in ('Shelf Level',):
    #         set_score = self.calculate_eye_level_availability(set_name)
    #     elif set_name in ('Product Blocking',):
    #         set_score = self.calculate_block_together_sets(set_name)
    #     elif set_name == 'Pallet Presence':
    #         set_score, pallet_score, half_pallet_score = self.calculate_pallet_presence()
    #     elif set_name == 'Share of Assortment':
    #         set_score = self.calculate_share_of_assortment()
    #         self.save_level2_and_level3(set_name, set_name, set_score)
    #     elif set_name == 'Shelf Impact Score':
    #         set_score = self.shelf_impact_score()
    #
    #     else:
    #         return
    #     set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name'] == set_name]['kpi_set_fk'].values[0]
    #     self.write_to_db_result(set_fk, set_score, self.LEVEL1)
    #     return set_score

    def calculate_nestle_score(self, set_name):
        """
        This function calculates the KPI results.
        """
        set_scores = {}
        main_children = self.templates_data[self.templates_data[
            self.templates_class.KPI_GROUP] == set_name]
        for c in xrange(len(main_children)):
            main_child = main_children.iloc[c]
            children = self.templates_data[self.templates_data[
                self.templates_class.KPI_GROUP] == main_child[
                    self.templates_class.KPI_NAME]]
            scores = []
            for i in xrange(len(children)):
                child = children.iloc[i]
                kpi_type = child[self.templates_class.KPI_TYPE]
                if not self.store_type in child[
                        self.templates_class.STORE_TYPE]:
                    continue
                if not set(child[self.templates_class.SCENE_TYPE].split(
                        self.templates_class.SEPARATOR)) & set(
                            self.scif['template_name'].unique().tolist()):
                    continue
                if kpi_type == self.BLOCK_TOGETHER:
                    score = self.calculate_block_together_sets(child)
                elif kpi_type == self.FACING_COUNT:
                    score = self.calculate_facing_count(child)
                elif kpi_type == self.AVAILABILITY:
                    score = self.calculate_availability(child)
                elif kpi_type == self.FACING_SOS:
                    score = self.calculate_facing_sos(child)
                elif kpi_type == self.SHELF_POSITION:
                    score = self.calculate_shelf_position(child)
                else:
                    Log.warning(
                        "KPI of type '{}' is not supported".format(kpi_type))
                    continue
                if score is not None:
                    child_score_weight = child[self.templates_class.WEIGHT]
                    atomic_fk = self.get_atomic_fk(child)
                    self.write_to_db_result(atomic_fk,
                                            score,
                                            level=self.LEVEL3)
                    if isinstance(score, tuple):
                        score = score[0]
                    weighted_score = score * float(child_score_weight)
                    scores.append(weighted_score)

            if not scores:
                scores = [0]
            if scores:
                score_type = main_child[self.templates_class.SCORE]
                score_weight = float(main_child[self.templates_class.WEIGHT])
                if score_type == self.templates_class.SUM_OF_SCORES:
                    score = sum(scores)
                else:
                    score = 0
                kpi_name = main_child[self.templates_class.KPI_NAME]
                kpi_fk = self.kpi_static_data[self.kpi_static_data['kpi_name']
                                              == kpi_name]['kpi_fk'].values[0]
                # self.write_to_db_result(kpi_fk, score, level=self.LEVEL2)
                set_scores[kpi_fk] = (score_weight, score)
        # total_weight = sum([score[0] for score in set_scores.values()])
        for kpi_fk in set_scores.keys():
            self.write_to_db_result(kpi_fk,
                                    set_scores[kpi_fk][1],
                                    level=self.LEVEL2)
        # set_score = sum([score[0] * score[1] for score in set_scores.values()]) / total_weight
        set_score = round(
            sum([score[0] * score[1] for score in set_scores.values()]), 2)
        set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name'] ==
                                      set_name]['kpi_set_fk'].values[0]
        self.write_to_db_result(set_fk, set_score, level=self.LEVEL1)

    @kpi_runtime()
    def calculate_ava(self):
        """
        This function calculates the KPI results.
        """
        set_scores = {}
        for set_name in self.template_ava_data['Set Name'].unique().tolist():
            kpk = self.template_ava_data[
                self.template_ava_data['Set Name'] ==
                set_name]['KPI Group'].unique().tolist()
            for main_kpi in kpk:
                atomics = self.template_ava_data[
                    self.template_ava_data['KPI Group'] == main_kpi]
                for i in xrange(len(atomics)):
                    atomic = atomics.iloc[i]
                    if not set(atomic[self.templates_class.SCENE_TYPE].split(
                            self.templates_class.SEPARATOR)) & set(
                                self.scif['template_name'].unique().tolist()):
                        continue
                    else:
                        templates = map(
                            lambda x: x.strip(),
                            atomic[self.templates_class.SCENE_TYPE].split(','))
                        scenes_to_check = self.scif[
                            self.scif['template_name'].isin(
                                templates)]['scene_fk'].unique().tolist()
                    kpi_type = atomic[self.templates_class.KPI_TYPE]
                    if kpi_type == self.BOTTOM_SHELF:
                        params = self.template_ava_bottom_shelf[
                            self.template_ava_bottom_shelf['KPI Name'] ==
                            atomic['KPI Name']].iloc[0]
                        self.calculate_bottom_shelf(params, scenes_to_check)
                    elif kpi_type == self.ADJACENT:
                        params = self.template_ava_adjacent[
                            self.template_ava_adjacent['KPI Name'] ==
                            atomic['KPI Name']].iloc[0]
                        self.calculate_adjacent(params, scenes_to_check)
                    elif kpi_type == self.DIAMOND:
                        params = self.template_ava_diamond[
                            self.template_ava_diamond['KPI Name'] ==
                            atomic['KPI Name']].iloc[0]
                        self.calculate_diamond(params, scenes_to_check)
                    else:
                        Log.warning("KPI of type '{}' is not supported".format(
                            kpi_type))
                        continue

    def get_custom_query(self,
                         scene_fk,
                         product_fk,
                         in_assortment_OSA=0,
                         oos_osa=0,
                         mha_in_assortment=0,
                         mha_oos=0,
                         length_mm_custom=0):
        attributes = pd.DataFrame(
            [(self.session_fk, scene_fk, product_fk, in_assortment_OSA,
              oos_osa, mha_in_assortment, mha_oos, length_mm_custom)],
            columns=[
                'session_fk', 'scene_fk', 'product_fk', 'in_assortment_OSA',
                'oos_osa', 'mha_in_assortment', 'mha_oos', 'length_mm_custom'
            ])

        query = insert(attributes.to_dict(), self.PSERVICE_CUSTOM_SCIF)
        self.custom_scif_queries.append(query)

    def calculate_bottom_shelf(self, kpi, scenes_to_check):
        target = int(kpi[self.templates_class.TARGET])
        shelf_number = map(lambda x: x.strip(),
                           kpi['shelf_number_from_bottom'].split(','))
        shelf_percent = int(kpi['shelf_percent'])
        products_for_check = map(lambda x: x.strip(),
                                 kpi['product_ean_code'].split(','))
        products_for_check = self.all_products[
            self.all_products['product_ean_code'].isin(
                products_for_check)]['product_fk'].tolist()
        for scene in scenes_to_check:
            shelf_edges = self.build_shelf_edges(scene, shelf_percent)
            for product_fk in products_for_check:
                result = self.tools.calculate_availability(
                    product_fk=product_fk, scene_fk=scene)
                if result:
                    in_assortment_osa = 1
                    result = self.calculate_contain(scene, product_fk,
                                                    shelf_edges, shelf_number)
                    mha_in_assortment = 1 if result >= target else 0
                else:
                    in_assortment_osa = mha_in_assortment = 0
                self.get_custom_query(scene_fk=scene,
                                      product_fk=product_fk,
                                      in_assortment_OSA=in_assortment_osa,
                                      mha_in_assortment=mha_in_assortment)

    def build_shelf_edges(self, scene_fk, shelf_percent):
        matches = self.match_product_in_scene[self.tools.get_filter_condition(
            self.match_product_in_scene, **{'scene_fk': scene_fk})]
        left = matches.copy().sort_values('x_mm', ascending=True).iloc[0]
        left = int(left['x_mm']) - (int(left['width_mm']) / 2
                                    )  # TODO width_mm_net
        right = matches.copy().sort_values('x_mm', ascending=False).iloc[0]
        right = int(right['x_mm']) + (int(right['width_mm']) / 2
                                      )  # TODO width_mm_net
        shelf_len = right - left
        shelf_len_after_downsize = (shelf_len -
                                    (shelf_len * shelf_percent / 100)) / 2
        edges = {
            'left': left + shelf_len_after_downsize,
            'right': right - shelf_len_after_downsize
        }
        return edges

    def build_product_edges(self, matches):
        points = []
        for x, product_show in matches.iterrows():
            left = int(product_show['x_mm']) - (
                int(product_show['width_mm']) / 2)  # TODO width_mm_net
            right = int(product_show['x_mm']) + (
                int(product_show['width_mm']) / 2)  # TODO width_mm_net
            edges_point = {'left': left, 'right': right}
            points.append(edges_point)
        return points

    def calculate_contain(self, scene, product_fk, shelf_edges, shelf_number):
        matches = self.match_product_in_scene[self.tools.get_filter_condition(
            self.match_product_in_scene, **{
                'scene_fk': scene,
                'shelf_number_from_bottom': shelf_number,
                'product_fk': product_fk
            })]
        points = self.build_product_edges(matches)
        for point in points:
            if (shelf_edges['left'] < point['left'] < shelf_edges['right']) or \
                    (shelf_edges['left'] < point['right'] < shelf_edges['right']):
                return True
        return False

    def calculate_diamond(self, kpi, scenes_to_check):
        target = int(kpi[self.templates_class.TARGET])
        products_for_check = map(lambda x: x.strip(),
                                 kpi['product_ean_code'].split(','))
        products_for_check = self.all_products[
            self.all_products['product_ean_code'].isin(
                products_for_check)]['product_fk'].tolist()
        for scene in scenes_to_check:
            if self.validate_scene(scene):
                polygon = self.build_diamond_polygon(scene)
                for product_fk in products_for_check:
                    result = self.tools.calculate_availability(
                        product_fk=product_fk, scene_fk=scenes_to_check)
                    if result:
                        in_assortment_osa = 1
                        result = self.calculate_polygon(scene=scene,
                                                        product_fk=product_fk,
                                                        polygon=polygon)
                        mha_in_assortment = 1 if result >= target else 0
                    else:
                        in_assortment_osa = mha_in_assortment = 0
                    self.get_custom_query(scene_fk=scene,
                                          product_fk=product_fk,
                                          in_assortment_OSA=in_assortment_osa,
                                          mha_in_assortment=mha_in_assortment)

    def validate_scene(self, scene_fk):
        matches = self.match_product_in_scene[self.tools.get_filter_condition(
            self.match_product_in_scene, **{'scene_fk': scene_fk})]
        if len(matches['shelf_number'].unique().tolist()) > 1:
            return True
        return False

    def build_diamond_polygon(self, scene_fk):
        matches = self.match_product_in_scene[self.tools.get_filter_condition(
            self.match_product_in_scene, **{'scene_fk': scene_fk})]
        shelf_number = min(matches['shelf_number'].unique().tolist())
        top = matches[(matches['shelf_number'] == shelf_number)
                      & (matches['stacking_layer'] == 1)].sort_values(
                          'y_mm', ascending=False).iloc[0]
        top = int(top['y_mm']) - (int(top['height_mm']) / 2
                                  )  # TODO height_mm_net
        try:
            bottom = matches[(matches['shelf_number_from_bottom'] == 2)
                             & (matches['stacking_layer'] == 1)].sort_values(
                                 'y_mm', ascending=False).iloc[0]
            bottom = int(bottom['y_mm']) - (int(bottom['height_mm']) / 2
                                            )  # TODO height_mm_net
        except:
            bottom = matches[matches['shelf_number_from_bottom'] ==
                             1].sort_values('y_mm', ascending=False).iloc[0]
            bottom = int(bottom['y_mm']) + (int(bottom['height_mm']) / 2
                                            )  # TODO height_mm_net
        left = matches.copy().sort_values('x_mm', ascending=True).iloc[0]
        left = int(left['x_mm']) - (int(left['width_mm']) / 2
                                    )  # TODO width_mm_net
        right = matches.copy().sort_values('x_mm', ascending=False).iloc[0]
        right = int(right['x_mm']) + (int(right['width_mm']) / 2
                                      )  # TODO width_mm_net
        middle_x = (right + left) / 2
        middle_y = (top + bottom) / 2
        polygon = Polygon([(middle_x, top), (right, middle_y),
                           (middle_x, bottom), (left, middle_y)])
        return polygon

    def calculate_polygon(self, scene, product_fk, polygon):
        matches = self.match_product_in_scene[self.tools.get_filter_condition(
            self.match_product_in_scene, **{'scene_fk': scene})]
        points = self.build_array_of_points(matches, product_fk)
        for point in points:
            if polygon.contains(point):
                return True
        return False

    def build_array_of_points(self, matches, product):
        points = []
        for x, product_show in matches[matches['product_fk'] ==
                                       product].iterrows():
            top = int(product_show['y_mm']) + (
                int(product_show['height_mm']) / 2)  # TODO height_mm_net
            bottom = int(product_show['y_mm']) - (
                int(product_show['height_mm']) / 2)  # TODO height_mm_net
            left = int(product_show['x_mm']) - (
                int(product_show['width_mm']) / 2)  # TODO width_mm_net
            right = int(product_show['x_mm']) + (
                int(product_show['width_mm']) / 2)  # TODO width_mm_net
            mask_point = Point(left, top), Point(right, top), Point(
                left, bottom), Point(right, bottom)
            points += mask_point
        return points

    def calculate_adjacent(self, kpi, scenes_to_check):
        adjacent_type = kpi['adjacent_type']
        adjacent_value = kpi['adjacent_value']
        anchor_filters = {adjacent_type: adjacent_value}
        products_for_check = map(lambda x: x.strip(),
                                 kpi['product_ean_code'].split(','))
        products_for_check = self.all_products[
            self.all_products['product_ean_code'].isin(
                products_for_check)]['product_fk'].tolist()
        general_filters = {'scene_fk': scenes_to_check}
        for scene in scenes_to_check:
            for product_fk in products_for_check:
                result = self.tools.calculate_availability(
                    product_fk=product_fk, scene_fk=scenes_to_check)
                if result:
                    in_assortment_osa = 1
                    result = not self.tools.calculate_non_proximity(
                        tested_filters={'product_fk': product_fk},
                        anchor_filters=anchor_filters,
                        allowed_diagonal=False,
                        **general_filters)
                    mha_in_assortment = 1 if result else 0
                else:
                    in_assortment_osa = mha_in_assortment = 0
                self.get_custom_query(scene_fk=scene,
                                      product_fk=product_fk,
                                      in_assortment_OSA=in_assortment_osa,
                                      mha_in_assortment=mha_in_assortment)

    @kpi_runtime()
    def calculate_block_together_sets(self, kpi):
        """
        This function calculates every block-together-typed KPI from the relevant sets, and returns the set final score.
        """
        templates = kpi[self.templates_class.SCENE_TYPE].split(
            self.templates_class.SEPARATOR)
        brands_for_block_check = kpi[self.templates_class.BRAND].split(
            self.templates_class.SEPARATOR)
        scenes_to_check = self.scif[self.scif['template_name'].isin(
            templates)]['scene_fk'].unique().tolist()
        if not kpi[self.templates_class.CATEGORY]:
            result = self.tools.calculate_block_together(
                brand_name=brands_for_block_check, scene_fk=scenes_to_check)
        else:
            category = kpi[self.templates_class.CATEGORY]
            result = self.tools.calculate_block_together(
                brand_name=brands_for_block_check,
                scene_fk=scenes_to_check,
                category=category)
        score = 100 if result else 0

        return score

    @kpi_runtime()
    def calculate_facing_count(self, kpi):
        """
        This function calculates every block-together-typed KPI from the relevant sets, and returns the set final score.
        """
        templates = kpi[self.templates_class.SCENE_TYPE].split(
            self.templates_class.SEPARATOR)
        products_for_check = kpi[self.templates_class.SKU]
        scenes_to_check = self.scif[self.scif['template_name'].isin(
            templates)]['scene_fk'].unique().tolist()
        result = self.tools.calculate_availability(
            product_ean_code=products_for_check,
            scene_fk=scenes_to_check,
            stacking_layer=1)
        if kpi[self.templates_class.TARGET]:
            target = float(kpi[self.templates_class.TARGET])
        else:
            target = kpi[self.templates_class.TARGET]
        score = 100 if result >= target else 0

        return score

    @kpi_runtime()
    def calculate_availability(self, kpi):
        """
        This function calculates every block-together-typed KPI from the relevant sets, and returns the set final score.
        """
        kpi_name = kpi[self.templates_class.KPI_NAME]
        templates_data = self.templates_class.parse_template(
            sheet_name='Availability',
            lower_headers_row_index=4,
            upper_headers_row_index=3,
            data_content_column_index=6,
            input_column_name_separator=', ')
        scene_types = []
        scores = []
        session_templates = self.scif['template_name'].unique().tolist()
        for scene_type in session_templates:
            availability_id = '{};{}'.format(self.store_type, scene_type)
            if availability_id in templates_data.columns:
                availability_data = templates_data[
                    (templates_data[self.templates_class.KPI_NAME] == kpi_name)
                    & (templates_data[availability_id] == 1)]
            else:
                continue
            if not availability_data.empty:
                scene_types.append(scene_type)
        products_for_check = templates_data[
            self.templates_class.availability_consts.PRODUCT_EAN_CODES].tolist(
            )
        for products_list in products_for_check:
            try:
                products = products_list.split(', ')
            except Exception as e:
                products = products_list
            result = self.tools.calculate_availability(
                product_ean_code=products,
                template_name=scene_types,
                stacking_layer=1)
            score = 100 if result > 0 else 0
            scores.append(score)

        if 0 in scores:
            final_score = 0
        else:
            final_score = 100

        return final_score

    @kpi_runtime()
    def calculate_shelf_position(self, kpi):
        """
        This function calculates every block-together-typed KPI from the relevant sets, and returns the set final score.
        """
        kpi_name = kpi[self.templates_class.KPI_NAME]
        templates_data = self.templates_class.parse_template(
            sheet_name='Shelf Position')
        scores = []
        shelf_position_data = templates_data[(
            templates_data[self.templates_class.KPI_NAME] == kpi_name)]
        products_for_check = shelf_position_data[
            self.templates_class.availability_consts.PRODUCT_EAN_CODES].tolist(
            )
        templates = kpi[self.templates_class.SCENE_TYPE].split(
            self.templates_class.SEPARATOR)
        scenes_to_check = self.scif[self.scif['template_name'].isin(
            templates)]['scene_fk'].unique().tolist()
        for products_list in products_for_check:
            try:
                products = products_list.split(', ')
            except Exception as e:
                products = products_list
            shelves = shelf_position_data.loc[
                shelf_position_data[
                    self.templates_class.availability_consts.PRODUCT_EAN_CODES]
                == products_list]['Shelf Position'].values[0].split(',')
            result = self.tools.calculate_shelf_level_assortment(
                shelves=[int(shelf) for shelf in shelves],
                product_ean_code=products,
                scene_fk=scenes_to_check)
            score = 100 if result > 0 else 0
            scores.append(score)

        if 0 in scores:
            final_score = 0
        else:
            final_score = 100

        return final_score

    @kpi_runtime()
    def calculate_facing_sos(self, kpi):
        """
        This function calculates every block-together-typed KPI from the relevant sets, and returns the set final score.
        """
        templates = kpi[self.templates_class.SCENE_TYPE].split(
            self.templates_class.SEPARATOR)
        manufactruers_for_check = kpi[self.templates_class.MANUFACTURER]
        scenes_to_check = self.scif[self.scif['template_name'].isin(
            templates)]['scene_fk'].unique().tolist()
        if kpi[self.templates_class.CATEGORY] is None:
            sos_filters = {'manufacturer_name': manufactruers_for_check}
            result = self.tools.calculate_share_of_shelf(
                sos_filters=sos_filters,
                scene_fk=scenes_to_check,
                stacking_layer=1)
        else:
            sos_filters = {'manufacturer_name': manufactruers_for_check}
            category = kpi[self.templates_class.CATEGORY]
            result = self.tools.calculate_share_of_shelf(
                sos_filters=sos_filters,
                scene_fk=scenes_to_check,
                category=category,
                stacking_layer=1)

        score = 100 if result > kpi[self.templates_class.TARGET] else 0

        return score

    def get_atomic_fk(self, params):
        """
        This function gets an Atomic KPI's FK out of the template data.
        """
        atomic_name = params[self.templates_class.KPI_NAME]
        kpi_name = params[self.templates_class.KPI_GROUP]
        atomic_fk = self.kpi_static_data[
            (self.kpi_static_data['kpi_name'] == kpi_name)
            & (self.kpi_static_data['atomic_kpi_name'] == atomic_name
               )]['atomic_kpi_fk']
        if atomic_fk.empty:
            return None
        return atomic_fk.values[0]

    def write_to_db_result(self, fk, score, level):
        """
        This function creates the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(fk, score, level)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self, fk, score, level):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        if level == self.LEVEL1:
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['kpi_set_fk'] ==
                fk]['kpi_set_name'].values[0]
            attributes = pd.DataFrame(
                [(kpi_set_name, self.session_uid, self.store_id,
                  self.visit_date.isoformat(), format(score, '.2f'), fk)],
                columns=[
                    'kps_name', 'session_uid', 'store_fk', 'visit_date',
                    'score_1', 'kpi_set_fk'
                ])
        elif level == self.LEVEL2:
            kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] ==
                                            fk]['kpi_name'].values[0]
            attributes = pd.DataFrame(
                [(self.session_uid, self.store_id, self.visit_date.isoformat(),
                  fk, kpi_name, score)],
                columns=[
                    'session_uid', 'store_fk', 'visit_date', 'kpi_fk',
                    'kpk_name', 'score'
                ])
        elif level == self.LEVEL3:
            data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk']
                                        == fk]
            atomic_kpi_name = data['atomic_kpi_name'].values[0]
            kpi_fk = data['kpi_fk'].values[0]
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['atomic_kpi_fk'] ==
                fk]['kpi_set_name'].values[0]
            attributes = pd.DataFrame(
                [(atomic_kpi_name, self.session_uid,
                  kpi_set_name, self.store_id, self.visit_date.isoformat(),
                  datetime.utcnow().isoformat(), score, kpi_fk, fk)],
                columns=[
                    'display_text', 'session_uid', 'kps_name', 'store_fk',
                    'visit_date', 'calculation_time', 'score', 'kpi_fk',
                    'atomic_kpi_fk'
                ])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    def commit_custom_scif(self):
        if not self.rds_conn.is_connected:
            self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        delete_query = NESTLEUKQueries.get_delete_session_custom_scif(
            self.session_fk)
        cur.execute(delete_query)
        self.rds_conn.db.commit()
        queries = self.merge_insert_queries(self.custom_scif_queries)
        for query in queries:
            try:
                cur.execute(query)
            except:
                print 'could not run query: {}'.format(query)
        self.rds_conn.db.commit()

    def merge_insert_queries(self, insert_queries):
        # other_queries = []
        query_groups = {}
        for query in insert_queries:
            if 'update' in query:
                self.update_queries.append(query)
            else:
                static_data, inserted_data = query.split('VALUES ')
                if static_data not in query_groups:
                    query_groups[static_data] = []
                query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            for group_index in xrange(0, len(query_groups[group]), 10**4):
                merged_queries.append('{0} VALUES {1}'.format(
                    group, ',\n'.join(
                        query_groups[group][group_index:group_index + 10**4])))
        # merged_queries.extend(other_queries)
        return merged_queries

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.commit_custom_scif()
        cur = self.rds_conn.db.cursor()
        delete_queries = NESTLEUKQueries.get_delete_session_results_query(
            self.session_uid)
        for query in delete_queries:
            cur.execute(query)
        queries = self.merge_insert_queries(self.kpi_results_queries)
        for query in queries:
            cur.execute(query)
        self.rds_conn.db.commit()
コード例 #2
0
class PENAFLORAR_SANDDIAGEOARToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3
    ACTIVATION_STANDARD = 'Activation Standard'

    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.store_type = self.store_info['additional_attribute_1'].values[0]
        self.kpi_static_data = self.get_kpi_static_data()
        self.set_templates_data = {}
        self.match_display_in_scene = self.get_match_display()
        self.kpi_results_queries = []
        self.scores = {self.LEVEL1: {}, self.LEVEL2: {}, self.LEVEL3: {}}

        self.output = output
        self.common = Common(self.data_provider)
        self.commonV2 = CommonV2(self.data_provider)
        self.global_gen = DIAGEOGenerator(self.data_provider, self.output,
                                          self.common)
        self.tools = DIAGEOToolBox(
            self.data_provider,
            output,
            match_display_in_scene=self.match_display_in_scene
        )  # replace the old one
        self.diageo_generator = DIAGEOGenerator(self.data_provider,
                                                self.output, self.common)

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = DIAGEOQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def get_match_display(self):
        """
        This function extracts the display matches data and saves it into one global data frame.
        The data is taken from probedata.match_display_in_scene.
        """
        query = DIAGEOQueries.get_match_display(self.session_uid)
        match_display = pd.read_sql_query(query, self.rds_conn.db)
        return match_display

    def main_calculation(self, set_names):
        """
        This function calculates the KPI results.
        """
        log_runtime('Updating templates')(self.tools.update_templates)()

        # Global assortment kpis
        assortment_res_dict = self.diageo_generator.diageo_global_assortment_function_v2(
        )
        self.commonV2.save_json_to_new_tables(assortment_res_dict)

        for set_name in set_names:
            set_score = 0
            if set_name not in self.tools.KPI_SETS_WITHOUT_A_TEMPLATE and set_name not in self.set_templates_data.keys(
            ):
                try:
                    self.set_templates_data[
                        set_name] = self.tools.download_template(set_name)
                except:
                    Log.warning("Couldn't find a template for set name: " +
                                str(set_name))
                    continue
            # if set_name in ('MPA', 'New Products',):
            #     set_score = self.calculate_assortment_sets(set_name)

            # Global Visible to Customer / Visible to Consumer
            if set_name in ('Visible to Customer', 'Visible to Consumer %'):
                # Global function
                sku_list = filter(
                    None, self.scif[self.scif['product_type'] ==
                                    'SKU'].product_ean_code.tolist())
                res_dict = self.diageo_generator.diageo_global_visible_percentage(
                    sku_list)

                if res_dict:
                    # Saving to new tables
                    # parent_res = res_dict[-1]
                    self.commonV2.save_json_to_new_tables(res_dict)

                    # Saving to old tables
                    # result = parent_res['result']
                    # self.save_level2_and_level3(set_name=set_name, kpi_name=set_name, score=result)

                # Saving to old tables
                filters = {self.tools.VISIBILITY_PRODUCTS_FIELD: 'Y'}
                set_score = self.tools.calculate_visible_percentage(
                    visible_filters=filters)
                self.save_level2_and_level3(set_name, set_name, set_score)

            elif set_name in ('Relative Position'):
                # Global function
                res_dict = self.diageo_generator.diageo_global_relative_position_function(
                    self.set_templates_data[set_name],
                    location_type='template_display_name')

                if res_dict:
                    # Saving to new tables
                    self.commonV2.save_json_to_new_tables(res_dict)

                set_score = self.calculate_relative_position_sets(set_name)
            else:
                return

            if set_score == 0:
                pass
            elif set_score is False:
                continue

            set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name']
                                          == set_name]['kpi_set_fk'].values[0]
            self.write_to_db_result(set_fk, set_score, self.LEVEL1)

        # commiting to new tables
        self.commonV2.commit_results_data()

    def calculate_relative_position_sets(self, set_name):
        """
        This function calculates every relative-position-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        for params in self.set_templates_data[set_name]:
            if self.store_info.at[0, 'additional_attribute_2'] == params.get(
                    'additional_attribute_2', 'Empty'):
                tested_filters = {
                    params.get(TESTED_TYPE): params.get(TESTED_VALUE)
                }
                anchor_filters = {
                    params.get(ANCHOR_TYPE): params.get(ANCHOR_VALUE)
                }
                direction_data = {
                    'top':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.TOP_DISTANCE)),
                    'bottom':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.BOTTOM_DISTANCE)),
                    'left':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.LEFT_DISTANCE)),
                    'right':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.RIGHT_DISTANCE))
                }
                general_filters = {
                    'template_display_name': params.get(self.tools.LOCATION)
                }
                result = self.tools.calculate_relative_position(
                    tested_filters, anchor_filters, direction_data,
                    **general_filters)
                score = 1 if result else 0
                scores.append(score)

                self.save_level2_and_level3(set_name,
                                            params.get(self.tools.KPI_NAME),
                                            score)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def _get_direction_for_relative_position(self, value):
        """
        This function converts direction data from the template (as string) to a number.
        """
        if value == self.tools.UNLIMITED_DISTANCE:
            value = 1000
        elif not value or not str(value).isdigit():
            value = 0
        else:
            value = int(value)
        return value

    def calculate_assortment_sets(self, set_name):
        """
        This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        for params in self.set_templates_data[set_name]:
            target = str(params.get(self.store_type, ''))
            if target.isdigit() or target.capitalize() in (
                    self.tools.RELEVANT_FOR_STORE,
                    self.tools.OR_OTHER_PRODUCTS):
                products = str(
                    params.get(self.tools.PRODUCT_EAN_CODE,
                               params.get(self.tools.PRODUCT_EAN_CODE2,
                                          ''))).replace(',', ' ').split()
                target = 1 if not target.isdigit() else int(target)
                kpi_name = params.get(self.tools.GROUP_NAME,
                                      params.get(self.tools.PRODUCT_NAME))
                kpi_static_data = self.kpi_static_data[
                    (self.kpi_static_data['kpi_set_name'] == set_name)
                    & (self.kpi_static_data['kpi_name'] == kpi_name)]
                if len(products) > 1:
                    result = 0
                    for product in products:
                        product_score = self.tools.calculate_assortment(
                            product_ean_code=product)
                        result += product_score
                        atomic_fk = kpi_static_data[
                            kpi_static_data['description'] ==
                            product]['atomic_kpi_fk'].values[0]
                        self.write_to_db_result(atomic_fk,
                                                product_score,
                                                level=self.LEVEL3)
                    score = 1 if result >= target else 0
                else:
                    result = self.tools.calculate_assortment(
                        product_ean_code=products)
                    atomic_fk = kpi_static_data['atomic_kpi_fk'].values[0]
                    score = 1 if result >= target else 0
                    self.write_to_db_result(atomic_fk,
                                            score,
                                            level=self.LEVEL3)

                scores.append(score)
                kpi_fk = kpi_static_data['kpi_fk'].values[0]
                self.write_to_db_result(kpi_fk, score, level=self.LEVEL2)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def calculate_activation_standard(self):
        """
        This function calculates the Activation Standard KPI, and saves the result to the DB (for all 3 levels).
        """
        final_score = 0
        for params in self.tools.download_template(self.ACTIVATION_STANDARD):
            set_name = params.get(self.tools.ACTIVATION_SET_NAME)
            kpi_name = params.get(self.tools.ACTIVATION_KPI_NAME)
            target = float(params.get(self.tools.ACTIVATION_TARGET))
            target = target * 100 if target < 1 else target
            score_type = params.get(self.tools.ACTIVATION_SCORE)
            weight = float(params.get(self.tools.ACTIVATION_WEIGHT))
            if kpi_name:
                kpi_fk = self.kpi_static_data[
                    (self.kpi_static_data['kpi_set_name'] == set_name)
                    & (self.kpi_static_data['kpi_name'] == kpi_name
                       )]['kpi_fk'].values[0]
                score = self.scores[self.LEVEL2].get(kpi_fk, 0)
            else:
                set_fk = self.kpi_static_data[
                    self.kpi_static_data['kpi_set_name'] ==
                    set_name]['kpi_set_fk'].values[0]
                score = self.scores[self.LEVEL1].get(set_fk, 0)
            if score >= target:
                score = 100
            else:
                if score_type == 'PROPORTIONAL':
                    score = (score / float(target)) * 100
                else:
                    score = 0
            final_score += score * weight
            self.save_level2_and_level3(self.ACTIVATION_STANDARD, set_name,
                                        score)
        set_fk = self.kpi_static_data[
            self.kpi_static_data['kpi_set_name'] ==
            self.ACTIVATION_STANDARD]['kpi_set_fk'].values[0]
        self.write_to_db_result(set_fk, final_score, self.LEVEL1)

    def save_level2_and_level3(self, set_name, kpi_name, score):
        """
        Given KPI data and a score, this functions writes the score for both KPI level 2 and 3 in the DB.
        """
        kpi_data = self.kpi_static_data[
            (self.kpi_static_data['kpi_set_name'] == set_name)
            & (self.kpi_static_data['kpi_name'] == kpi_name)]
        try:
            kpi_fk = kpi_data['kpi_fk'].values[0]
        except:
            Log.warning("kpi name or set name don't exist")
            return
        atomic_kpi_fk = kpi_data['atomic_kpi_fk'].values[0]
        self.write_to_db_result(kpi_fk, score, self.LEVEL2)
        self.write_to_db_result(atomic_kpi_fk, score, self.LEVEL3)

    def write_to_db_result(self, fk, score, level):
        """
        This function creates the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(fk, score, level)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self, fk, score, level):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        if level == self.LEVEL1:
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['kpi_set_fk'] ==
                fk]['kpi_set_name'].values[0]
            attributes = pd.DataFrame(
                [(kpi_set_name, self.session_uid, self.store_id,
                  self.visit_date.isoformat(), format(score, '.2f'), fk)],
                columns=[
                    'kps_name', 'session_uid', 'store_fk', 'visit_date',
                    'score_1', 'kpi_set_fk'
                ])
        elif level == self.LEVEL2:
            kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] ==
                                            fk]['kpi_name'].values[0]
            attributes = pd.DataFrame(
                [(self.session_uid, self.store_id, self.visit_date.isoformat(),
                  fk, kpi_name, score)],
                columns=[
                    'session_uid', 'store_fk', 'visit_date', 'kpi_fk',
                    'kpk_name', 'score'
                ])
        elif level == self.LEVEL3:
            data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk']
                                        == fk]
            atomic_kpi_name = data['atomic_kpi_name'].values[0]
            kpi_fk = data['kpi_fk'].values[0]
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['atomic_kpi_fk'] ==
                fk]['kpi_set_name'].values[0]
            attributes = pd.DataFrame(
                [(atomic_kpi_name, self.session_uid,
                  kpi_set_name, self.store_id, self.visit_date.isoformat(),
                  datetime.utcnow().isoformat(), score, kpi_fk, fk)],
                columns=[
                    'display_text', 'session_uid', 'kps_name', 'store_fk',
                    'visit_date', 'calculation_time', 'score', 'kpi_fk',
                    'atomic_kpi_fk'
                ])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        insert_queries = self.merge_insert_queries(self.kpi_results_queries)
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        delete_queries = DIAGEOQueries.get_delete_session_results_query_old_tables(
            self.session_uid)
        for query in delete_queries:
            cur.execute(query)
        for query in insert_queries:
            cur.execute(query)
        self.rds_conn.db.commit()

    @staticmethod
    def merge_insert_queries(insert_queries):
        query_groups = {}
        for query in insert_queries:
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            merged_queries.append('{0} VALUES {1}'.format(
                group, ',\n'.join(query_groups[group])))
        return merged_queries
コード例 #3
0
class BATRUNewTemplate:
    def __init__(self, project_name, set_name):
        self.project = project_name
        self.log_suffix = '{}: '.format(self.project)
        self.queries = []
        self.kpi = set_name
        self.sets_added = {}
        self.kpis_added = {}
        self.kpi_counter = {'set': 0, 'kpi': 0, 'atomic': 0}
        self.data = pd.DataFrame()
        self.set_fk = self.get_set_fk(set_name)
        if set_name == BATRUConst.SK_SET_NAME:
            self.delete_static_DB()
            self.aws_conn = PSProjectConnector(self.project,
                                               DbUsers.CalculationEng)
            self.kpi_static_data = self.get_kpi_data()
            self.get_kpis_from_template()
        elif set_name == BATRUConst.SAS_SET_NAME:
            self.aws_conn = PSProjectConnector(self.project,
                                               DbUsers.CalculationEng)
            self.kpi_static_data = self.get_kpi_data()
            self.get_kpis_from_template_sas()
        elif set_name == BATRUConst.P4_SET_NAME:
            self.delete_static_DB()
            self.kpi_static_data = self.get_kpi_data()
            self.p4_template = parse_template(BATRUConst.P4_PATH,
                                              BATRUConst.POSM_SHEET)
            for column in self.p4_template.columns:
                self.p4_template[column] = self.encode_column_in_df(
                    self.p4_template, column)
            self.alreadyAddedAtomics = pd.DataFrame(columns=[
                BATRUConst.SET_NAME, BATRUConst.KPI_NAME,
                BATRUConst.GROUP_NAME_P4, BATRUConst.ATOMIC_NAME
            ])

    def get_kpis_from_template_sas(self):
        list_of_dicts = []
        sections_template = parse_template(BATRUConst.P3_PATH,
                                           BATRUConst.SAS_ZONE_SHEET)
        fixtures = sections_template['Equipment'].unique()
        display_names = list(sections_template['display_name'].unique())
        display_names.append("No competitors in SAS Zone")
        for fixture in fixtures:
            for i in range(0, 11):
                if i == 0:
                    level_2_name = fixture
                else:
                    level_2_name = BATRUConst.P3_COUNT_FIXTURE.format(
                        fixture, i)
                for level_3_name in display_names:
                    kpi_dictionary = {
                        BATRUConst.SET_NAME: BATRUConst.SAS_SET_NAME,
                        BATRUConst.KPI_NAME: level_2_name,
                        BATRUConst.ATOMIC_NAME: level_3_name
                    }
                    list_of_dicts.append(kpi_dictionary)
        self.data = pd.DataFrame(list_of_dicts)

    @staticmethod
    def encode_column_in_df(df, column_name):
        return df[column_name].str.encode('utf-8')

    def get_kpis_from_template(self):
        list_of_dicts = []
        sections_template = parse_template(BATRUConst.P3_PATH,
                                           BATRUConst.SK_SHEET)
        fixtures = sections_template['fixture'].unique()
        sections = sections_template['section_name'].unique()
        for fixture in fixtures:
            for i in range(0, 11):
                if i == 0:
                    level_2_name = fixture
                else:
                    level_2_name = fixture + " - {}".format(i)
                for model_id in sections:
                    for name in BATRUConst.convert_names.keys():
                        if name == BATRUConst.MODEL_ID:
                            level_3_name = model_id
                            display_text = model_id
                            relativ_score = 1
                        else:
                            level_3_name = name
                            relativ_score = 0
                            display_text = self.encode_string(
                                BATRUConst.convert_names[name])
                        kpi_dictionary = {
                            BATRUConst.SET_NAME: BATRUConst.SK_SET_NAME,
                            BATRUConst.KPI_NAME: level_2_name,
                            BATRUConst.ATOMIC_NAME: level_3_name,
                            BATRUConst.MODEL_ID: model_id,
                            BATRUConst.RELATIVE_SCORE: relativ_score,
                            BATRUConst.DISPLAY_TEXT: display_text
                        }
                        list_of_dicts.append(kpi_dictionary)
        self.data = pd.DataFrame(list_of_dicts)

    @staticmethod
    def encode_string(str):
        try:
            return str.replace("'", "\\'").encode('utf-8')
        except:
            Log.debug('The name {} is already coded'.format(str))
            return str

    @property
    def rds_conn(self):
        if not hasattr(self, '_rds_conn'):
            self._rds_conn = PSProjectConnector(self.project,
                                                DbUsers.CalculationEng)
        return self._rds_conn

    def delete_static_DB(self):
        cur = self.rds_conn.db.cursor()
        atomic_query = """
            delete from static.atomic_kpi
            where kpi_fk in (select pk from static.kpi where kpi_set_fk = {});
            """.format(self.set_fk)
        kpi_query = """
            delete from static.kpi where kpi_set_fk = {};
            """.format(self.set_fk)
        delete_queries = [atomic_query, kpi_query]
        for query in delete_queries:
            cur.execute(query)
            print query
        self.rds_conn.db.commit()

    def get_set_fk(self, set_name):
        self.rds_conn.connect_rds()
        query = """
                select pk
                from static.kpi_set where name = "{}";
                """.format(set_name)
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data.iloc[0][0]

    def get_kpi_data(self):
        self.rds_conn.connect_rds()
        query = """
            select api.name as atomic_kpi_name, api.pk as atomic_kpi_fk, api.description,
                   kpi.display_text as kpi_name, kpi.pk as kpi_fk, api.model_id as section,
                   kps.name as kpi_set_name, kps.pk as kpi_set_fk
            from static.kpi_set kps
            left join static.kpi kpi on kps.pk = kpi.kpi_set_fk
            left join static.atomic_kpi api on kpi.pk = api.kpi_fk;
        """
        kpi_data = pd.read_sql_query(query, self.rds_conn.db)
        str_columns = [
            'description', 'kpi_name', 'atomic_kpi_name', 'kpi_set_name',
            'section'
        ]
        for column in str_columns:
            kpi_data[column] = self.encode_column_in_df(kpi_data, column)
        return kpi_data

    def handle_update(self):
        if self.kpi == BATRUConst.P4_SET_NAME:
            self.add_p4_to_static()
            self.commit_to_db()
        elif self.kpi == BATRUConst.SK_SET_NAME:
            self.add_kpis_to_static_p3()
            self.add_atomics_to_static_p3()
            Log.info('{} Sets, {} KPIs and {} Atomics have been added'.format(
                self.kpi_counter['set'], self.kpi_counter['kpi'],
                self.kpi_counter['atomic']))
        elif self.kpi == BATRUConst.SAS_SET_NAME:
            self.add_kpis_to_static_sas()
            self.add_atomics_to_static_sas()
            Log.info('{} Sets, {} KPIs and {} Atomics have been added'.format(
                self.kpi_counter['set'], self.kpi_counter['kpi'],
                self.kpi_counter['atomic']))

    def add_kpis_to_static_sas(self):
        kpis = self.data.drop_duplicates(
            subset=[BATRUConst.SET_NAME, BATRUConst.KPI_NAME], keep='first')
        self.aws_conn.connect_rds()
        cur = self.aws_conn.db.cursor()
        for i in xrange(len(kpis)):
            set_name = self.encode_string(kpis.iloc[i][BATRUConst.SET_NAME])
            kpi_name = self.encode_string(kpis.iloc[i][BATRUConst.KPI_NAME])
            if self.kpi_static_data[
                (self.kpi_static_data[BATRUConst.SET_NAME] == set_name) &
                (self.kpi_static_data[BATRUConst.KPI_NAME] == kpi_name)].empty:
                set_fk = self.kpi_static_data[self.kpi_static_data[
                    BATRUConst.SET_NAME] == set_name][
                        BATRUConst.SET_FK].values[0]
                level2_query = """
                       INSERT INTO static.kpi (kpi_set_fk, display_text)
                       VALUES ('{0}', '{1}');""".format(set_fk, kpi_name)
                print level2_query
                cur.execute(level2_query)
                if set_name in self.kpis_added.keys():
                    self.kpis_added[set_name][kpi_name] = cur.lastrowid
                else:
                    self.kpis_added[set_name] = {kpi_name: cur.lastrowid}
                print level2_query
                self.kpi_counter['kpi'] += 1
        self.aws_conn.db.commit()

    def add_atomics_to_static_sas(self):
        atomics = self.data
        queries = []
        for i in xrange(len(atomics)):
            atomic = atomics.iloc[i]
            set_name = self.encode_string(atomic[BATRUConst.SET_NAME])
            kpi_name = self.encode_string(atomic[BATRUConst.KPI_NAME])
            atomic_name = self.encode_string(atomic[BATRUConst.ATOMIC_NAME])
            names = [atomic_name]
            for index, name in enumerate(names):
                if self.kpi_static_data[
                    (self.kpi_static_data[BATRUConst.SET_NAME] == set_name) &
                    (self.kpi_static_data[BATRUConst.KPI_NAME] == kpi_name) &
                    (self.kpi_static_data[BATRUConst.ATOMIC_NAME]
                     == name)].empty:
                    if set_name in self.kpis_added.keys(
                    ) and kpi_name in self.kpis_added[set_name].keys():
                        kpi_fk = self.kpis_added[set_name][kpi_name]
                    else:
                        kpi_fk = self.kpi_static_data[(self.kpi_static_data[
                            BATRUConst.SET_NAME] == set_name) & (
                                self.kpi_static_data[BATRUConst.KPI_NAME] ==
                                kpi_name)][BATRUConst.KPI_FK].values[0]
                    level3_query = """
                               INSERT INTO static.atomic_kpi (kpi_fk, name, description, display_text,
                                                              presentation_order, display)
                               VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}');""".format(
                        kpi_fk, name, name, name, 1, 'Y')
                    queries.append(level3_query)
                    self.kpi_counter['atomic'] += 1
        self.aws_conn.connect_rds()
        cur = self.aws_conn.db.cursor()
        for query in queries:
            cur.execute(query)
            print query
        self.aws_conn.db.commit()

    def add_kpis_to_static_p3(self):
        kpis = self.data.drop_duplicates(
            subset=[BATRUConst.SET_NAME, BATRUConst.KPI_NAME], keep='first')
        self.aws_conn.connect_rds()
        cur = self.aws_conn.db.cursor()
        for i in xrange(len(kpis)):
            set_name = self.encode_string(kpis.iloc[i][BATRUConst.SET_NAME])
            kpi_name = self.encode_string(kpis.iloc[i][BATRUConst.KPI_NAME])
            if self.kpi_static_data[
                (self.kpi_static_data[BATRUConst.SET_NAME] == set_name) &
                (self.kpi_static_data[BATRUConst.KPI_NAME] == kpi_name)].empty:
                if set_name in self.sets_added.keys():
                    set_fk = self.sets_added[set_name]
                else:
                    set_fk = self.set_fk
                level2_query = """
                       INSERT INTO static.kpi (kpi_set_fk, display_text)
                       VALUES ('{0}', '{1}');""".format(set_fk, kpi_name)
                cur.execute(level2_query)
                if set_name in self.kpis_added.keys():
                    self.kpis_added[set_name][kpi_name] = cur.lastrowid
                else:
                    self.kpis_added[set_name] = {kpi_name: cur.lastrowid}
                print level2_query
                self.kpi_counter['kpi'] += 1
        self.aws_conn.db.commit()

    def add_atomics_to_static_p3(self):
        atomics = self.data
        queries = []
        for i in xrange(len(atomics)):
            atomic = atomics.iloc[i]
            set_name = self.encode_string(atomic[BATRUConst.SET_NAME])
            kpi_name = self.encode_string(atomic[BATRUConst.KPI_NAME])
            atomic_name = self.encode_string(atomic[BATRUConst.ATOMIC_NAME])
            model_id = self.encode_string(atomic[BATRUConst.MODEL_ID])
            relative_score = atomic[BATRUConst.RELATIVE_SCORE]
            display_text = self.encode_string(atomic[BATRUConst.DISPLAY_TEXT])
            names = [atomic_name]
            for index, name in enumerate(names):
                if self.kpi_static_data[
                    (self.kpi_static_data[BATRUConst.SET_NAME] == set_name) &
                    (self.kpi_static_data[BATRUConst.KPI_NAME] == kpi_name) &
                    (self.kpi_static_data[BATRUConst.ATOMIC_NAME] == name) &
                    (self.kpi_static_data['section'] == model_id)].empty:
                    if set_name in self.kpis_added.keys(
                    ) and kpi_name in self.kpis_added[set_name].keys():
                        kpi_fk = self.kpis_added[set_name][kpi_name]
                    else:
                        kpi_fk = self.kpi_static_data[(self.kpi_static_data[
                            BATRUConst.SET_NAME] == set_name) & (
                                self.kpi_static_data[BATRUConst.KPI_NAME] ==
                                kpi_name)][BATRUConst.KPI_FK].values[0]
                    level3_query = """
                               INSERT INTO static.atomic_kpi (kpi_fk, name, description, display_text,
                                                              presentation_order, model_id, relative_score, display)
                               VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}', '{6}', '{7}');
                               """.format(kpi_fk, name, name, display_text,
                                          index + 1, model_id, relative_score,
                                          'Y')
                    queries.append(level3_query)
                    self.kpi_counter['atomic'] += 1
        self.aws_conn.connect_rds()
        cur = self.aws_conn.db.cursor()
        for query in queries:
            cur.execute(query)
            print query
        self.aws_conn.db.commit()

    def add_p4_to_static(self):
        template_for_static = self.p4_template[BATRUConst.COLUMNS_FOR_STATIC]
        # Saves to static all KPI (equipments) with a counter.
        atomic_queries = []
        kpi_queries = []
        kpi_names = template_for_static[
            BATRUConst.KPI_NAME_FIELD].unique().tolist()
        for kpi in kpi_names:
            kpi_with_count = self.add_kpi_count(kpi)
            for kpi_count in kpi_with_count:
                if self.kpi_static_data[(self.kpi_static_data['kpi_set_name']
                                         == BATRUConst.P4_SET_NAME)
                                        & (self.kpi_static_data['kpi_name'] ==
                                           kpi_count)].empty:
                    kpi_queries.append(kpi_count)
        self.save_kpi_level(self.set_fk, kpi_queries)
        # We need to re-run query for updated kpis.
        self.kpi_static_data = self.get_kpi_data()
        # This part is not combined with the loop above since it needs all kpis (with count) to be saved first.
        for kpi_name in kpi_names:
            atomics_for_static = template_for_static[template_for_static[
                BATRUConst.KPI_NAME_FIELD] == kpi_name]
            for i in xrange(len(atomics_for_static)):
                row = atomics_for_static.iloc[i]
                group = self.encode_string(row[BATRUConst.GROUP_NAME_FIELD])
                product = self.encode_string(
                    row[BATRUConst.PRODUCT_NAME_FIELD])
                kpi_with_count = self.add_kpi_count(kpi_name)
                #  This will create group and product atomics
                for kpi in kpi_with_count:
                    is_exist = self.alreadyAddedAtomics[
                        (self.alreadyAddedAtomics[BATRUConst.SET_NAME] ==
                         BATRUConst.P4_SET_NAME) &
                        (self.alreadyAddedAtomics[BATRUConst.KPI_NAME] == kpi)
                        & (self.alreadyAddedAtomics[BATRUConst.GROUP_NAME_P4]
                           == group) &
                        (self.alreadyAddedAtomics[BATRUConst.ATOMIC_NAME]
                         == product)]
                    if is_exist.empty:
                        try:

                            kpi_fk = self.kpi_static_data[
                                (self.kpi_static_data[BATRUConst.SET_FK] ==
                                 self.set_fk)
                                & (self.kpi_static_data[BATRUConst.KPI_NAME] ==
                                   kpi)][BATRUConst.KPI_FK].values[0]
                            dict_already_added = {
                                BATRUConst.SET_NAME: BATRUConst.P4_SET_NAME,
                                BATRUConst.KPI_NAME: kpi,
                                BATRUConst.GROUP_NAME_P4: group,
                                BATRUConst.ATOMIC_NAME: product
                            }
                            self.alreadyAddedAtomics = self.alreadyAddedAtomics.append(
                                dict_already_added, ignore_index=True)
                            product_query = (kpi_fk, product, product, product,
                                             group,
                                             BATRUConst.PRODUCT_RELATIVE_SCORE)
                            group_query = (kpi_fk, group, group, group, None,
                                           BATRUConst.GROUP_RELATIVE_SCORE)
                            atomic_queries.extend([group_query, product_query])
                        except IndexError as e:
                            print "kpi '{}' does not exist.".format(kpi)
        self.create_atomic_queries(set(atomic_queries))

    def create_atomic_queries(self, queries_to_commit):
        level3_query = """
        INSERT INTO static.atomic_kpi (kpi_fk, name, description, display_text,
                                       presentation_order, display, model_id, relative_score)
        VALUES ('{}', '{}', '{}', '{}', '{}', '{}', '{}', {});"""
        for query in queries_to_commit:
            if self.is_new(query):
                self.queries.append(
                    level3_query.format(
                        query[0], query[1], query[2], query[3], 1, 'Y',
                        '{}'.format(query[4]) if query[4] else 'NULL',
                        query[5]).replace("'NULL'", "NULL"))

    def is_new(self, data, level=3):
        if level == 3:
            existing = self.kpi_static_data[
                (self.kpi_static_data[BATRUConst.SET_FK] == self.set_fk)
                & (self.kpi_static_data[BATRUConst.KPI_FK] == data[0]) &
                (self.kpi_static_data[BATRUConst.ATOMIC_NAME] == data[1])]
        elif level == 2:
            existing = self.kpi_static_data[
                (self.kpi_static_data[BATRUConst.SET_FK] == self.set_fk)
                & (self.kpi_static_data[BATRUConst.KPI_NAME] == data[0])]
        else:
            Log.debug('not valid level for checking new KPIs')
            return False

        return existing.empty

    def save_kpi_level(self, set_fk, kpi_list):
        level2_query = """
            INSERT INTO static.kpi (kpi_set_fk, display_text)
            VALUES ('{}', '{}');"""
        new_kpis = []
        for kpi in kpi_list:
            if self.is_new([kpi], level=2):
                new_kpis.append(kpi)
        count_for_show = 0
        self.rds_conn.connect_rds()
        all = len(kpi_list)
        cur = self.rds_conn.db.cursor()
        for kpi in new_kpis:
            query = level2_query.format(set_fk, kpi.replace("'", "''"))
            print query
            count_for_show += 1
            cur.execute(query)
            if count_for_show % 10 == 0:
                print 'done {} / {}'.format(count_for_show, all)
        self.rds_conn.db.commit()
        self.rds_conn.disconnect_rds()

    def add_kpi_count(self, kpi_name):
        kpis = [kpi_name]
        i = 2
        while i <= BATRUConst.MAX_KPI_COUNT:
            kpi = BATRUConst.P4_COUNT_FIXTURE.format(kpi_name, i)
            kpis.append(kpi)
            i += 1
        return kpis

    def commit_to_db(self):
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        kpis_sum = len(self.queries)
        count_for_show = 0
        for query in self.queries:
            # try:
            print query
            cur.execute(query)
            count_for_show += 1
            if count_for_show % 10 == 0:
                print 'There are {} / {}'.format(count_for_show, kpis_sum)
        self.rds_conn.db.commit()
        self.rds_conn.disconnect_rds()
コード例 #4
0
class CCBRToolBox:
    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.tools = CCBRGENERALToolBox(self.data_provider,
                                        self.output,
                                        rds_conn=self.rds_conn)
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.kpi_results_queries = []
        self.survey = Survey(self.data_provider, self.output)
        self.kpi_results_new_tables_queries = []
        self.New_kpi_static_data = self.get_new_kpi_static_data()
        self.session_id = self.data_provider.session_id
        self.prices_per_session = PsDataProvider(
            self.data_provider, self.output).get_price_union(self.session_id)
        self.common_db = Common(self.data_provider)
        self.count_sheet = pd.read_excel(PATH, Const.COUNT).fillna("")
        self.group_count_sheet = pd.read_excel(PATH,
                                               Const.GROUP_COUNT).fillna("")
        self.survey_sheet = pd.read_excel(PATH, Const.SURVEY).fillna("")

    def main_calculation(self):
        """
        This function calculates the KPI results.
        """
        kpis_sheet = pd.read_excel(PATH, Const.KPIS).fillna("")
        for index, row in kpis_sheet.iterrows():
            self.handle_atomic(row)
        self.handle_simon_kpis()
        self.commit_results_data()

    def handle_simon_kpis(self):
        """
        activate the availability and pricing functions
        """
        active_products = self.all_products.loc[
            self.all_products["is_active"] > 0]
        self.calculate_availability(active_products)
        self.calculate_pricing(self.all_products)

    def calculate_availability(self, active_products):
        """
        calculates the availability for all products per session, used is sovi and sovi vertical reports
        :param active_products: a df containing only active products
        """
        active_products_sku_and_other = active_products[
            (active_products['product_type'] == 'SKU')
            | (active_products['product_type'] == 'Other')]
        active_products_pks = active_products_sku_and_other[
            'product_fk'].unique().tolist()
        filters = {'product_fk': active_products_pks}
        filtered_df = self.scif[self.tools.get_filter_condition(
            self.scif, **filters)]
        facing_filtered = filtered_df.loc[filtered_df['facings'] > 0][[
            'template_fk', 'product_fk', 'facings'
        ]]
        facing_filtered_pks = facing_filtered['product_fk'].unique().tolist()
        for product in facing_filtered_pks:
            product_df = facing_filtered.loc[facing_filtered['product_fk'] ==
                                             product]
            product_template_fks = product_df['template_fk'].unique().tolist()
            for template_fk in product_template_fks:
                sum_facing = product_df.loc[product_df['template_fk'] ==
                                            template_fk]['facings'].sum()
                self.write_to_db_result_new_tables(fk=Const.AVAILABILITY_PK,
                                                   numerator_id=product,
                                                   score='1',
                                                   denominator_id=template_fk,
                                                   numerator_result='1',
                                                   result=sum_facing)

    def calculate_pricing(self, all_products):
        """
        inserting the db the pricing of all active and inactive skus.
        used in preco and preco vertical reports
        :param all_products: df containing all products
        """
        only_sku_type_products = all_products.loc[all_products['product_type']
                                                  == 'SKU']
        all_products_fks_size = only_sku_type_products[['product_fk',
                                                        'size']].fillna("")
        product_fks_and_prices = self.prices_per_session
        merge_size_and_price = pd.merge(all_products_fks_size,
                                        product_fks_and_prices,
                                        how='left',
                                        on='product_fk')
        merge_size_and_price['value'] = merge_size_and_price['value'].fillna(
            '0')
        for row in merge_size_and_price.itertuples():
            product = row[1]  # row['product_fk']
            size = row[2]  # row['size']
            price = row[3]  # row['value']
            if size == '':
                size = 0
            if price > 0:
                self.write_to_db_result_new_tables(fk=Const.PRICING_PK,
                                                   numerator_id=product,
                                                   numerator_result=size,
                                                   result=price)

    def handle_atomic(self, row):
        """
        run the correct kpi for a specific row in the template
        :param row: a row from the template
        """
        atomic_name = row[Const.ENGLISH_KPI_NAME].strip()
        kpi_type = row[Const.KPI_TYPE].strip()
        if kpi_type == Const.SURVEY:
            self.handle_survey_atomics(atomic_name)
        elif kpi_type == Const.COUNT:
            self.handle_count_atomics(atomic_name)
        elif kpi_type == Const.GROUP_COUNT:
            self.handle_group_count_atomics(atomic_name)

    def handle_survey_atomics(self, atomic_name):
        """
        handle survey questions
        :param atomic_name: the name of the kpi
        :return: only if the survey filters aren't satisfied
        """
        row = self.survey_sheet.loc[self.survey_sheet[Const.ENGLISH_KPI_NAME]
                                    == atomic_name]
        if row.empty:
            Log.warning("Dataframe is empty, wrong kpi name: " + atomic_name)
            return
        store_type_filter = self.store_info['store_type'].values[0].strip()
        store_type_template = row[Const.STORE_TYPE_TEMPLATE].values[0].strip()

        # if cell in template is not empty
        if store_type_template != "":
            store_types = store_type_template.split(",")
            store_types = [item.strip() for item in store_types]
            if store_type_filter not in store_types:
                return

        # find the answer to the survey in session
        question_id = row[Const.SURVEY_QUESTION_ID].values[0]
        question_answer_template = row[Const.TARGET_ANSWER].values[0]

        survey_result = self.survey.get_survey_answer(
            ('question_fk', question_id))
        if question_answer_template == Const.NUMERIC:
            if not survey_result:
                survey_result = 0
            if not isinstance(survey_result, (int, long, float)):
                Log.warning("question id " + str(question_id) +
                            " in template is not a number")
                survey_result = 0

        else:
            answer = self.survey.check_survey_answer(
                ('question_fk', question_id), question_answer_template)
            survey_result = 1 if answer else -1

        try:
            atomic_pk = self.common_db.get_kpi_fk_by_kpi_name_new_tables(
                atomic_name)
        except IndexError:
            Log.warning("There is no matching Kpi fk for kpi name: " +
                        atomic_name)
            return

        self.write_to_db_result_new_tables(fk=atomic_pk,
                                           numerator_id=self.session_id,
                                           numerator_result=survey_result,
                                           result=survey_result)

    def handle_count_atomics(self, atomic_name):
        """
        handle count kpis, used in consolidada report
        :param atomic_name: the name of the kpi to calculate
        """
        sum_of_count = 0
        target = 0
        count_result = 0
        row = self.count_sheet.loc[self.count_sheet[Const.ENGLISH_KPI_NAME] ==
                                   atomic_name]
        if row.empty:
            Log.warning("Dataframe is empty, wrong kpi name: " + atomic_name)
            return

        try:
            atomic_pk = self.common_db.get_kpi_fk_by_kpi_name_new_tables(
                atomic_name)
        except IndexError:
            Log.warning("There is no matching Kpi fk for kpi name: " +
                        atomic_name)
            return
        for index, row in row.iterrows():
            sum_of_count, target, count_result = self.handle_count_row(row)
        if not isinstance(sum_of_count, (int, float, long)):
            sum_of_count = count_result

        self.write_to_db_result_new_tables(fk=atomic_pk,
                                           numerator_id=self.session_id,
                                           numerator_result=sum_of_count,
                                           denominator_result=target,
                                           result=count_result)

    def handle_group_count_atomics(self, atomic_name):
        """
        handle group count kpis (different from count in or and and conditions), used in consolidada report
        :param atomic_name: the name of the kpi to calculate
        """
        rows = self.group_count_sheet.loc[self.group_count_sheet[
            Const.GROUP_KPI_NAME] == atomic_name]
        group_weight = 0
        group_result = 0
        group_target = 0
        group_sum_of_count = 0
        sum_of_count_df = pd.DataFrame()
        target_operator = ""
        if rows.empty:
            Log.warning("Dataframe is empty, wrong kpi name: " + atomic_name)
            return

        try:
            atomic_pk = self.common_db.get_kpi_fk_by_kpi_name_new_tables(
                atomic_name)
        except IndexError:
            Log.warning("There is no matching Kpi fk for kpi name: " +
                        atomic_name)
            return

        for index, row in rows.iterrows():
            target_operator = row[Const.TARGET_OPERATOR].strip()
            weight = row[Const.WEIGHT]
            sum_of_count, target, count_result = self.handle_count_row(row)
            if count_result >= 1:
                group_weight += weight
                if group_weight >= 100:
                    # use for getting numeric results instead of 1 and 0
                    if (target_operator == '+'):
                        sum_of_count_df = pd.concat(
                            [sum_of_count_df, sum_of_count])
                    else:
                        group_result = 1
                        break

            # conditional, if given -1000 kpi must fail
            elif count_result == -1000:
                group_result = 0
                break

        # use for getting numeric results instead of 1 and 0
        if (target_operator == '+'):
            if sum_of_count_df.empty:
                group_sum_of_count = 0
            else:
                group_sum_of_count = len(sum_of_count_df.groupby('scene_id'))
            group_result = group_sum_of_count

        self.write_to_db_result_new_tables(fk=atomic_pk,
                                           numerator_id=self.session_id,
                                           numerator_result=group_sum_of_count,
                                           denominator_result=group_target,
                                           result=group_result)

    def handle_count_row(self, row):
        """
        filters qall params in aspecific row and send it to the correct count calculation
        :param row:
        :return:
        """
        count_type = row[Const.COUNT_TYPE].strip()
        target = row[Const.TARGET]
        target_operator = row[Const.TARGET_OPERATOR].strip()
        product_template = row[Const.PRODUCT]
        store_type_filter = self.store_info['store_type'].values[0]
        store_type_template = row[Const.STORE_TYPE_TEMPLATE]
        product_size = row[Const.PRODUCT_SIZE]
        product_size_operator = row[Const.PRODUCT_SIZE_OPERATOR].strip()
        product_measurement_unit = row[Const.MEASUREMENT_UNIT].strip()
        consider_few = row[Const.CONSIDER_FEW]
        multipack_template = row[Const.MULTIPACK].strip()
        multipack_df = None

        # filter store type
        if store_type_template != "":
            store_types = store_type_template.split(",")
            store_types = [item.strip() for item in store_types]
            if store_type_filter not in store_types:
                return 0, 0, 0

        filtered_df = self.scif.copy()

        # filter product
        if product_template != "":
            products_to_check = product_template.split(",")
            products_to_check = [item.strip() for item in products_to_check]
            filtered_df = filtered_df[filtered_df['product_name'].isin(
                products_to_check)]
            if filtered_df.empty:
                return 0, 0, 0

        # filter product size
        if product_size != "":
            if product_measurement_unit == 'l':
                product_size *= 1000

            ml_df = filtered_df[filtered_df['size_unit'] == 'ml']
            l_df = filtered_df[filtered_df['size_unit'] == 'l']

            if multipack_template != "":
                multipack_df = filtered_df[filtered_df['MPACK'] == 'Y']
            temp_df = l_df.copy()
            temp_df['size'] = l_df['size'].apply((lambda x: x * 1000))
            filtered_df = pd.concat([temp_df, ml_df])

            if product_size_operator == '<':
                filtered_df = filtered_df[filtered_df['size'] < product_size]
            elif product_size_operator == '<=':
                filtered_df = filtered_df[filtered_df['size'] <= product_size]
            elif product_size_operator == '>':
                filtered_df = filtered_df[filtered_df['size'] > product_size]
            elif product_size_operator == '>=':
                filtered_df = filtered_df[filtered_df['size'] >= product_size]
            elif product_size_operator == '=':
                filtered_df = filtered_df[filtered_df['size'] == product_size]

            # multipack conditions is an or between product size and MPACK
            if multipack_template != "":
                filtered_df = pd.concat([filtered_df,
                                         multipack_df]).drop_duplicates()

        filters = self.get_filters_from_row(row)
        count_of_units = 0
        if count_type == Const.SCENE:
            count_of_units = self.count_of_scenes(filtered_df, filters,
                                                  target_operator, target)
        elif count_type == Const.FACING:
            count_of_units = self.count_of_facings(filtered_df, filters,
                                                   consider_few, target)
        elif count_type == Const.SCENE_SOS:
            count_of_units = self.count_of_sos(filtered_df, filters)
        else:
            Log.warning("Couldn't find a correct COUNT variable in template")

        if target_operator == '<=':
            count_result = 1 if (target <= count_of_units) else 0

        # use for getting numeric results instead of 1 and 0
        elif target_operator == '+':
            if isinstance(count_of_units, (int, float, long)):
                count_result = count_of_units
            else:
                count_result = len(count_of_units)
        else:
            count_result = 1 if (target >= count_of_units) else 0
        return count_of_units, target, count_result

    def get_filters_from_row(self, row):
        """
        handle filters appering in scif
        :param row: row containing all filters
        :return: a dictionary of the filters
        """
        filters = dict(row)

        # no need to be accounted for, fields that aren't in scif
        for field in Const.DELETE_FIELDS:
            if field in filters:
                del filters[field]

        if Const.WEIGHT in filters.keys():
            del filters[Const.WEIGHT]
        if Const.GROUP_KPI_NAME in filters.keys():
            del filters[Const.GROUP_KPI_NAME]

        exclude_manufacturer = filters[Const.EXCLUDE_MANUFACTURER].strip()
        if exclude_manufacturer != "":
            filters[Const.MANUFACTURER] = (exclude_manufacturer,
                                           Const.EXCLUDE_FILTER)
            del filters[Const.EXCLUDE_MANUFACTURER]

        exclude_category = filters[Const.EXCLUDE_CATEGORY].strip()
        if exclude_category != "":
            filters[Const.CATEGORY] = (exclude_category, Const.EXCLUDE_FILTER)
            del filters[Const.EXCLUDE_CATEGORY]

        # filter all the empty cells
        for key in filters.keys():
            if (filters[key] == ""):
                del filters[key]
            elif isinstance(filters[key], tuple):
                filters[key] = (filters[key][0].split(","), filters[key][1])
            else:
                filters[key] = filters[key].split(",")
                filters[key] = [item.strip() for item in filters[key]]

        return self.create_filters_according_to_scif(filters)

    def create_filters_according_to_scif(self, filters):
        """
        adjusting the template names to scif names
        :param filters: only the scif filters in the template shape
        :return: the filters dictionary
        """
        convert_from_scif = {
            Const.TEMPLATE_GROUP: 'template_group',
            Const.TEMPLATE_NAME: 'template_name',
            Const.BRAND: 'brand_name',
            Const.CATEGORY: 'category',
            Const.MANUFACTURER: 'manufacturer_name',
            Const.PRODUCT_TYPE: 'product_type',
            Const.MULTIPACK: 'MPAK'
        }
        for key in filters.keys():
            filters[convert_from_scif[key]] = filters.pop(key)
        return filters

    def count_of_scenes(self, filtered_df, filters, target_operator, target):
        """
        calculate the count of scene types
        :param filtered_df: the first filtered (no scif filters) dataframe
        :param filters: the scif filters
        :param target_operator: the operation to do, + for returning a dataframe (used in group count)
        :param target: the target
        :return: dataframe for group counts +, number of scenes for all other functions
        """
        scene_data = filtered_df[self.tools.get_filter_condition(
            filtered_df, **filters)]
        if target_operator == '+':

            # filter by scene_id and by template_name (scene type)
            scene_types_groupby = scene_data.groupby(
                ['template_name', 'scene_id'])['facings'].sum().reset_index()
            number_of_scenes = scene_types_groupby[
                scene_types_groupby['facings'] >= target]
        else:
            number_of_scenes = len(scene_data['scene_id'].unique())
        return number_of_scenes

    def count_of_sos(self, filtered_df, filters):
        """
        calculating the share of shelf
        :param filtered_df: the first filtered (no scif filters) dataframe
        :param filters: the scif filters
        :return: the number of different scenes answered the condition  (hard coded 50%)
        """
        scene_data = filtered_df[self.tools.get_filter_condition(
            filtered_df, **filters)]
        scene_data = scene_data.rename(columns={"facings": "facings_nom"})

        # filter by scene_id and by template_name (scene type)
        scene_types_groupby = scene_data.groupby(['template_name', 'scene_id'
                                                  ])['facings_nom'].sum()
        all_products_groupby = self.scif.groupby(['template_name', 'scene_id'
                                                  ])['facings'].sum()
        merge_result = pd.concat((scene_types_groupby, all_products_groupby),
                                 axis=1,
                                 join='inner').reset_index()
        return len(merge_result[
            merge_result['facings_nom'] >= merge_result['facings'] * 0.5])

    def count_of_facings(self, filtered_df, filters, consider_few, target):
        '''
        calculate the count of facings
        :param filtered_df: the first filtered (no scif filters) dataframe
        :param filters: the scif filters
        :param consider_few: in case there is a need to consider more then one brand
        :param target: the target to pass
        :return:
        '''
        facing_data = filtered_df[self.tools.get_filter_condition(
            filtered_df, **filters)]
        if consider_few != "":
            facing_data_groupby = facing_data.groupby(['brand_name'
                                                       ])['facings'].sum()
            if len(facing_data_groupby[
                    facing_data_groupby >= target]) >= consider_few:
                number_of_facings = facing_data['facings'].sum()
            else:
                number_of_facings = 0
        else:
            number_of_facings = facing_data['facings'].sum()
        return number_of_facings

    def get_new_kpi_static_data(self):
        """
        This function extracts the static new KPI data (new tables) and saves it into one global data frame.
        The data is taken from static.kpi_level_2.
        """
        query = CCBRQueries.get_new_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def write_to_db_result_new_tables(self,
                                      fk,
                                      numerator_id,
                                      numerator_result,
                                      result,
                                      denominator_id=None,
                                      denominator_result=None,
                                      score=None):
        """
        This function creates the result data frame of new rables KPI,
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        table = KPI_NEW_TABLE
        attributes = self.create_attributes_dict_new_tables(
            fk, numerator_id, numerator_result, denominator_id,
            denominator_result, result, score)
        query = insert(attributes, table)
        self.kpi_results_new_tables_queries.append(query)

    def create_attributes_dict_new_tables(self, kpi_fk, numerator_id,
                                          numerator_result, denominator_id,
                                          denominator_result, result, score):
        """
        This function creates a data frame with all attributes needed for saving in KPI results new tables.
        """
        attributes = pd.DataFrame(
            [(kpi_fk, self.session_id, numerator_id, numerator_result,
              denominator_id, denominator_result, result, score)],
            columns=[
                'kpi_level_2_fk', 'session_fk', 'numerator_id',
                'numerator_result', 'denominator_id', 'denominator_result',
                'result', 'score'
            ])
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        insert_queries = self.merge_insert_queries(
            self.kpi_results_new_tables_queries)
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        delete_query = CCBRQueries.get_delete_session_results_query(
            self.session_uid, self.session_id)
        cur.execute(delete_query)
        for query in insert_queries:
            cur.execute(query)
        self.rds_conn.db.commit()
        self.rds_conn.disconnect_rds()

    @staticmethod
    def merge_insert_queries(insert_queries):
        query_groups = {}
        for query in insert_queries:
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            merged_queries.append('{0} VALUES {1}'.format(
                group, ',\n'.join(query_groups[group])))
        return merged_queries
コード例 #5
0
class DIAGEOBR_SANDToolBox:

    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.k_engine = BaseCalculationsScript(data_provider, output)
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.store_channel = self.store_info['store_type'].values[0]
        if self.store_channel:
            self.store_channel = self.store_channel.upper()
        self.store_type = self.store_info['additional_attribute_1'].values[0]
        self.segment = self.get_business_unit_name()
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.match_display_in_scene = self.get_match_display()
        self.set_templates_data = {}
        self.kpi_static_data = self.get_kpi_static_data()
        self.tools = DIAGEOToolBox(self.data_provider, output, match_display_in_scene=self.match_display_in_scene)
        self.kpi_results_queries = []
        self.common = Common(self.data_provider)
        self.commonV2 = CommonV2(self.data_provider)
        self.diageo_generator = DIAGEOGenerator(self.data_provider, self.output, self.common)



    def get_business_unit_name(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = DIAGEOQueries.get_business_unit_name(self.store_id)
        business_unit_name = pd.read_sql_query(query, self.rds_conn.db)
        if business_unit_name['business_unit_name'].empty:
            return ""
        else:
            return business_unit_name['business_unit_name'].values[0]

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = DIAGEOQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def get_match_display(self):
        """
        This function extracts the display matches data and saves it into one global data frame.
        The data is taken from probedata.match_display_in_scene.
        """
        query = DIAGEOQueries.get_match_display(self.session_uid)
        match_display = pd.read_sql_query(query, self.rds_conn.db)
        return match_display

    def main_calculation(self, set_names):
        """
        This function calculates the KPI results.
        """
        log_runtime('Updating templates')(self.tools.update_templates)()

        # Global assortment kpis
        assortment_res_dict = self.diageo_generator.diageo_global_assortment_function_v2()
        self.commonV2.save_json_to_new_tables(assortment_res_dict)

        for set_name in set_names:
            set_score = 0
            if set_name not in self.tools.KPI_SETS_WITHOUT_A_TEMPLATE and set_name not in self.set_templates_data.keys():
                try:
                    self.set_templates_data[set_name] = self.tools.download_template(set_name)
                except:
                    Log.warning("Couldn't find a template for set name: " + str(set_name))
                    continue

            # if set_name in ('MPA', 'New Products'):
            #     set_score = self.calculate_assortment_sets(set_name)
            # elif set_name in ('POSM',):
            #     set_score = self.calculate_posm_sets(set_name)
            if set_name == 'Visible to Customer':

                # Global function
                sku_list = filter(None, self.scif[self.scif['product_type'] == 'SKU'].product_ean_code.tolist())
                res_dict = self.diageo_generator.diageo_global_visible_percentage(sku_list)

                if res_dict:
                    # Saving to new tables
                    parent_res = res_dict[-1]
                    for r in res_dict:
                        self.commonV2.write_to_db_result(**r)

                    # Saving to old tables
                    set_score = result = parent_res['result']
                    self.save_level2_and_level3(set_name=set_name, kpi_name=set_name, score=result)

                # filters = {self.tools.VISIBILITY_PRODUCTS_FIELD: 'Y'}
                # set_score = self.tools.calculate_visible_percentage(visible_filters=filters)
                # self.save_level2_and_level3(set_name, set_name, set_score)

            elif set_name in ('Secondary Displays', 'Secondary'):
                # Global function
                res_dict = self.diageo_generator.diageo_global_secondary_display_secondary_function()

                # Saving to new tables
                if res_dict:
                    self.commonV2.write_to_db_result(fk=res_dict['fk'], numerator_id=1, denominator_id=self.store_id,
                                                                                            result=res_dict['result'])

                # Saving to old tables
                set_score = self.tools.calculate_assortment(assortment_entity='scene_id', location_type='Secondary Shelf')
                self.save_level2_and_level3(set_name, set_name, set_score)

            if set_score == 0:
                pass
            elif set_score is False:
                return

            set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name'] == set_name]['kpi_set_fk'].values[0]
            self.write_to_db_result(set_fk, set_score, self.LEVEL1)

        # commiting to new tables
        self.commonV2.commit_results_data()

    def save_level2_and_level3(self, set_name, kpi_name, score):
        """
        Given KPI data and a score, this functions writes the score for both KPI level 2 and 3 in the DB.
        """
        kpi_data = self.kpi_static_data[(self.kpi_static_data['kpi_set_name'] == set_name) &
                                        (self.kpi_static_data['kpi_name'] == kpi_name)]
        try:
            kpi_fk = kpi_data['kpi_fk'].values[0]
        except:
            Log.warning("kpi name or set name don't exist")
            return
        atomic_kpi_fk = kpi_data['atomic_kpi_fk'].values[0]
        self.write_to_db_result(kpi_fk, score, self.LEVEL2)
        self.write_to_db_result(atomic_kpi_fk, score, self.LEVEL3)

    def calculate_posm_sets(self, set_name):
        """
        This function calculates every POSM-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        for params in self.set_templates_data[set_name]:
            if self.store_channel is None:
                break

            kpi_res = self.tools.calculate_posm(display_name=params.get(self.tools.DISPLAY_NAME))
            score = 1 if kpi_res > 0 else 0
            if params.get(self.store_type) == self.tools.RELEVANT_FOR_STORE:
                scores.append(score)

            if score == 1 or params.get(self.store_type) == self.tools.RELEVANT_FOR_STORE:
                self.save_level2_and_level3(set_name, params.get(self.tools.DISPLAY_NAME), score)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def calculate_assortment_sets(self, set_name):
        """
        This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        segment = '{};{}'.format(self.store_type, self.segment)
        for params in self.set_templates_data[set_name]:
            if params.get(segment, '').capitalize() in (self.tools.RELEVANT_FOR_STORE,
                                                                self.tools.OR_OTHER_PRODUCTS):

                object_type = self.tools.ENTITY_TYPE_CONVERTER.get(params.get(self.tools.ENTITY_TYPE),
                                                                   'product_ean_code')
                objects = [str(params.get(self.tools.PRODUCT_EAN_CODE, params.get(self.tools.PRODUCT_EAN_CODE2, '')))]
                if params.get(self.store_type) == self.tools.OR_OTHER_PRODUCTS:
                    additional_objects = str(params.get(self.tools.ADDITIONAL_SKUS)).split(',')
                    objects.extend(additional_objects)
                filters = {object_type: objects}
                result = self.tools.calculate_assortment(**filters)
                score = 1 if result > 0 else 0
                scores.append(score)

                self.save_level2_and_level3(set_name, params.get(self.tools.PRODUCT_NAME), score)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def write_to_db_result(self, fk, score, level):
        """
        This function the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(fk, score, level)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self, fk, score, level):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        score = round(score, 2)
        if level == self.LEVEL1:
            kpi_set_name = self.kpi_static_data[self.kpi_static_data['kpi_set_fk'] == fk]['kpi_set_name'].values[0]
            score_type = '%' if kpi_set_name in self.tools.KPI_SETS_WITH_PERCENT_AS_SCORE else ''
            attributes = pd.DataFrame([(kpi_set_name, self.session_uid, self.store_id, self.visit_date.isoformat(),
                                        format(score, '.2f'), score_type, fk)],
                                      columns=['kps_name', 'session_uid', 'store_fk', 'visit_date', 'score_1',
                                               'score_2', 'kpi_set_fk'])

        elif level == self.LEVEL2:
            kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] == fk]['kpi_name'].values[0].replace("'", "\\'")
            attributes = pd.DataFrame([(self.session_uid, self.store_id, self.visit_date.isoformat(),
                                        fk, kpi_name, score)],
                                      columns=['session_uid', 'store_fk', 'visit_date', 'kpi_fk', 'kpk_name', 'score'])
        elif level == self.LEVEL3:
            data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk'] == fk]
            atomic_kpi_name = data['atomic_kpi_name'].values[0].replace("'", "\\'")
            kpi_fk = data['kpi_fk'].values[0]
            kpi_set_name = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk'] == fk]['kpi_set_name'].values[0]
            attributes = pd.DataFrame([(atomic_kpi_name, self.session_uid, kpi_set_name, self.store_id,
                                        self.visit_date.isoformat(), datetime.utcnow().isoformat(),
                                        score, kpi_fk, fk, None, None)],
                                      columns=['display_text', 'session_uid', 'kps_name', 'store_fk', 'visit_date',
                                               'calculation_time', 'score', 'kpi_fk', 'atomic_kpi_fk', 'threshold',
                                               'result'])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        delete_queries = DIAGEOQueries.get_delete_session_results_query_old_tables(self.session_uid)
        for query in delete_queries:
            cur.execute(query)
        for query in self.kpi_results_queries:
            cur.execute(query)
        self.rds_conn.db.commit()
コード例 #6
0
class BATRUAssortment:
    def __init__(self):
        self.parsed_args = _parse_arguments()
        self.project = self.parsed_args.project
        self.rds_conn = self.rds_connect
        self.file_path = self.parsed_args.file
        self.start_date = self.parsed_args.date
        self.partial_update = self.parsed_args.update
        self.store_data = self.get_store_data
        self.all_products = self.get_product_data
        self.current_top_skus = self.get_current_top_skus
        self.stores = {}
        self.products = {}
        self.all_queries = []

        if self.start_date is None:
            self.current_date = datetime.now().date()
        else:
            self.current_date = datetime.strptime(self.start_date,
                                                  '%Y-%m-%d').date()
        self.deactivate_date = self.current_date - timedelta(1)
        self.activate_date = self.current_date

        if self.partial_update in ('1', 'True', 'Yes', 'Y'):
            self.partial_update = True
        else:
            self.partial_update = False

    def upload_assortment(self):
        """
        This is the main function of the assortment.
        It does the validation and then upload the assortment.
        :return:
        """
        Log.debug("Parsing and validating the assortment template")
        is_valid, invalid_inputs = self.p1_assortment_validator()

        Log.info("Assortment upload is started")
        self.upload_store_assortment_file()
        if not is_valid:
            Log.warning("Errors were found during the template validation")
            if invalid_inputs[INVALID_STORES]:
                Log.warning("The following stores don't exist in the DB: {}"
                            "".format(invalid_inputs[INVALID_STORES]))
            if invalid_inputs[INVALID_PRODUCTS]:
                Log.warning("The following products don't exist in the DB: {}"
                            "".format(invalid_inputs[INVALID_PRODUCTS]))
        Log.info("Assortment upload is finished")

    @property
    def rds_connect(self):
        self.rds_conn = PSProjectConnector(self.project,
                                           DbUsers.CalculationEng)
        try:
            pd.read_sql_query('select pk from probedata.session limit 1',
                              self.rds_conn.db)
        except Exception as e:
            self.rds_conn.disconnect_rds()
            self.rds_conn = PSProjectConnector(self.project,
                                               DbUsers.CalculationEng)
        return self.rds_conn

    @property
    def get_store_data(self):
        query = "select pk as store_fk, store_number_1 as store_number from static.stores"
        self.store_data = pd.read_sql_query(query, self.rds_conn.db)
        return self.store_data

    @property
    def get_product_data(self):
        query = "select pk as product_fk, product_ean_code from static.product " \
                "where delete_date is null"
        self.all_products = pd.read_sql_query(query, self.rds_conn.db)
        return self.all_products

    @property
    def get_current_top_skus(self):
        query = """select store_fk, product_fk
                   from pservice.custom_osa
                   where end_date is null"""
        data = pd.read_sql_query(query, self.rds_conn.db)
        return data

    def p1_assortment_validator(self):
        """
        This function validates the store assortment template.
        It compares the OUTLET_ID (= store_number_1) and the products ean_code to the stores and products from the DB
        :return: False in case of an error and True in case of a valid template
        """
        raw_data = self.parse_assortment_template()
        legal_template = True
        invalid_inputs = {INVALID_STORES: [], INVALID_PRODUCTS: []}
        valid_stores = self.store_data.loc[
            self.store_data['store_number'].isin(raw_data[OUTLET_ID])]
        if len(valid_stores) != len(raw_data[OUTLET_ID].unique()):
            invalid_inputs[INVALID_STORES] = list(
                set(raw_data[OUTLET_ID].unique()) -
                set(valid_stores['store_number']))
            Log.debug("The following stores don't exist in the DB: {}".format(
                invalid_inputs[INVALID_STORES]))
            legal_template = False

        valid_product = self.all_products.loc[self.all_products[EAN_CODE].isin(
            raw_data[EAN_CODE])]
        if len(valid_product) != len(raw_data[EAN_CODE].unique()):
            invalid_inputs[INVALID_PRODUCTS] = list(
                set(raw_data[EAN_CODE].unique()) -
                set(valid_product[EAN_CODE]))
            Log.debug(
                "The following products don't exist in the DB: {}".format(
                    invalid_inputs[INVALID_PRODUCTS]))
            legal_template = False
        return legal_template, invalid_inputs

    def parse_assortment_template(self):
        """
        This functions turns the csv into DF
        It tries to handle all of the possible format situation that I encountered yet (different delimiter and unicode)
        :return: DF that contains the store_number_1 (Outlet ID) and the product_ean_code of the assortments
        """
        data = pd.read_csv(self.file_path, sep='\t')
        if OUTLET_ID not in data.columns or EAN_CODE not in data.columns:
            data = pd.read_csv(self.file_path)
        if OUTLET_ID not in data.columns or EAN_CODE not in data.columns:
            data = pd.read_csv(self.file_path, encoding='utf-7')
        data = data.drop_duplicates(subset=data.columns, keep='first')
        data = data.fillna('')
        return data

    def set_end_date_for_irrelevant_assortments(self, stores_list):
        """
        This function sets an end_date to all of the irrelevant stores in the assortment.
        :param stores_list: List of the stores from the assortment template
        """
        Log.debug("Closing assortment for stores out of template")
        irrelevant_stores = self.store_data.loc[
            ~self.store_data['store_number'].
            isin(stores_list)]['store_fk'].unique().tolist()
        current_assortment_stores = self.current_top_skus['store_fk'].unique(
        ).tolist()
        stores_to_remove = list(
            set(irrelevant_stores).intersection(
                set(current_assortment_stores)))
        for store in stores_to_remove:
            query = [
                self.get_store_deactivation_query(store, self.deactivate_date)
            ]
            self.commit_results(query)
        Log.debug("Assortment is closed for ({}) stores".format(
            len(stores_to_remove)))

    def upload_store_assortment_file(self):
        raw_data = self.parse_assortment_template()
        data = []
        list_of_stores = raw_data[OUTLET_ID].unique().tolist()

        if not self.partial_update:
            self.set_end_date_for_irrelevant_assortments(list_of_stores)

        Log.debug("Preparing assortment data for update")
        store_counter = 0
        for store in list_of_stores:
            store_data = {}
            store_products = raw_data.loc[raw_data[OUTLET_ID] ==
                                          store][EAN_CODE].tolist()
            store_data[store] = store_products
            data.append(store_data)

            store_counter += 1
            if store_counter % 1000 == 0 or store_counter == len(
                    list_of_stores):
                Log.debug("Assortment is prepared for {}/{} stores".format(
                    store_counter, len(list_of_stores)))

        Log.debug("Updating assortment data in DB")
        store_counter = 0
        for store_data in data:

            self.update_db_from_json(store_data)

            if self.all_queries:
                queries = self.merge_insert_queries(self.all_queries)
                self.commit_results(queries)
                self.all_queries = []

            store_counter += 1
            if store_counter % 1000 == 0 or store_counter == len(data):
                Log.debug(
                    "Assortment is updated in DB for {}/{} stores".format(
                        store_counter, len(data)))

    @staticmethod
    def merge_insert_queries(queries):
        """
        This function aggregates all of the insert queries
        :param queries: all of the queries (update and insert) for the assortment
        :return: The merged insert queries
        """
        query_groups = {}
        other_queries = []
        for query in queries:
            if 'VALUES' not in query:
                other_queries.append(query)
                continue
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            for group_index in xrange(0, len(query_groups[group]), 10**4):
                merged_queries.append('{0} VALUES {1}'.format(
                    group, ',\n'.join(
                        query_groups[group][group_index:group_index + 10**4])))
        return other_queries + merged_queries

    def update_db_from_json(self, data):
        update_products = set()
        missing_products = set()

        store_number = data.keys()[0]
        if store_number is None:
            Log.debug("'{}' column or value is missing".format(STORE_NUMBER))
            return

        store_fk = self.get_store_fk(store_number)
        if store_fk is None:
            Log.debug(
                'Store Number {} does not exist in DB'.format(store_number))
            return

        for key in data[store_number]:
            validation = False
            if isinstance(key, (float, int)):
                validation = True
            elif isinstance(key, (str, unicode)):
                validation = True
            if validation:
                product_ean_code = str(key).split(',')[-1]
                product_fk = self.get_product_fk(product_ean_code)
                if product_fk is None:
                    missing_products.add(product_ean_code)
                else:
                    update_products.add(product_fk)

        if missing_products:
            Log.debug(
                'The following EAN Codes for Store Number {} do not exist in DB: {}.'
                ''.format(store_number, list(missing_products)))
        queries = []
        current_products = self.current_top_skus[
            self.current_top_skus['store_fk'] ==
            store_fk]['product_fk'].tolist()

        products_to_deactivate = tuple(
            set(current_products).difference(update_products))
        products_to_activate = tuple(
            set(update_products).difference(current_products))

        if products_to_deactivate:
            if len(products_to_deactivate) == 1:
                queries.append(
                    self.get_deactivation_query(
                        store_fk, "(" + str(products_to_deactivate[0]) + ")",
                        self.deactivate_date))
            else:
                queries.append(
                    self.get_deactivation_query(store_fk,
                                                tuple(products_to_deactivate),
                                                self.deactivate_date))

        for product_fk in products_to_activate:
            queries.append(
                self.get_activation_query(store_fk, product_fk,
                                          self.activate_date))

        self.all_queries.extend(queries)
        Log.debug(
            'Store Number {} - Products to update {}: Deactivated {}, Activated {}'
            ''.format(store_number, len(update_products),
                      len(products_to_deactivate), len(products_to_activate)))

    def get_store_fk(self, store_number):
        """
        This functions returns the store's fk
        :param store_number: 'store_number_1' attribute of the store
        :return: store fk
        """
        store_number = str(store_number)
        if store_number in self.stores:
            store_fk = self.stores[store_number]
        else:
            store_fk = self.store_data[self.store_data['store_number'] ==
                                       store_number]
            if not store_fk.empty:
                store_fk = store_fk['store_fk'].values[0]
                self.stores[store_number] = store_fk
            else:
                store_fk = None
        return store_fk

    def get_product_fk(self, product_ean_code):
        product_ean_code = str(product_ean_code).strip()
        if product_ean_code in self.products:
            product_fk = self.products[product_ean_code]
        else:
            product_fk = self.all_products[
                self.all_products['product_ean_code'] == product_ean_code]
            if not product_fk.empty:
                product_fk = product_fk['product_fk'].values[0]
                self.products[product_ean_code] = product_fk
            else:
                product_fk = None
        return product_fk

    @staticmethod
    def get_deactivation_query(store_fk, product_fks, date):
        query = \
            """
            update {} set end_date = '{}', is_current = NULL 
            where store_fk = {} and product_fk in {} and end_date is null;
            """\
            .format(STORE_ASSORTMENT_TABLE, date, store_fk, product_fks)
        return query

    @staticmethod
    def get_store_deactivation_query(store_fk, date):
        query = \
            """
            update {} set end_date = '{}', is_current = NULL
            where store_fk = {} and end_date is null;
            """.format(STORE_ASSORTMENT_TABLE, date, store_fk)
        return query

    @staticmethod
    def get_activation_query(store_fk, product_fk, date):
        attributes = pd.DataFrame(
            [(store_fk, product_fk, str(date), 1)],
            columns=['store_fk', 'product_fk', 'start_date', 'is_current'])
        query = insert(attributes.to_dict(), STORE_ASSORTMENT_TABLE)
        return query

    def commit_results(self, queries):
        """
        This function commits the results into the DB in batches.
        query_num is the number of queires that were executed in the current batch
        After batch_size is reached, the function re-connects the DB and cursor.
        """
        self.rds_conn.connect_rds()
        cursor = self.rds_conn.db.cursor()
        batch_size = 1000
        query_num = 0
        failed_queries = []
        for query in queries:
            try:
                cursor.execute(query)
                # print query
            except Exception as e:
                Log.warning(
                    'Committing to DB failed to due to: {}. Query: {}'.format(
                        e, query))
                self.rds_conn.db.commit()
                failed_queries.append(query)
                self.rds_conn.connect_rds()
                cursor = self.rds_conn.db.cursor()
                continue
            if query_num > batch_size:
                self.rds_conn.db.commit()
                self.rds_conn.connect_rds()
                cursor = self.rds_conn.db.cursor()
                query_num = 0
            query_num += 1
        self.rds_conn.db.commit()
コード例 #7
0
class DIAGEOUK_SANDToolBox:

    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.k_engine = BaseCalculationsScript(data_provider, output)
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.store_channel = self.store_info['store_type'].values[0]
        if self.store_channel:
            self.store_channel = self.store_channel.upper()
        self.store_type = self.store_info['additional_attribute_1'].values[0]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.match_display_in_scene = self.get_match_display()
        self.set_templates_data = {}
        self.kpi_static_data = self.get_kpi_static_data()
        self.kpi_results_queries = []
        self.output = output
        self.common = Common(self.data_provider)
        self.commonV2 = CommonV2(self.data_provider)
        self.global_gen = DIAGEOGenerator(self.data_provider, self.output,
                                          self.common)
        self.tools = DIAGEOToolBox(
            self.data_provider,
            output,
            match_display_in_scene=self.match_display_in_scene
        )  # replace the old one
        self.diageo_generator = DIAGEOGenerator(self.data_provider,
                                                self.output,
                                                self.common,
                                                menu=True)

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = DIAGEOQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def get_match_display(self):
        """
        This function extracts the display matches data and saves it into one global data frame.
        The data is taken from probedata.match_display_in_scene.
        """
        query = DIAGEOQueries.get_match_display(self.session_uid)
        match_display = pd.read_sql_query(query, self.rds_conn.db)
        return match_display

    def main_calculation(self, set_names):
        """
        This function calculates the KPI results.
        """
        log_runtime('Updating templates')(self.tools.update_templates)()
        # SOS Out Of The Box kpis
        self.activate_ootb_kpis()

        # Global assortment kpis
        assortment_res_dict = self.diageo_generator.diageo_global_assortment_function_v2(
        )
        self.commonV2.save_json_to_new_tables(assortment_res_dict)

        # Global assortment kpis - v3 for NEW MOBILE REPORTS use
        assortment_res_dict_v3 = self.diageo_generator.diageo_global_assortment_function_v3(
        )
        self.commonV2.save_json_to_new_tables(assortment_res_dict_v3)

        equipment_score_scenes = self.get_equipment_score_relevant_scenes()
        res_dict = self.diageo_generator.diageo_global_equipment_score(
            save_scene_level=False, scene_list=equipment_score_scenes)
        self.commonV2.save_json_to_new_tables(res_dict)

        # Global Menu kpis
        menus_res_dict = self.diageo_generator.diageo_global_share_of_menu_cocktail_function(
            cocktail_product_level=True)
        self.commonV2.save_json_to_new_tables(menus_res_dict)

        for set_name in set_names:
            set_score = 0
            if set_name not in self.tools.KPI_SETS_WITHOUT_A_TEMPLATE and set_name not in self.set_templates_data.keys(
            ):
                try:
                    self.set_templates_data[
                        set_name] = self.tools.download_template(set_name)
                except:
                    Log.warning("Couldn't find a template for set name: " +
                                str(set_name))
                    continue

            # Global relative position
            if set_name in ('Relative Position'):
                # Global function
                res_dict = self.diageo_generator.diageo_global_relative_position_function(
                    self.set_templates_data[set_name],
                    location_type='template_group')
                self.commonV2.save_json_to_new_tables(res_dict)

                # Saving to old tables
                self.set_templates_data[set_name] = parse_template(
                    RELATIVE_PATH, lower_headers_row_index=2)
                set_score = self.calculate_relative_position_sets(set_name)
            # elif set_name in ('MPA', 'New Products', 'Local MPA'):
            elif set_name in ('Local MPA'):
                set_score = self.calculate_assortment_sets(set_name)

            # Global Secondary Displays
            elif set_name in ('Secondary Displays', 'Secondary'):
                # Global function
                res_json = self.diageo_generator.diageo_global_secondary_display_secondary_function(
                )
                if res_json:
                    # Saving to new tables
                    self.commonV2.write_to_db_result(
                        fk=res_json['fk'],
                        numerator_id=1,
                        denominator_id=self.store_id,
                        result=res_json['result'])

                # Saving to old tables
                set_score = self.tools.calculate_number_of_scenes(
                    location_type='Secondary')
                if not set_score:
                    set_score = self.tools.calculate_number_of_scenes(
                        location_type='Secondary Shelf')
                self.save_level2_and_level3(set_name, set_name, set_score)
            elif set_name == 'POSM':
                set_score = self.calculate_posm_sets(set_name)
            elif set_name in ('Visible to Customer', 'Visible to Consumer %'):
                # Global function
                sku_list = filter(
                    None, self.scif[self.scif['product_type'] ==
                                    'SKU'].product_ean_code.tolist())
                res_dict = self.diageo_generator.diageo_global_visible_percentage(
                    sku_list)

                if res_dict:
                    # Saving to new tables
                    parent_res = res_dict[-1]
                    self.commonV2.save_json_to_new_tables(res_dict)

                    # Saving to old tables
                    # result = parent_res['result']
                    # self.save_level2_and_level3(set_name=set_name, kpi_name=set_name, score=result)

                # Saving to old tables
                filters = {self.tools.VISIBILITY_PRODUCTS_FIELD: 'Y'}
                set_score = self.tools.calculate_visible_percentage(
                    visible_filters=filters)
                self.save_level2_and_level3(set_name, set_name, set_score)
            else:
                continue

            if set_score == 0:
                pass
            elif set_score is False:
                continue

            set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name']
                                          == set_name]['kpi_set_fk'].values[0]
            self.write_to_db_result(set_fk, set_score, self.LEVEL1)

        # commiting to new tables
        self.commonV2.commit_results_data()

    def save_level2_and_level3(self, set_name, kpi_name, score):
        """
        Given KPI data and a score, this functions writes the score for both KPI level 2 and 3 in the DB.
        """
        kpi_data = self.kpi_static_data[
            (self.kpi_static_data['kpi_set_name'].str.encode('utf-8') ==
             set_name.encode('utf-8'))
            & (self.kpi_static_data['kpi_name'].str.encode('utf-8') ==
               kpi_name.encode('utf-8'))]
        try:
            kpi_fk = kpi_data['kpi_fk'].values[0]
        except:
            Log.warning("kpi name or set name don't exist")
            return
        atomic_kpi_fk = kpi_data['atomic_kpi_fk'].values[0]
        self.write_to_db_result(kpi_fk, score, self.LEVEL2)
        self.write_to_db_result(atomic_kpi_fk, score, self.LEVEL3)

    def calculate_relative_position_sets(self, set_name):
        """
        This function calculates every relative-position-typed KPI from the relevant sets, and returns the set final score.
        """

        scores = []
        for i in xrange(len(self.set_templates_data[set_name])):
            params = self.set_templates_data[set_name].iloc[i]
            if self.store_channel == params.get(self.tools.CHANNEL,
                                                '').upper():
                scif_tested_param = 'brand_name' if params.get(self.tools.TESTED_TYPE, '') == self.tools.BRAND \
                    else 'product_ean_code'
                scif_anchor_param = 'brand_name' if params.get(self.tools.ANCHOR_TYPE, '') == self.tools.BRAND \
                    else 'product_ean_code'
                tested_filters = {
                    scif_tested_param: params.get(self.tools.TESTED_NEW)
                }
                anchor_filters = {
                    scif_anchor_param: params.get(self.tools.ANCHOR_NEW)
                }

                direction_data = {
                    'top':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.TOP_DISTANCE)),
                    'bottom':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.BOTTOM_DISTANCE)),
                    'left':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.LEFT_DISTANCE)),
                    'right':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.RIGHT_DISTANCE))
                }
                if params.get(self.tools.LOCATION_OLD, ''):
                    general_filters = {
                        'template_group': params.get(self.tools.LOCATION_OLD)
                    }
                else:
                    general_filters = {}

                result = self.tools.calculate_relative_position(
                    tested_filters, anchor_filters, direction_data,
                    **general_filters)
                score = 1 if result else 0
                scores.append(score)

                self.save_level2_and_level3(set_name,
                                            params.get(self.tools.KPI_NAME),
                                            score)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def _get_direction_for_relative_position(self, value):
        """
        This function converts direction data from the template (as string) to a number.
        """
        if value == self.tools.UNLIMITED_DISTANCE:
            value = 1000
        elif not value or not str(value).isdigit():
            value = 0
        else:
            value = int(value)
        return value

    def calculate_posm_sets(self, set_name):
        """
        This function calculates every POSM-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        for params in self.set_templates_data[set_name]:
            if self.store_channel is None:
                break

            kpi_res = self.tools.calculate_posm(
                display_name=params.get(self.tools.DISPLAY_NAME))
            score = 1 if kpi_res > 0 else 0
            if params.get(self.store_type) == self.tools.RELEVANT_FOR_STORE:
                scores.append(score)

            if score == 1 or params.get(
                    self.store_type) == self.tools.RELEVANT_FOR_STORE:
                self.save_level2_and_level3(
                    set_name, params.get(self.tools.DISPLAY_NAME), score)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def calculate_assortment_sets(self, set_name):
        """
        This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        for params in self.set_templates_data[set_name]:
            target = str(params.get(self.store_type, ''))
            if target.isdigit() or target.capitalize() in (
                    self.tools.RELEVANT_FOR_STORE,
                    self.tools.OR_OTHER_PRODUCTS):
                products = str(
                    params.get(self.tools.PRODUCT_EAN_CODE,
                               params.get(self.tools.PRODUCT_EAN_CODE2,
                                          ''))).replace(',', ' ').split()
                target = 1 if not target.isdigit() else int(target)
                kpi_name = params.get(self.tools.GROUP_NAME,
                                      params.get(self.tools.PRODUCT_NAME))
                kpi_static_data = self.kpi_static_data[
                    (self.kpi_static_data['kpi_set_name'] == set_name)
                    & (self.kpi_static_data['kpi_name'] == kpi_name)]
                if len(products) > 1:
                    result = 0
                    for product in products:
                        product_score = self.tools.calculate_assortment(
                            product_ean_code=product)
                        result += product_score
                        try:
                            product_name = self.all_products[
                                self.all_products['product_ean_code'] ==
                                product]['product_name'].values[0]
                        except Exception as e:
                            Log.warning(
                                'Product {} is not defined in the DB'.format(
                                    product))
                            continue
                        try:
                            atomic_fk = \
                            kpi_static_data[kpi_static_data['atomic_kpi_name'] == product_name]['atomic_kpi_fk'].values[
                                0]
                        except Exception as e:
                            Log.warning(
                                'Product {} is not defined in the DB'.format(
                                    product_name))
                            continue
                        self.write_to_db_result(atomic_fk,
                                                product_score,
                                                level=self.LEVEL3)
                    score = 1 if result >= target else 0
                else:
                    result = self.tools.calculate_assortment(
                        product_ean_code=products)
                    atomic_fk = kpi_static_data['atomic_kpi_fk'].values[0]
                    score = 1 if result >= target else 0
                    self.write_to_db_result(atomic_fk,
                                            score,
                                            level=self.LEVEL3)

                scores.append(score)
                kpi_fk = kpi_static_data['kpi_fk'].values[0]
                self.write_to_db_result(kpi_fk, score, level=self.LEVEL2)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    # def calculate_assortment_sets(self, set_name): # the old version. I changed it to the function of KE for local MPA.
    #     """
    #     This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score.
    #     """
    #     scores = []
    #     for params in self.set_templates_data[set_name]:
    #         if params.get(self.store_type, '').capitalize() in (self.tools.RELEVANT_FOR_STORE,
    #                                                             self.tools.OR_OTHER_PRODUCTS):
    #             object_type = self.tools.ENTITY_TYPE_CONVERTER.get(params.get(self.tools.ENTITY_TYPE),
    #                                                                'product_ean_code')
    #             objects = [str(params.get(self.tools.PRODUCT_EAN_CODE, params.get(self.tools.PRODUCT_EAN_CODE2, '')))]
    #             if params.get(self.store_type) == self.tools.OR_OTHER_PRODUCTS:
    #                 additional_objects = str(params.get(self.tools.ADDITIONAL_SKUS)).split(',')
    #                 objects.extend(additional_objects)
    #             filters = {object_type: objects}
    #             result = self.tools.calculate_assortment(**filters)
    #             score = 1 if result > 0 else 0
    #             scores.append(score)
    #
    #             self.save_level2_and_level3(set_name, params.get(self.tools.PRODUCT_NAME), score)
    #
    #     if not scores:
    #         return False
    #     set_score = (sum(scores) / float(len(scores))) * 100
    #     return set_score

    def write_to_db_result(self, fk, score, level):
        """
        This function the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(fk, score, level)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self, fk, score, level):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        score = round(score, 2)
        if level == self.LEVEL1:
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['kpi_set_fk'] ==
                fk]['kpi_set_name'].values[0]
            score_type = '%' if kpi_set_name in self.tools.KPI_SETS_WITH_PERCENT_AS_SCORE else ''
            attributes = pd.DataFrame(
                [(kpi_set_name, self.session_uid, self.store_id,
                  self.visit_date.isoformat(), format(score,
                                                      '.2f'), score_type, fk)],
                columns=[
                    'kps_name', 'session_uid', 'store_fk', 'visit_date',
                    'score_1', 'score_2', 'kpi_set_fk'
                ])

        elif level == self.LEVEL2:
            kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] ==
                                            fk]['kpi_name'].values[0].replace(
                                                "'", "\\'")
            attributes = pd.DataFrame(
                [(self.session_uid, self.store_id, self.visit_date.isoformat(),
                  fk, kpi_name, score)],
                columns=[
                    'session_uid', 'store_fk', 'visit_date', 'kpi_fk',
                    'kpk_name', 'score'
                ])
        elif level == self.LEVEL3:
            data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk']
                                        == fk]
            atomic_kpi_name = data['atomic_kpi_name'].values[0].replace(
                "'", "\\'")
            kpi_fk = data['kpi_fk'].values[0]
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['atomic_kpi_fk'] ==
                fk]['kpi_set_name'].values[0]
            attributes = pd.DataFrame([
                (atomic_kpi_name, self.session_uid,
                 kpi_set_name, self.store_id, self.visit_date.isoformat(),
                 datetime.utcnow().isoformat(), score, kpi_fk, fk, None, None)
            ],
                                      columns=[
                                          'display_text', 'session_uid',
                                          'kps_name', 'store_fk', 'visit_date',
                                          'calculation_time', 'score',
                                          'kpi_fk', 'atomic_kpi_fk',
                                          'threshold', 'result'
                                      ])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    def activate_ootb_kpis(self):

        # FACINGS_SOS_MANUFACTURER_IN_WHOLE_STORE - level 1
        sos_store_fk = self.commonV2.get_kpi_fk_by_kpi_name('SOS OUT OF STORE')
        sos_store = ManufacturerFacingsSOSInWholeStore(
            data_provider=self.data_provider,
            kpi_definition_fk=sos_store_fk).calculate()
        # FACINGS_SOS_CATEGORY_IN_WHOLE_STORE - level 2
        sos_cat_out_of_store_fk = self.commonV2.get_kpi_fk_by_kpi_name(
            'SOS CATEGORY OUT OF STORE')
        sos_cat_out_of_store = self.calculate_sos_of_cat_of_out_of_store_new(
            sos_cat_out_of_store_fk)

        # FACINGS_SOS_SUB_CATEGORY_OUT_OF_CATEGORY - level 3
        sos_sub_cat_out_of_cat_fk = self.commonV2.get_kpi_fk_by_kpi_name(
            'SOS SUB CATEGORY OUT OF CATEGORY')
        sos_sub_cat_out_of_cat = SubCategoryFacingsSOSPerCategory(
            data_provider=self.data_provider,
            kpi_definition_fk=sos_sub_cat_out_of_cat_fk).calculate()

        # FACINGS_SOS_MANUFACTURER_OUT_OF_SUB_CATEGORY - level 4
        sos_man_out_of_sub_cat_fk = self.commonV2.get_kpi_fk_by_kpi_name(
            'SOS MANUFACTURER OUT OF SUB CATEGORY')
        sos_man_out_of_sub_cat = ManufacturerFacingsSOSPerSubCategoryInStore(
            data_provider=self.data_provider,
            kpi_definition_fk=sos_man_out_of_sub_cat_fk).calculate()

        # FACINGS_SOS_BRAND_OUT_OF_SUB_CATEGORY_IN_WHOLE_STORE - level 5
        sos_brand_out_of_sub_cat_fk = self.commonV2.get_kpi_fk_by_kpi_name(
            'SOS BRAND OUT OF MANUFACTURER')
        sos_brand_out_of_sub_cat = self.calculate_sos_of_brand_out_of_manufacturer_in_sub_cat(
            sos_brand_out_of_sub_cat_fk)

        # Savings results in Hierarchy
        self.save_hierarchy(sos_store, sos_cat_out_of_store,
                            sos_sub_cat_out_of_cat, sos_man_out_of_sub_cat,
                            sos_brand_out_of_sub_cat)

    def calculate_sos_of_brand_out_of_manufacturer_in_sub_cat(self, kpi_fk):
        res_list = []
        res_dict = dict()
        # Get rid of Irrelevant and Empty types and keep only facings > 1
        filtered_scif = self.scif[
            ~self.scif['product_type'].isin(['Irrelevant', 'Empty'])
            & self.scif['facings_ign_stack'] > 0]

        # Filter by each Sub Category and Manufacturer
        sub_cat_fk_list = filtered_scif['sub_category_fk'].unique().tolist()
        for sub_cat in sub_cat_fk_list:
            filtered_scif_by_sub_cat = filtered_scif[
                filtered_scif['sub_category_fk'] == sub_cat]
            list_of_relevant_manufacturers = filtered_scif_by_sub_cat[
                'manufacturer_fk'].unique().tolist()
            for manu_fk in list_of_relevant_manufacturers:
                filtered_scif_by_sub_cat_and_manufacturer = filtered_scif_by_sub_cat[
                    filtered_scif_by_sub_cat['manufacturer_fk'] == manu_fk]
                denominator_result = filtered_scif_by_sub_cat_and_manufacturer[
                    'facings_ign_stack'].sum()

                # Calculate results per Brand
                list_of_relevant_brands = filtered_scif_by_sub_cat_and_manufacturer[
                    'brand_fk'].unique().tolist()
                for brand_fk in list_of_relevant_brands:
                    filtered_scif_by_brand = filtered_scif_by_sub_cat_and_manufacturer[
                        filtered_scif_by_sub_cat_and_manufacturer['brand_fk']
                        == brand_fk]
                    facings_brand_results = filtered_scif_by_brand[
                        'facings_ign_stack'].sum()
                    result_for_brand = facings_brand_results / denominator_result

                    # Preparing the results' dictionary
                    res_dict['kpi_definition_fk'] = kpi_fk
                    res_dict['numerator_id'] = brand_fk
                    res_dict['numerator_result'] = facings_brand_results
                    res_dict['denominator_id'] = int(sub_cat)
                    res_dict['denominator_result'] = denominator_result
                    res_dict['identifier_result'] = (int(brand_fk),
                                                     int(sub_cat),
                                                     int(manu_fk))
                    res_dict['identifier_parent'] = int(manu_fk), (
                        int(sub_cat))
                    res_dict['result'] = result_for_brand
                    res_dict['score'] = result_for_brand
                    res_list.append(res_dict.copy())
        return res_list

    def calculate_sos_of_cat_of_out_of_store_new(self, kpi_fk):
        res_list = []
        res_dict = dict()
        # Get rid of Irrelevant and Empty types and keep only facings ignore stacking > 1
        filtered_scif = self.scif[
            ~self.scif['product_type'].isin(['Irrelevant', 'Empty'])
            & self.scif['facings_ign_stack'] > 0]
        denominator_result = filtered_scif['facings_ign_stack'].sum()
        categories_fk_list = filtered_scif['category_fk'].unique().tolist()

        # Calculate result per category (using facings_ign_stack!)
        for category_fk in categories_fk_list:
            filtered_scif_by_category = filtered_scif[
                filtered_scif['category_fk'] == category_fk]
            facings_category_result = filtered_scif_by_category[
                'facings_ign_stack'].sum()
            result_for_category = facings_category_result / denominator_result

            # Preparing the results' dictionary
            res_dict['kpi_definition_fk'] = kpi_fk
            res_dict['numerator_id'] = category_fk
            res_dict['numerator_result'] = facings_category_result
            res_dict['denominator_id'] = self.store_id
            res_dict['denominator_result'] = denominator_result
            res_dict['result'] = result_for_category
            res_dict['score'] = result_for_category
            res_list.append(res_dict.copy())
        return res_list

    def save_hierarchy(self, level_1, level_2, level_3, level_4, level_5):
        for i in level_1:
            res = i.to_dict
            kpi_identifier = "level_1"
            self.commonV2.write_to_db_result(
                fk=res['kpi_definition_fk'],
                numerator_id=res['numerator_id'],
                denominator_id=res['denominator_id'],
                numerator_result=res['numerator_result'],
                denominator_result=res['denominator_result'],
                result=res['result'],
                score=res['result'],
                identifier_result=kpi_identifier,
                should_enter=False)

        for res in level_2:
            kpi_identifier = "level_2_" + str(int(res['numerator_id']))
            parent_identifier = "level_1"
            self.commonV2.write_to_db_result(
                fk=res['kpi_definition_fk'],
                numerator_id=res['numerator_id'],
                denominator_id=res['denominator_id'],
                numerator_result=res['numerator_result'],
                denominator_result=res['denominator_result'],
                result=res['result'],
                score=res['result'],
                identifier_result=kpi_identifier,
                identifier_parent=parent_identifier,
                should_enter=True)

        for i in level_3:
            res = i.to_dict
            kpi_identifier = str(int(res['numerator_id']))
            parent_identifier = "level_2_" + str(int(res['denominator_id']))
            self.commonV2.write_to_db_result(
                fk=res['kpi_definition_fk'],
                numerator_id=res['numerator_id'],
                denominator_id=res['denominator_id'],
                numerator_result=res['numerator_result'],
                denominator_result=res['denominator_result'],
                result=res['result'],
                score=res['result'],
                identifier_result=kpi_identifier,
                identifier_parent=parent_identifier,
                should_enter=True)

        for i in level_4:
            res = i.to_dict
            kpi_identifier = "level_4_" + str(
                (int(res['numerator_id']), int(res['denominator_id'])))
            parent_identifier = str(int(res['denominator_id']))
            self.commonV2.write_to_db_result(
                fk=res['kpi_definition_fk'],
                numerator_id=res['numerator_id'],
                denominator_id=res['denominator_id'],
                numerator_result=res['numerator_result'],
                denominator_result=res['denominator_result'],
                result=res['result'],
                score=res['result'],
                identifier_result=kpi_identifier,
                identifier_parent=parent_identifier,
                should_enter=True)

        for res in level_5:
            kpi_identifier = "level_5_" + str(res['identifier_result'])
            parent_identifier = "level_4_" + str(res['identifier_parent'])
            self.commonV2.write_to_db_result(
                fk=res['kpi_definition_fk'],
                numerator_id=res['numerator_id'],
                denominator_id=res['denominator_id'],
                numerator_result=res['numerator_result'],
                denominator_result=res['denominator_result'],
                result=res['result'],
                score=res['result'],
                identifier_result=kpi_identifier,
                identifier_parent=parent_identifier,
                should_enter=True)

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        delete_queries = DIAGEOQueries.get_delete_session_results_query_old_tables(
            self.session_uid)
        for query in delete_queries:
            cur.execute(query)
        for query in self.kpi_results_queries:
            cur.execute(query)
        self.rds_conn.db.commit()

    def get_equipment_score_relevant_scenes(self):
        scenes = []
        if not self.diageo_generator.scif.empty:
            scenes = self.diageo_generator.scif[self.diageo_generator.scif['template_name'] == \
                                                'ON - DRAUGHT TAPS']['scene_fk'].unique().tolist()
        return scenes
コード例 #8
0
class SINGHATHToolBox:
    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.common = Common(self.data_provider)
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.templates = self.data_provider[Data.TEMPLATES]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.kpi_static_data = self.common.get_kpi_static_data()
        self.kpi_results_queries = []
        self.templates_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), '..',
            TEMPLATE_PARENT_FOLDER, TEMPLATE_NAME)
        self.kpi_template = pd.ExcelFile(self.templates_path)

    def get_products_price_for_ean_codes(self, ean_codes, session_fk):
        # https://jira.trax-cloud.com/browse/TOHA-2024 to have this in data provider
        self.rds_conn.connect_rds()
        query = """
                    select 
                    value as price, is_promotion,
                    product_fk, name, ean_code, category_fk, brand_fk, type as product_type,
                    sub_category_fk
                    from probedata.manual_collection_price mcp
                    join static_new.product prod on mcp.product_fk=prod.pk
                    where mcp.value is not null
                    and prod.is_active =1
                    and session_fk={session_fk}
                    and ean_code in {ean_codes};
                    """
        df = pd.read_sql_query(
            query.format(
                ean_codes=ean_codes,
                session_fk=session_fk,
            ), self.rds_conn.db)
        return df

    def main_calculation(self, *args, **kwargs):
        """
        This function calculates the KPI results.
        """
        self.filter_and_send_kpi_to_calc()
        self.common.commit_results_data()
        return 0

    def filter_and_send_kpi_to_calc(self):
        kpi_sheet = self.kpi_template.parse(KPI_SHEET)
        for index, kpi_sheet_row in kpi_sheet.iterrows():
            if not is_nan(kpi_sheet_row[KPI_ACTIVE_COL]):
                if str(kpi_sheet_row[KPI_ACTIVE_COL]).strip().lower() in [
                        '0.0', 'n', 'no'
                ]:
                    print("KPI :{} deactivated in sheet.".format(
                        kpi_sheet_row[KPI_NAME_COL]))
                    continue
            if not is_nan(kpi_sheet_row[KPI_SHEET_STORE_TYPES_COL]):
                if bool(kpi_sheet_row[KPI_SHEET_STORE_TYPES_COL].strip()) and \
                        kpi_sheet_row[KPI_SHEET_STORE_TYPES_COL].strip().lower() != 'all':
                    print "Check the store types in excel..."
                    permitted_store_types = [
                        x.strip() for x in
                        kpi_sheet_row[KPI_SHEET_STORE_TYPES_COL].split(',')
                        if x.strip()
                    ]
                    if self.store_info.store_type.values[
                            0] not in permitted_store_types:
                        print "Store type not permitted..."
                        continue
            kpi = self.kpi_static_data[
                (self.kpi_static_data[DF_KPI_TYPE_COL] ==
                 kpi_sheet_row[KPI_NAME_COL])
                & (self.kpi_static_data['delete_time'].isnull())]
            if kpi.empty:
                print("KPI Name:{} not found in DB".format(
                    kpi_sheet_row[KPI_NAME_COL]))
                continue
            sheet_name = kpi_sheet_row[KPI_SHEET_NAME_COL]
            sheet_data_frame = self.kpi_template.parse(sheet_name).fillna(
                method='ffill')
            if sheet_name == PRICE_SHEET:
                self.write_price_difference(kpi, sheet_data_frame)
            elif sheet_name == POS_PRESENCE_SHEET:
                self.write_pos_presence(kpi, sheet_data_frame)
            elif sheet_name == DUMP_DISPLAY_PRESENCE:
                self.write_dump_display_presence(kpi, sheet_data_frame)

    def write_price_difference(self, kpi, price_sheet_data_frame):
        # drop the first column
        price_sheet_data_frame.columns = price_sheet_data_frame.iloc[0]
        price_sheet_data_frame = price_sheet_data_frame.reindex(
            price_sheet_data_frame.index.drop(0))

        all_ean_codes = tuple(
            map(
                str, price_sheet_data_frame[[
                    PRICE_SHEET_EAN_CODE_1_COL, PRICE_SHEET_EAN_CODE_2_COL
                ]].values.ravel('F')))
        prod_price_data = self.get_products_price_for_ean_codes(
            ean_codes=all_ean_codes,
            session_fk=self.session_info['pk'].iloc[0])
        for index, each_row in price_sheet_data_frame.iterrows():
            result = 1
            own_manufacturer_ean = each_row.get(PRICE_SHEET_EAN_CODE_1_COL,
                                                None)
            competitive_manufacturer_ean = each_row.get(
                PRICE_SHEET_EAN_CODE_2_COL, None)
            if not own_manufacturer_ean or not competitive_manufacturer_ean:
                continue
            own_manufacturer = prod_price_data.query(
                "ean_code=='{code}'".format(code=own_manufacturer_ean))
            competitive_manufacturer = prod_price_data.query(
                "ean_code=='{code}'".format(code=competitive_manufacturer_ean))
            if own_manufacturer.empty or competitive_manufacturer.empty:
                print "Own or Competitive Manufacturer not present in the session."
                continue
            own_manufacturer_price = own_manufacturer['price'].iloc[0]
            competitive_manufacturer_price = competitive_manufacturer[
                'price'].iloc[0]
            # if the `calculated price difference` <= `given price difference` then only it is a pass
            if float(own_manufacturer_price - competitive_manufacturer_price) > \
                    float(each_row.get(PRICE_SHEET_PRICE_DIFFERENCE_COL, 0)):
                result = 0
            self.common.write_to_db_result(
                fk=kpi['pk'].iloc[0],
                numerator_id=int(own_manufacturer['product_fk'].iloc[0]),
                numerator_result=result,
                denominator_id=int(
                    competitive_manufacturer['product_fk'].iloc[0]),
                denominator_result=result,
                context_id=self.store_id,
                result=own_manufacturer_price,
                score=competitive_manufacturer_price,
            )

    def write_pos_presence(self, kpi, pos_data_frame):
        for each_ean in pos_data_frame[POS_PRESENCE_EAN_COL]:
            presence = 1
            product_df = self.scif.query(
                "product_ean_code=='{each_ean}' and product_type=='{type}'".
                format(each_ean=each_ean, type=DUMP_DISPLAY_POS_TYPE))
            if product_df.empty:
                product_df = self.all_products.query(
                    "product_ean_code=='{each_ean}' and product_type=='{type}'"
                    .format(each_ean=each_ean, type=DUMP_DISPLAY_POS_TYPE))
                presence = 0
                if product_df.empty:
                    # This should not happen
                    # This means the POS ean code is not in the product master data
                    continue
                    # raise Exception("KPI {kpi_name}: The product with EAN {ean} and type {type}"
                    #                 " in template is not in DB.".format(
                    #                     kpi_name=kpi[DF_KPI_TYPE_COL].iloc[0],
                    #                     ean=each_ean,
                    #                     type=DUMP_DISPLAY_POS_TYPE,
                    #                 ))
            self.common.write_to_db_result(
                fk=kpi['pk'].iloc[0],
                numerator_id=int(product_df['product_fk'].iloc[0]),
                denominator_id=self.store_id,
                context_id=self.store_id,
                result=presence,
                score=presence,
            )

    def write_dump_display_presence(self, kpi, dump_display_data_frame):
        dump_display_data_group = dump_display_data_frame.groupby(
            DUMP_DISPLAY_CATEGORY_COL)
        for category, dump_display_data in dump_display_data_group:
            presence = 0
            # iterate through rows for each category
            category_name = DUMP_CATEGORY_MAP[category]
            category_fk = self.all_products.query(
                "category=='{category}'".format(
                    category=category_name))['category_fk'].iloc[0]
            # get the applicable scene types -- start
            set_scene_types = set()
            scene_type_list = list(
                dump_display_data[DUMP_DISPLAY_SCENE_TYPE_COL].values.ravel(
                    'F'))
            for each_list in scene_type_list:
                set_scene_types.update(
                    tuple(str(each.strip()) for each in each_list.split(',')))
            # get the applicable scene types -- end
            # make template name case-insensitive search -- start
            self.templates["template_name"] = self.templates[
                "template_name"].str.lower()
            _scene_types = map(str.lower, list(set_scene_types))
            # make template name case-insensitive search -- start
            allowed_template_fks = self.templates.query(
                "template_name in {allowed_templates}".format(
                    allowed_templates=_scene_types)
            )['template_fk'].values.tolist()
            template_scif = self.scif.query(
                'template_fk in {}'.format(allowed_template_fks))
            if template_scif.empty:
                print "kpi: {kpi}: Template/Scene Types: {templates} are not present in session {sess}" \
                    .format(kpi=kpi[DF_KPI_TYPE_COL].iloc[0],
                            templates=_scene_types,
                            sess=self.session_uid)
                continue
            template_scif_by_scene_id = template_scif.groupby('scene_id')
            row_truths = []  # to check if all items in the category is true
            for scene_id, scene_data in template_scif_by_scene_id:
                dump_display_product_group = dump_display_data.groupby(
                    DUMP_DISPLAY_PROD_TYPE_COL)
                one_condition_fail = False
                for prod_type, product_items in dump_display_product_group:
                    if one_condition_fail:
                        continue
                    logic = product_items[DUMP_DISPLAY_LOGIC_COL].iloc[
                        0].strip().lower()
                    for idx, each_prod_entry in product_items.iterrows():
                        _pos_codes = str(
                            each_prod_entry[DUMP_DISPLAY_EAN_CODE_COL])
                        all_pos_ean_codes = tuple(
                            map(str, [
                                x.strip() for x in _pos_codes.split(',') if x
                            ]))
                        facings_count = 0
                        prod_scif_with_ean = scene_data.query(
                            'product_ean_code in {all_skus} and category_fk=="{category_fk}"'
                            .format(all_skus=all_pos_ean_codes,
                                    category_fk=category_fk))
                        if not prod_scif_with_ean.empty:
                            facings_count = int(
                                prod_scif_with_ean['facings'].iloc[0])
                        if facings_count < int(
                                each_prod_entry[DUMP_DISPLAY_COUNT_COL]):
                            if logic == 'and':
                                # one prod type didn't satisfy; try next scene.
                                one_condition_fail = True
                                break
                        else:
                            row_truths.append(prod_type)
                if row_truths and all(
                    [ech in row_truths
                     for ech in DUMP_DISPLAY_PROD_TYPE_LIST]):
                    # dump display is found, break out and save presence for this category
                    presence = 1
                    break
                else:
                    row_truths = []

            # save for each category
            self.common.write_to_db_result(
                fk=int(kpi['pk'].iloc[0]),
                # only one category
                numerator_id=category_fk,
                denominator_id=self.store_id,
                context_id=self.store_id,
                result=presence,
                score=presence,
            )
コード例 #9
0
class PURINAToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.common = Common(self.data_provider)
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        self.kpi_static_data = self.common.get_kpi_static_data()
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.session_fk = self.session_info['pk'].values[0]
        self.kpi_results_queries = []
        self.kpi_static_queries = []
        self.purina_scif = self.scif.loc[self.scif['category_fk'] == PET_FOOD_CATEGORY]

    def calculate_purina(self, *args, **kwargs):
        """
        This function calculates the KPI results.
        """
        if not self.is_session_purina():
            return
        # Update all new static KPIs
        self.create_new_static_kpi()

        self.kpi_static_data = self.common.get_kpi_static_data(refresh=True)

        self.update_kpi_score()
        self.run_data_collecting()
        self.common.commit_results_data()

    def update_kpi_score(self):
        # Only to see results in join :(

        for kpi in PURINA_KPI:
            kpi_fk = self.get_kpi_fk_by_kpi_name(kpi, self.LEVEL2, set_name=PURINA_SET)
            self.common.write_to_db_result(kpi_fk, self.LEVEL2, 1)


    def run_data_collecting(self):
        """
        This function run the man calculation of linear sos with sub category out of subsegment
        or price out of subsegment
        :param price_kpi:
        :return:
        """

        data = self.purina_scif.dropna(subset=[LINEAR_SIZE])

        if data.empty:
            Log.info("No relevant purina's products were found in session.")
            return

        # subseg_name_list = data[SCIF_SUBSEGMENT].unique()
        # for subseg in subseg_name_list:
        #     if not subseg:
        #         subseg = NO_SUBSEG
        #         by_subseg = data.loc[pd.isnull(data[SCIF_SUBSEGMENT])]
        #         subseg_ft = self.cm_to_ft(sum(by_subseg[LINEAR_SIZE]))
        #     else:
        #         by_subseg = data.loc[data[SCIF_SUBSEGMENT] == subseg]
        #         subseg_ft = self.cm_to_ft(sum(by_subseg[LINEAR_SIZE]))
        #     atomic_fk = self.get_kpi_fk_by_kpi_name(subseg, self.LEVEL3, father=SUBSEGMENT_KPI, set_name=SUBSEGMENT_SET)
        #     self.common.old_write_to_db_result(fk=atomic_fk, level=self.LEVEL3, score=subseg_ft)
        #     atomic_fk = self.get_kpi_fk_by_kpi_name(subseg, self.LEVEL3, father=SUBSEGMENT_KPI, set_name=PRICE_SET)
        #     self.common.old_write_to_db_result(fk=atomic_fk, level=self.LEVEL3, score=subseg_ft)

        # gets all category linear size

        category_ft = self.cm_to_ft(sum(data[LINEAR_SIZE]))
        fk = self.get_kpi_fk_by_kpi_name(PURINA_SET, self.LEVEL1)
        self.common.write_to_db_result(fk, self.LEVEL1, category_ft)

        man = data['manufacturer_name'].unique()
        for mf in man:
            by_mf = data.loc[data['manufacturer_name'] == mf]
            manufacturer_ft = self.cm_to_ft(sum(by_mf[LINEAR_SIZE]))
            relevant_kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == MANUFACTUR) &
                                          (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0]
            atomic_fk = self.get_kpi_fk_by_kpi_name(mf, self.LEVEL3, father=MANUFACTUR, set_name=PURINA_SET)
            if atomic_fk:
                self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                               score=manufacturer_ft, score_2=manufacturer_ft,
                                               session_uid=self.session_uid, store_fk=self.store_id,
                                               display_text=mf.replace("'","''"),
                                               visit_date=self.visit_date.isoformat(),
                                               calculation_time=datetime.utcnow().isoformat(),
                                               kps_name=PURINA_SET,
                                               kpi_fk=relevant_kpi_fk)
            else:
                print 'atomic cannot be saved for manufacturer {}'.format(mf)

            brands = by_mf['brand_name'].unique()
            for brand in brands:
                by_brand = by_mf.loc[data['brand_name'] == brand]
                brand_ft = self.cm_to_ft(sum(by_brand[LINEAR_SIZE]))
                kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == BRAND) &
                                                  (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0]
                atomic_fk = self.get_kpi_fk_by_kpi_name(brand, self.LEVEL3, father=BRAND, set_name=PURINA_SET)
                if atomic_fk:
                    self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                   score=brand_ft, score_2=brand_ft, style=mf.replace("'","''"),
                                                   session_uid=self.session_uid, store_fk=self.store_id,
                                                   display_text=brand.replace("'","''"),
                                                   visit_date=self.visit_date.isoformat(),
                                                   calculation_time=datetime.utcnow().isoformat(),
                                                   kps_name=PURINA_SET,
                                                   kpi_fk=kpi_fk)
                else:
                    print 'atomic cannot be saved for brand {}'.format(brand)

                categories = by_brand[SCIF_CATEOGRY].unique()
                for cat in categories:
                    if not cat:
                        cat = OTHER
                        by_cat = by_brand.loc[pd.isnull(by_brand[SCIF_PRICE])]
                        cat_ft = self.cm_to_ft(sum(by_cat[LINEAR_SIZE]))
                    else:
                        by_cat = by_brand.loc[data[SCIF_SUB_CATEOGRY] == cat]
                        cat_ft = self.cm_to_ft(sum(by_cat[LINEAR_SIZE]))

                    kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == CATEGORY) &
                                          (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0]
                    atomic_fk = self.get_kpi_fk_by_kpi_name(cat, self.LEVEL3, father=CATEGORY, set_name=PURINA_SET)
                    if atomic_fk:
                        self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                       score=cat_ft,
                                                       score_2=cat_ft, style=mf.replace("'","''"),
                                                       result=brand.replace("'","''"),
                                                       session_uid=self.session_uid, store_fk=self.store_id,
                                                       display_text=cat.replace("'","''"),
                                                       visit_date=self.visit_date.isoformat(),
                                                       calculation_time=datetime.utcnow().isoformat(),
                                                       kps_name=PURINA_SET,
                                                       kpi_fk=kpi_fk)
                    else:
                        print 'atomic cannot be saved for category {}'.format(cat)

                    sub_cats = by_cat[SCIF_SUB_CATEOGRY].unique()
                    for sub_cat in sub_cats:
                        if not sub_cat:
                            sub_cat = OTHER
                            by_sub_cat = by_cat.loc[pd.isnull(by_cat[SCIF_PRICE])]
                            sub_cat_ft = self.cm_to_ft(sum(by_sub_cat[LINEAR_SIZE]))
                        else:
                            by_sub_cat = by_cat.loc[data[SCIF_SUB_CATEOGRY] == sub_cat]
                            sub_cat_ft = self.cm_to_ft(sum(by_sub_cat[LINEAR_SIZE]))
                        # write to db under sub category atomic kpi score with brand name in results

                        kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == SUB_CATEGORY) &
                                                          (self.kpi_static_data['kpi_set_name'] == PURINA_SET)][
                                                                                                    'kpi_fk'].values[0]
                        atomic_fk = self.get_kpi_fk_by_kpi_name(sub_cat, self.LEVEL3, father=SUB_CATEGORY,
                                                                    set_name=PURINA_SET)
                        if atomic_fk:
                            self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                           score=sub_cat_ft,
                                                           score_2=sub_cat_ft, style=mf.replace("'","''"),
                                                           result=brand.replace("'","''"),
                                                           result_2=cat.replace("'","''"),
                                                           session_uid=self.session_uid, store_fk=self.store_id,
                                                           display_text=sub_cat.replace("'","''"),
                                                           visit_date=self.visit_date.isoformat(),
                                                           calculation_time=datetime.utcnow().isoformat(),
                                                           kps_name=PURINA_SET,
                                                           kpi_fk=kpi_fk)
                        else:
                            print 'atomic cannot be saved for sub category {}'.format(sub_cat)

                        prices = by_sub_cat[SCIF_PRICE].unique()
                        for price_class in prices:
                            if not price_class:
                                price_class = OTHER
                                by_prices = by_sub_cat.loc[pd.isnull(by_sub_cat[SCIF_PRICE])]
                                price_ft = self.cm_to_ft(sum(by_prices[LINEAR_SIZE]))
                            else:
                                by_prices = by_sub_cat.loc[by_sub_cat[SCIF_PRICE] == price_class]
                                price_ft = self.cm_to_ft(sum(by_prices[LINEAR_SIZE]))
                            kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == PRICE_KPI) &
                                                              (self.kpi_static_data['kpi_set_name'] == PURINA_SET)][
                                                                                                    'kpi_fk'].values[0]
                            atomic_fk = self.get_kpi_fk_by_kpi_name(price_class, self.LEVEL3, father=PRICE_KPI,
                                                                    set_name=PURINA_SET)
                            if atomic_fk:
                                self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                               score=price_ft,
                                                               score_2=price_ft, style=mf.replace("'","''"),
                                                               result=brand.replace("'","''"),
                                                               result_2=cat.replace("'","''"),
                                                               result_3=sub_cat.replace("'","''"),
                                                               session_uid=self.session_uid, store_fk=self.store_id,
                                                               display_text=price_class.replace("'", "''"),
                                                               visit_date=self.visit_date.isoformat(),
                                                               calculation_time=datetime.utcnow().isoformat(),
                                                               kps_name=PURINA_SET,
                                                               kpi_fk=kpi_fk )
                            else:
                                print 'atomic cannot be saved for price class {}'.format(price_class)


    @staticmethod
    def cm_to_ft(cm):
        return cm / 30.48

    def get_labels(self):
        query = """select pk, labels, ean_code
        from static_new.product
        """
        labels = pd.read_sql_query(query, self.rds_conn.db)
        return labels

    def get_kpi_fk_by_kpi_name(self, kpi_name, kpi_level, father=None, logic_father=None, set_name=None):
        if kpi_level == self.LEVEL1:
            column_key = 'kpi_set_fk'
            column_value = 'kpi_set_name'
            father_value = 'kpi_set_name'


        elif kpi_level == self.LEVEL2:
            column_key = 'kpi_fk'
            column_value = 'kpi_name'
            father_value = 'kpi_set_name'


        elif kpi_level == self.LEVEL3:
            column_key = 'atomic_kpi_fk'
            column_value = 'atomic_kpi_name'
            father_value = 'kpi_name'

        else:
            raise ValueError('invalid level')

        try:
            relevant = self.kpi_static_data[self.kpi_static_data[column_value] == kpi_name]
            if father:
                relevant = relevant[relevant[father_value] == father]
            if set_name:
                relevant = relevant[relevant['kpi_set_name'] == set_name]

            return relevant[column_key].values[0]

        except IndexError:
            Log.info('Kpi name: {}, isn\'t equal to any kpi name in static table'.format(kpi_name))
            return None

    def create_new_static_kpi(self):
        #  This functions takes all brands, sub categories, categories and manufacturers in session.
        #  The function adds them to database in case they are new.
        brands = self.get_all_brands()
        sub_cats = self.get_all_sub_categories()
        manufacturer = self.get_all_manufacturers()
        cats = self.get_all_categories()
        prices = self.get_all_price_classes()

        new_brands = self.purina_scif.loc[~self.purina_scif['brand_name'].isin(brands)]['brand_name'].unique()
        new_manufacturer = self.purina_scif.loc[~self.purina_scif['manufacturer_name'].isin(manufacturer)][
            'manufacturer_name'].unique()
        new_sub_cat = self.purina_scif.loc[(~self.purina_scif[SCIF_SUB_CATEOGRY].isin(sub_cats)) &
                                        (~pd.isnull(self.purina_scif[SCIF_SUB_CATEOGRY]))][SCIF_SUB_CATEOGRY].unique()
        new_cat = self.purina_scif.loc[(~self.purina_scif[SCIF_CATEOGRY].isin(cats)) &
                                       (~pd.isnull(self.purina_scif[SCIF_CATEOGRY]))][SCIF_CATEOGRY].unique()
        new_prices = self.purina_scif.loc[(~self.purina_scif[SCIF_PRICE].isin(prices)) &
                                          (~pd.isnull(self.purina_scif[SCIF_PRICE]))][SCIF_PRICE].unique()

        self.save_static_atomics(BRAND, new_brands, PURINA_SET)
        self.save_static_atomics(MANUFACTUR, new_manufacturer, PURINA_SET)
        self.save_static_atomics(CATEGORY, new_cat, PURINA_SET)
        self.save_static_atomics(SUB_CATEGORY, new_sub_cat, PURINA_SET)
        self.save_static_atomics(PRICE_KPI, new_prices, PURINA_SET)

        self.commit_static_data()

    def get_all_brands(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == BRAND]['atomic_kpi_name']

    def get_all_sub_categories(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == SUB_CATEGORY]['atomic_kpi_name']

    def get_all_manufacturers(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == MANUFACTUR]['atomic_kpi_name']

    def get_all_categories(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == CATEGORY]['atomic_kpi_name']

    def get_all_price_classes(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == PRICE_KPI]['atomic_kpi_name']

    def save_static_atomics(self, kpi_name, atomics, set_name):
        kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == kpi_name) &
                                          (self.kpi_static_data['kpi_set_name'] == set_name)]['kpi_fk'].values[0]
        for current in atomics:
            current = current.replace("'", "''")
            query = """
               INSERT INTO {0} (`kpi_fk`, `name`, `description`, `display_text`,`presentation_order`, `display`)
               VALUES ('{1}', '{2}', '{3}', '{4}', '{5}', '{6}');""".format(STATIC_ATOMIC,
                                                                            kpi_fk, current, current, current, 1, 'Y')

            self.kpi_static_queries.append(query)

    def commit_static_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        # ProjectConnector(self.project_name, DbUsers.CalculationEng)
        cur = self.rds_conn.db.cursor()
        for query in self.kpi_static_queries:
            try:
                cur.execute(query)
            except Exception as e:
                Log.info('query {} could not be executed.'.format(query))
        self.rds_conn.db.commit()

        self.rds_conn.disconnect_rds()

    def is_session_purina(self):
        # This function checks is the session is of Purina project by its category and that it is a successful visit.
        session_data = self.get_session_category_data()
        session_data = session_data.loc[(session_data['category_fk'] == 13) &
                                        (session_data['resolution_fk'] == 1) &
                                        (session_data['exclude_status_fk'] == 1)]
        if not session_data.empty:
            return True
        return False

    def get_session_category_data(self):
        local_con = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        query = """select category_fk, resolution_fk, exclude_status_fk from probedata.session_category
                where session_fk = {}""".format(self.session_fk)
        data = pd.read_sql_query(query, local_con.db)
        return data
コード例 #10
0
class AddKPIs(object):
    def __init__(self,
                 project_name,
                 template_path=None,
                 remove_duplicates=False,
                 add_kpi_pks=False):
        self.project_name = project_name
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.kpi_static_data = self.get_kpi_static_data()
        self.template_path = self.get_template_path(template_path)
        self.template_data = pd.read_excel(self.template_path)
        self.remove_duplicates = remove_duplicates
        self.kpi_counter = 0
        self.insert_queries = []
        self.output_path = self.get_output_file_path()
        self.error_cells = set()
        self.add_kpi_pks = add_kpi_pks

    @staticmethod
    def get_template_path(template_path):
        return template_path if template_path is not None else os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'new_tables_template.xlsx')

    def get_output_file_path(self):
        path_to_list = self.template_path.split('/')
        file_name = path_to_list[len(path_to_list) - 1]
        output_path = os.path.join('/tmp', file_name)
        return output_path

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = Queries.get_new_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def add_kpis_from_template(self):
        self.validate_template()
        if len(self.error_cells) == 0:
            self.insert_into_kpi_lvl_2()
        else:
            self.highlight_errors_in_template()
            print 'errors found in template. see highlighted in path: {}'.format(
                self.output_path)

    def validate_template(self):
        self.check_similar_types()
        self.check_binary_fields()
        if not self.remove_duplicates:
            self.check_duplicate_in_template()

    def check_similar_types(self):
        kpi_types = set(self.template_data[Consts.KPI_TYPE].unique().tolist())
        existing_types = set(
            self.kpi_static_data[Consts.KPI_TYPE].unique().tolist())
        similar_types = kpi_types.intersection(existing_types)
        if similar_types:
            err_df = self.template_data[self.template_data[
                Consts.KPI_TYPE].isin(similar_types)]
            cells_list = [(i + 1, Consts.KPI_TYPE, Consts.SALMON)
                          for i in err_df.index.values]
            self.error_cells.update(cells_list)

    def check_binary_fields(self):
        binary_fields_df = self.template_data[Consts.BINARY_FIELDS]
        allowed_values = [1, 0, '1', '0', '1.0', '0.0', np.nan]
        for col in binary_fields_df.columns.tolist():
            err_df = binary_fields_df[~binary_fields_df[col].
                                      isin(allowed_values)]
            if len(err_df) > 0:
                cells_list = [(i + 1, col, Consts.LIME)
                              for i in err_df.index.values]
                self.error_cells.update(cells_list)

    def check_duplicate_in_template(self):
        template_data = self.template_data
        template_data['count'] = 1
        count_rows = template_data.groupby(
            Consts.KPI_TYPE, as_index=False).agg({'count': np.sum})
        count_rows = count_rows[count_rows['count'] != 1]
        if len(count_rows) > 0:
            duplicate_kpis = count_rows[Consts.KPI_TYPE].values.tolist()
            print 'duplicate kpis: ', str(duplicate_kpis)
            for kpi in duplicate_kpis:
                err_df = template_data[template_data[Consts.KPI_TYPE] == kpi]
                cells_list = [(i + 1, Consts.KPI_TYPE, Consts.BLUE)
                              for i in err_df.index.values]
                self.error_cells.update(cells_list)

    def highlight_errors_in_template(self):
        writer = pd.ExcelWriter(self.output_path, engine='xlsxwriter')
        self.template_data.to_excel(writer, sheet_name='Sheet1', index=False)

        workbook = writer.book
        worksheet = writer.sheets['Sheet1']
        # error_format = workbook.add_format({'fg_color': '#EEC93F'})
        for i, col, color in list(self.error_cells):
            value = self.template_data.loc[i - 1, col]
            col_num = self.template_data.columns.get_loc(col)
            error_format = workbook.add_format({'fg_color': color})
            worksheet.write(i, col_num, value, error_format)
        writer.save()

    def insert_into_kpi_lvl_2(self):
        if self.remove_duplicates:
            self.template_data = self.template_data.drop_duplicates(
                subset=['type'], keep='first')
        for i, row in self.template_data.iterrows():
            attributes = self.create_attributes_dict(row)
            query = insert(attributes, Consts.STATIC_KPI_LVL_2)
            self.insert_queries.append(query)
        merged_queries = self.merge_insert_queries()
        # print merged_queries
        self.commit_to_db(merged_queries)

    def create_attributes_dict(self, kpi_row):
        attributes_dict = {
            'type': {
                0: kpi_row['type'].replace("'", "\\'").encode('utf-8')
            },
            'client_name': {
                0: kpi_row['client_name'].replace("'", "\\'").encode('utf-8')
            },
            'numerator_type_fk': {
                0: kpi_row['numerator_type_fk']
            },
            'denominator_type_fk': {
                0: kpi_row['denominator_type_fk']
            },
            'kpi_score_type_fk': {
                0: kpi_row['kpi_score_type_fk']
            },
            'kpi_result_type_fk': {
                0: kpi_row['kpi_result_type_fk']
            },
            'session_relevance': {
                0:
                kpi_row['session_relevance']
                if not np.isnan(kpi_row['session_relevance']) else 0
            },
            'scene_relevance': {
                0:
                kpi_row['scene_relevance']
                if not np.isnan(kpi_row['scene_relevance']) else 0
            },
            'planogram_relevance': {
                0:
                kpi_row['planogram_relevance']
                if not np.isnan(kpi_row['planogram_relevance']) else 0
            },
            'live_session_relevance': {
                0:
                kpi_row['live_session_relevance']
                if not np.isnan(kpi_row['live_session_relevance']) else 0
            },
            'live_scene_relevance': {
                0:
                kpi_row['live_scene_relevance']
                if not np.isnan(kpi_row['live_scene_relevance']) else 0
            },
            'is_percent': {
                0:
                kpi_row['is_percent']
                if not np.isnan(kpi_row['is_percent']) else 0
            },
            'kpi_target_type_fk': {
                0: kpi_row['kpi_target_type_fk']
            },
            'kpi_calculation_stage_fk': {
                0: 3
            },
            'valid_from': {
                0: '1990-01-01'
            },
            'valid_until': {
                0: '2050-01-01'
            },
            'initiated_by': {
                0: 'Custom'
            },
            'context_type_fk': {
                0: kpi_row['context_type_fk']
            }
        }
        if self.add_kpi_pks:
            attributes_dict.update({'pk': {0: kpi_row['pk']}})
        return attributes_dict

    def merge_insert_queries(self):
        query_groups = {}
        for query in self.insert_queries:
            if not query:
                continue
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            for group_index in xrange(0, len(query_groups[group]), 10**4):
                merged_queries.append('{0} VALUES {1}'.format(
                    group, ',\n'.join(
                        query_groups[group][group_index:group_index + 10**4])))
        return merged_queries

    def commit_to_db(self, queries):
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        for query in queries:
            try:
                cur.execute(query)
                self.rds_conn.db.commit()
                print 'kpis were added to the db'
            except Exception as e:
                print 'kpis were not inserted: {}'.format(repr(e))
コード例 #11
0
class AddKPIsToAPI(object):
    def __init__(self,
                 project_name,
                 file_path=None,
                 kpi_list=None,
                 all_existing_kpis=False,
                 kpis_to_exclude=None):
        self.project_name = project_name
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.kpi_static_data = self.get_kpi_static_data()
        self.existing_configurations = self.get_kpi_view_config_api()
        self.template_path = file_path
        self.template_data = pd.read_excel(
            self.template_path) if self.template_path is not None else None
        self.all_existing_kpis = all_existing_kpis
        self.insert_queries = []
        self.kpi_list = list(
            set(kpi_list)) if kpi_list is not None else kpi_list
        self.kpis_to_exclude = list(
            set(kpis_to_exclude)) if kpis_to_exclude is not None else []

    def get_output_file_path(self):
        path_to_list = self.template_path.split('/')
        file_name = path_to_list[len(path_to_list) - 1]
        output_path = os.path.join('/tmp', file_name)
        return output_path

    def get_kpi_view_config_api(self):
        query = """ select * from static.kpi_view_configuration where application='API' """
        kpi_config_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_config_data['kpi_level_2_fk'].values.tolist()

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = Queries.get_new_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def configure_kpis_for_api(self):
        if self.all_existing_kpis:
            if self.kpi_list or self.template_path:
                print 'all_existing_kpis is set to True => kpi list or kpi file data will be ignored'
            kpi_pks = self.kpi_static_data['pk'].values.tolist()
        else:
            kpi_pks = self.template_data['pk'].unique().tolist(
            ) if self.template_data is not None else self.kpi_list
            kpi_pks = kpi_pks if kpi_pks is not None else []
        kpi_pks = list(set(kpi_pks) - set(self.existing_configurations))
        kpi_pks = list(set(kpi_pks) - set(self.kpis_to_exclude))
        self.generate_insert_queries(kpi_pks)
        if self.insert_queries:
            merged_queries = self.merge_insert_queries()
            self.commit_to_db(merged_queries)
        if not self.insert_queries:
            print 'No kpis were added'

    def generate_insert_queries(self, kpi_pks):
        for pk in kpi_pks:
            attributes = self.create_attributes_dict(pk)
            query = insert(attributes, Consts.STATIC_KPI_VIEW_CONFIG)
            self.insert_queries.append(query)

    @staticmethod
    def create_attributes_dict(pk):
        attributes_dict = {
            'application': {
                0: 'API'
            },
            'kpi_level_2_fk': {
                0: pk
            },
            'kpi_level_1_fk': {
                0: 0
            },
            'page': {
                0: ""
            }
        }
        return attributes_dict

    def merge_insert_queries(self):
        query_groups = {}
        for query in self.insert_queries:
            if not query:
                continue
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            for group_index in xrange(0, len(query_groups[group]), 10**4):
                merged_queries.append('{0} VALUES {1}'.format(
                    group, ',\n'.join(
                        query_groups[group][group_index:group_index + 10**4])))
        return merged_queries

    def commit_to_db(self, queries):
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        for query in queries:
            try:
                cur.execute(query)
                self.rds_conn.db.commit()
                print 'kpis were added to the db'
            except Exception as e:
                print 'kpis were not inserted: {}'.format(repr(e))
コード例 #12
0
class SOLARBRToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3
    EXCLUDE_EMPTY = False
    EXCLUDE_FILTER = 0
    EMPTY = 'Empty'

    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.common = Common(self.data_provider)
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.k_engine = BaseCalculationsGroup(data_provider, output)
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        self.kpi_static_data = self.common.get_kpi_static_data()
        self.kpi_results_queries = []
        self.templates = {}
        self.session_id = self.data_provider.session_id
        self.score_templates = {}
        self.get_templates()
        self.get_score_template()
        self.manufacturer_fk = self.all_products[
            self.all_products['manufacturer_name'] == 'Coca Cola'].iloc[0]
        self.sos = SOS(self.data_provider, self.output)
        self.total_score = 0
        self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0]
        self.toolbox = GENERALToolBox(self.data_provider)
        self.scenes_info = self.data_provider[Data.SCENES_INFO]
        self.kpi_results_new_tables_queries = []
        # self.store_type = self.data_provider.store_type



    def get_templates(self):

        for sheet in Const.SHEETS_MAIN:
            self.templates[sheet] = pd.read_excel(MAIN_TEMPLATE_PATH, sheetname=sheet.decode("utf-8"), keep_default_na=False)


    def get_score_template(self):
        for sheet in Const.SHEETS_SCORE:
            self.score_templates[sheet] = pd.read_excel(SCORE_TEMPLATE_PATH, sheetname=sheet.decode("utf-8"), keep_default_na=False, encoding = "utf-8")


    def main_calculation(self, *args, **kwargs):
        main_template = self.templates[Const.KPIS]
        for i, main_line in main_template.iterrows():
            self.calculate_main_kpi(main_line)
        self.commit_results()





    def calculate_main_kpi(self, main_line):
        kpi_name = main_line[Const.KPI_NAME]
        kpi_type = main_line[Const.Type]
        scene_types = self.does_exist(main_line, Const.SCENE_TYPES)

        result = score = 0
        general_filters = {}


        scif_scene_types = self.scif['template_name'].unique().tolist()
        store_type = str(self.store_info["store_type"].iloc[0])
        store_types = self.does_exist_store(main_line, Const.STORE_TYPES)
        if store_type in store_types:

            if scene_types:
                if (('All' in scene_types) or bool(set(scif_scene_types) & set(scene_types))) :
                    if not ('All' in scene_types):
                        general_filters['template_name'] = scene_types
                    if kpi_type == Const.SOVI:
                        relevant_template = self.templates[kpi_type]
                        relevant_template = relevant_template[relevant_template[Const.KPI_NAME] == kpi_name]

                        if relevant_template["numerator param 1"].all() and relevant_template["denominator param"].all():
                            function = self.get_kpi_function(kpi_type)
                            for i, kpi_line in relevant_template.iterrows():
                                result, score = function(kpi_line, general_filters)
                    else:
                        pass

            else:
                pass


    @staticmethod
    def does_exist(kpi_line, column_name):
        """
        checks if kpi_line has values in this column, and if it does - returns a list of these values
        :param kpi_line: line from template
        :param column_name: str
        :return: list of values if there are, otherwise None
        """
        if column_name in kpi_line.keys() and kpi_line[column_name] != "":
            cell = kpi_line[column_name]
            if type(cell) in [int, float]:
                return [cell]
            elif type(cell) in [unicode, str]:
                return cell.split(", ")
        return None

    @staticmethod
    def does_exist_store(kpi_line, column_name):
        """
        checks if kpi_line has values in this column, and if it does - returns a list of these values
        :param kpi_line: line from template
        :param column_name: str
        :return: list of values if there are, otherwise None
        """
        if column_name in kpi_line.keys() and kpi_line[column_name] != "":
            cell = kpi_line[column_name]
            if type(cell) in [int, float]:
                return [cell]
            elif type(cell) in [unicode, str]:
                return cell.split(",")
        return None





    def calculate_sos(self, kpi_line,  general_filters):
        kpi_name = kpi_line[Const.KPI_NAME]
        den_type = kpi_line[Const.DEN_TYPES_1]
        den_value = kpi_line[Const.DEN_VALUES_1].split(',')

        num_type = kpi_line[Const.NUM_TYPES_1]
        num_value = kpi_line[Const.NUM_VALUES_1].split(',')

        general_filters[den_type] = den_value

        sos_filters = {num_type : num_value}

        if kpi_line[Const.NUM_TYPES_2]:
            num_type_2 = kpi_line[Const.NUM_TYPES_2]
            num_value_2 = kpi_line[Const.NUM_VALUES_2].split(',')
            sos_filters[num_type_2] = num_value_2

        sos_value = self.sos.calculate_share_of_shelf(sos_filters, **general_filters)
        # sos_value *= 100
        sos_value = round(sos_value, 2)

        score = self.get_score_from_range(kpi_name, sos_value)

        manufacturer_products = self.all_products[
            self.all_products['manufacturer_name'] == num_value[0]].iloc[0]

        manufacturer_fk = manufacturer_products["manufacturer_fk"]

        all_products = self.all_products[
            self.all_products['category'] == den_value[0]].iloc[0]

        category_fk = all_products["category_fk"]



        numerator_res, denominator_res = self.get_numerator_and_denominator(sos_filters, **general_filters)

        self.common.write_to_db_result_new_tables(fk = 1,
                                                  numerator_id=manufacturer_fk,
                                                  numerator_result= numerator_res,
                                                  denominator_id=category_fk,
                                                  denominator_result= denominator_res,
                                                  result=sos_value,
                                                  score= score,
                                                  score_after_actions= score)
        return sos_value, score

    def get_score_from_range(self, kpi_name, sos_value):
        store_type = str(self.store_info["store_type"].iloc[0])
        self.score_templates[store_type] = self.score_templates[store_type].replace(kpi_name, kpi_name.encode("utf-8"))
        score_range = self.score_templates[store_type].query('Kpi == "' + str(kpi_name.encode("utf-8")) +
                                                          '" & Low <= ' + str(sos_value) +
                                                          ' & High >= ' + str(sos_value)+'')
        score = score_range['Score'].iloc[0]
        return score


    def get_kpi_function(self, kpi_type):
        """
        transfers every kpi to its own function    .encode('utf-8')
        :param kpi_type: value from "sheet" column in the main sheet
        :return: function
        """
        if kpi_type == Const.SOVI:
            return self.calculate_sos
        else:
            Log.warning("The value '{}' in column sheet in the template is not recognized".format(kpi_type))
            return None

    @staticmethod
    def round_result(result):
        return round(result, 3)

    def get_numerator_and_denominator(self, sos_filters=None, include_empty=False, **general_filters):

        if include_empty == self.EXCLUDE_EMPTY and 'product_type' not in sos_filters.keys() + general_filters.keys():
                general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER)
        pop_filter = self.toolbox.get_filter_condition(self.scif, **general_filters)
        subset_filter = self.toolbox.get_filter_condition(self.scif, **sos_filters)
        try:
            pop = self.scif

            filtered_population = pop[pop_filter]
            if filtered_population.empty:
                return 0,0
            else:
                subset_population = filtered_population[subset_filter]
                # ratio = TBox.calculate_ratio_sum_field_in_rows(filtered_population, subset_population, Fd.FACINGS)

                df = filtered_population
                subset_df = subset_population
                sum_field  = Fd.FACINGS
                try:
                    Validation.is_empty_df(df)
                    Validation.is_empty_df(subset_df)
                    Validation.is_subset(df, subset_df)
                    Validation.df_columns_equality(df, subset_df)
                    Validation.validate_columns_exists(df, [sum_field])
                    Validation.validate_columns_exists(subset_df, [sum_field])
                    Validation.is_none(sum_field)
                except Exception, e:
                    msg = "Data verification failed: {}.".format(e)
                    # raise Exception(msg)

                default_value = 0

                numerator = TBox.calculate_frame_column_sum(subset_df, sum_field, default_value)
                denominator = TBox.calculate_frame_column_sum(df, sum_field, default_value)

                return numerator, denominator

        except Exception as e:

             Log.error(e.message)


        return True

    def commit_results(self):
        insert_queries = self.merge_insert_queries(self.kpi_results_new_tables_queries)
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        delete_query = SOLARBRQueries.get_delete_session_results_query(self.session_uid, self.session_id)
        cur.execute(delete_query)
        for query in insert_queries:
            cur.execute(query)
        self.rds_conn.db.commit()
        self.rds_conn.disconnect_rds()

    @staticmethod
    def merge_insert_queries(insert_queries):
        query_groups = {}
        for query in insert_queries:
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            merged_queries.append('{0} VALUES {1}'.format(group, ',\n'.join(query_groups[group])))
        return merged_queries
コード例 #13
0
class DBHandler:
    """
    Tnuva has NCC report that comparing the results of the OOS SKU level for the current session and the previous
    ones. We didn't want to calculate it during the report and this doesn't exist yet in the API so this util class
    is handling on fetching the results.
    """
    def __init__(self, project_name, session_uid):
        self.project_name = project_name
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.session_uid = session_uid

    def _get_previous_session_fk(self):
        """
        This method fetches the last completed session_fk for the current store.
        """
        last_session_fk_query = self._get_last_visit_fk_query()
        last_session_fk = self._execute_db_query(last_session_fk_query)
        if len(last_session_fk) != 2:
            Log.warning(
                Consts.LOG_EMPTY_PREVIOUS_SESSIONS.format(self.session_uid))
            last_session_fk = None
        else:
            last_session_fk = last_session_fk.loc[1, BasicConsts.PK]
        return last_session_fk

    def _get_oos_results(self, session_fk):
        """
        This method gets a session_fk and fetches the relevant OOS results.
        """
        query = self._previous_oos_results_query(session_fk)
        result = self._execute_db_query(query)
        return result

    def get_last_session_oos_results(self):
        """
        This is the main method of this util and the only public one.
        It fetches the relevant OOS results for the last relevant visit if exists.
        """
        last_session_fk = self._get_previous_session_fk()
        if last_session_fk is None:
            return None
        oos_results = self._get_oos_results(last_session_fk)
        return oos_results

    def get_kpi_result_value(self):
        """ This method extracts the kpi_result_types from the DB. """
        result_type_query = self._get_kpi_result_value_query()
        result_types = self._execute_db_query(result_type_query)
        return result_types

    def _execute_db_query(self, query):
        """ This method is responsible on the DB execution.
        It gets a query (string) and executes it. """
        try:
            result = pd.read_sql_query(query, self.rds_conn.db)
        except DatabaseError:
            self.rds_conn.connect_rds()
            result = pd.read_sql_query(query, self.rds_conn.db)
        return result

    def get_oos_reasons_for_session(self, session_uid):
        oos_reasons_query = self._get_oos_reasons_query(session_uid)
        oos_reasons = self._execute_db_query(oos_reasons_query)
        return oos_reasons

    # The following are the queries that we are using in order to get the previous
    # sessions relevant results.

    @staticmethod
    def _previous_oos_results_query(session_fk):
        """
        This m
        :param session_fk:
        :return:
        """
        prev_results_query = """SELECT 
                                kpi_level_2_fk, numerator_id, result
                            FROM
                                report.kpi_level_2_results
                            WHERE
                                session_fk = {}
                                    AND kpi_level_2_fk IN (SELECT 
                                        pk
                                    FROM
                                        static.kpi_level_2
                                    WHERE type IN {})
                                       """.format(session_fk,
                                                  Consts.PREV_RES_KPIS_FOR_NCC)
        return prev_results_query

    @staticmethod
    def _get_kpi_result_value_query():
        kpi_result_type = """SELECT pk, value FROM static.kpi_result_value;"""
        return kpi_result_type

    def _get_last_visit_fk_query(self):
        """
        Before fetching the results we need to get the previous session from the same store.
        This is a query that used by the Mobile team.
        This query returns the current session's fk and the previous one as well.
        """
        last_two_sessions_query = """SELECT
                   s1.pk
                FROM
                   probedata.session s1
                JOIN probedata.session s2 ON
                   s2.store_fk = s1.store_fk
                   AND s2.visit_date >= s1.visit_date
                   AND s2.start_time >= s1.start_time
                   AND (
                       SELECT count(1)
                   from
                       probedata.scene as sc
                   where
                       sc.session_uid = s2.session_uid
                       and status <> 6
                       AND sc.delete_time is null) = 0
                WHERE
                   s2.session_uid = '{}'
                   AND s2.delete_time is NULL
                   AND s1.delete_time is NULL
                   AND (
                       SELECT count(1)
                   from
                       probedata.scene as sc
                   where
                       sc.session_uid = s1.session_uid
                       and status <> 6
                       AND sc.delete_time is null) = 0
                ORDER BY
                   s1.visit_date DESC ,
                   s1.start_time DESC
                limit 2;""".format(self.session_uid)
        return last_two_sessions_query

    @staticmethod
    def _get_oos_reasons_query(session_uid):
        query = """
                    SELECT * FROM probedata.oos_exclude oe
                    JOIN static.oos_message om on om.pk=oe.oos_message_fk
                    JOIN static.oos_message_type omt on omt.pk=om.type
                    where oe.session_uid = '{}' and oe.delete_time is null;
                """.format(session_uid)
        return query