예제 #1
0
class Generator:
    def __init__(self, data_provider, output):
        self.data_provider = data_provider
        self.output = output
        self.project_name = data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.tool_box = HEINEKENTWToolBox(self.data_provider, self.output)
        self.common = Common(data_provider)

    @log_runtime('Total Calculations', log_start=True)
    def main_function(self):
        """
        This is the main KPI calculation function.
        It calculates the score for every KPI set and saves it to the DB.
        """
        if self.tool_box.scif.empty:
            Log.warning('Scene item facts is empty for this session')
        self.tool_box.main_calculation()
        self.common.commit_results_data()
예제 #2
0
class Generator:
    def __init__(self, data_provider, output):
        self.data_provider = data_provider
        self.output = output
        self.project_name = data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.tool_box = CCAAUToolBox(self.data_provider, self.output)
        self.common = Common(data_provider)

    @log_runtime('Total Calculations', log_start=True)
    def main_function(self):
        """
        This is the main KPI calculation function.
        It calculates the score for every KPI set and saves it to the DB.
        """
        if self.tool_box.scif.empty:
            Log.warning('Scene item facts is empty for this session')
        # for kpi_set_fk in self.tool_box.kpi_static_data['kpi_set_fk'].unique().tolist():
        #     score = self.tool_box.main_calculation(kpi_set_fk=kpi_set_fk)
        #     self.common.write_to_db_result(kpi_set_fk, self.tool_box.LEVEL1, score)
        self.tool_box.main_calculation()
        self.common.commit_results_data()
예제 #3
0
class INBEVTRADMXToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.common = Common(self.data_provider)
        self.common2 = Common2(self.data_provider)
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.templates = self.data_provider.all_templates
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.kpi_static_data = self.common.get_kpi_static_data()
        self.kpi_results_queries = []
        self.templates_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), '..', 'Data')
        self.excel_file_path = os.path.join(self.templates_path,
                                            'inbevtradmx_template_11_v3.xlsx')
        self.availability = Availability(self.data_provider)
        self.survey_response = self.data_provider[Data.SURVEY_RESPONSES]
        self.geo = GeoLocation.INBEVTRADMXGeo(self.rds_conn, self.session_uid,
                                              self.data_provider,
                                              self.kpi_static_data,
                                              self.common, self.common2)
        self.new_static_data = self.common2.kpi_static_data
        self.manufacturer_fk = 1
        self.match_displays_in_scene = self.data_provider.match_display_in_scene
        self.mpis = self.mpis_merger()
        self.all_data = pd.merge(self.scif,
                                 self.match_product_in_scene[[
                                     'product_fk', 'shelf_number', 'scene_fk',
                                     'facing_sequence_number'
                                 ]],
                                 how="inner",
                                 left_on=['item_id', 'scene_id'],
                                 right_on=['product_fk',
                                           'scene_fk']).drop_duplicates()
        self.ignore_stacking = False
        self.facings_field = 'facings' if not self.ignore_stacking else 'facings_ign_stack'
        self.INCLUDE_FILTER = 1
        self.EXCLUDE_FILTER = 0
        self.CONTAIN_FILTER = 2
        self.EXCLUDE_EMPTY = False
        self.INCLUDE_EMPTY = True

    # init functions:

    def parse_template(self):
        """
        convert excel file to data frame
        :return: data frame
        """
        template_df = pd.read_excel(self.excel_file_path,
                                    sheetname='template',
                                    encoding='utf-8')
        template_df['Store Additional Attribute 4'] = template_df[
            'Store Additional Attribute 4'].fillna('')
        template_df['Store Additional Attribute 13'] = template_df[
            'Store Additional Attribute 13'].fillna('')
        return template_df

    def check_store_type(self, row, relevant_columns):
        """
        this method checks if the session store type is valid
        :type relevant_columns: list
        :param row: current KPI row
        :return: boolean
        """
        # make sure that we need to check this field
        if relevant_columns.__contains__('store_type'):
            # get the valid stores from the template
            stores_df = pd.read_excel(self.excel_file_path,
                                      sheetname='store types')
            # create a list of the valid stores
            stores = stores_df.values
            return row['store_type'] in stores
        else:
            return True

    # calculation:

    def main_calculation(self):
        # calculate geo
        geo_result = self.geo.calculate_geo_location()
        self.geo.write_geo_to_db(float(geo_result))

        # calculate from template
        self.calculate_kpi_set_from_template()
        self.calculate_posm_displays()
        self.common.commit_results_data()
        self.common2.commit_results_data()

    def calculate_kpi_set_from_template(self):
        """
        this method chooses the correct set to calculate
        there will always be only on set to calculate, depending on the field 'Store additional attribute 4' from
        template
        :return: None
        """
        # get the template
        parsed_template = self.parse_template()

        # get all the unique sets
        # sets = parsed_template['KPI Level 1 Name'].unique()

        # get the session additional_attribute_4 & 13
        additional_attribute_4 = self.store_info.additional_attribute_4.values[
            0]
        additional_attribute_13 = self.store_info.additional_attribute_13.values[
            0]
        set_name = self.choose_correct_sets_to_calculate(
            additional_attribute_4, additional_attribute_13, parsed_template)

        # for set_name in set_names:
        # wrong value in additional attribute 4 - shouldn't calculate
        if set_name == '':
            Log.warning(
                'Wrong value in additional attribute 4 - shouldnt calculate')
            return -1
        # get only the part of the template that is related to this set
        set_template_df = parsed_template[parsed_template['KPI Level 1 Name']
                                          == set_name]
        # start calculating !
        self.calculate_set_score(set_template_df, set_name)

    @staticmethod
    def choose_correct_sets_to_calculate(additional_attribute_4,
                                         additional_attribute_13, template):
        """
        choose what is the appropriate set to calculate
        :param additional_attribute_4: session additional_attribute_4. if None, will ignore the kpi.
        :param additional_attribute_13: session additional_attribute_13. if None, will ignore this attribute
        :param template: parsed template
        :return: set name to calculate - assuming each additional attribute 4 matches only 1 set name.
        """
        template = template.dropna(subset=['Store Additional Attribute 4'],
                                   axis=0)
        # Makes sure attribute 4 exist
        if not additional_attribute_4:
            return ''

        if additional_attribute_13:
            sets = template[
                (template['Store Additional Attribute 4'].str.contains(
                    additional_attribute_4))
                & ((template['Store Additional Attribute 13'].str.contains(
                    additional_attribute_13))
                   | (template['Store Additional Attribute 13'] == ''))]
        else:
            sets = template[
                (template['Store Additional Attribute 4'].str.contains(
                    additional_attribute_4))
                & (template['Store Additional Attribute 13'] == '')]
        if sets.empty:
            return ''
        else:

            sets = sets['KPI Level 1 Name'].unique().tolist()

            if len(sets) == 1:
                set_name = sets[0]

            elif additional_attribute_4 == 'BC':
                set_name = sets[0]
            elif additional_attribute_4 == 'BA':
                set_name = sets[1]
            elif additional_attribute_4 == 'MODELORAMA':
                set_name = sets[2]

            else:
                Log.error('KPI should only run on one KPI set.')

            return set_name

    def calculate_set_score(self, set_df, set_name):
        """
        this method iterates kpi set and calculates it's score
        :param set_df: the set df to calculate score for
        :param set_name: the kpi set name
        :return: None
        """
        set_score = 0
        # get array of all kpi level 2 names
        kpi_names = set_df['KPI Level 2 Name'].unique()
        # iterate all kpi level 2
        for kpi_name in kpi_names:
            # calculate kpi level 2 score
            kpi_score = self.calculate_kpi_level_2_score(
                kpi_name, set_df, set_name)
            # accumulate set score
            set_score += kpi_score
        # round set_score if the set is 'Set Urban'
        if set_name == 'Set Urban':
            set_score = round(set_score)
        # finally, write level 1 kpi set score to DB
        self.write_kpi_set_score_to_db(set_name, set_score)

    def calculate_kpi_level_2_score(self, kpi_name, set_df, set_name):
        """
        this method gets kpi level 2 name, and iterates it's related atomic kpis
        :param set_name: kpi set name
        :param kpi_name: kpi level 2 name
        :param set_df: kpi set df
        :return: kpi level 2 score
        """
        kpi_df = set_df[set_df['KPI Level 2 Name'].str.encode('utf8') ==
                        kpi_name.encode('utf-8')]
        kpi_score = 0
        # iterate the all related atomic kpis
        for i, row in kpi_df.iterrows():
            # get atomic kpi name
            kpi_level_3_name = row['KPI Level 3 Name']
            # calculate atomic kpi score
            atomic_kpi_score = self.calculate_atomic_kpi_score(
                row, kpi_level_3_name, kpi_name, set_name)
            # accumulate kpi level 2 score
            kpi_score += atomic_kpi_score
        write_to_all_levels = False
        if len(kpi_df
               ) > 1:  # if there is just one atomic we don't need two levels
            write_to_all_levels = True
        elif kpi_df['KPI Level 3 Name'].iloc[0] != kpi_df[
                'KPI Level 2 Name'].iloc[0]:
            write_to_all_levels = True
        self.write_kpi_score_to_db(kpi_name, set_name, kpi_score,
                                   write_to_all_levels)
        return kpi_score

    def calculate_atomic_kpi_score(self, row, kpi_level_3_name, kpi_name,
                                   set_name):
        """
        this method calculates score for specific atomic kpi
        :param set_name: kpi set name
        :param kpi_name: kpi name
        :param kpi_level_3_name: the atomic kpi name
        :param row: atomic kpi details
        :return: atomic kpi score
        """
        atomic_kpi_score = 0
        # get column name to consider in calculation
        relevant_columns = map(str.strip, str(row['column names']).split(','))
        optional_columns = map(str.strip,
                               str(row['optional column']).split(','))
        is_kpi_passed = 0
        if self.check_store_type(row, relevant_columns):
            # get weight of current atomic kpi
            curr_weight = row['weights']
            # figure out what type of calculation need to be done
            if row['KPI type'] == 'Product Availability':
                if kpi_level_3_name == 'URBAN':
                    score = self.calculate_weigthed_availability_score(
                        row, relevant_columns)
                    if score:
                        atomic_kpi_score = score
                elif kpi_level_3_name == 'Hay o no hay # frentes':
                    if self.calculate_lead_availability_score(
                            row, relevant_columns):
                        is_kpi_passed = 1
                elif kpi_level_3_name == 'Adherencia de materiales':
                    if self.calculate_or_availability(row, relevant_columns,
                                                      optional_columns):
                        is_kpi_passed = 1
                else:
                    if self.calculate_availability_score(
                            row, relevant_columns):
                        is_kpi_passed = 1
                        if kpi_level_3_name == '100% Marcas HE':
                            curr_weight = self.unique_brand_count

            elif row['KPI type'] == 'SOS':

                ratio = self.calculate_sos_score(row, relevant_columns)
                if (row['product_type'] == 'Empty') & (ratio <= 0.2):
                    if self.scif[self.scif['template_name'] ==
                                 row['template_name']].empty:
                        is_kpi_passed = 0
                    else:
                        is_kpi_passed = 1
                elif ratio == 1:
                    is_kpi_passed = 1
            elif row['KPI type'] == 'Survey':
                if self.calculate_survey_score(row):
                    is_kpi_passed = 1
            if is_kpi_passed == 1:
                atomic_kpi_score += curr_weight
            # write result to DB
            # the customer asked for this specific KPI will write 100 in DB if it passed even if the weight is 0
            if kpi_level_3_name == 'Sin Espacios Vacios' and curr_weight == 0 and is_kpi_passed == 1:
                # atomic_kpi_score = 100
                self.write_atomic_to_db(kpi_level_3_name, 100, kpi_name,
                                        set_name, is_kpi_passed, curr_weight)
            else:
                self.write_atomic_to_db(kpi_level_3_name, atomic_kpi_score,
                                        kpi_name, set_name, is_kpi_passed,
                                        curr_weight)
        return atomic_kpi_score

    # sos

    def calculate_sos_score(self, row, relevant_columns):
        """
        this method calculates share of shelf score according to columns from the data frame
        :param relevant_columns: columns to check in the excel file
        :param row: data frame to calculate from
        :return: share of shelf score
        """

        if 'shelf_number' in relevant_columns:
            scif_index_list = []
            df = self.all_data[
                (self.all_data.template_name == row['template_name'])
                & (self.all_data.shelf_number == row['shelf_number']) &
                (self.all_data.facing_sequence_number > 0)]
            df.shelf_number = df.shelf_number.astype(float)
            df.shelf_number = df.shelf_number.astype(str)
            df = df[df['product_type'] != 'Empty']

        else:
            # get df only with the correct template name
            df = self.scif[self.scif.template_name == row['template_name']]
        if not pd.isna(row['exclude product_type']):
            for excl in [
                    e.strip() for e in row['exclude product_type'].split(',')
            ]:
                df = df[df['product_type'] != excl]

        # sum of all the facings in df
        facings = df.facings.values.sum()
        if facings == 0:
            return 0
        # create dictionary for calculating
        filters_dict = self.create_sos_filtered_dictionary(
            relevant_columns, row)
        # reduce the df only to relevant columns
        df = df[filters_dict.keys()]

        # check if it's invasion KPI for the special case
        inv = row['KPI Level 3 Name'][:3] == 'Inv'
        ratio = self.calculate_sos_ratio(df, filters_dict, inv)

        return ratio / facings

    def calculate_sos_ratio(self, df, filters_dict, inv):
        ratio = 0

        if 'manufacturer_name' in df.columns.tolist():
            filter_out = df[(df['product_type'] == ('Empty'))
                            & (df['manufacturer_name'] == 'General')].index
            df.drop(filter_out, inplace=True)

        for key in filters_dict:
            delete = [key for value in filters_dict[key] if value in ['nan']]
        for key in delete:
            del filters_dict[key]

        # iterate the data frame
        for i, df_row in df.iterrows():
            # initialize the boolean variable
            bol = True
            # special case that the customer asked
            # if inv and df_row.product_type == 'Empty':
            if df_row.product_type == 'Empty':
                ratio = ratio + self.scif.facings.loc[i]
                continue
            # iterate the filtered dictionary keys
            else:
                for key in filters_dict.keys():
                    # check if the value in df row in in "key" column is in the filtered dictionary we created before
                    bol &= df_row[key] in filters_dict[key]
            # that means success of the inner loop, that all the values matching for this data frame row
            if bol:
                if 'shelf_number' in filters_dict.keys():
                    ratio = ratio + self.all_data.facings.loc[i]
                else:
                    # accumulate ratio
                    ratio = ratio + self.scif.facings.loc[i]
        return ratio

    def create_sos_filtered_dictionary(self, relevant_columns, row):
        """
        this method filters out relevant columns from the template fso we can calculate SOS easily
        :param relevant_columns: relevant columns from temolates
        :param row: specific row from template
        :return: dictionary
        """
        # dictionary to send to the generic method
        filters_dict = {}
        self.handle_exclude_skus(filters_dict, relevant_columns, row)
        # fill the dictionary
        for column_value in relevant_columns:
            if column_value in [
                    'Store Additional Attribute 4',
                    'Store Additional Attribute 13'
            ] or column_value == 'store_type':
                continue
            filters_dict[column_value] = map(
                str.strip,
                str(row.loc[column_value]).split(','))
        return filters_dict

    # availability:

    def calculate_weigthed_availability_score(self, row, relevant_columns):
        """
        this method calculates availability score according to columns from the data frame
        :param row: data frame to calculate from
        :param relevant_columns: columns to check in the excel file
        :return: boolean
        """
        passed = 0
        # Gets the scene types
        scene_types = row['template_name'].split(', ')
        scenes_num = len(scene_types)
        # man = row['manufacturer_name']
        if 'scene_type' in relevant_columns:
            relevant_columns.remove('scene_type')
        # create filtered dictionary
        filters_dict = self.create_availability_filtered_dictionary(
            relevant_columns, row)
        for scene in scene_types:
            filters_dict.update({'template_name': scene})
            # call the generic method from KPIUtils_v2
            if self.availability.calculate_availability(**filters_dict):
                passed += 1
        return (passed / float(scenes_num)) * 100 if scenes_num else 0

    def calculate_lead_availability_score(self, row, relevant_columns):
        """
        this method calculates availability score according to columns from the data frame
        :param row: data frame to calculate from
        :param relevant_columns: columns to check in the excel file
        :return: boolean
        """
        # Gets the brand names
        brand_names = row['brand_name'].split(', ')
        if 'scene_type' in relevant_columns:
            relevant_columns.remove('scene_type')

        # create filtered dictionary
        filters_dict = self.create_availability_filtered_dictionary(
            relevant_columns, row)
        for brand in brand_names:
            filters_dict.update({'brand_name': brand})
            # call the generic method from KPIUtils_v2
            availability_score = self.availability.calculate_availability(
                **filters_dict)
            if self.decide_availability_score(row, availability_score):
                return True
        return False

    def calculate_availability_score(self, row, relevant_columns):
        """
        this method calculates availability score according to columns from the data frame
        :param row: data frame to calculate from
        :param relevant_columns: columns to check in the excel file
        :return: boolean
        """
        # create filtered dictionary
        filters_dict = self.create_availability_filtered_dictionary(
            relevant_columns, row)
        for key in filters_dict:
            delete = [key for value in filters_dict[key] if value in ['nan']]
        for key in delete:
            del filters_dict[key]
        # call the generic method from KPIUtils_v2
        availability_score = self.availability.calculate_availability(
            **filters_dict)

        filtered_df = self.scif[self.get_filter_condition(
            self.scif, **filters_dict)]
        self.unique_brand_count = len(filtered_df['brand_name'].unique())

        # check if this score should pass or fail
        return self.decide_availability_score(row, availability_score)

    def create_availability_filtered_dictionary(self, relevant_columns, row):
        """
        this method creates a dictionary with keys according to the specific row in the template
        :param relevant_columns: columns to create keys by
        :param row: the specific row in the template
        :return: dictionary
        """
        # dictionary to send to the generic method
        filters_dict = {}
        self.handle_exclude_skus(filters_dict, relevant_columns, row)
        # fill the dictionary
        for column_value in relevant_columns:
            if column_value == 'Store Additional Attribute 4' or column_value == 'store_type' or \
                    column_value == 'Store Additional Attribute 13':
                continue
            filters_dict[column_value] = map(
                str.strip,
                str(row.loc[column_value]).split(','))
        return filters_dict

    def calculate_or_availability(self, row, relevant_columns,
                                  optional_columns):
        """
                this method calculates availability score according to columns from the data frame
                :param row: data frame to calculate from
                :param relevant_columns: columns to check in the excel file
                :return: boolean
                """
        for optional_column in optional_columns:
            # create filtered dictionary
            temp_relevant_columns = relevant_columns[:]
            temp_relevant_columns.append(optional_column)
            filters_dict = self.create_availability_filtered_dictionary(
                temp_relevant_columns, row)
            for key in filters_dict:
                delete = [
                    key for value in filters_dict[key] if value in ['nan']
                ]
            for key in delete:
                del filters_dict[key]
            # call the generic method from KPIUtils_v2
            availability_score = self.availability.calculate_availability(
                **filters_dict)
            # check if this score should pass or fail
            if self.decide_availability_score(row, availability_score):
                return True

        return False

    def filter_product_names(self, exclude_skus):
        """
        this method filters list of SKUs from self.scif
        :param exclude_skus:  list of SKUs to exclude from template
        :return: filtered list
        """
        return filter(lambda sku: sku not in exclude_skus,
                      self.scif.product_name.values)

    @staticmethod
    def decide_availability_score(row, availability_score):
        """
        this method decides if the score should pass or fail according to the template
        :param row: scpecific row from template
        :param availability_score: score
        :return: Boolean
        """
        if availability_score == 0:
            return False
        else:
            if row['KPI Level 1 Name'] == 'Set Modeloramas' and row[
                    'KPI Level 3 Name'] == 'Hay o no hay Pop?':
                if row['KPI Level 2 Name'] == 'Pop Exterior':
                    return availability_score > 1
                elif row['KPI Level 2 Name'] == 'Pop Interior':
                    return availability_score > 1
            elif row['KPI Level 1 Name'] == 'Set Self Execution' and row[
                    'KPI Level 3 Name'] == 'Hay o no hay # frentes':
                return availability_score > 24
            else:
                return True

    # surveys:

    def calculate_survey_score(self, row):
        """
        this method calculates survey score according to columns from the data frame
        :param row: data frame to calculate from
        :return: boolean
        """
        question_code = str(int(row['Survey Question Code']))
        if not self.survey_response.empty and \
                not self.survey_response[self.survey_response.code == question_code].empty:
            answer = self.survey_response.selected_option_text[
                self.survey_response.code == question_code].values[0]
            if answer == 'Si':
                return True
            else:
                if row['KPI Level 2 Name'] == 'Primer Impacto' and answer == 'No tiene Enfirador':
                    return True
            return False
        else:
            return False

    # help functions:

    def calculate_posm_displays(self):
        new_kpi_set_fk = self.common2.get_kpi_fk_by_kpi_name(DISPLAY_KPI)
        for display_fk in self.match_displays_in_scene['display_fk'].to_list():
            self.common2.write_to_db_result(fk=new_kpi_set_fk,
                                            result=0,
                                            numerator_id=self.manufacturer_fk,
                                            denominator_id=display_fk)

        new_kpi_set_fk = self.common2.get_kpi_fk_by_kpi_name(POSM_KPI)
        for product_fk in self.scif['product_fk'][self.scif['location_type'] ==
                                                  'POSM'].to_list():
            self.common2.write_to_db_result(fk=new_kpi_set_fk,
                                            result=0,
                                            numerator_id=self.manufacturer_fk,
                                            denominator_id=product_fk)

    def handle_exclude_skus(self, filters_dict, relevant_columns, row):
        """
        this method checks if there is value in 'exclude skus' column in the template.
        if exists, it filters out the relevant skus from the calculation
        :param filters_dict: filtered dictionary
        :param relevant_columns: columns to create keys by
        :param row: specific row to calculate
        :return: None
        """
        try:
            exclude_skus = row['exclude skus'].split(',')
        except AttributeError:
            exclude_skus = []
        if exclude_skus:
            # filter out some product names according to template
            product_names = self.filter_product_names(exclude_skus)
            filters_dict['product_name'] = product_names
        if 'exclude skus' in row.to_dict().keys(
        ) and 'exclude skus' in relevant_columns:
            relevant_columns.remove('exclude skus')

    # db functions:

    def write_kpi_set_score_to_db(self, set_name, set_score):
        """
        this method writes set kpi score to static.kps_results DB
        :param set_name: set name
        :param set_score: set score
        :return: None
        """
        # kpi_set_fk = self.kpi_static_data.kpi_set_fk[self.kpi_static_data.kpi_set_name == set_name].unique()[0]
        # self.common.write_to_db_result(kpi_set_fk, self.LEVEL1, set_score)
        new_kpi_set_fk = self.common2.get_kpi_fk_by_kpi_name(set_name)
        self.common2.write_to_db_result(
            fk=new_kpi_set_fk,
            result=set_score,
            numerator_id=self.manufacturer_fk,
            denominator_id=self.store_id,
            identifier_result=self.common2.get_dictionary(name=set_name))

    def write_kpi_score_to_db(self, kpi_name, set_name, kpi_score,
                              write_to_all_levels):
        """
        this method writes kpi score to static.kpk_results DB
        :param kpi_name: name of level 2 kpi
        :param set_name: name of related set
        :param kpi_score: the score
        :return: None
        """
        # kpi_fk = \
        #     self.kpi_static_data.kpi_fk[
        #         (self.kpi_static_data.kpi_name.str.encode('utf-8') == kpi_name.encode('utf-8')) &
        #         (self.kpi_static_data.kpi_set_name == set_name)].values[0]
        # self.common.write_to_db_result(kpi_fk, self.LEVEL2, kpi_score)
        if write_to_all_levels:
            new_kpi_fk = self.common2.get_kpi_fk_by_kpi_name(kpi_name)
            self.common2.write_to_db_result(
                fk=new_kpi_fk,
                result=kpi_score,
                should_enter=True,
                identifier_parent=self.common2.get_dictionary(name=set_name),
                identifier_result=self.common2.get_dictionary(name=kpi_name))

    def write_atomic_to_db(self, atomic_name, atomic_score, kpi_name, set_name,
                           is_kpi_passed, curr_weight):
        """
        this method writes atomic kpi score to static.kpi_results DB
        :param curr_weight: current weight of atomic kpi
        :param is_kpi_passed: is this kpi passed
        :param atomic_name: atomic kpi name
        :param atomic_score: the score
        :param kpi_name: name of related kpi
        :param set_name: name of related set
        :return:
        """
        # atomic_kpi_fk = \
        #     self.kpi_static_data.atomic_kpi_fk[(self.kpi_static_data.atomic_kpi_name == atomic_name) &
        #                                        (self.kpi_static_data.kpi_name == kpi_name) &
        #                                        (self.kpi_static_data.kpi_set_name == set_name)].values[0]
        # attrs = self.common.create_attributes_dict(fk=atomic_kpi_fk, score=is_kpi_passed, level=self.LEVEL3)
        # attrs['result'] = {0: atomic_score}
        # attrs['kpi_weight'] = {0: curr_weight}
        # query = insert(attrs, self.common.KPI_RESULT)
        # self.common.kpi_results_queries.append(query)
        identifier_parent = self.common2.get_dictionary(name=kpi_name)
        if atomic_name == kpi_name:
            identifier_parent = self.common2.get_dictionary(name=set_name)

        new_atomic_fk = self.common2.get_kpi_fk_by_kpi_name(atomic_name)

        # kpi_fk = \
        # self.new_static_data[self.new_static_data['client_name'].str.encode('utf-8') == kpi_name.encode('utf-8')][
        #     'pk'].values[0]
        self.common2.write_to_db_result(fk=new_atomic_fk,
                                        result=atomic_score,
                                        numerator_id=self.manufacturer_fk,
                                        denominator_id=self.store_id,
                                        weight=curr_weight,
                                        should_enter=True,
                                        score=is_kpi_passed,
                                        identifier_parent=identifier_parent)

    def mpis_merger(self):
        try:
            mpis = self.match_product_in_scene.merge(self.products, on='product_fk', suffixes=['', '_p']) \
            .merge(self.scene_info, on='scene_fk', suffixes=['', '_s']) \
            .merge(self.templates, on='template_fk', suffixes=['', '_t'])
            return mpis
        except:
            pass

    def get_filter_condition(self, df, **filters):
        """
        :param df: The data frame to be filters.
        :param filters: These are the parameters which the data frame is filtered by.
                       Every parameter would be a tuple of the value and an include/exclude flag.
                       INPUT EXAMPLE (1):   manufacturer_name = ('Diageo', DIAGEOAUGENERALToolBox.INCLUDE_FILTER)
                       INPUT EXAMPLE (2):   manufacturer_name = 'Diageo'
        :return: a filtered Scene Item Facts data frame.
        """
        if not filters:
            return df['pk'].apply(bool)
        if self.facings_field in df.keys():
            filter_condition = (df[self.facings_field] > 0)
        else:
            filter_condition = None
        for field in filters.keys():
            if field in df.keys():
                if isinstance(filters[field], tuple):
                    value, exclude_or_include = filters[field]
                else:
                    value, exclude_or_include = filters[
                        field], self.INCLUDE_FILTER
                if not value:
                    continue
                if not isinstance(value, list):
                    value = [value]
                if exclude_or_include == self.INCLUDE_FILTER:
                    condition = (df[field].isin(value))
                elif exclude_or_include == self.EXCLUDE_FILTER:
                    condition = (~df[field].isin(value))
                elif exclude_or_include == self.CONTAIN_FILTER:
                    condition = (df[field].str.contains(value[0], regex=False))
                    for v in value[1:]:
                        condition |= df[field].str.contains(v, regex=False)
                else:
                    continue
                if filter_condition is None:
                    filter_condition = condition
                else:
                    filter_condition &= condition
            else:
                pass
                # Log.warning('field {} is not in the Data Frame'.format(field))

        return filter_condition
예제 #4
0
class GSKNZToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.common = Common(self.data_provider)
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.kpi_static_data = self.common.get_kpi_static_data()
        self.kpi_results_queries = []
        self.set_up_template = pd.read_excel(os.path.join(
            os.path.dirname(os.path.realpath(__file__)), '..', 'Data',
            'gsk_set_up.xlsx'),
                                             sheet_name='Functional KPIs',
                                             keep_default_na=False)

        self.gsk_generator = GSKGenerator(self.data_provider, self.output,
                                          self.common, self.set_up_template)

    def main_calculation(self, *args, **kwargs):
        """
        This function calculates the KPI results.
        """
        assortment_store_dict = self.gsk_generator.availability_store_function(
        )
        self.common.save_json_to_new_tables(assortment_store_dict)

        assortment_category_dict = self.gsk_generator.availability_category_function(
        )
        self.common.save_json_to_new_tables(assortment_category_dict)

        assortment_subcategory_dict = self.gsk_generator.availability_subcategory_function(
        )
        self.common.save_json_to_new_tables(assortment_subcategory_dict)

        facings_sos_dict = self.gsk_generator.gsk_global_facings_sos_whole_store_function(
        )
        self.common.save_json_to_new_tables(facings_sos_dict)

        linear_sos_dict = self.gsk_generator.gsk_global_linear_sos_whole_store_function(
        )
        self.common.save_json_to_new_tables(linear_sos_dict)

        linear_sos_dict = self.gsk_generator.gsk_global_linear_sos_by_sub_category_function(
        )
        self.common.save_json_to_new_tables(linear_sos_dict)

        facings_sos_dict = self.gsk_generator.gsk_global_facings_by_sub_category_function(
        )
        self.common.save_json_to_new_tables(facings_sos_dict)

        facings_sos_dict = self.gsk_generator.gsk_global_facings_sos_by_category_function(
        )
        self.common.save_json_to_new_tables(facings_sos_dict)

        linear_sos_dict = self.gsk_generator.gsk_global_linear_sos_by_category_function(
        )
        self.common.save_json_to_new_tables(linear_sos_dict)

        self.common.commit_results_data()

        return
예제 #5
0
class INBEVTRADMXToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.common = Common(self.data_provider)
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.kpi_static_data = self.common.get_kpi_static_data()
        self.kpi_results_queries = []
        self.templates_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), '..', 'Data')
        self.excel_file_path = os.path.join(self.templates_path,
                                            'inbevtradmx_template.xlsx')
        self.availability = Availability(self.data_provider)
        self.survey_response = self.data_provider[Data.SURVEY_RESPONSES]
        self.geo = GeoLocation.Geo(self.rds_conn, self.session_uid,
                                   self.data_provider, self.kpi_static_data,
                                   self.common)

    def parse_template(self):
        """
        convert excel file to data frame
        :return: data frame
        """
        template_df = pd.read_excel(self.excel_file_path, sheetname='template')
        return template_df

    def filter_product_names(self, exclude_skus):
        """
        this method filters list of SKUs from self.scif
        :param exclude_skus:  list of SKUs to exclude from template
        :return: filtered list
        """
        return filter(lambda sku: sku not in exclude_skus,
                      self.scif.product_name.values)

    def calculate_availability_score(self, row, relevant_columns):
        """
        this method calculates availability score according to columns from the data frame
        :param row: data frame to calculate from
        :param relevant_columns: columns to check in the excel file
        :return: boolean
        """
        # create filtered dictionary
        filters_dict = self.create_availability_filtered_dictionary(
            relevant_columns, row)
        # call the generic method from KPIUtils_v2
        availability_score = self.availability.calculate_availability(
            **filters_dict)
        # check if this score should pass or fail
        return self.decide_availability_score(row, availability_score)

    def create_availability_filtered_dictionary(self, relevant_columns, row):
        """
        this method creates a dictionary with keys according to the specific row in the template
        :param relevant_columns: columns to create keys by
        :param row: the specific row in the template
        :return: dictionary
        """
        # dictionary to send to the generic method
        filters_dict = {}
        self.handle_exclude_skus(filters_dict, relevant_columns, row)
        # fill the dictionary
        for column_value in relevant_columns:
            if column_value == 'store Additional Attribute 4' or column_value == 'store_type':
                continue
            filters_dict[column_value] = map(
                str.strip,
                str(row.loc[column_value]).split(','))
        return filters_dict

    def handle_exclude_skus(self, filters_dict, relevant_columns, row):
        """
        this method checks if there is value in 'exclude skus' column in the template.
        if exists, it filters out the relevant skus from the calculation
        :param filters_dict: filtered dictionary
        :param relevant_columns: columns to create keys by
        :param row: specific row to calculate
        :return: None
        """
        try:
            exclude_skus = row['exclude skus'].split(',')
        except AttributeError:
            exclude_skus = []
        if exclude_skus:
            # filter out some product names according to template
            product_names = self.filter_product_names(exclude_skus)
            filters_dict['product_name'] = product_names
            relevant_columns.remove('exclude skus')

    @staticmethod
    def decide_availability_score(row, availability_score):
        """
        this method decides if the score should pass or fail according to the template
        :param row: scpecific row from template
        :param availability_score: score
        :return: Boolean
        """
        if availability_score == 0:
            return False
        else:
            if row['KPI Level 1 Name'] == 'Set Modeloramas' and row[
                    'KPI Level 3 Name'] == 'Hay o no hay Pop?':
                if row['KPI Level 2 Name'] == 'Pop Exterior':
                    return availability_score > 1
                elif row['KPI Level 2 Name'] == 'Pop Interior':
                    return availability_score > 2
            else:
                return True

    def calculate_sos_score(self, row, relevant_columns):
        """
        this method calculates share of shelf score according to columns from the data frame
        :param relevant_columns: columns to check in the excel file
        :param row: data frame to calculate from
        :return: share of shelf score
        """
        # get df only with the correct template name
        df = self.scif[self.scif.template_name == row['template_name']]

        # sum of all the facings in df
        facings = df.facings.values.sum()
        if facings == 0:
            return 0
        # create dictionary for calculating
        filters_dict = self.create_sos_filtered_dictionary(
            relevant_columns, row)
        # reduce the df only to relevant columns
        df = df[filters_dict.keys()]

        ratio = self.calculate_sos_ratio(df, filters_dict)
        return ratio / facings

    def calculate_sos_ratio(self, df, filters_dict):
        ratio = 0
        # iterate the data frame
        for i, df_row in df.iterrows():
            # initialize the boolean variable
            bol = True
            # iterate the filtered dictionary keys
            for key in filters_dict.keys():
                # check if the value in df row in in "key" column is in the filtered dictionary we created before
                bol &= df_row[key] in filters_dict[key]
            # that means success of the inner loop, that all the values matching for this data frame row
            if bol:
                # accumulate ratio
                ratio = ratio + self.scif.facings.loc[i]
        return ratio

    def create_sos_filtered_dictionary(self, relevant_columns, row):
        """
        this method filters out relevant columns from the template fso we can calculate SOS easily
        :param relevant_columns: relevant columns from temolates
        :param row: specific row from template
        :return: dictionary
        """
        # dictionary to send to the generic method
        filters_dict = {}
        self.handle_exclude_skus(filters_dict, relevant_columns, row)
        # fill the dictionary
        for column_value in relevant_columns:
            if column_value == 'Store Additional Attribute 4' or column_value == 'store_type':
                continue
            filters_dict[column_value] = map(
                str.strip,
                str(row.loc[column_value]).split(','))
        return filters_dict

    def calculate_survey_score(self, row):
        """
        this method calculates survey score according to columns from the data frame
        :param row: data frame to calculate from
        :return: boolean
        """
        question_code = str(int(row['Survey Question Code']))
        if not self.survey_response.empty and \
                not self.survey_response[self.survey_response.code == question_code].empty:
            answer = self.survey_response.selected_option_text[
                self.survey_response.code == question_code].values[0]
            if answer == 'Si':
                return True
            else:
                if row['KPI Level 2 Name'] == 'Primer Impacto' and answer == 'No tiene Enfirador':
                    return True
            return False
        else:
            return False

    def check_store_type(self, row, relevant_columns):
        """
        this method checks if the session store type is valid
        :type relevant_columns: list
        :param row: current KPI row
        :return: boolean
        """
        # make sure that we need to check this field
        if relevant_columns.__contains__('store_type'):
            # get the valid stores from the template
            stores_df = pd.read_excel(self.excel_file_path,
                                      sheetname='store types')
            # create a list of the valid stores
            stores = stores_df.values
            return row['store_type'] in stores
        else:
            return True

    def calculate_set_score(self, set_df, set_name):
        """
        this method iterates kpi set and calculates it's score
        :param set_df: the set df to calculate score for
        :param set_name: the kpi set name
        :return: None
        """
        set_score = 0
        # get array of all kpi level 2 names
        kpi_names = set_df['KPI Level 2 Name'].unique()
        # iterate all kpi level 2
        for kpi_name in kpi_names:
            # calculate kpi level 2 score
            kpi_score = self.calculate_kpi_level_2_score(
                kpi_name, set_df, set_name)
            # write kpi level 2 score to DB
            self.write_kpi_score_to_db(kpi_name, set_name, kpi_score)
            # accumulate set score
            set_score += kpi_score
        # finally, write level 1 kpi set score to DB
        self.write_kpi_set_score_to_db(set_name, set_score)

    def calculate_kpi_level_2_score(self, kpi_name, set_df, set_name):
        """
        this method gets kpi level 2 name, and iterates it's related atomic kpis
        :param set_name: kpi set name
        :param kpi_name: kpi level 2 name
        :param set_df: kpi set df
        :return: kpi level 2 score
        """
        kpi_df = set_df[set_df['KPI Level 2 Name'] == kpi_name]
        kpi_score = 0
        # iterate the all related atomic kpis
        for i, row in kpi_df.iterrows():
            # get atomic kpi name
            kpi_level_3_name = row['KPI Level 3 Name']
            # calculate atomic kpi score
            atomic_kpi_score = self.calculate_atomic_kpi_score(
                row, kpi_level_3_name, kpi_name, set_name)
            # accumulate kpi level 2 score
            kpi_score += atomic_kpi_score
        return kpi_score

    def calculate_atomic_kpi_score(self, row, kpi_level_3_name, kpi_name,
                                   set_name):
        """
        this method calculates score for specific atomic kpi
        :param set_name: kpi set name
        :param kpi_name: kpi name
        :param kpi_level_3_name: the atomic kpi name
        :param row: atomic kpi details
        :return: atomic kpi score
        """
        atomic_kpi_score = 0
        # get column name to consider in calculation
        relevant_columns = map(str.strip, str(row['column names']).split(','))
        if self.check_store_type(row, relevant_columns):
            # get weight of current atomic kpi
            curr_weight = row['weights']
            # figure out what type of calculation need to be done
            if row['KPI type'] == 'Product Availability':
                if self.calculate_availability_score(row, relevant_columns):
                    atomic_kpi_score += curr_weight
            elif row['KPI type'] == 'SOS':
                ratio = self.calculate_sos_score(row, relevant_columns)
                if (row['product_type'] == 'Empty') & (ratio <= 0.2):
                    atomic_kpi_score += curr_weight
                elif ratio == 1:
                    atomic_kpi_score += curr_weight
            elif row['KPI type'] == 'Survey':
                if self.calculate_survey_score(row):
                    atomic_kpi_score += curr_weight
        # write result to DB
        self.write_atomic_to_db(kpi_level_3_name, atomic_kpi_score, kpi_name,
                                set_name)
        return atomic_kpi_score

    def write_kpi_set_score_to_db(self, set_name, set_score):
        """
        this method writes set kpi score to static.kps_results DB
        :param set_name: set name
        :param set_score: set score
        :return: None
        """
        kpi_set_fk = self.kpi_static_data.kpi_set_fk[
            self.kpi_static_data.kpi_set_name == set_name].unique()[0]
        self.common.write_to_db_result(kpi_set_fk, self.LEVEL1, set_score)

    def write_kpi_score_to_db(self, kpi_name, set_name, kpi_score):
        """
        this method writes kpi score to static.kpk_results DB
        :param kpi_name: name of level 2 kpi
        :param set_name: name of related set
        :param kpi_score: the score
        :return: None
        """
        kpi_fk = \
            self.kpi_static_data.kpi_fk[(self.kpi_static_data.kpi_name == kpi_name) &
                                        (self.kpi_static_data.kpi_set_name == set_name)].values[0]
        self.common.write_to_db_result(kpi_fk, self.LEVEL2, kpi_score)

    def write_atomic_to_db(self, atomic_name, atomic_score, kpi_name,
                           set_name):
        """
        this method writes atomic kpi score to static.kpi_results DB
        :param atomic_name: atomic kpi name
        :param atomic_score: the score
        :param kpi_name: name of related kpi
        :param set_name: name of related set
        :return:
        """
        atomic_kpi_fk = \
            self.kpi_static_data.atomic_kpi_fk[(self.kpi_static_data.atomic_kpi_name == atomic_name) &
                                               (self.kpi_static_data.kpi_name == kpi_name) &
                                               (self.kpi_static_data.kpi_set_name == set_name)].values[0]
        self.common.write_to_db_result(atomic_kpi_fk, self.LEVEL3,
                                       atomic_score)

    def calculate_kpi_set_from_template(self):
        """
        this method chooses the correct set to calculate
        there will always be only on set to calculate, depending on the field 'Store additional attribute 4' from
        template
        :return: None
        """
        # get the template
        parsed_template = self.parse_template()
        # get all the unique sets
        sets = parsed_template['KPI Level 1 Name'].unique()
        # get the session additional_attribute_4
        additional_attribute_4 = self.store_info.additional_attribute_4.values[
            0]
        set_name = self.choose_correct_set_to_calculate(
            additional_attribute_4, sets)
        # wrong value in additional attribute 4 - shouldn't calculate
        if set_name == '':
            Log.warning(
                'Wrong value in additional attribute 4 - shouldnt calculate')
            return -1
        # get only the part of the template that is related to this set
        set_template_df = parsed_template[parsed_template['KPI Level 1 Name']
                                          == set_name]
        # start calculating !
        self.calculate_set_score(set_template_df, set_name)

    @staticmethod
    def choose_correct_set_to_calculate(additional_attribute_4, sets):
        """
        choose what is the appropriate set to calculate
        :param additional_attribute_4: session additional_attribute_4
        :param sets: list of optional sets
        :return: set name to calculate
        """
        if additional_attribute_4 == 'BC':
            set_name = sets[0]
        elif additional_attribute_4 == 'BA':
            set_name = sets[1]
        elif additional_attribute_4 == 'MODELORAMA':
            set_name = sets[2]
        else:
            return ''
        return set_name

    def main_calculation(self):
        # calculate geo
        geo_result = self.geo.calculate_geo_location()
        self.geo.write_geo_to_db(float(geo_result))

        # calculate from template
        self.calculate_kpi_set_from_template()
        self.common.commit_results_data()
예제 #6
0
class AMERICASToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3
    FIRST_KPI = 'First KPI'
    SECOND_KPI = 'Second KPI'

    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.common = Common(self.data_provider)
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.kpi_static_data = self.common.get_kpi_static_data()
        self.kpi_results_queries = []

    def get_product_fk_from_product_name(self, product_name):
        return self.all_products.product_fk[self.all_products.product_name ==
                                            product_name].values[0]

    def calculate_first_kpi(self):
        """
        this method if in shelf number 2 all products are the product name below
        :return: True/False
        """
        product_name = 'CORONA LIGHT 355ML BOTE TALLCAN'
        mpis = self.match_product_in_scene
        second_shelf = mpis[mpis.shelf_number == 2]
        unique_products_in_second_self = second_shelf.product_fk.unique()
        if len(unique_products_in_second_self) != 1:
            return False
        else:
            product_fk = self.get_product_fk_from_product_name(product_name)
            if product_fk in unique_products_in_second_self:
                return True
            else:
                return False

    def calculate_second_kpi(self):
        product_name = 'MODELO ESPECIAL 355ML BOTE SINGLE'
        facing_counter = 0
        try:
            product_fk = self.get_product_fk_from_product_name(product_name)
            products_in_scene_df = self.match_product_in_scene[
                self.match_product_in_scene.product_fk == product_fk]
            for i, row in products_in_scene_df.iterrows():
                if str(row.face_count) == 'nan':
                    facing_counter += 1
                else:
                    facing_counter += row.face_count
        except IndexError:
            pass
        return facing_counter >= 5

    def write_result_to_db(self, kpi_name, kpi_result):
        if kpi_result:
            score = 100
        else:
            score = 0
        atomic_kpi_fk = self.kpi_static_data.atomic_kpi_fk[
            self.kpi_static_data.atomic_kpi_name == kpi_name].values[0]
        self.common.write_to_db_result(atomic_kpi_fk, self.LEVEL3, score)
        kpi_fk = self.kpi_static_data.kpi_fk[self.kpi_static_data.kpi_name ==
                                             kpi_name].values[0]
        self.common.write_to_db_result(kpi_fk, self.LEVEL2, score)
        kpi_set_fk = self.kpi_static_data.kpi_set_fk[
            self.kpi_static_data.kpi_name == kpi_name].values[0]
        self.common.write_to_db_result(kpi_set_fk, self.LEVEL1, score)

    def main_calculation(self):
        self.write_result_to_db(self.FIRST_KPI, self.calculate_first_kpi())
        self.write_result_to_db(self.SECOND_KPI, self.calculate_second_kpi())
        self.common.commit_results_data()
예제 #7
0
class PURINAToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.common = Common(self.data_provider)
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        self.kpi_static_data = self.common.get_kpi_static_data()
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.session_fk = self.session_info['pk'].values[0]
        self.kpi_results_queries = []
        self.kpi_static_queries = []
        self.purina_scif = self.scif.loc[self.scif['category_fk'] == PET_FOOD_CATEGORY]

    def calculate_purina(self, *args, **kwargs):
        """
        This function calculates the KPI results.
        """
        if not self.is_session_purina():
            return
        # Update all new static KPIs
        self.create_new_static_kpi()

        self.kpi_static_data = self.common.get_kpi_static_data(refresh=True)

        self.update_kpi_score()
        self.run_data_collecting()
        self.common.commit_results_data()

    def update_kpi_score(self):
        # Only to see results in join :(

        for kpi in PURINA_KPI:
            kpi_fk = self.get_kpi_fk_by_kpi_name(kpi, self.LEVEL2, set_name=PURINA_SET)
            self.common.write_to_db_result(kpi_fk, self.LEVEL2, 1)


    def run_data_collecting(self):
        """
        This function run the man calculation of linear sos with sub category out of subsegment
        or price out of subsegment
        :param price_kpi:
        :return:
        """

        data = self.purina_scif.dropna(subset=[LINEAR_SIZE])

        if data.empty:
            Log.info("No relevant purina's products were found in session.")
            return

        # subseg_name_list = data[SCIF_SUBSEGMENT].unique()
        # for subseg in subseg_name_list:
        #     if not subseg:
        #         subseg = NO_SUBSEG
        #         by_subseg = data.loc[pd.isnull(data[SCIF_SUBSEGMENT])]
        #         subseg_ft = self.cm_to_ft(sum(by_subseg[LINEAR_SIZE]))
        #     else:
        #         by_subseg = data.loc[data[SCIF_SUBSEGMENT] == subseg]
        #         subseg_ft = self.cm_to_ft(sum(by_subseg[LINEAR_SIZE]))
        #     atomic_fk = self.get_kpi_fk_by_kpi_name(subseg, self.LEVEL3, father=SUBSEGMENT_KPI, set_name=SUBSEGMENT_SET)
        #     self.common.old_write_to_db_result(fk=atomic_fk, level=self.LEVEL3, score=subseg_ft)
        #     atomic_fk = self.get_kpi_fk_by_kpi_name(subseg, self.LEVEL3, father=SUBSEGMENT_KPI, set_name=PRICE_SET)
        #     self.common.old_write_to_db_result(fk=atomic_fk, level=self.LEVEL3, score=subseg_ft)

        # gets all category linear size

        category_ft = self.cm_to_ft(sum(data[LINEAR_SIZE]))
        fk = self.get_kpi_fk_by_kpi_name(PURINA_SET, self.LEVEL1)
        self.common.write_to_db_result(fk, self.LEVEL1, category_ft)

        man = data['manufacturer_name'].unique()
        for mf in man:
            by_mf = data.loc[data['manufacturer_name'] == mf]
            manufacturer_ft = self.cm_to_ft(sum(by_mf[LINEAR_SIZE]))
            relevant_kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == MANUFACTUR) &
                                          (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0]
            atomic_fk = self.get_kpi_fk_by_kpi_name(mf, self.LEVEL3, father=MANUFACTUR, set_name=PURINA_SET)
            if atomic_fk:
                self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                               score=manufacturer_ft, score_2=manufacturer_ft,
                                               session_uid=self.session_uid, store_fk=self.store_id,
                                               display_text=mf.replace("'","''"),
                                               visit_date=self.visit_date.isoformat(),
                                               calculation_time=datetime.utcnow().isoformat(),
                                               kps_name=PURINA_SET,
                                               kpi_fk=relevant_kpi_fk)
            else:
                print 'atomic cannot be saved for manufacturer {}'.format(mf)

            brands = by_mf['brand_name'].unique()
            for brand in brands:
                by_brand = by_mf.loc[data['brand_name'] == brand]
                brand_ft = self.cm_to_ft(sum(by_brand[LINEAR_SIZE]))
                kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == BRAND) &
                                                  (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0]
                atomic_fk = self.get_kpi_fk_by_kpi_name(brand, self.LEVEL3, father=BRAND, set_name=PURINA_SET)
                if atomic_fk:
                    self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                   score=brand_ft, score_2=brand_ft, style=mf.replace("'","''"),
                                                   session_uid=self.session_uid, store_fk=self.store_id,
                                                   display_text=brand.replace("'","''"),
                                                   visit_date=self.visit_date.isoformat(),
                                                   calculation_time=datetime.utcnow().isoformat(),
                                                   kps_name=PURINA_SET,
                                                   kpi_fk=kpi_fk)
                else:
                    print 'atomic cannot be saved for brand {}'.format(brand)

                categories = by_brand[SCIF_CATEOGRY].unique()
                for cat in categories:
                    if not cat:
                        cat = OTHER
                        by_cat = by_brand.loc[pd.isnull(by_brand[SCIF_PRICE])]
                        cat_ft = self.cm_to_ft(sum(by_cat[LINEAR_SIZE]))
                    else:
                        by_cat = by_brand.loc[data[SCIF_SUB_CATEOGRY] == cat]
                        cat_ft = self.cm_to_ft(sum(by_cat[LINEAR_SIZE]))

                    kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == CATEGORY) &
                                          (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0]
                    atomic_fk = self.get_kpi_fk_by_kpi_name(cat, self.LEVEL3, father=CATEGORY, set_name=PURINA_SET)
                    if atomic_fk:
                        self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                       score=cat_ft,
                                                       score_2=cat_ft, style=mf.replace("'","''"),
                                                       result=brand.replace("'","''"),
                                                       session_uid=self.session_uid, store_fk=self.store_id,
                                                       display_text=cat.replace("'","''"),
                                                       visit_date=self.visit_date.isoformat(),
                                                       calculation_time=datetime.utcnow().isoformat(),
                                                       kps_name=PURINA_SET,
                                                       kpi_fk=kpi_fk)
                    else:
                        print 'atomic cannot be saved for category {}'.format(cat)

                    sub_cats = by_cat[SCIF_SUB_CATEOGRY].unique()
                    for sub_cat in sub_cats:
                        if not sub_cat:
                            sub_cat = OTHER
                            by_sub_cat = by_cat.loc[pd.isnull(by_cat[SCIF_PRICE])]
                            sub_cat_ft = self.cm_to_ft(sum(by_sub_cat[LINEAR_SIZE]))
                        else:
                            by_sub_cat = by_cat.loc[data[SCIF_SUB_CATEOGRY] == sub_cat]
                            sub_cat_ft = self.cm_to_ft(sum(by_sub_cat[LINEAR_SIZE]))
                        # write to db under sub category atomic kpi score with brand name in results

                        kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == SUB_CATEGORY) &
                                                          (self.kpi_static_data['kpi_set_name'] == PURINA_SET)][
                                                                                                    'kpi_fk'].values[0]
                        atomic_fk = self.get_kpi_fk_by_kpi_name(sub_cat, self.LEVEL3, father=SUB_CATEGORY,
                                                                    set_name=PURINA_SET)
                        if atomic_fk:
                            self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                           score=sub_cat_ft,
                                                           score_2=sub_cat_ft, style=mf.replace("'","''"),
                                                           result=brand.replace("'","''"),
                                                           result_2=cat.replace("'","''"),
                                                           session_uid=self.session_uid, store_fk=self.store_id,
                                                           display_text=sub_cat.replace("'","''"),
                                                           visit_date=self.visit_date.isoformat(),
                                                           calculation_time=datetime.utcnow().isoformat(),
                                                           kps_name=PURINA_SET,
                                                           kpi_fk=kpi_fk)
                        else:
                            print 'atomic cannot be saved for sub category {}'.format(sub_cat)

                        prices = by_sub_cat[SCIF_PRICE].unique()
                        for price_class in prices:
                            if not price_class:
                                price_class = OTHER
                                by_prices = by_sub_cat.loc[pd.isnull(by_sub_cat[SCIF_PRICE])]
                                price_ft = self.cm_to_ft(sum(by_prices[LINEAR_SIZE]))
                            else:
                                by_prices = by_sub_cat.loc[by_sub_cat[SCIF_PRICE] == price_class]
                                price_ft = self.cm_to_ft(sum(by_prices[LINEAR_SIZE]))
                            kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == PRICE_KPI) &
                                                              (self.kpi_static_data['kpi_set_name'] == PURINA_SET)][
                                                                                                    'kpi_fk'].values[0]
                            atomic_fk = self.get_kpi_fk_by_kpi_name(price_class, self.LEVEL3, father=PRICE_KPI,
                                                                    set_name=PURINA_SET)
                            if atomic_fk:
                                self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                               score=price_ft,
                                                               score_2=price_ft, style=mf.replace("'","''"),
                                                               result=brand.replace("'","''"),
                                                               result_2=cat.replace("'","''"),
                                                               result_3=sub_cat.replace("'","''"),
                                                               session_uid=self.session_uid, store_fk=self.store_id,
                                                               display_text=price_class.replace("'", "''"),
                                                               visit_date=self.visit_date.isoformat(),
                                                               calculation_time=datetime.utcnow().isoformat(),
                                                               kps_name=PURINA_SET,
                                                               kpi_fk=kpi_fk )
                            else:
                                print 'atomic cannot be saved for price class {}'.format(price_class)


    @staticmethod
    def cm_to_ft(cm):
        return cm / 30.48

    def get_labels(self):
        query = """select pk, labels, ean_code
        from static_new.product
        """
        labels = pd.read_sql_query(query, self.rds_conn.db)
        return labels

    def get_kpi_fk_by_kpi_name(self, kpi_name, kpi_level, father=None, logic_father=None, set_name=None):
        if kpi_level == self.LEVEL1:
            column_key = 'kpi_set_fk'
            column_value = 'kpi_set_name'
            father_value = 'kpi_set_name'


        elif kpi_level == self.LEVEL2:
            column_key = 'kpi_fk'
            column_value = 'kpi_name'
            father_value = 'kpi_set_name'


        elif kpi_level == self.LEVEL3:
            column_key = 'atomic_kpi_fk'
            column_value = 'atomic_kpi_name'
            father_value = 'kpi_name'

        else:
            raise ValueError('invalid level')

        try:
            relevant = self.kpi_static_data[self.kpi_static_data[column_value] == kpi_name]
            if father:
                relevant = relevant[relevant[father_value] == father]
            if set_name:
                relevant = relevant[relevant['kpi_set_name'] == set_name]

            return relevant[column_key].values[0]

        except IndexError:
            Log.info('Kpi name: {}, isn\'t equal to any kpi name in static table'.format(kpi_name))
            return None

    def create_new_static_kpi(self):
        #  This functions takes all brands, sub categories, categories and manufacturers in session.
        #  The function adds them to database in case they are new.
        brands = self.get_all_brands()
        sub_cats = self.get_all_sub_categories()
        manufacturer = self.get_all_manufacturers()
        cats = self.get_all_categories()
        prices = self.get_all_price_classes()

        new_brands = self.purina_scif.loc[~self.purina_scif['brand_name'].isin(brands)]['brand_name'].unique()
        new_manufacturer = self.purina_scif.loc[~self.purina_scif['manufacturer_name'].isin(manufacturer)][
            'manufacturer_name'].unique()
        new_sub_cat = self.purina_scif.loc[(~self.purina_scif[SCIF_SUB_CATEOGRY].isin(sub_cats)) &
                                        (~pd.isnull(self.purina_scif[SCIF_SUB_CATEOGRY]))][SCIF_SUB_CATEOGRY].unique()
        new_cat = self.purina_scif.loc[(~self.purina_scif[SCIF_CATEOGRY].isin(cats)) &
                                       (~pd.isnull(self.purina_scif[SCIF_CATEOGRY]))][SCIF_CATEOGRY].unique()
        new_prices = self.purina_scif.loc[(~self.purina_scif[SCIF_PRICE].isin(prices)) &
                                          (~pd.isnull(self.purina_scif[SCIF_PRICE]))][SCIF_PRICE].unique()

        self.save_static_atomics(BRAND, new_brands, PURINA_SET)
        self.save_static_atomics(MANUFACTUR, new_manufacturer, PURINA_SET)
        self.save_static_atomics(CATEGORY, new_cat, PURINA_SET)
        self.save_static_atomics(SUB_CATEGORY, new_sub_cat, PURINA_SET)
        self.save_static_atomics(PRICE_KPI, new_prices, PURINA_SET)

        self.commit_static_data()

    def get_all_brands(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == BRAND]['atomic_kpi_name']

    def get_all_sub_categories(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == SUB_CATEGORY]['atomic_kpi_name']

    def get_all_manufacturers(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == MANUFACTUR]['atomic_kpi_name']

    def get_all_categories(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == CATEGORY]['atomic_kpi_name']

    def get_all_price_classes(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == PRICE_KPI]['atomic_kpi_name']

    def save_static_atomics(self, kpi_name, atomics, set_name):
        kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == kpi_name) &
                                          (self.kpi_static_data['kpi_set_name'] == set_name)]['kpi_fk'].values[0]
        for current in atomics:
            current = current.replace("'", "''")
            query = """
               INSERT INTO {0} (`kpi_fk`, `name`, `description`, `display_text`,`presentation_order`, `display`)
               VALUES ('{1}', '{2}', '{3}', '{4}', '{5}', '{6}');""".format(STATIC_ATOMIC,
                                                                            kpi_fk, current, current, current, 1, 'Y')

            self.kpi_static_queries.append(query)

    def commit_static_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        # ProjectConnector(self.project_name, DbUsers.CalculationEng)
        cur = self.rds_conn.db.cursor()
        for query in self.kpi_static_queries:
            try:
                cur.execute(query)
            except Exception as e:
                Log.info('query {} could not be executed.'.format(query))
        self.rds_conn.db.commit()

        self.rds_conn.disconnect_rds()

    def is_session_purina(self):
        # This function checks is the session is of Purina project by its category and that it is a successful visit.
        session_data = self.get_session_category_data()
        session_data = session_data.loc[(session_data['category_fk'] == 13) &
                                        (session_data['resolution_fk'] == 1) &
                                        (session_data['exclude_status_fk'] == 1)]
        if not session_data.empty:
            return True
        return False

    def get_session_category_data(self):
        local_con = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        query = """select category_fk, resolution_fk, exclude_status_fk from probedata.session_category
                where session_fk = {}""".format(self.session_fk)
        data = pd.read_sql_query(query, local_con.db)
        return data