コード例 #1
0
 def _assert_kpi_results_filled(self):
     connector = PSProjectConnector(TestProjectsNames().TEST_PROJECT_1, DbUsers.Docker)
     cursor = connector.db.cursor()
     cursor.execute('''
     SELECT * FROM report.kpi_results
     ''')
     kpi_results = cursor.fetchall()
     self.assertNotEquals(len(kpi_results), 0)
     connector.disconnect_rds()
コード例 #2
0
 def _get_static_kpi(self):
     connector = PSProjectConnector(self._project,self._dbUser)
     try:
         static = '''SELECT * FROM  static.kpi_level_2;'''
         return pd.read_sql_query(static, connector.db)
     except Exception as e:
         print e.message
     finally:
         connector.disconnect_rds()
コード例 #3
0
 def _assert_custom_scif_table_filled(self):
     connector = PSProjectConnector(TestProjectsNames().TEST_PROJECT_1, DbUsers.Docker)
     cursor = connector.db.cursor()
     #cursor = connector.db.cursor(MySQLdb.cursors.DictCursor)
     cursor.execute('''
        SELECT * FROM pservice.custom_scene_item_facts
        ''')
     kpi_results = cursor.fetchall()
     self.assertNotEquals(len(kpi_results), 0)
     connector.disconnect_rds()
コード例 #4
0
 def __init__(self, session_uid, store_id, visit_date, project_name):
     rds_conn = PSProjectConnector(project_name, DbUsers.ReadOnly)
     self._kpi_static_data = get_all_kpi_static_data(rds_conn.db)
     rds_conn.disconnect_rds()
     self._project_name = project_name
     self._kpi_results_queries = []
     self._kpk_results_queries = []
     self._kps_results_queries = []
     self._session_uid = session_uid
     self._store_id = store_id
     self._visit_date = visit_date
 def _assert_scene_tables_kpi_results_filled(self, distinct_kpis_num=None):
     connector = PSProjectConnector(TestProjectsNames().TEST_PROJECT_1, DbUsers.Docker)
     cursor = connector.db.cursor()
     cursor.execute('''
        SELECT * FROM report.scene_kpi_results
        ''')
     kpi_results = cursor.fetchall()
     if distinct_kpis_num:
         df = pd.DataFrame(kpi_results)
         self.assertEquals(df['kpi_level_2_fk'].unique().__len__(), distinct_kpis_num)
     else:
         self.assertNotEquals(len(kpi_results), 0)
     connector.disconnect_rds()
コード例 #6
0
    def _assert_kpi_results_filled(self):
        connector = PSProjectConnector(TestProjectsNames().TEST_PROJECT_1,
                                       DbUsers.Docker)
        cursor = connector.db.cursor()
        cursor.execute('''
        SELECT * FROM report.kpi_level_2_results
        ''')
        kpi_results = cursor.fetchall()

        # silent test, diageomx are no longer using those templates and the test is failing
        # self.assertNotEquals(len(kpi_results), 0)

        connector.disconnect_rds()
コード例 #7
0
 def get_session_kpi_results(self):
     query = '''
             select kpir.*, kh.session_kpi_results_fk, kh.session_kpi_results_parent_fk
             from report.kpi_level_2_results kpir
             left join probedata.session ses on kpir.session_fk = ses.pk
             left join report.kpi_hierarchy kh on kpir.pk = kh.session_kpi_results_parent_fk
             -- left join report.kpi_level_2_results kpir2 on kh.session_kpi_results_parent_fk = kpir2.pk
             where kh.session_kpi_results_fk  is not null
             and session_uid = '{}'
             '''.format(self.session_uid)
     con = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
     data = pd.read_sql(query, con.db)
     con.disconnect_rds()
     return data
コード例 #8
0
 def _get_kpi_results(self):
     connector = PSProjectConnector(self._project, self._dbUser)
     try:
         results = '''    
                 SELECT 
                     *
                 FROM
                     report.kpi_level_2_results,
                     probedata.session
                 WHERE
                     probedata.session.pk = report.kpi_level_2_results.session_fk
                         AND probedata.session.visit_date BETWEEN '{}' AND '{}';'''.format(self.start_date, self.end_date)
         return pd.read_sql_query(results, connector.db)
     except Exception as e:
         print e.message
     finally:
         connector.disconnect_rds()
 def _assert_new_tables_kpi_results_filled(self, distinct_kpis_num=None, list_of_kpi_names=None):
     connector = PSProjectConnector(TestProjectsNames().TEST_PROJECT_1, DbUsers.Docker)
     cursor = connector.db.cursor()
     cursor.execute('''
        SELECT kl2.pk, kl2.client_name, kl2r.kpi_level_2_fk, kl2r.result 
        FROM report.kpi_level_2_results kl2r left join static.kpi_level_2 kl2 
        on kpi_level_2_fk = kl2.pk
        ''')
     kpi_results = cursor.fetchall()
     df = pd.DataFrame(kpi_results)
     if distinct_kpis_num:
         self.assertEquals(df['kpi_level_2_fk'].unique().__len__(), distinct_kpis_num)
     else:
         self.assertNotEquals(len(kpi_results), 0)
     if list_of_kpi_names:
         exisitng_results = df['client_name'].unique()
         result = all(elem in exisitng_results for elem in list_of_kpi_names)
         self.assertTrue(result)
     connector.disconnect_rds()
コード例 #10
0
 def commit_results_data(self):
     """
     This function writes all KPI results to the DB, and commits the changes.
     """
     rds_conn = PSProjectConnector(self._project_name,
                                   DbUsers.CalculationEng)
     cur = rds_conn.db.cursor()
     delete_queries = MarsUsQueries.get_delete_session_results_query(
         self._session_uid)
     for query in delete_queries:
         cur.execute(query)
     queries = self.merge_insert_queries(self._kpi_results_queries)
     for query in queries:
         cur.execute(query)
     queries = self.merge_insert_queries(self._kpk_results_queries)
     for query in queries:
         cur.execute(query)
     for query in self._kps_results_queries:
         cur.execute(query)
     rds_conn.db.commit()
     rds_conn.disconnect_rds()
コード例 #11
0
 def commit_results(self, queries):
     self.rds_conn.disconnect_rds()
     rds_conn = PSProjectConnector(PROJECT, DbUsers.CalculationEng)
     cur = rds_conn.db.cursor()
     for query in self.update_queries:
         print query
         try:
             cur.execute(query)
         except Exception as e:
             Log.debug('Inserting to DB failed due to: {}'.format(e))
             rds_conn.disconnect_rds()
             rds_conn = PSProjectConnector(PROJECT, DbUsers.CalculationEng)
             cur = rds_conn.db.cursor()
             continue
     rds_conn.db.commit()
     rds_conn.disconnect_rds()
     rds_conn = PSProjectConnector(PROJECT, DbUsers.CalculationEng)
     cur = rds_conn.db.cursor()
     for query in queries:
         print query
         try:
             cur.execute(query)
         except Exception as e:
             Log.debug('Inserting to DB failed due to: {}'.format(e))
             rds_conn.disconnect_rds()
             rds_conn = PSProjectConnector(PROJECT, DbUsers.CalculationEng)
             cur = rds_conn.db.cursor()
             continue
     rds_conn.db.commit()
コード例 #12
0
class KCUS_KPIToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output, set_name=None):
        self.k_engine = BaseCalculationsGroup(data_provider, output)
        self.data_provider = data_provider
        self.output = output
        self.products = self.data_provider[Data.ALL_PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.project_name = data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.templates = self.data_provider[Data.ALL_TEMPLATES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.scenes_info = self.data_provider[Data.SCENES_INFO]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.session_info = SessionInfo(data_provider)
        self.store_id = self.data_provider[Data.STORE_FK]
        self.store_data = self.data_provider[Data.STORE_INFO]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.scif = self.scif.merge(self.data_provider[Data.STORE_INFO],
                                    how='left',
                                    left_on='store_id',
                                    right_on='store_fk')
        for sub in SUB:
            if sub in (SUB.keys()):
                self.scif = self.scif.rename(columns={sub: SUB.get(sub)})

        for title in TITLE:
            if title in (self.scif.columns.unique().tolist()):
                self.scif = self.scif.rename(columns={title: TITLE.get(title)})
        # self.generaltoolbox = KCUSGENERALToolBox(data_provider, output, geometric_kpi_flag=True)
        # self.scif = self.scif.replace(' ', '', regex=True)
        self.set_name = set_name
        self.kpi_fetcher = KCUSFetcher(self.project_name, self.scif,
                                       self.match_product_in_scene,
                                       self.set_name, self.products,
                                       self.session_uid)
        self.all_template_data = parse_template(TEMPLATE_PATH, "Simple KPI's")
        self.survey_response = self.data_provider[Data.SURVEY_RESPONSES]
        self.sales_rep_fk = self.data_provider[
            Data.SESSION_INFO]['s_sales_rep_fk'].iloc[0]
        self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0]
        self.store_type = self.data_provider[
            Data.STORE_INFO]['store_type'].iloc[0]
        self.region = self.data_provider[
            Data.STORE_INFO]['region_name'].iloc[0]
        self.thresholds_and_results = {}
        self.result_df = []
        self.writing_to_db_time = datetime.timedelta(0)
        self.kpi_results_queries = []
        self.potential_products = {}
        self.shelf_square_boundaries = {}
        self.average_shelf_values = {}
        self.kpi_static_data = self.get_kpi_static_data()
        self.ignore_stacking = False
        self.facings_field = 'facings' if not self.ignore_stacking else 'facings_ign_stack'
        self.max_shelf_of_bay = []
        self.INCLUDE_FILTER = 1
        self.MM_TO_FEET_CONVERSION = MM_TO_FEET_CONVERSION
        self.EXCLUDE_EMPTY = True

    def main_calculation(self, *args, **kwargs):
        """
               This function calculates the KPI results.
               """

        kpi_set_fk = 1
        set_name = self.kpi_static_data.loc[
            self.kpi_static_data['kpi_set_fk'] ==
            kpi_set_fk]['kpi_set_name'].values[0]
        template_data = self.all_template_data.loc[
            self.all_template_data['KPI Level 1 Name'] == set_name]

        try:
            if set_name and not set(
                    template_data['Scene Types to Include'].values[0].encode(
                    ).split(', ')) & set(
                        self.scif['template_name'].unique().tolist()):
                Log.info('Category {} was not captured'.format(
                    template_data['category'].values[0]))
                return
        except Exception as e:
            Log.info(
                'KPI Set {} is not defined in the template'.format(set_name))
        # for kpi_name in kpi_list:
        for i, row in template_data.iterrows():
            try:
                kpi_name = row['KPI Level 2 Name']
                if kpi_name in KPI_LEVEL_2_cat_space:
                    scene_type = [
                        s for s in row['Scene_Type'].encode().split(', ')
                    ]
                    kpi_type = row['KPI_Type']

                    if row['Param1'] == 'Category' or 'sub_category':
                        category = row['Value1']

                        if kpi_type == 'category space':
                            self.calculate_category_space(
                                kpi_set_fk, kpi_name, scene_type, category)

            except Exception as e:
                Log.info('KPI {} calculation failed due to {}'.format(
                    kpi_name.encode('utf-8'), e))
                continue
        return

    def calculate_category_space(self, kpi_set_fk, kpi_name, scene_types,
                                 category):
        template = self.all_template_data.loc[
            (self.all_template_data['KPI Level 2 Name'] == kpi_name)
            & (self.all_template_data['Value1'] == category)]
        kpi_template = template.loc[template['KPI Level 2 Name'] == kpi_name]
        if kpi_template.empty:
            return None
        kpi_template = kpi_template.iloc[0]
        values_to_check = []
        secondary_values_to_check = []

        filters = {
            'template_name': scene_types,
            'category': kpi_template['Value1']
        }

        if kpi_template['Value1']:
            values_to_check = self.all_products.loc[
                self.all_products['category'] ==
                kpi_template['Value1']]['category'].unique().tolist()

        if kpi_template['Value2']:
            if kpi_template['Value2'] in [
                    'Feminine Needs', 'Feminine Hygiene'
            ]:
                sub_category_att = 'FEM NEEDS'
                secondary_values_to_check = self.all_products.loc[
                    self.all_products[sub_category_att] ==
                    kpi_template['Value2']][sub_category_att].unique().tolist(
                    )

            # elif kpi_template['Value2'] == 'Feminine Hygiene':
            #     sub_category_att = 'FEM HYGINE'
            #     secondary_values_to_check = self.all_products.loc[self.all_products['category'] == kpi_template['Value1']][
            #     sub_category_att].unique().tolist()

        for primary_filter in values_to_check:
            filters[kpi_template['Param1']] = primary_filter
            if secondary_values_to_check:
                for secondary_filter in secondary_values_to_check:
                    if secondary_filter == None:
                        continue

                    filters[sub_category_att] = secondary_filter

                    result = self.calculate_category_space_length(**filters)

                    score = result

                    self.write_to_db_result(kpi_set_fk,
                                            score,
                                            self.LEVEL3,
                                            kpi_name=kpi_name,
                                            score=score)
            else:

                result = self.calculate_category_space_length(**filters)

                score = result

                self.write_to_db_result(kpi_set_fk,
                                        score,
                                        self.LEVEL3,
                                        kpi_name=kpi_name,
                                        score=score)

    def calculate_category_space_length(self, threshold=0.5, **filters):
        """
        :param threshold: The ratio for a bay to be counted as part of a category.
        :param filters: These are the parameters which the data frame is filtered by.
        :return: The total shelf width (in mm) the relevant facings occupy.
        """

        try:
            #Remove this line of code when tagging is updated for femini hygiene.
            if any(item in filters['template_name']
                   for item in ESTIMATE_SPACE_BY_BAYS_TEMPLATE_NAMES):
                for k in filters.keys():
                    if k not in ['template_name', 'category']:

                        del filters[k]

            filtered_scif = self.scif[self.get_filter_condition(
                self.scif, **filters)]
            if self.EXCLUDE_EMPTY == True:
                filtered_scif = filtered_scif[
                    filtered_scif['product_type'] != 'Empty']

            space_length = 0
            bay_values = []
            max_linear_of_bays = 0
            product_fk_list = filtered_scif['product_fk'].unique().tolist()
            # space_length_DEBUG = 0
            for scene in filtered_scif['scene_fk'].unique().tolist():

                scene_matches = self.match_product_in_scene[
                    self.match_product_in_scene['scene_fk'] == scene]
                scene_filters = filters
                scene_filters['scene_fk'] = scene
                scene_filters['product_fk'] = product_fk_list

                for bay in scene_matches['bay_number'].unique().tolist():
                    bay_total_linear = scene_matches.loc[
                        (scene_matches['bay_number'] == bay)
                        & (scene_matches['stacking_layer'] == 1) &
                        (scene_matches['status']
                         == 1)]['width_mm_advance'].sum()
                    max_linear_of_bays += bay_total_linear
                    scene_filters['bay_number'] = bay
                    tested_group_linear = scene_matches[
                        self.get_filter_condition(scene_matches,
                                                  **scene_filters)]

                    tested_group_linear_value = tested_group_linear[
                        'width_mm_advance'].loc[
                            tested_group_linear['stacking_layer'] == 1].sum()

                    if tested_group_linear_value:
                        bay_ratio = tested_group_linear_value / float(
                            bay_total_linear)
                    else:
                        bay_ratio = 0

                    if bay_ratio >= threshold:
                        # bay_num_of_shelves = len(scene_matches.loc[(scene_matches['bay_number'] == bay) &
                        #                                            (scene_matches['stacking_layer'] == 1)][
                        #                              'shelf_number'].unique().tolist())
                        # if kpi_name not in self.average_shelf_values.keys():
                        #     self.average_shelf_values[kpi_name] = {'num_of_shelves': bay_num_of_shelves,
                        #                                            'num_of_bays': 1}
                        # else:
                        #     self.average_shelf_values[kpi_name]['num_of_shelves'] += bay_num_of_shelves
                        #     self.average_shelf_values[kpi_name]['num_of_bays'] += 1
                        # if bay_num_of_shelves:
                        #     bay_final_linear_value = tested_group_linear_value / float(bay_num_of_shelves)
                        # else:
                        #     bay_final_linear_value = 0

                        #  bay_final_linear_value * self.MM_TO_FEET_CONVERSION
                        #  space_length_DEBUG += bay_final_linear_value
                        bay_values.append(4)
                    else:

                        bay_values.append(0)
                if filtered_scif['template_name'].iloc[
                        0] in ESTIMATE_SPACE_BY_BAYS_TEMPLATE_NAMES:
                    max_bays = len(
                        scene_matches['bay_number'].unique().tolist())
                    space_length = max_bays * 4
                space_length = sum(bay_values)

        except Exception as e:
            Log.info('Linear Feet calculation failed due to {}'.format(e))
            space_length = 0

        return space_length

    def get_category(self):
        pass

    def get_filter_condition(self, df, **filters):
        """
        :param df: The data frame to be filters.
        :param filters: These are the parameters which the data frame is filtered by.
                       Every parameter would be a tuple of the value and an include/exclude flag.
                       INPUT EXAMPLE (1):   manufacturer_name = ('Diageo', DIAGEOAUPNGROGENERALToolBox.INCLUDE_FILTER)
                       INPUT EXAMPLE (2):   manufacturer_name = 'Diageo'
        :return: a filtered Scene Item Facts data frame.
        """
        if not filters:
            return df['pk'].apply(bool)
        if self.facings_field in df.keys():
            filter_condition = (df[self.facings_field] > 0)
        else:
            filter_condition = None
        for field in filters.keys():
            if field in df.keys():
                if isinstance(filters[field], tuple):
                    value, exclude_or_include = filters[field]
                else:
                    value, exclude_or_include = filters[
                        field], self.INCLUDE_FILTER
                if not value:
                    continue
                if not isinstance(value, list):
                    value = [value]
                if exclude_or_include == self.INCLUDE_FILTER:
                    condition = (df[field].isin(value))
                elif exclude_or_include == self.EXCLUDE_FILTER:
                    condition = (~df[field].isin(value))
                elif exclude_or_include == self.CONTAIN_FILTER:
                    condition = (df[field].str.contains(value[0], regex=False))
                    for v in value[1:]:
                        condition |= df[field].str.contains(v, regex=False)
                else:
                    continue
                if filter_condition is None:
                    filter_condition = condition
                else:
                    filter_condition &= condition
            else:
                Log.warning('field {} is not in the Data Frame'.format(field))

        return filter_condition

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = KCUSFetcher.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def kpi_name_builder(self, kpi_name, **filters):
        """
        This function builds kpi name according to naming convention
        """
        for filter in filters.keys():
            if filter == 'template_name':
                continue
            kpi_name = kpi_name.replace('{' + filter + '}',
                                        str(filters[filter]))
            kpi_name = kpi_name.replace("'", "\'")
        return kpi_name

    def write_to_db_result(self,
                           kpi_set_fk,
                           result,
                           level,
                           score=None,
                           threshold=None,
                           kpi_name=None,
                           kpi_fk=None):
        """
        This function the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(kpi_set_fk,
                                                 result=result,
                                                 level=level,
                                                 score=score,
                                                 threshold=threshold,
                                                 kpi_name=kpi_name,
                                                 kpi_fk=kpi_fk)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self,
                               kpi_set_fk,
                               result,
                               level,
                               score=None,
                               threshold=None,
                               kpi_name=None,
                               kpi_fk=None):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        if level == self.LEVEL1:
            kpi_set_name = \
                self.kpi_static_data[self.kpi_static_data['kpi_set_fk'] == kpi_set_fk]['kpi_set_name'].values[0]

            attributes = pd.DataFrame([(
                kpi_set_name,
                self.session_uid,
                self.store_id,
                self.visit_date.isoformat(),
                result,
                kpi_set_fk,
            )],
                                      columns=[
                                          'kps_name', 'session_uid',
                                          'store_fk', 'visit_date', 'score_1',
                                          'kpi_set_fk'
                                      ])
        elif level == self.LEVEL2:
            kpi_name = self.kpi_static_data[
                self.kpi_static_data['kpi_fk'] ==
                kpi_fk]['kpi_name'].values[0].replace("'", "\\'")
            attributes = pd.DataFrame(
                [(self.session_uid, self.store_id, self.visit_date.isoformat(),
                  kpi_fk, kpi_name, result)],
                columns=[
                    'session_uid', 'store_fk', 'visit_date', 'kpi_fk',
                    'kpk_name', 'score'
                ])
        elif level == self.LEVEL3:
            kpi_set_name = \
                self.kpi_static_data[self.kpi_static_data['kpi_set_fk'] == kpi_set_fk]['kpi_set_name'].values[0]
            try:
                atomic_kpi_fk = \
                    self.kpi_static_data[self.kpi_static_data['atomic_kpi_name'] == kpi_name]['atomic_kpi_fk'].values[0]
                kpi_fk = self.kpi_static_data[
                    self.kpi_static_data['atomic_kpi_fk'] ==
                    atomic_kpi_fk]['kpi_fk'].values[0]
            except Exception as e:
                atomic_kpi_fk = None
                kpi_fk = None
            kpi_name = kpi_name.replace("'", "\\'")
            attributes = pd.DataFrame(
                [(kpi_name, self.session_uid, kpi_set_name, self.store_id,
                  self.visit_date.isoformat(),
                  datetime.datetime.utcnow().isoformat(), result, kpi_fk,
                  atomic_kpi_fk, threshold, score)],
                columns=[
                    'display_text', 'session_uid', 'kps_name', 'store_fk',
                    'visit_date', 'calculation_time', 'result', 'kpi_fk',
                    'atomic_kpi_fk', 'threshold', 'score'
                ])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        cur = self.rds_conn.db.cursor()
        delete_queries = KCUSFetcher.get_delete_session_results_query(
            self.session_uid)
        for query in delete_queries:
            cur.execute(query)
        self.rds_conn.db.commit()
        self.rds_conn.disconnect_rds()
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        cur = self.rds_conn.db.cursor()
        # for query in self.kpi_results_queries:
        #     try:
        #         cur.execute(query)
        #     except Exception as e:
        #         Log.info('Query {} failed due to {}'.format(query, e))
        #         continue
        queries = self.merge_insert_queries(self.kpi_results_queries)
        for query in queries:
            cur.execute(query)
        self.rds_conn.db.commit()

    def merge_insert_queries(self, insert_queries):
        # other_queries = []
        query_groups = {}
        for query in insert_queries:
            if 'update' in query:
                self.update_queries.append(query)
            else:
                static_data, inserted_data = query.split('VALUES ')
                if static_data not in query_groups:
                    query_groups[static_data] = []
                query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            for group_index in xrange(0, len(query_groups[group]), 10**4):
                merged_queries.append('{0} VALUES {1}'.format(
                    group, ',\n'.join(
                        query_groups[group][group_index:group_index + 10**4])))

        return merged_queries
コード例 #13
0
class SpecialProgramsToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.k_engine = BaseCalculationsScript(data_provider, output)
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.tools = SpecialProgramsGENERALToolBox(self.data_provider,
                                                   self.output,
                                                   rds_conn=self.rds_conn)
        self.kpi_static_data = self.get_kpi_static_data()
        self.kpi_results_queries = []
        self.pop_data = parse_template(TEMPLATE_PATH, 'POP')
        self.pathway_data = parse_template(TEMPLATE_PATH, 'Pathway')
        self.store_types = parse_template(TEMPLATE_PATH, 'store types')
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.store_type = self.store_info['store_type'].iloc[0]

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI Data and saves it into one global Data frame.
        The Data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = SpecialProgramsQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def main_calculation(self):
        """
        This function calculates the KPI results.
        """
        scenes = self.scif['scene_fk'].unique().tolist()
        if scenes:
            for scene in scenes:
                scene_data = self.scif.loc[self.scif['scene_fk'] == scene]
                pop_result = self.calculate_pop(scene_data)
                self.calculate_Pathway(pop_result, scene_data)
                del scene_data
        return

    def calculate_pop(self, scene_data):
        store_list = self.store_types['store types'].tolist()
        for store_type in store_list:
            if self.store_type in store_type:
                pop_new_data = self.pop_data.loc[self.pop_data['store type'] ==
                                                 store_type]
                for index, row in pop_new_data.iterrows():
                    template_group = [
                        str(g) for g in row['Template group'].split(',')
                    ]
                    if scene_data['template_group'].values[
                            0] in template_group or template_group == ['']:
                        brands_list = row['brand_name'].split(',')
                        filters = {
                            'brand_name': brands_list,
                            'scene_fk':
                            scene_data['scene_fk'].unique().tolist()
                        }
                        result = self.tools.calculate_availability(**filters)
                        if result > 0:
                            self.write_to_db_result(name='{} POP'.format(
                                scene_data['scene_fk'].values[0]),
                                                    result=row['result'],
                                                    score=1,
                                                    level=self.LEVEL3)
                            return row['result']
                del pop_new_data
                break
        self.write_to_db_result(name='{} POP'.format(
            scene_data['scene_fk'].values[0]),
                                result='No POP',
                                score=0,
                                level=self.LEVEL3)
        return

    def calculate_Pathway(self, pop_result, scene_data):
        result = 0
        store_list = self.store_types['store types'].tolist()
        for store_type in store_list:
            if self.store_type in store_type:
                try:
                    if pop_result:
                        pathways = self.pathway_data['result'].unique().tolist(
                        )
                        for pathway in pathways:
                            path_data = self.pathway_data.loc[
                                self.pathway_data['result'] == pathway]
                            if ',' in path_data['store type'].values[0]:
                                store_type_list = [
                                    str(g) for g in path_data['store type'].
                                    values[0].split(',')
                                ]
                            else:
                                store_type_list = [
                                    str(g)
                                    for g in path_data['store type'].values[0]
                                ]
                            if self.store_type in store_type_list:
                                if path_data['Template group'].values[0]:
                                    template_group = [
                                        str(g)
                                        for g in path_data['Template group'].
                                        values[0].split(',')
                                    ]
                                else:
                                    template_group = []
                                if template_group:
                                    if scene_data['template_group'].values[
                                            0] in template_group:
                                        result = self.check_path_way(
                                            path_data, scene_data)
                                        if result == 1:
                                            return
                                else:
                                    result = self.check_path_way(
                                        path_data, scene_data)
                                    if result == 1:
                                        return
                            del path_data
                        del pathways

                except Exception as e:
                    continue
        if not result:
            self.write_to_db_result(name='{} Pathway'.format(
                scene_data['scene_fk'].values[0]),
                                    result='No Pathway',
                                    score=0,
                                    level=self.LEVEL3)
        return False

    def check_path_way(self, path_data, scene_data):
        filters = {'scene_fk': scene_data['scene_fk'].values[0]}
        result = 0
        filters[path_data['param1'].values[0]] = [
            str(g) for g in path_data['value1'].values[0].split(",")
        ]
        if not path_data['param2'].empty:
            if path_data['param2'].values[0]:
                filters[path_data['param2'].values[0]] = [
                    str(g) for g in path_data['value2'].values[0].split(",")
                ]
        if path_data['Target'].values[0]:
            target = float(path_data['Target'].values[0])
        else:
            target = 1
        if path_data['calculation_type'].values[0] == 'availability':
            result = self.tools.calculate_availability(**filters)
            if result > 0:
                result = 1
            if result >= target:
                result = 1
                self.write_to_db_result(name='{} Pathway'.format(
                    scene_data['scene_fk'].values[0]),
                                        result=path_data['result'].values[0],
                                        score=1,
                                        level=self.LEVEL3)
                return result
        elif path_data['calculation_type'].values[
                0] == 'number of unique SKUs':
            result = self.tools.calculate_assortment(**filters)
            if result > 0:
                result = 1
            if result >= target:
                score = result
                self.write_to_db_result(name='{} Pathway'.format(
                    scene_data['scene_fk'].values[0]),
                                        result=path_data['result'].values[0],
                                        score=score,
                                        level=self.LEVEL3)
                return result

    def write_to_db_result(self, score, level, result=None, name=None):
        """
        This function creates the result Data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(score, level, result, name)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self, score, level, result=None, name=None):
        """
        This function creates a Data frame with all attributes needed for saving in KPI results tables.

        """
        if level == self.LEVEL1:
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['kpi_set_fk'] ==
                32]['kpi_set_name'].values[0]
            attributes = pd.DataFrame(
                [(kpi_set_name, self.session_uid, self.store_id,
                  self.visit_date.isoformat(), format(score, '.2f'), 32)],
                columns=[
                    'kps_name', 'session_uid', 'store_fk', 'visit_date',
                    'score_1', 'kpi_set_fk'
                ])
        elif level == self.LEVEL3:
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['kpi_set_fk'] ==
                32]['kpi_set_name'].values[0]
            attributes = pd.DataFrame(
                [(name, self.session_uid, kpi_set_name, self.store_id,
                  self.visit_date.isoformat(), datetime.utcnow().isoformat(),
                  score, 246, result)],
                columns=[
                    'display_text', 'session_uid', 'kps_name', 'store_fk',
                    'visit_date', 'calculation_time', 'score', 'kpi_fk',
                    'result'
                ])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self, kpi_set_fk=None):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn.disconnect_rds()
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        atomic_pks = tuple()
        if kpi_set_fk is not None:
            query = SpecialProgramsQueries.get_atomic_pk_to_delete(
                self.session_uid, kpi_set_fk)
            kpi_atomic_data = pd.read_sql_query(query, self.rds_conn.db)
            atomic_pks = tuple(kpi_atomic_data['pk'].tolist())
        cur = self.rds_conn.db.cursor()
        if atomic_pks:
            delete_queries = SpecialProgramsQueries.get_delete_session_results_query(
                self.session_uid, kpi_set_fk, atomic_pks)
            for query in delete_queries:
                cur.execute(query)
        for query in self.kpi_results_queries:
            cur.execute(query)
        self.rds_conn.db.commit()
コード例 #14
0
class MarsUsDogMainMealWet(object):
    def __init__(self, data_provider, output):
        self._data_provider = data_provider
        self.project_name = self._data_provider.project_name
        self.rds_conn = PSProjectConnector(self.project_name, DbUsers.ReadOnly)
        self._output = output
        self._tools = MarsUsGENERALToolBox(self._data_provider,
                                           self._output,
                                           ignore_stacking=True)
        self._template = ParseMarsUsTemplates()
        self._writer = self._get_writer()
        self.store_id = self._data_provider[Data.STORE_FK]
        self._data_provider.channel = self.get_store_att15(self.store_id)
        self._data_provider.retailer = self.get_store_retailer(self.store_id)
        self._data_provider.probe_groups = self.get_probe_group(
            self._data_provider.session_uid)
        self.store_type = data_provider.store_type
        self.rds_conn.disconnect_rds()
        self._data_provider.trace_container = pd.DataFrame(columns=[
            'kpi_display_text', 'scene_id', 'products&brands',
            'allowed_products', 'kpi_pass'
        ])

    def get_store_att15(self, store_fk):
        query = MarsUsQueries.get_store_attribute(15, store_fk)
        att15 = pd.read_sql_query(query, self.rds_conn.db)
        return att15.values[0][0]

    def get_store_att8(self, store_fk):
        query = MarsUsQueries.get_store_attribute(8, store_fk)
        att10 = pd.read_sql_query(query, self.rds_conn.db)
        return att10.values[0][0]

    def get_store_retailer(self, store_fk):
        query = MarsUsQueries.get_store_retailer(store_fk)
        att10 = pd.read_sql_query(query, self.rds_conn.db)
        return att10.values[0][0]

    def get_probe_group(self, session_uid):
        query = MarsUsQueries.get_probe_group(session_uid)
        probe_group = pd.read_sql_query(query, self.rds_conn.db)
        return probe_group

    @property
    def _get_store_channel(self):
        return self._data_provider.channel

    @property
    def _get_store_type(self):
        return self._data_provider.store_type

    @property
    def _get_retailer_name(self):
        return self._data_provider.retailer

    def _get_writer(self):
        return KpiResultsWriter(
            session_uid=self._data_provider.session_uid,
            project_name=self._data_provider.project_name,
            store_id=self._data_provider[Data.STORE_FK],
            visit_date=self._data_provider[Data.VISIT_DATE])

    def calculate_scores(self):
        """
        This function calculates the KPI results.
        """
        if not self.is_relevant_retailer_channel():
            Log.warning(
                'retailer: {} and channel: {} are are not relevant'.format(
                    self._get_retailer_name, self._get_store_channel))
            self._writer.commit_results_data()
            return

        if self._is_pet_food_category_excluded():
            Log.warning(
                'pet food category does not exists or it was excluded by decision unit'
            )
            self._writer.commit_results_data()
            return

        # template SPT
        if self.is_relevant_retailer_channel_spt():
            set_names = self._get_set_spt_names()
            for set_name in set_names:
                if self._is_session_irrelevant_for_set(set_name):
                    Log.info('Skipping set: {}'.format(set_name))
                    continue
                Log.info('Starting set: {}'.format(set_name))
                if not self.retailer_channel_has_sales_data(set_name):
                    Log.warning('no sales data for retailer: {},'
                                ' channel: {}, set: {}'.format(
                                    self._get_retailer_name,
                                    self._get_store_channel, set_name))

                template_data = self._template.parse_template(set_name, 0)
                hierarchy = Definition(
                    template_data, self._get_store_channel,
                    self._get_retailer_name,
                    self._get_store_type).get_atomic_hierarchy_and_filters(
                        set_name)
                preferred_range = template_data[
                    KPIConsts.PREFERRED_RANGE_SHEET]
                Results(
                    self._tools, self._data_provider, self._writer,
                    preferred_range[preferred_range['Set name'] ==
                                    set_name]).calculate(hierarchy)

        # template BDB
        if self.is_relevant_retailer_channel_bdb():
            set_names = self._get_set_names()
            for set_name in set_names:
                if self._is_session_irrelevant_for_set(set_name):
                    Log.info('Skipping set: {}'.format(set_name))
                    continue
                Log.info('Starting set: {}'.format(set_name))
                if not self.retailer_channel_has_sales_data(set_name):
                    Log.warning('no sales data for retailer: {},'
                                ' channel: {}, set: {}'.format(
                                    self._get_retailer_name,
                                    self._get_store_channel, set_name))

                template_data = self._template.parse_template(set_name, 1)
                hierarchy = Definition(
                    template_data, self._get_store_channel,
                    self._get_retailer_name,
                    self._get_store_type).get_atomic_hierarchy_and_filters(
                        set_name)
                preferred_range = template_data[
                    KPIConsts.PREFERRED_RANGE_SHEET]
                Results(
                    self._tools, self._data_provider, self._writer,
                    preferred_range[preferred_range['Set name'] ==
                                    set_name]).calculate(hierarchy)

        # self._data_provider.trace_container.to_csv('/home/Israel/Desktop/trace_block.csv')
        self._writer.commit_results_data()

    @staticmethod
    def _get_set_names():
        return [
            DOG_MAIN_MEAL_DRY_2018, DOG_MAIN_MEAL_WET_2018, CAT_TREATS_2018,
            CAT_MAIN_MEAL_DRY_2018, CAT_MAIN_MEAL_WET_2018, DOG_TREATS_2018
        ]

    @staticmethod
    def _get_set_spt_names():
        return [
            SPT_DOG_TREATS_Q1_2018, SPT_CAT_TREATS_Q1_2018,
            SPT_CAT_MAIN_MEAL_Q1_2018, SPT_DOG_MAIN_MEAL_Q1_2018
        ]

    def _is_pet_food_category_excluded(self):
        category_fk = 13
        decision_unit = self._data_provider._decision_unit_data_provider
        session_category = decision_unit._session_categories_status
        if session_category[session_category['category_fk'] ==
                            category_fk]['exclude_status_fk'].empty:
            return True
        category_excluded = \
            session_category[session_category['category_fk'] == category_fk]['exclude_status_fk'].fillna(1).iloc[0] != 1
        session_excluded = decision_unit._session_exclude_status[
            'exclude_status_fk'].fillna(1).iloc[0] != 1
        return category_excluded | session_excluded

    def _is_session_irrelevant_for_set(self, set_name):
        matches = self._tools.match_product_in_scene.copy()
        relevant_filters = SET_PRE_CALC_CHECKS[set_name]
        filtered_matches = matches[self._tools.get_filter_condition(
            matches, **relevant_filters)]
        if filtered_matches.empty:
            return True
        return False

    @staticmethod
    def _get_set_category_fk(set_name):
        return SET_CATEGORIES[set_name]

    def check_template(self):
        set_names = self._get_set_names()
        suspicious = {
            'kpis with empty filters': [],
            'filters with no data': {},
            'kpi with no filters': []
        }
        for set_name in set_names:
            template_data = self._template.parse_template(set_name)
            hierarchy = Definition(
                template_data,
                self._get_store_channel).get_atomic_hierarchy_and_filters(
                    set_name)
            product = self._data_provider.all_products
            for kpi in hierarchy:
                if kpi.has_key('filters'):
                    fil = kpi['filters']
                    if len(fil) == 0:
                        suspicious['kpis with empty filters'].append(
                            'set {} atomic {} filters are empty'.format(
                                set_name, kpi['atomic']))
                    for key, value in fil.iteritems():
                        if key == 'order by':
                            continue
                        key = self.split_filter(key)
                        key = self.rename_filter(key)
                        if isinstance(value, tuple):
                            value = value[0]
                        Log.info('set {} atomic {}, checking {} in {}'.format(
                            set_name, kpi['atomic'], value, key))
                        pp = product[product[key].isin(value)]
                        if len(pp) == 0:
                            suspicious['filters with no data'].setdefault(
                                key, set()).update(value)
                else:
                    suspicious['kpi with no filters'].append(
                        'set {} kpi {} has no filters'.format(
                            set_name, kpi['atomic']))
        return suspicious

    @staticmethod
    def split_filter(key):
        key_list = key.split(';')
        return key_list[0]

    @staticmethod
    def rename_filter(filter_):
        if FILTER_NAMING_DICT.has_key(filter_):
            return FILTER_NAMING_DICT[filter_]
        else:
            return filter_

    def retailer_channel_has_sales_data(self, set_name):
        sales = ParseMarsUsTemplates().get_mars_sales_data()
        sales = sales[(sales['retailer'] == self._data_provider.retailer)
                      & (sales['channel'] == self._data_provider.channel) &
                      (sales['set'] == set_name.upper())]
        if sales.empty:
            return False
        else:
            return True

    def retailer_channel_has_spt_sales_data(self, set_name):
        sales = ParseMarsUsTemplates().get_mars_spt_sales_data()
        sales = sales[(sales['retailer'] == self._data_provider.retailer)
                      & (sales['channel'] == self._data_provider.channel) &
                      (sales['set'] == set_name.upper())]
        if sales.empty:
            return False
        else:
            return True

    def is_relevant_retailer_channel(self):
        is_relevant_retailer = self._get_retailer_name in BDB_RETAILERS + SPT_RETAILERS
        is_relevant_channel = self._get_store_channel in BDB_CHANNELS + SPT_CHANNELS
        return is_relevant_retailer and is_relevant_channel

    def is_relevant_retailer_channel_bdb(self):
        is_relevant_retailer = self._get_retailer_name in BDB_RETAILERS
        is_relevant_channel = self._get_store_channel in BDB_CHANNELS
        return is_relevant_retailer and is_relevant_channel

    def is_relevant_retailer_channel_spt(self):
        is_relevant_retailer = self._get_retailer_name in SPT_RETAILERS
        is_relevant_channel = self._get_store_channel in SPT_CHANNELS
        return is_relevant_retailer and is_relevant_channel
コード例 #15
0
class MONSTERCCUS_SANDToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.k_engine = BaseCalculationsScript(data_provider, output)
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        self.tools = MONSTERGENERALCCUS_SANDToolBox(self.data_provider, self.output, rds_conn=self.rds_conn)
        self.kpi_static_data = self.get_kpi_static_data()
        self.kpi_results_queries = []
        self.ssd_score = 0   # Save the current score for Kpi1, fail Kp12 in case Kpi1 succeed.

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI Data and saves it into one global Data frame.
        The Data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = CCUS_SANDMONSTERQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def main_calculation(self):
        """
        This function calculates the KPI results.
        """
        score = 0
        kpi1 = self.calculate_kpi1()
        kpi2 = self.calculate_kpi2()
        if kpi1 and kpi2:
            score = 1
            self.write_to_db_result(result=score,
                                    score=score, level=self.LEVEL1)
            return True
        else:
            self.write_to_db_result(result=score,
                                    score=score, level=self.LEVEL1)
            return False

    def calculate_availability(self, filters1, filters2, check_sos=False, type=None):
        score = 0
        scif_scenes = self.scif.loc[self.scif['template_fk'].isin(TEMPLATE_COLD)]
        scenes = scif_scenes['scene_id'].unique().tolist()
        bays = self.match_product_in_scene['bay_number'].unique().tolist()
        if scenes:
            for scene in scenes:
                for bay in bays:
                    filters1['scene_id'] = scene
                    filters1['bay_number'] = bay
                    filters2['scene_id'] = scene
                    filters2['bay_number'] = bay
                    if check_sos:
                        numerator_result = self.tools.calculate_availability(front_facing='Y', **filters2)
                        denominator_filters = {'scene_id': scene, 'bay_number': bay}
                        denominator_result = self.tools.calculate_availability(front_facing='Y', **denominator_filters)
                        result = 0 if denominator_result == 0 else (numerator_result / float(denominator_result))
                        if type == "calc1":
                            target = SOS_TARGET_KPI1
                        elif type == "calc2":
                            target = SOS_TARGET_KPI2
                        if result < target:
                            continue
                    result1 = self.tools.calculate_availability(**filters1)
                    result2 = self.tools.calculate_availability(**filters2)
                    score = 1 if (result1 and result2) >= 1 else 0
                    if score > 0:
                        return score
        return score

    def calculate_kpi1(self):
        filters1 = {'product_ean_code': ['355', '356', '7084702650', '7084702652']}
        filters2 = {'att2': ATT2, 'att4': SSD, 'manufacturer_fk': 1}
        score = self.calculate_availability(filters1, filters2, check_sos=True,type="calc1")
        if score > 0:
            self.write_to_db_result(name='Is Mutant in the same door as Coke SSD', result=score,
                                    score=1, level=self.LEVEL3)
            self.ssd_score = 1
            return True
        else:
            self.write_to_db_result(name='Is Mutant in the same door as Coke SSD?', result=score,
                                    score=0, level=self.LEVEL3)
            self.ssd_score = 0
            return False

    def calculate_kpi2(self):
        result = 0
        filters1 = {'product_ean_code': ['355', '356', '7084702650', '7084702652']}
        filters2 = {'product_ean_code': MOUNTAIN_DEW}
        score = self.calculate_availability(filters1, filters2, check_sos=True,type="calc2")
        if score > 0 and self.ssd_score == 0:
            self.write_to_db_result(name='Is Mutant in the same door as Mountain Dew?', result=score,
                                    score=1, level=self.LEVEL3)
            return True
        else:
            self.write_to_db_result(name='Is Mutant in the same door as Mountain Dew?', result=result,
                                    score=0, level=self.LEVEL3)
            return False

    def write_to_db_result(self, score, level, result=None, name=None):
        """
        This function creates the result Data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(score, level, result, name)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self, score, level, result=None, name=None):
        """
        This function creates a Data frame with all attributes needed for saving in KPI results tables.

        """
        if level == self.LEVEL1:
            kpi_set_name = self.kpi_static_data[self.kpi_static_data['kpi_set_fk'] == 27]['kpi_set_name'].values[0]
            attributes = pd.DataFrame([(kpi_set_name, self.session_uid, self.store_id, self.visit_date.isoformat(),
                                        format(score, '.2f'), 27)],
                                      columns=['kps_name', 'session_uid', 'store_fk', 'visit_date', 'score_1',
                                               'kpi_set_fk'])
        elif level == self.LEVEL3:
            kpi_set_name = self.kpi_static_data[self.kpi_static_data['kpi_set_fk'] == 27]['kpi_set_name'].values[0]
            attributes = pd.DataFrame([(name, self.session_uid, kpi_set_name, self.store_id,
                                        self.visit_date.isoformat(), datetime.utcnow().isoformat(),
                                        score, 241, result)],
                                      columns=['display_text', 'session_uid', 'kps_name', 'store_fk', 'visit_date',
                                               'calculation_time', 'score', 'kpi_fk', 'result'])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self, kpi_set_fk=None):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn.disconnect_rds()
        self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        atomic_pks = tuple()
        if kpi_set_fk is not None:
            query = CCUS_SANDMONSTERQueries.get_atomic_pk_to_delete(self.session_uid, kpi_set_fk)
            kpi_atomic_data = pd.read_sql_query(query, self.rds_conn.db)
            atomic_pks = tuple(kpi_atomic_data['pk'].tolist())
        cur = self.rds_conn.db.cursor()
        if atomic_pks:
            delete_queries = CCUS_SANDMONSTERQueries.get_delete_session_results_query(self.session_uid, kpi_set_fk, atomic_pks)
            for query in delete_queries:
                cur.execute(query)
        for query in self.kpi_results_queries:
            cur.execute(query)
        self.rds_conn.db.commit()
コード例 #16
0
class CCRUProjectCalculations:
    def __init__(self, data_provider, output):
        self.data_provider = data_provider
        self.output = output
        self.project_name = self.data_provider.project_name
        self.rds_conn = self.rds_connection()

        self.tool_box = CCRUKPIToolBox(self.data_provider,
                                       self.output,
                                       update_kpi_set=True)
        self.session_uid = self.tool_box.session_uid
        self.visit_date = self.tool_box.visit_date
        self.store_id = self.tool_box.store_id
        self.test_store = self.tool_box.test_store
        self.pos_kpi_set_name = self.tool_box.pos_kpi_set_name

        self.session_info = SessionInfo(data_provider)
        self.json = CCRUJsonGenerator()

        self.results = {}
        self.kpi_source_json = None

    def main_function(self):

        if str(self.visit_date) < self.tool_box.MIN_CALC_DATE:
            Log.warning('Warning. Session cannot be calculated. '
                        'Visit date is less than {2} - {0}. '
                        'Store ID {1}.'.format(self.visit_date, self.store_id,
                                               self.tool_box.MIN_CALC_DATE))
            return

        elif self.tool_box.visit_type in [self.tool_box.SEGMENTATION_VISIT]:
            Log.warning(
                'Warning. Session with Segmentation visit type has no KPI calculations.'
            )
            return

        elif self.tool_box.visit_type in [self.tool_box.PROMO_VISIT]:
            self.calculate_promo_compliance()
            return

        else:
            if self.pos_kpi_set_name not in self.tool_box.ALLOWED_POS_SETS:
                Log.warning(
                    'Warning. Session cannot be calculated. '
                    'POS KPI Set name in store attribute is invalid - {0}. '
                    'Store ID {1}.'.format(self.pos_kpi_set_name,
                                           self.store_id))
            else:
                self.calculate_red_score()

        if self.tool_box.visit_type in [self.tool_box.STANDARD_VISIT]:
            if not self.tool_box.cooler_assortment.empty:
                self.tool_box.set_kpi_set(CCRUConsts.COOLER_AUDIT_SCORE,
                                          CCRUConsts.COOLER_AUDIT_SCORE)
                self.calculate_cooler_audit()

        Log.debug('KPI calculation stage: {}'.format('Committing results old'))
        self.tool_box.commit_results_data_old()

        Log.debug('KPI calculation stage: {}'.format('Committing results new'))
        self.tool_box.commit_results_data_new()

        Log.debug('KPI calculation is completed')

    def calculate_red_score(self):
        kpi_source_json = self.json.create_kpi_source('KPI_Source.xlsx',
                                                      self.pos_kpi_set_name)
        kpi_source = {}
        for row in kpi_source_json:
            # Log.info('SOURCE: {}'.format(row.get(SOURCE)))
            kpi_source[row.pop(SOURCE)] = row
        if kpi_source:
            pass

        # elif self.test_store == "Y":
        #     Log.warning('Warning. Session cannot be calculated: '
        #                 'Store is a test store. '
        #                 'Store ID {1}.'
        #                 .format(self.pos_kpi_set_name, self.store_id))
        #     return

        else:
            Log.warning(
                'Warning. Session cannot be calculated. '
                'POS KPI Set name in store attribute is invalid - {0}. '
                'Store ID {1}.'.format(self.pos_kpi_set_name, self.store_id))
            return

        mr_targets = {}
        for kpi_set, params in kpi_source.items():
            if params.get(MR_TARGET) is not None:
                mr_targets.update({kpi_set: params[MR_TARGET]})
                mr_targets.update({params[SET]: params[MR_TARGET]})
        self.tool_box.mr_targets = mr_targets

        kpi_sets_types_to_calculate = [POS]  # SPIRITS are excluded
        for kpi_set_type in kpi_sets_types_to_calculate:
            if kpi_source[kpi_set_type][SET]:
                Log.debug('KPI calculation stage: {}'.format(
                    kpi_source[kpi_set_type][SET]))
                self.tool_box.set_kpi_set(kpi_source[kpi_set_type][SET],
                                          kpi_set_type)
                self.json.project_kpi_dict['kpi_data'] = []
                self.json.create_kpi_data_json(
                    'kpi_data',
                    kpi_source[kpi_set_type][FILE],
                    sheet_name=kpi_source[kpi_set_type][SHEET],
                    pos_kpi_set_name=self.pos_kpi_set_name)
                self.calculate_red_score_kpi_set(
                    self.json.project_kpi_dict.get('kpi_data')[0],
                    kpi_source[kpi_set_type][SET],
                    mr_targets.get(kpi_set_type))

        if kpi_source[GAPS][SET]:
            Log.debug('KPI calculation stage: {}'.format(
                kpi_source[GAPS][SET]))
            self.tool_box.set_kpi_set(kpi_source[GAPS][SET], GAPS)
            self.json.create_kpi_data_json('gaps',
                                           kpi_source[GAPS][FILE],
                                           sheet_name=kpi_source[GAPS][SHEET])
            self.tool_box.calculate_gaps_old(
                self.json.project_kpi_dict.get('gaps'))
            self.tool_box.calculate_gaps_new(
                self.json.project_kpi_dict.get('gaps'), kpi_source[GAPS][SET])

        if kpi_source[BENCHMARK][SET]:
            Log.debug('KPI calculation stage: {}'.format(
                kpi_source[BENCHMARK][SET]))
            self.tool_box.set_kpi_set(kpi_source[BENCHMARK][SET], BENCHMARK)
            self.json.create_kpi_data_json(
                'benchmark',
                kpi_source[BENCHMARK][FILE],
                sheet_name=kpi_source[BENCHMARK][SHEET])
            self.tool_box.calculate_benchmark(
                self.json.project_kpi_dict.get('benchmark'),
                kpi_source[BENCHMARK][SET])

        if kpi_source[CONTRACT][FILE]:
            Log.debug('Importing Contract Execution template')
            self.json.create_kpi_data_json(
                'contract',
                kpi_source[CONTRACT][FILE],
                sheet_name=kpi_source[CONTRACT][SHEET])

        if kpi_source[TOPSKU][SET]:
            Log.debug('KPI calculation stage: {}'.format(
                kpi_source[TOPSKU][SET]))
            include_to_contract = True if self.json.project_kpi_dict.get(
                'contract') else False
            self.tool_box.set_kpi_set(kpi_source[TOPSKU][SET], TOPSKU)
            self.tool_box.calculate_top_sku(include_to_contract,
                                            kpi_source[TOPSKU][SET])

        if self.json.project_kpi_dict.get('contract'):
            if kpi_source[EQUIPMENT][SET]:
                equipment_target_data = self.tool_box.get_equipment_target_data(
                )
                if equipment_target_data:
                    if kpi_source[TARGET][SET]:
                        Log.debug('KPI calculation stage: {}'.format(
                            kpi_source[TARGET][SET]))
                        self.tool_box.set_kpi_set(kpi_source[TARGET][SET],
                                                  TARGET)
                        self.json.project_kpi_dict['kpi_data'] = []
                        self.json.create_kpi_data_json(
                            'kpi_data',
                            kpi_source[TARGET][FILE],
                            sheet_name=kpi_source[TARGET][SHEET])
                        self.calculate_red_score_kpi_set(
                            self.json.project_kpi_dict.get('kpi_data')[0],
                            kpi_source[TARGET][SET])

                        Log.debug('KPI calculation stage: {}'.format(
                            kpi_source[EQUIPMENT][SET]))
                        self.tool_box.set_kpi_set(kpi_source[EQUIPMENT][SET],
                                                  EQUIPMENT)
                        self.tool_box.calculate_equipment_execution(
                            self.json.project_kpi_dict.get('contract'),
                            kpi_source[EQUIPMENT][SET],
                            kpi_source[KPI_CONVERSION][FILE],
                            equipment_target_data)

            if kpi_source[CONTRACT][SET]:
                Log.debug('KPI calculation stage: {}'.format(
                    kpi_source[CONTRACT][SET]))
                self.tool_box.set_kpi_set(kpi_source[CONTRACT][SET], CONTRACT)
                self.tool_box.calculate_contract_execution(
                    self.json.project_kpi_dict.get('contract'),
                    kpi_source[CONTRACT][SET])

        # Log.debug('KPI calculation stage: {}'.format('Committing results old'))
        # self.tool_box.commit_results_data_old()
        #
        # Log.debug('KPI calculation stage: {}'.format('Committing results new'))
        # self.tool_box.commit_results_data_new()

    def calculate_red_score_kpi_set(self,
                                    kpi_data,
                                    kpi_set_name,
                                    set_target=None):

        score = 0
        score += self.tool_box.check_availability(kpi_data)
        score += self.tool_box.check_facings_sos(kpi_data)
        score += self.tool_box.check_share_of_cch(kpi_data)
        score += self.tool_box.check_number_of_skus_per_door_range(kpi_data)
        score += self.tool_box.check_number_of_doors(kpi_data)
        score += self.tool_box.check_number_of_scenes(kpi_data)
        score += self.tool_box.check_number_of_scenes_no_tagging(kpi_data)
        score += self.tool_box.check_customer_cooler_doors(kpi_data)
        score += self.tool_box.check_atomic_passed(kpi_data)
        score += self.tool_box.check_atomic_passed_on_the_same_scene(kpi_data)
        score += self.tool_box.check_sum_atomics(kpi_data)
        score += self.tool_box.check_dummies(kpi_data)
        score += self.tool_box.check_weighted_average(kpi_data)
        score += self.tool_box.check_kpi_scores(kpi_data)

        score = round(float(score), 3)

        self.tool_box.create_kpi_groups(kpi_data.values()[0])

        self.tool_box.write_to_kpi_results_old(
            pd.DataFrame(
                [(kpi_set_name, self.session_uid, self.store_id,
                  self.visit_date.isoformat(), format(score, '.3f'), None)],
                columns=[
                    'kps_name', 'session_uid', 'store_fk', 'visit_date',
                    'score_1', 'kpi_set_fk'
                ]), 'level1')

        set_target = set_target if set_target is not None else 100

        self.tool_box.update_kpi_scores_and_results(
            {
                'KPI ID': 0,
                'KPI name Eng': kpi_set_name,
                'KPI name Rus': kpi_set_name,
                'Parent': 'root'
            }, {
                'target': set_target,
                'weight': None,
                'result': score,
                'score': score,
                'weighted_score': score,
                'level': 0
            })

        # INTEGRATION is excluded from calculation starting 2020
        #
        # if kpi_set_type == POS:
        #     Log.debug('KPI calculation stage: {}'.format(kpi_source[INTEGRATION][SET]))
        #     self.tool_box.prepare_hidden_set(kpi_data, kpi_source[INTEGRATION][SET])

    def calculate_promo_compliance(self):
        self.json.create_kpi_data_json('promo',
                                       'KPI_Promo_Tracking.xlsx',
                                       sheet_name='2019')
        kpi_data = self.json.project_kpi_dict.get('promo')

        Log.debug('KPI calculation stage: {}'.format('Promo Compliance'))
        self.tool_box.calculate_promo_compliance_store(kpi_data)

        Log.debug('KPI calculation stage: {}'.format('Committing results new'))
        self.tool_box.common.commit_results_data()

    def calculate_cooler_audit(self):
        self.json.create_kpi_data_json('cooler_audit',
                                       'Cooler_Quality.xlsx',
                                       sheet_name='ALL')
        kpi_data = self.json.project_kpi_dict.get('cooler_audit')[0]
        group_model_map = pd.read_excel(os.path.join(self.json.base_path,
                                                     'Cooler_Quality.xlsx'),
                                        sheet_name=GROUP_MODEL_MAP)
        self.tool_box.calculate_cooler_kpis(kpi_data, group_model_map)

    def rds_connection(self):
        if not hasattr(self, '_rds_conn'):
            self._rds_conn = PSProjectConnector(self.project_name,
                                                DbUsers.CalculationEng)
        try:
            pd.read_sql_query('select pk from probedata.session limit 1',
                              self._rds_conn.db)
        except:
            self._rds_conn.disconnect_rds()
            self._rds_conn = PSProjectConnector(self.project_name,
                                                DbUsers.CalculationEng)
        return self._rds_conn
コード例 #17
0
class PNGAUToolBox(PNGAUConsts):
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.k_engine = BaseCalculationsScript(data_provider, output)
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.all_templates = self.data_provider[Data.ALL_TEMPLATES]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.store_type = self.data_provider[
            Data.STORE_INFO]['store_type'].values[0]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.brand_to_manufacturer = {
            x['brand_name']: x['manufacturer_name']
            for i, x in self.scif[['brand_name', 'manufacturer_name'
                                   ]].drop_duplicates().iterrows()
        }
        self.match_display_in_scene = self.get_match_display()
        self.store_retailer = self.get_retailer()
        self.tools = PNGAUGENERALToolBox(self.data_provider,
                                         self.output,
                                         rds_conn=self.rds_conn)
        if self.visit_date <= datetime(2017, 12, 31).date():
            self.TEMPLATE_PATH = os.path.join(
                os.path.dirname(os.path.realpath(__file__)), '..', 'Data',
                'Template Dec.xlsx')
        else:
            self.TEMPLATE_PATH = os.path.join(
                os.path.dirname(os.path.realpath(__file__)), '..', 'Data',
                'Template.xlsx')
        self.template_data = parse_template(self.TEMPLATE_PATH, 'KPIs')
        self.scoring_data = parse_template(self.TEMPLATE_PATH, 'Scores')
        self.category_scene_types, self.template_groups = self.get_category_scene_types(
        )
        self._custom_templates = {}
        self.scenes_types_for_categories = {}
        self.kpi_static_data = self.get_kpi_static_data()
        self.kpi_results_queries = []
        self.kpi_results = {}
        self.atomic_results = {}
        self.categories = self.all_products['category_fk'].unique().tolist()

    @property
    def rds_conn(self):
        if not hasattr(self, '_rds_conn'):
            self._rds_conn = PSProjectConnector(self.project_name,
                                                DbUsers.CalculationEng)
        try:
            pd.read_sql_query('select pk from probedata.session limit 1',
                              self._rds_conn.db)
        except:
            Log.info('Disconnecting and connecting to DB')
            self._rds_conn.disconnect_rds()
            self._rds_conn = PSProjectConnector(self.project_name,
                                                DbUsers.CalculationEng)
        return self._rds_conn

    @property
    def empty_spaces(self):
        if not hasattr(self, '_empty_spaces'):
            matches = self.match_product_in_scene.merge(self.products,
                                                        on='product_fk',
                                                        how='left',
                                                        suffixes=['', '_1'])
            self._empty_spaces = PNGAUEmptySpaces(matches)
            self._empty_spaces.calculate_empty_spaces()
        return self._empty_spaces.empty_spaces

    def get_template(self, name):
        """
        This function extracts the template's data, given a sheet name.
        """
        if name not in self._custom_templates.keys():
            self._custom_templates[name] = parse_template(
                self.TEMPLATE_PATH, name)
        return self._custom_templates[name]

    def get_category_scene_types(self):
        """
        This function converts the category-scene template into a dictionary.
        """
        scene_types = parse_template(self.TEMPLATE_PATH, 'Category-Scene_Type')
        category_scene_types = {self.PRIMARY_SHELF: []}
        for category in scene_types[self.CATEGORY].unique():
            data = scene_types[scene_types[self.CATEGORY] == category]
            types = data[self.SCENE_TYPES].unique().tolist()
            category_scene_types[category] = types
            if category != self.DISPLAY:
                category_scene_types[self.PRIMARY_SHELF].extend(types)
        template_groups = {}
        for template_group in scene_types[self.TEMPLATE_GROUP].unique():
            if template_group:
                template_groups[template_group] = scene_types[scene_types[
                    self.TEMPLATE_GROUP] == template_group][
                        self.SCENE_TYPES].unique().tolist()
        return category_scene_types, template_groups

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = PNGAUQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def get_match_display(self):
        """
        This function extracts the display matches data and saves it into one global data frame.
        The data is taken from probedata.match_display_in_scene.
        """
        query = PNGAUQueries.get_match_display(self.session_uid)
        match_display = pd.read_sql_query(query, self.rds_conn.db)
        match_display = match_display.merge(
            self.scene_info[['scene_fk', 'template_fk']],
            on='scene_fk',
            how='left')
        match_display = match_display.merge(self.all_templates,
                                            on='template_fk',
                                            how='left',
                                            suffixes=['', '_y'])
        return match_display

    def get_retailer(self):
        query = PNGAUQueries.get_retailer(self.store_id)
        data = pd.read_sql_query(query, self.rds_conn.db)
        retailer = '' if data.empty else data['retailer'].values[0]
        if 'woolworth' in retailer.lower():
            retailer = 'Woolworths'
        return retailer

    @log_runtime('Main Calculation')
    def main_calculation(self):
        """
        This function calculates the KPI results.
        """
        self.calculation_per_product()
        self.calculation_per_brand()
        self.calculation_per_manufacturer()
        self.calculation_per_category()

        set_names = self.template_data[self.SET_NAME].tolist()
        scoring_sets = self.scoring_data[self.SET_NAME].tolist()
        for set_name in set(set_names):
            self.write_to_db_result(level=self.LEVEL1, kpi_set_name=set_name)
        for set_name in set(scoring_sets):
            set_weight = float(self.scoring_data[self.scoring_data[
                self.SET_NAME] == set_name][self.WEIGHT].values[0])
            set_weight = int(set_weight * 100)
            self.write_to_db_result(score=set_weight,
                                    level=self.LEVEL1,
                                    kpi_set_name=set_name)

    def get_target(self, kpi_name, category):
        """
        This function extracts a given KPI's target from the 'Targets' template.
        """
        target = None
        targets_data = self.get_template('Targets')
        targets_data = targets_data[(targets_data[self.KPI_NAME] == kpi_name) &
                                    (targets_data[self.CATEGORY] == category)]
        if not targets_data.empty:
            targets_data = targets_data.iloc[0]
            if self.store_retailer in targets_data.keys():
                target = targets_data[self.store_retailer]
                if target:
                    try:
                        if isinstance(target,
                                      (str, unicode)) and '%' in target:
                            target = float(target.replace('%', '')) / 100
                        target = int(float(target) * 10000)
                    except ValueError:
                        pass
        return target

    def calculate_distribution(self, category, params):
        """
        This function calculates 'Distribution' typed KPIs (and its sub-KPIs - the KPIs which inherit its results).
        """
        set_name = params[self.SET_NAME]
        scenes_filters = self.get_scenes_filters(params, category)
        if params[self.CUSTOM_SHEET]:
            custom_template = self.get_template(params[self.CUSTOM_SHEET])
            if self.store_type not in custom_template.columns:
                return
            distribution_products = custom_template[
                (custom_template[self.CATEGORY] == category)
                & (custom_template[self.store_type].apply(bool))]
            distribution_products = distribution_products[
                self.PRODUCT_EAN].unique().tolist()
        else:
            distribution_products = self.scif[
                (self.scif['category'] == category)
                & (self.scif['in_assort_sc'] == 1)]['product_ean_code'].unique(
                ).tolist()
        number_of_distributed_products = 0
        for product_ean_code in distribution_products:
            product_brand = self.all_products[
                self.all_products['product_ean_code'] == product_ean_code]
            if product_brand.empty:
                # Log.warning('Product EAN {} does not match any active product'.format(product_ean_code))
                continue
            product_brand = product_brand['brand_name'].values[0]
            kpi_name = params[self.KPI_NAME].format(category=category,
                                                    brand=product_brand,
                                                    ean=product_ean_code)
            result = int(
                self.tools.calculate_assortment(
                    product_ean_code=product_ean_code, **scenes_filters))
            self.save_result(set_name, kpi_name, result=result)
            number_of_distributed_products += result
        self.calculation_by_reference(category, set_name,
                                      number_of_distributed_products,
                                      len(distribution_products))

    def calculation_by_reference(self, category, reference_name, numerator,
                                 denominator):
        """
        Given a parent-KPI's results, this function calculates a reference KPI.
        """
        referenced_kpis = self.template_data[self.template_data[
            self.REFERENCE_KPI] == reference_name]
        for x, params in referenced_kpis.iterrows():
            score = 0
            aggregation_type = params[self.AGGREGATED_SCORE]
            if aggregation_type.startswith('Percentage'):
                if aggregation_type.endswith('Passed'):
                    numerator = numerator
                else:
                    numerator = denominator - numerator
                result = 0 if not denominator else round(
                    (numerator / float(denominator)) * 100, 2)
            elif aggregation_type.startswith('Count'):
                if aggregation_type.endswith('All'):
                    result = denominator
                elif aggregation_type.endswith('Passed'):
                    result = numerator
                else:
                    result = denominator - numerator
                score = 0 if not denominator else (result /
                                                   float(denominator)) * 10000
            else:
                continue
            kpi_name = params[self.KPI_NAME].format(category=category)
            self.save_result(params[self.SET_NAME],
                             kpi_name,
                             score=score,
                             result=result,
                             threshold=denominator)

    def get_scenes_filters(self, params, category):
        """
        This function extracts the scene-type data (==filter) of a given KPI.
        """
        filters = {}
        if params[self.SCENE_TYPES] and params[self.SCENE_TYPES] != 'ALL':
            scene_types = params[self.SCENE_TYPES].split(self.SEPARATOR)
            template_names = []
            for scene_type in scene_types:
                if scene_type == self.CATEGORY_PRIMARY_SHELF:
                    # if category in self.category_scene_types.keys():
                    #     template_names.extend(self.category_scene_types[category])
                    template_names.extend(
                        self.category_scene_types[self.PRIMARY_SHELF])
                elif scene_type == self.DISPLAY:
                    template_names.extend(
                        self.category_scene_types[self.DISPLAY])
                else:
                    template_names.append(scene_type)
            if template_names:
                filters['template_name'] = template_names
        return filters

    def calculation_per_product(self):
        """
        This function calculates all KPIs which are saved per product_ean_code.
        """
        kpis = self.template_data[self.template_data[self.SUB_CALCULATION] ==
                                  'Product']
        for x, entity in self.scif.drop_duplicates(
                subset=['product_ean_code']).iterrows():
            for y, params in kpis.iterrows():
                score = result = threshold = None
                set_name = params[self.SET_NAME]
                kpi_name = params[self.KPI_NAME].format(
                    category=entity['category'],
                    brand=entity['brand_name'],
                    ean=entity['product_ean_code'])
                kpi_type = params[self.KPI_TYPE]
                scene_filters = self.get_scenes_filters(
                    params, entity['category'])
                if kpi_type == self.FACING_COUNT:
                    result = int(
                        self.tools.calculate_availability(
                            product_ean_code=entity['product_ean_code'],
                            **scene_filters))
                else:
                    continue
                self.save_result(set_name,
                                 kpi_name,
                                 score=score,
                                 result=result,
                                 threshold=threshold)

    def calculation_per_brand(self):
        """
        This function calculates all KPIs which are saved per brand_name.
        """
        kpis = self.template_data[self.template_data[self.SUB_CALCULATION] ==
                                  'Brand']
        for x, entity in self.scif.drop_duplicates(
                subset=['brand_fk']).iterrows():
            for y, params in kpis.iterrows():
                score = result = threshold = None
                set_name = params[self.SET_NAME]
                kpi_name = None
                kpi_type = params[self.KPI_TYPE]
                scene_filters = self.get_scenes_filters(
                    params, entity['category'])
                if kpi_type == self.FACING_COUNT:
                    result = int(
                        self.tools.calculate_availability(
                            brand_fk=entity['brand_fk'], **scene_filters))
                    threshold = int(
                        self.tools.calculate_availability(
                            category_fk=entity['category_fk'],
                            **scene_filters))
                    if set_name == 'Size of Display Raw Data':
                        score = 0 if not threshold else (
                            result / float(threshold)) * 10000
                elif kpi_type == self.FACING_SOS:
                    score = self.tools.calculate_share_of_shelf(
                        {'brand_fk': entity['brand_fk']},
                        category_fk=entity['category_fk'],
                        **scene_filters)
                    score *= 10000
                elif kpi_type == self.LINEAR_SOS:
                    score, dummy = self.tools.calculate_linear_share_of_shelf(
                        {'brand_fk': entity['brand_fk']},
                        category_fk=entity['category_fk'],
                        **scene_filters)
                    score *= 10000
                elif kpi_type == self.SHELF_SPACE_LENGTH:
                    result = int(
                        self.tools.calculate_share_space_length(
                            brand_fk=entity['brand_fk'],
                            category_fk=entity['category_fk'],
                            **scene_filters))
                    threshold = int(
                        self.tools.calculate_share_space_length(
                            category_fk=entity['category_fk'],
                            **scene_filters))
                elif kpi_type == self.ASSORTMENT:
                    if params[self.MANUFACTURERS]:
                        manufacturers = params[self.MANUFACTURERS].split(
                            self.SEPARATOR)
                        if entity['manufacturer_name'] not in manufacturers:
                            continue
                    result = int(
                        self.tools.calculate_assortment(
                            brand_fk=entity['brand_fk']))
                elif kpi_type == self.COUNT_OF_SCENES:
                    brand_aggregation = 0
                    for group in self.template_groups.keys():
                        result = int(
                            self.tools.calculate_number_of_scenes(
                                category_fk=entity['category_fk'],
                                brand_fk=entity['brand_fk'],
                                template_name=self.template_groups[group]))
                        brand_aggregation += result
                        kpi_name = params[self.KPI_NAME].format(
                            category=entity['category'],
                            brand=entity['brand_name'],
                            display_type=group)
                        self.save_result(set_name, kpi_name, result=result)
                    reference_kpi = self.template_data[self.template_data[
                        self.REFERENCE_KPI] == params[self.SET_NAME]].iloc[0]
                    reference_name = reference_kpi[self.KPI_NAME].format(
                        category=entity['category'],
                        brand=entity['brand_name'])
                    self.save_result(reference_kpi[self.SET_NAME],
                                     reference_name,
                                     result=brand_aggregation)
                    continue
                else:
                    continue
                if kpi_name is None:
                    kpi_name = params[self.KPI_NAME].format(
                        category=entity['category'],
                        brand=entity['brand_name'])
                self.save_result(set_name,
                                 kpi_name,
                                 score=score,
                                 result=result,
                                 threshold=threshold)

    def calculation_per_manufacturer(self):
        """
        This function calculates all KPIs which are saved per manufacturer_name.
        """
        kpis = self.template_data[self.template_data[self.SUB_CALCULATION] ==
                                  'Manufacturer']
        for x, entity in self.scif.drop_duplicates(
                subset=['manufacturer_fk', 'category']).iterrows():
            for y, params in kpis.iterrows():
                score = result = threshold = None
                set_name = params[self.SET_NAME]
                kpi_name = params[self.KPI_NAME].format(
                    category=entity['category'],
                    manufacturer=entity['manufacturer_name'])
                kpi_type = params[self.KPI_TYPE]
                scene_filters = self.get_scenes_filters(
                    params, entity['category'])
                if kpi_type == self.FACING_COUNT:
                    result = int(
                        self.tools.calculate_availability(
                            manufacturer_fk=entity['manufacturer_fk'],
                            category_fk=entity['category_fk'],
                            **scene_filters))
                elif kpi_type == self.FACING_SOS:
                    score = self.tools.calculate_share_of_shelf(
                        {'manufacturer_fk': entity['manufacturer_fk']},
                        category_fk=entity['category_fk'],
                        **scene_filters)
                    score *= 10000
                elif kpi_type == self.LINEAR_SOS:
                    score, dummy = self.tools.calculate_linear_share_of_shelf(
                        {'manufacturer_fk': entity['manufacturer_fk']},
                        category_fk=entity['category_fk'],
                        **scene_filters)
                    score *= 10000
                elif kpi_type == self.SHELF_SPACE_LENGTH:
                    result = int(
                        self.tools.calculate_share_space_length(
                            manufacturer_fk=entity['manufacturer_fk'],
                            category_fk=entity['category_fk'],
                            **scene_filters))
                elif kpi_type == self.ASSORTMENT_SHARE:
                    category_assortment = self.tools.calculate_availability(
                        category_fk=entity['category_fk'])
                    manufacturer_assortment = self.tools.calculate_availability(
                        manufacturer_fk=entity['manufacturer_fk'],
                        category_fk=entity['category_fk'])
                    score = 0 if not category_assortment else manufacturer_assortment / float(
                        category_assortment)
                    score *= 10000
                else:
                    continue
                self.save_result(set_name,
                                 kpi_name,
                                 score=score,
                                 result=result,
                                 threshold=threshold)

    def calculation_per_category(self):
        """
        This function calculates all KPIs which are saved per category.
        """
        kpis = self.template_data[self.template_data[self.SUB_CALCULATION] ==
                                  'Category']
        for x, entity in self.scif.drop_duplicates(
                subset=['category_fk']).iterrows():
            for y, params in kpis.iterrows():
                score = result = threshold = None
                set_name = params[self.SET_NAME]
                kpi_name = None
                kpi_type = params[self.KPI_TYPE]
                scene_filters = self.get_scenes_filters(
                    params, entity['category'])
                if kpi_type == self.EMPTY_FACING_SOS:
                    result = self.tools.calculate_share_of_shelf(
                        {'product_type': 'Empty'},
                        include_empty=self.tools.INCLUDE_EMPTY,
                        category_fk=entity['category_fk'],
                        **scene_filters)
                    result *= 10000
                elif kpi_type == self.DISTRIBUTION:
                    self.calculate_distribution(entity['category'], params)
                elif kpi_type == self.COUNT_OF_POSM_BY_TYPE:
                    if scene_filters:
                        match_display = self.match_display_in_scene[
                            self.tools.get_filter_condition(
                                self.match_display_in_scene, **scene_filters)]
                    else:
                        match_display = self.match_display_in_scene.copy()
                    custom_sheet = self.get_template(params[self.CUSTOM_SHEET])
                    if self.store_retailer not in custom_sheet.columns:
                        continue
                    filtered_sheet = custom_sheet[
                        (custom_sheet[self.CATEGORY] == entity['category'])
                        & (custom_sheet[self.store_retailer].apply(bool))]
                    for display_type in filtered_sheet[self.BRAND].unique():
                        displays = filtered_sheet[filtered_sheet[self.BRAND] ==
                                                  display_type]
                        for display_name in displays[self.CLAIM_NAME].unique():
                            kpi_name = params[self.KPI_NAME].format(
                                category=entity['category'],
                                display_type=display_type,
                                display=display_name)
                            result = len(match_display[
                                match_display['display_name'] == display_name])
                            self.save_result(set_name, kpi_name, result=result)
                    continue
                elif kpi_type == self.COUNT_OF_POSM:
                    custom_sheet = self.get_template(params[self.CUSTOM_SHEET])
                    if self.store_retailer not in custom_sheet.columns:
                        continue
                    filtered_sheet = custom_sheet[
                        (custom_sheet[self.CATEGORY] == entity['category'])
                        & (custom_sheet[self.store_retailer].apply(bool))]
                    displays = filtered_sheet[
                        self.CLAIM_NAME].unique().tolist()
                    result = len(match_display[
                        match_display['display_name'].isin(displays)])
                elif kpi_type == self.ASSORTMENT_SHARE:
                    reference_results = self.convert_results_to_df(
                        params[self.REFERENCE_KPI])
                    reference_results = reference_results[
                        reference_results['category'] == entity['category']]
                    category_assortment = self.tools.calculate_assortment(
                        category_fk=entity['category_fk'])
                    for brand in reference_results['brand'].unique():
                        brand_assortment = reference_results[
                            reference_results['brand'] ==
                            brand]['result'].sum()
                        score = 0 if not category_assortment else int(
                            (brand_assortment / float(category_assortment)) *
                            10000)
                        kpi_name = params[self.KPI_NAME].format(
                            category=entity['category'], brand=brand)
                        self.save_result(set_name,
                                         kpi_name,
                                         score=score,
                                         result=brand_assortment,
                                         threshold=category_assortment)
                    continue
                elif kpi_type == self.SHARE_OF_DISPLAY:
                    reference_kpi = self.template_data[(~self.template_data[
                        self.SET_NAME].isin([params[self.SET_NAME]])) & (
                            self.template_data[self.REFERENCE_KPI] == params[
                                self.REFERENCE_KPI])].iloc[0]
                    reference_results = self.convert_results_to_df(
                        params[self.REFERENCE_KPI])
                    reference_results = reference_results[
                        reference_results['category'] == entity['category']]
                    manufacturers = params[self.MANUFACTURERS].split(
                        self.SEPARATOR)
                    total_share = reference_results['result'].sum()
                    manufacturer_share = 0
                    for brand in reference_results['brand'].unique():
                        brand_share = reference_results[
                            reference_results['brand'] ==
                            brand]['result'].sum()
                        # P&G Share
                        brand_manufacturer = self.brand_to_manufacturer.get(
                            brand, '')
                        if brand_manufacturer in manufacturers:
                            manufacturer_share += brand_share
                        # Share for brand
                        score = 0 if not total_share else int(
                            (brand_share / float(total_share)) * 10000)
                        reference_name = reference_kpi[self.KPI_NAME].format(
                            category=entity['category'], brand=brand)
                        self.save_result(reference_kpi[self.SET_NAME],
                                         reference_name,
                                         score=score)
                    score = 0 if not total_share else int(
                        (manufacturer_share / float(total_share)) * 10000)
                elif kpi_type == self.EMPTY_SPACES:
                    category_empty = self.empty_spaces.get(
                        entity['category'], {}).get('png', 0)
                    category_facings = int(
                        self.tools.calculate_availability(
                            category_fk=entity['category_fk'],
                            manufacturer_fk=entity['manufacturer_fk']))
                    category_facings_with_empty = category_facings + category_empty
                    if not category_facings_with_empty:
                        score = 0
                    else:
                        score = int(
                            (category_empty /
                             float(category_facings_with_empty)) * 10000)
                    result = category_empty
                    threshold = category_facings_with_empty
                else:
                    continue
                if result is not None or score is not None or threshold is not None:
                    if kpi_name is None:
                        kpi_name = params[self.KPI_NAME].format(
                            category=entity['category'])
                    self.save_result(set_name,
                                     kpi_name,
                                     score=score,
                                     result=result,
                                     threshold=threshold)

            self.scoring_calculation(category=entity['category'])

    def convert_results_to_df(self, set_name):
        """
        This function extracts the atomic KPI results of a given KPI, and turns it into a dataframe.
        """
        output = []
        if set_name in self.atomic_results.keys():
            for atomic in self.atomic_results[set_name].keys():
                results = {'result': self.atomic_results[set_name][atomic]}
                for data in atomic.split(self.UNICODE_DASH):
                    name, value = data.split(':')
                    results[name.lower()] = value
                output.append(results)
        return pd.DataFrame(output)

    def save_result(self,
                    set_name,
                    kpi_name,
                    score=None,
                    result=None,
                    threshold=None):
        """
        Given a score and/or result and/or threshold, this function writes an atomic KPI to the DB.
        """
        if isinstance(score, float):
            score = int(round(score))
        if isinstance(result, float):
            result = int(round(result))
        if isinstance(threshold, float):
            threshold = int(round(threshold))
        self.write_to_db_result(level=self.LEVEL3,
                                score=score,
                                result=result,
                                threshold=threshold,
                                kpi_name=set_name,
                                atomic_kpi_name=kpi_name.replace("'", ""))

    def scoring_calculation(self, category):
        """
        This function calculates the results of the scoring KPIs, for a given category.
        """
        kpi_name = 'Category:{}'.format(category)
        for x, params in self.scoring_data.iterrows():
            set_name = params[self.SET_NAME]

            target = self.get_target(set_name, category)
            if target is None:
                continue

            kpi_type = params[self.KPI_TYPE]
            reference_kpi = params[self.REFERENCE_KPI]
            scenes_filters = self.get_scenes_filters(params, category)

            if reference_kpi:
                reference_results = self.convert_results_to_df(reference_kpi)
                if reference_results.empty:
                    continue
                reference_results = reference_results[
                    reference_results['category'] == category]
                if kpi_type == 'Percentage Passed':
                    total_calculated = len(reference_results)
                    total_passed = reference_results['result'].sum()
                    result = 0 if not total_calculated else int(
                        (total_passed / float(total_calculated)) * 10000)
                else:
                    continue
            else:
                if kpi_type == self.LINEAR_SOS:
                    result, dummy = self.tools.calculate_linear_share_of_shelf(
                        sos_filters={
                            'manufacturer_name': self.OWN_MANUFACTURER
                        },
                        category=category,
                        **scenes_filters)
                    result *= 10000
                elif kpi_type == self.FACING_SOS:
                    result = self.tools.calculate_share_of_shelf(
                        sos_filters={
                            'manufacturer_name': self.OWN_MANUFACTURER
                        },
                        category=category,
                        **scenes_filters)
                    result *= 10000
                elif kpi_type == self.POSM_ASSORTMENT:
                    result = self.calculate_posm_assortment_share(
                        category, params, scenes_filters)
                elif kpi_type == self.SURVEY_QUESTION:
                    survey_template = self.get_template(
                        params[self.CUSTOM_SHEET])
                    survey_template = survey_template[
                        (survey_template[self.KPI_NAME] == set_name)
                        & (survey_template[self.CATEGORY] == category)]
                    if survey_template.empty:
                        continue
                    survey_id = None
                    for y, survey in survey_template.iterrows():
                        if self.store_type in survey[self.STORE_TYPE].split(
                                self.SEPARATOR):
                            survey_id = int(float(survey[self.SURVEY_ID]))
                    if survey_id is None:
                        continue
                    result = self.tools.get_survey_answer(
                        ('question_fk', survey_id))
                else:
                    continue
            weight = int(float(params[self.WEIGHT]) * 100)
            if isinstance(target, (int, float)):
                score = weight if result >= target else 0
            else:
                score = weight if result == target else 0
            self.save_result(set_name,
                             kpi_name,
                             result=result,
                             score=score,
                             threshold=target)

    def calculate_posm_assortment_share(self, category, params, filters):
        """
        This function calculates the share of POSM assortment (number of unique POSMs out of all POSMs in the template).
        """
        template_data = self.get_template(params[self.CUSTOM_SHEET])
        template_data = template_data[template_data[self.CATEGORY] == category]
        if template_data.empty or self.store_retailer not in template_data.keys(
        ):
            return None
        posm_items = template_data[template_data[self.store_retailer].apply(
            bool)][self.CLAIM_NAME].tolist()
        match_display = self.match_display_in_scene[
            self.tools.get_filter_condition(self.match_display_in_scene,
                                            **filters)]
        assortment = len(match_display[match_display['display_name'].isin(
            posm_items)]['display_name'].unique())
        result = 0 if len(posm_items) == 0 else int(
            (assortment / float(len(posm_items))) * 10000)
        return result

    def write_to_db_result(self,
                           level,
                           score=None,
                           result=None,
                           threshold=None,
                           **kwargs):
        """
        This function creates the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(level, score, result,
                                                 threshold, **kwargs)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self,
                               level,
                               score=None,
                               result=None,
                               threshold=None,
                               **kwargs):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        if level == self.LEVEL1:
            set_name = kwargs['kpi_set_name']
            set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name']
                                          == set_name]['kpi_set_fk'].values[0]
            attributes = pd.DataFrame(
                [(set_name, self.session_uid, self.store_id,
                  self.visit_date.isoformat(), score, set_fk)],
                columns=[
                    'kps_name', 'session_uid', 'store_fk', 'visit_date',
                    'score_1', 'kpi_set_fk'
                ])
        elif level == self.LEVEL2:
            kpi_name = kwargs['kpi_name']
            kpi_fk = self.kpi_static_data[self.kpi_static_data['kpi_name'] ==
                                          kpi_name]['kpi_fk'].values[0]
            attributes = pd.DataFrame(
                [(self.session_uid, self.store_id, self.visit_date.isoformat(),
                  kpi_fk, kpi_name, score)],
                columns=[
                    'session_uid', 'store_fk', 'visit_date', 'kpi_fk',
                    'kpk_name', 'score'
                ])
            self.kpi_results[kpi_name] = score
        elif level == self.LEVEL3:
            kpi_name = kwargs['kpi_name']
            kpi_fk = self.kpi_static_data[self.kpi_static_data['kpi_name'] ==
                                          kpi_name]['kpi_fk'].values[0]
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['kpi_fk'] ==
                kpi_fk]['kpi_set_name'].values[0]
            atomic_kpi_name = kwargs['atomic_kpi_name']
            attributes = pd.DataFrame(
                [(atomic_kpi_name, self.session_uid,
                  kpi_set_name, self.store_id, self.visit_date.isoformat(),
                  datetime.utcnow().isoformat(), score, result, threshold,
                  kpi_fk)],
                columns=[
                    'display_text', 'session_uid', 'kps_name', 'store_fk',
                    'visit_date', 'calculation_time', 'score', 'result',
                    'threshold', 'kpi_fk'
                ])
            if kpi_set_name not in self.atomic_results.keys():
                self.atomic_results[kpi_set_name] = {}
            self.atomic_results[kpi_set_name][
                atomic_kpi_name] = result if result is not None else score
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        insert_queries = self.merge_insert_queries(self.kpi_results_queries)
        cur = self.rds_conn.db.cursor()
        delete_queries = PNGAUQueries.get_delete_session_results_query(
            self.session_uid)
        for query in delete_queries:
            cur.execute(query)
        for query in insert_queries:
            cur.execute(query)
        self.rds_conn.db.commit()

    @staticmethod
    def merge_insert_queries(insert_queries):
        query_groups = {}
        for query in insert_queries:
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            merged_queries.append('{0} VALUES {1}'.format(
                group, ',\n'.join(query_groups[group])))
        return merged_queries
コード例 #18
0
class DIAGEOUK_SANDToolBox:

    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.k_engine = BaseCalculationsScript(data_provider, output)
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.store_channel = self.store_info['store_type'].values[0]
        if self.store_channel:
            self.store_channel = self.store_channel.upper()
        self.store_type = self.store_info['additional_attribute_1'].values[0]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.match_display_in_scene = self.get_match_display()
        self.set_templates_data = {}
        self.kpi_static_data = self.get_kpi_static_data()
        self.kpi_results_queries = []
        self.output = output
        self.common = Common(self.data_provider)
        self.commonV2 = CommonV2(self.data_provider)
        self.global_gen = DIAGEOGenerator(self.data_provider, self.output,
                                          self.common)
        self.tools = DIAGEOToolBox(
            self.data_provider,
            output,
            match_display_in_scene=self.match_display_in_scene
        )  # replace the old one
        self.diageo_generator = DIAGEOGenerator(self.data_provider,
                                                self.output,
                                                self.common,
                                                menu=True)

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = DIAGEOQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def get_match_display(self):
        """
        This function extracts the display matches data and saves it into one global data frame.
        The data is taken from probedata.match_display_in_scene.
        """
        query = DIAGEOQueries.get_match_display(self.session_uid)
        match_display = pd.read_sql_query(query, self.rds_conn.db)
        return match_display

    def main_calculation(self, set_names):
        """
        This function calculates the KPI results.
        """
        log_runtime('Updating templates')(self.tools.update_templates)()
        # SOS Out Of The Box kpis
        self.activate_ootb_kpis()

        # Global assortment kpis
        assortment_res_dict = self.diageo_generator.diageo_global_assortment_function_v2(
        )
        self.commonV2.save_json_to_new_tables(assortment_res_dict)

        # Global assortment kpis - v3 for NEW MOBILE REPORTS use
        assortment_res_dict_v3 = self.diageo_generator.diageo_global_assortment_function_v3(
        )
        self.commonV2.save_json_to_new_tables(assortment_res_dict_v3)

        equipment_score_scenes = self.get_equipment_score_relevant_scenes()
        res_dict = self.diageo_generator.diageo_global_equipment_score(
            save_scene_level=False, scene_list=equipment_score_scenes)
        self.commonV2.save_json_to_new_tables(res_dict)

        # Global Menu kpis
        menus_res_dict = self.diageo_generator.diageo_global_share_of_menu_cocktail_function(
            cocktail_product_level=True)
        self.commonV2.save_json_to_new_tables(menus_res_dict)

        for set_name in set_names:
            set_score = 0
            if set_name not in self.tools.KPI_SETS_WITHOUT_A_TEMPLATE and set_name not in self.set_templates_data.keys(
            ):
                try:
                    self.set_templates_data[
                        set_name] = self.tools.download_template(set_name)
                except:
                    Log.warning("Couldn't find a template for set name: " +
                                str(set_name))
                    continue

            # Global relative position
            if set_name in ('Relative Position'):
                # Global function
                res_dict = self.diageo_generator.diageo_global_relative_position_function(
                    self.set_templates_data[set_name],
                    location_type='template_group')
                self.commonV2.save_json_to_new_tables(res_dict)

                # Saving to old tables
                self.set_templates_data[set_name] = parse_template(
                    RELATIVE_PATH, lower_headers_row_index=2)
                set_score = self.calculate_relative_position_sets(set_name)
            # elif set_name in ('MPA', 'New Products', 'Local MPA'):
            elif set_name in ('Local MPA'):
                set_score = self.calculate_assortment_sets(set_name)

            # Global Secondary Displays
            elif set_name in ('Secondary Displays', 'Secondary'):
                # Global function
                res_json = self.diageo_generator.diageo_global_secondary_display_secondary_function(
                )
                if res_json:
                    # Saving to new tables
                    self.commonV2.write_to_db_result(
                        fk=res_json['fk'],
                        numerator_id=1,
                        denominator_id=self.store_id,
                        result=res_json['result'])

                # Saving to old tables
                set_score = self.tools.calculate_number_of_scenes(
                    location_type='Secondary')
                if not set_score:
                    set_score = self.tools.calculate_number_of_scenes(
                        location_type='Secondary Shelf')
                self.save_level2_and_level3(set_name, set_name, set_score)
            elif set_name == 'POSM':
                set_score = self.calculate_posm_sets(set_name)
            elif set_name in ('Visible to Customer', 'Visible to Consumer %'):
                # Global function
                sku_list = filter(
                    None, self.scif[self.scif['product_type'] ==
                                    'SKU'].product_ean_code.tolist())
                res_dict = self.diageo_generator.diageo_global_visible_percentage(
                    sku_list)

                if res_dict:
                    # Saving to new tables
                    parent_res = res_dict[-1]
                    self.commonV2.save_json_to_new_tables(res_dict)

                    # Saving to old tables
                    # result = parent_res['result']
                    # self.save_level2_and_level3(set_name=set_name, kpi_name=set_name, score=result)

                # Saving to old tables
                filters = {self.tools.VISIBILITY_PRODUCTS_FIELD: 'Y'}
                set_score = self.tools.calculate_visible_percentage(
                    visible_filters=filters)
                self.save_level2_and_level3(set_name, set_name, set_score)
            else:
                continue

            if set_score == 0:
                pass
            elif set_score is False:
                continue

            set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name']
                                          == set_name]['kpi_set_fk'].values[0]
            self.write_to_db_result(set_fk, set_score, self.LEVEL1)

        # commiting to new tables
        self.commonV2.commit_results_data()

    def save_level2_and_level3(self, set_name, kpi_name, score):
        """
        Given KPI data and a score, this functions writes the score for both KPI level 2 and 3 in the DB.
        """
        kpi_data = self.kpi_static_data[
            (self.kpi_static_data['kpi_set_name'].str.encode('utf-8') ==
             set_name.encode('utf-8'))
            & (self.kpi_static_data['kpi_name'].str.encode('utf-8') ==
               kpi_name.encode('utf-8'))]
        try:
            kpi_fk = kpi_data['kpi_fk'].values[0]
        except:
            Log.warning("kpi name or set name don't exist")
            return
        atomic_kpi_fk = kpi_data['atomic_kpi_fk'].values[0]
        self.write_to_db_result(kpi_fk, score, self.LEVEL2)
        self.write_to_db_result(atomic_kpi_fk, score, self.LEVEL3)

    def calculate_relative_position_sets(self, set_name):
        """
        This function calculates every relative-position-typed KPI from the relevant sets, and returns the set final score.
        """

        scores = []
        for i in xrange(len(self.set_templates_data[set_name])):
            params = self.set_templates_data[set_name].iloc[i]
            if self.store_channel == params.get(self.tools.CHANNEL,
                                                '').upper():
                scif_tested_param = 'brand_name' if params.get(self.tools.TESTED_TYPE, '') == self.tools.BRAND \
                    else 'product_ean_code'
                scif_anchor_param = 'brand_name' if params.get(self.tools.ANCHOR_TYPE, '') == self.tools.BRAND \
                    else 'product_ean_code'
                tested_filters = {
                    scif_tested_param: params.get(self.tools.TESTED_NEW)
                }
                anchor_filters = {
                    scif_anchor_param: params.get(self.tools.ANCHOR_NEW)
                }

                direction_data = {
                    'top':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.TOP_DISTANCE)),
                    'bottom':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.BOTTOM_DISTANCE)),
                    'left':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.LEFT_DISTANCE)),
                    'right':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.RIGHT_DISTANCE))
                }
                if params.get(self.tools.LOCATION_OLD, ''):
                    general_filters = {
                        'template_group': params.get(self.tools.LOCATION_OLD)
                    }
                else:
                    general_filters = {}

                result = self.tools.calculate_relative_position(
                    tested_filters, anchor_filters, direction_data,
                    **general_filters)
                score = 1 if result else 0
                scores.append(score)

                self.save_level2_and_level3(set_name,
                                            params.get(self.tools.KPI_NAME),
                                            score)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def _get_direction_for_relative_position(self, value):
        """
        This function converts direction data from the template (as string) to a number.
        """
        if value == self.tools.UNLIMITED_DISTANCE:
            value = 1000
        elif not value or not str(value).isdigit():
            value = 0
        else:
            value = int(value)
        return value

    def calculate_posm_sets(self, set_name):
        """
        This function calculates every POSM-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        for params in self.set_templates_data[set_name]:
            if self.store_channel is None:
                break

            kpi_res = self.tools.calculate_posm(
                display_name=params.get(self.tools.DISPLAY_NAME))
            score = 1 if kpi_res > 0 else 0
            if params.get(self.store_type) == self.tools.RELEVANT_FOR_STORE:
                scores.append(score)

            if score == 1 or params.get(
                    self.store_type) == self.tools.RELEVANT_FOR_STORE:
                self.save_level2_and_level3(
                    set_name, params.get(self.tools.DISPLAY_NAME), score)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def calculate_assortment_sets(self, set_name):
        """
        This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        for params in self.set_templates_data[set_name]:
            target = str(params.get(self.store_type, ''))
            if target.isdigit() or target.capitalize() in (
                    self.tools.RELEVANT_FOR_STORE,
                    self.tools.OR_OTHER_PRODUCTS):
                products = str(
                    params.get(self.tools.PRODUCT_EAN_CODE,
                               params.get(self.tools.PRODUCT_EAN_CODE2,
                                          ''))).replace(',', ' ').split()
                target = 1 if not target.isdigit() else int(target)
                kpi_name = params.get(self.tools.GROUP_NAME,
                                      params.get(self.tools.PRODUCT_NAME))
                kpi_static_data = self.kpi_static_data[
                    (self.kpi_static_data['kpi_set_name'] == set_name)
                    & (self.kpi_static_data['kpi_name'] == kpi_name)]
                if len(products) > 1:
                    result = 0
                    for product in products:
                        product_score = self.tools.calculate_assortment(
                            product_ean_code=product)
                        result += product_score
                        try:
                            product_name = self.all_products[
                                self.all_products['product_ean_code'] ==
                                product]['product_name'].values[0]
                        except Exception as e:
                            Log.warning(
                                'Product {} is not defined in the DB'.format(
                                    product))
                            continue
                        try:
                            atomic_fk = \
                            kpi_static_data[kpi_static_data['atomic_kpi_name'] == product_name]['atomic_kpi_fk'].values[
                                0]
                        except Exception as e:
                            Log.warning(
                                'Product {} is not defined in the DB'.format(
                                    product_name))
                            continue
                        self.write_to_db_result(atomic_fk,
                                                product_score,
                                                level=self.LEVEL3)
                    score = 1 if result >= target else 0
                else:
                    result = self.tools.calculate_assortment(
                        product_ean_code=products)
                    atomic_fk = kpi_static_data['atomic_kpi_fk'].values[0]
                    score = 1 if result >= target else 0
                    self.write_to_db_result(atomic_fk,
                                            score,
                                            level=self.LEVEL3)

                scores.append(score)
                kpi_fk = kpi_static_data['kpi_fk'].values[0]
                self.write_to_db_result(kpi_fk, score, level=self.LEVEL2)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    # def calculate_assortment_sets(self, set_name): # the old version. I changed it to the function of KE for local MPA.
    #     """
    #     This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score.
    #     """
    #     scores = []
    #     for params in self.set_templates_data[set_name]:
    #         if params.get(self.store_type, '').capitalize() in (self.tools.RELEVANT_FOR_STORE,
    #                                                             self.tools.OR_OTHER_PRODUCTS):
    #             object_type = self.tools.ENTITY_TYPE_CONVERTER.get(params.get(self.tools.ENTITY_TYPE),
    #                                                                'product_ean_code')
    #             objects = [str(params.get(self.tools.PRODUCT_EAN_CODE, params.get(self.tools.PRODUCT_EAN_CODE2, '')))]
    #             if params.get(self.store_type) == self.tools.OR_OTHER_PRODUCTS:
    #                 additional_objects = str(params.get(self.tools.ADDITIONAL_SKUS)).split(',')
    #                 objects.extend(additional_objects)
    #             filters = {object_type: objects}
    #             result = self.tools.calculate_assortment(**filters)
    #             score = 1 if result > 0 else 0
    #             scores.append(score)
    #
    #             self.save_level2_and_level3(set_name, params.get(self.tools.PRODUCT_NAME), score)
    #
    #     if not scores:
    #         return False
    #     set_score = (sum(scores) / float(len(scores))) * 100
    #     return set_score

    def write_to_db_result(self, fk, score, level):
        """
        This function the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(fk, score, level)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self, fk, score, level):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        score = round(score, 2)
        if level == self.LEVEL1:
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['kpi_set_fk'] ==
                fk]['kpi_set_name'].values[0]
            score_type = '%' if kpi_set_name in self.tools.KPI_SETS_WITH_PERCENT_AS_SCORE else ''
            attributes = pd.DataFrame(
                [(kpi_set_name, self.session_uid, self.store_id,
                  self.visit_date.isoformat(), format(score,
                                                      '.2f'), score_type, fk)],
                columns=[
                    'kps_name', 'session_uid', 'store_fk', 'visit_date',
                    'score_1', 'score_2', 'kpi_set_fk'
                ])

        elif level == self.LEVEL2:
            kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] ==
                                            fk]['kpi_name'].values[0].replace(
                                                "'", "\\'")
            attributes = pd.DataFrame(
                [(self.session_uid, self.store_id, self.visit_date.isoformat(),
                  fk, kpi_name, score)],
                columns=[
                    'session_uid', 'store_fk', 'visit_date', 'kpi_fk',
                    'kpk_name', 'score'
                ])
        elif level == self.LEVEL3:
            data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk']
                                        == fk]
            atomic_kpi_name = data['atomic_kpi_name'].values[0].replace(
                "'", "\\'")
            kpi_fk = data['kpi_fk'].values[0]
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['atomic_kpi_fk'] ==
                fk]['kpi_set_name'].values[0]
            attributes = pd.DataFrame([
                (atomic_kpi_name, self.session_uid,
                 kpi_set_name, self.store_id, self.visit_date.isoformat(),
                 datetime.utcnow().isoformat(), score, kpi_fk, fk, None, None)
            ],
                                      columns=[
                                          'display_text', 'session_uid',
                                          'kps_name', 'store_fk', 'visit_date',
                                          'calculation_time', 'score',
                                          'kpi_fk', 'atomic_kpi_fk',
                                          'threshold', 'result'
                                      ])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    def activate_ootb_kpis(self):

        # FACINGS_SOS_MANUFACTURER_IN_WHOLE_STORE - level 1
        sos_store_fk = self.commonV2.get_kpi_fk_by_kpi_name('SOS OUT OF STORE')
        sos_store = ManufacturerFacingsSOSInWholeStore(
            data_provider=self.data_provider,
            kpi_definition_fk=sos_store_fk).calculate()
        # FACINGS_SOS_CATEGORY_IN_WHOLE_STORE - level 2
        sos_cat_out_of_store_fk = self.commonV2.get_kpi_fk_by_kpi_name(
            'SOS CATEGORY OUT OF STORE')
        sos_cat_out_of_store = self.calculate_sos_of_cat_of_out_of_store_new(
            sos_cat_out_of_store_fk)

        # FACINGS_SOS_SUB_CATEGORY_OUT_OF_CATEGORY - level 3
        sos_sub_cat_out_of_cat_fk = self.commonV2.get_kpi_fk_by_kpi_name(
            'SOS SUB CATEGORY OUT OF CATEGORY')
        sos_sub_cat_out_of_cat = SubCategoryFacingsSOSPerCategory(
            data_provider=self.data_provider,
            kpi_definition_fk=sos_sub_cat_out_of_cat_fk).calculate()

        # FACINGS_SOS_MANUFACTURER_OUT_OF_SUB_CATEGORY - level 4
        sos_man_out_of_sub_cat_fk = self.commonV2.get_kpi_fk_by_kpi_name(
            'SOS MANUFACTURER OUT OF SUB CATEGORY')
        sos_man_out_of_sub_cat = ManufacturerFacingsSOSPerSubCategoryInStore(
            data_provider=self.data_provider,
            kpi_definition_fk=sos_man_out_of_sub_cat_fk).calculate()

        # FACINGS_SOS_BRAND_OUT_OF_SUB_CATEGORY_IN_WHOLE_STORE - level 5
        sos_brand_out_of_sub_cat_fk = self.commonV2.get_kpi_fk_by_kpi_name(
            'SOS BRAND OUT OF MANUFACTURER')
        sos_brand_out_of_sub_cat = self.calculate_sos_of_brand_out_of_manufacturer_in_sub_cat(
            sos_brand_out_of_sub_cat_fk)

        # Savings results in Hierarchy
        self.save_hierarchy(sos_store, sos_cat_out_of_store,
                            sos_sub_cat_out_of_cat, sos_man_out_of_sub_cat,
                            sos_brand_out_of_sub_cat)

    def calculate_sos_of_brand_out_of_manufacturer_in_sub_cat(self, kpi_fk):
        res_list = []
        res_dict = dict()
        # Get rid of Irrelevant and Empty types and keep only facings > 1
        filtered_scif = self.scif[
            ~self.scif['product_type'].isin(['Irrelevant', 'Empty'])
            & self.scif['facings_ign_stack'] > 0]

        # Filter by each Sub Category and Manufacturer
        sub_cat_fk_list = filtered_scif['sub_category_fk'].unique().tolist()
        for sub_cat in sub_cat_fk_list:
            filtered_scif_by_sub_cat = filtered_scif[
                filtered_scif['sub_category_fk'] == sub_cat]
            list_of_relevant_manufacturers = filtered_scif_by_sub_cat[
                'manufacturer_fk'].unique().tolist()
            for manu_fk in list_of_relevant_manufacturers:
                filtered_scif_by_sub_cat_and_manufacturer = filtered_scif_by_sub_cat[
                    filtered_scif_by_sub_cat['manufacturer_fk'] == manu_fk]
                denominator_result = filtered_scif_by_sub_cat_and_manufacturer[
                    'facings_ign_stack'].sum()

                # Calculate results per Brand
                list_of_relevant_brands = filtered_scif_by_sub_cat_and_manufacturer[
                    'brand_fk'].unique().tolist()
                for brand_fk in list_of_relevant_brands:
                    filtered_scif_by_brand = filtered_scif_by_sub_cat_and_manufacturer[
                        filtered_scif_by_sub_cat_and_manufacturer['brand_fk']
                        == brand_fk]
                    facings_brand_results = filtered_scif_by_brand[
                        'facings_ign_stack'].sum()
                    result_for_brand = facings_brand_results / denominator_result

                    # Preparing the results' dictionary
                    res_dict['kpi_definition_fk'] = kpi_fk
                    res_dict['numerator_id'] = brand_fk
                    res_dict['numerator_result'] = facings_brand_results
                    res_dict['denominator_id'] = int(sub_cat)
                    res_dict['denominator_result'] = denominator_result
                    res_dict['identifier_result'] = (int(brand_fk),
                                                     int(sub_cat),
                                                     int(manu_fk))
                    res_dict['identifier_parent'] = int(manu_fk), (
                        int(sub_cat))
                    res_dict['result'] = result_for_brand
                    res_dict['score'] = result_for_brand
                    res_list.append(res_dict.copy())
        return res_list

    def calculate_sos_of_cat_of_out_of_store_new(self, kpi_fk):
        res_list = []
        res_dict = dict()
        # Get rid of Irrelevant and Empty types and keep only facings ignore stacking > 1
        filtered_scif = self.scif[
            ~self.scif['product_type'].isin(['Irrelevant', 'Empty'])
            & self.scif['facings_ign_stack'] > 0]
        denominator_result = filtered_scif['facings_ign_stack'].sum()
        categories_fk_list = filtered_scif['category_fk'].unique().tolist()

        # Calculate result per category (using facings_ign_stack!)
        for category_fk in categories_fk_list:
            filtered_scif_by_category = filtered_scif[
                filtered_scif['category_fk'] == category_fk]
            facings_category_result = filtered_scif_by_category[
                'facings_ign_stack'].sum()
            result_for_category = facings_category_result / denominator_result

            # Preparing the results' dictionary
            res_dict['kpi_definition_fk'] = kpi_fk
            res_dict['numerator_id'] = category_fk
            res_dict['numerator_result'] = facings_category_result
            res_dict['denominator_id'] = self.store_id
            res_dict['denominator_result'] = denominator_result
            res_dict['result'] = result_for_category
            res_dict['score'] = result_for_category
            res_list.append(res_dict.copy())
        return res_list

    def save_hierarchy(self, level_1, level_2, level_3, level_4, level_5):
        for i in level_1:
            res = i.to_dict
            kpi_identifier = "level_1"
            self.commonV2.write_to_db_result(
                fk=res['kpi_definition_fk'],
                numerator_id=res['numerator_id'],
                denominator_id=res['denominator_id'],
                numerator_result=res['numerator_result'],
                denominator_result=res['denominator_result'],
                result=res['result'],
                score=res['result'],
                identifier_result=kpi_identifier,
                should_enter=False)

        for res in level_2:
            kpi_identifier = "level_2_" + str(int(res['numerator_id']))
            parent_identifier = "level_1"
            self.commonV2.write_to_db_result(
                fk=res['kpi_definition_fk'],
                numerator_id=res['numerator_id'],
                denominator_id=res['denominator_id'],
                numerator_result=res['numerator_result'],
                denominator_result=res['denominator_result'],
                result=res['result'],
                score=res['result'],
                identifier_result=kpi_identifier,
                identifier_parent=parent_identifier,
                should_enter=True)

        for i in level_3:
            res = i.to_dict
            kpi_identifier = str(int(res['numerator_id']))
            parent_identifier = "level_2_" + str(int(res['denominator_id']))
            self.commonV2.write_to_db_result(
                fk=res['kpi_definition_fk'],
                numerator_id=res['numerator_id'],
                denominator_id=res['denominator_id'],
                numerator_result=res['numerator_result'],
                denominator_result=res['denominator_result'],
                result=res['result'],
                score=res['result'],
                identifier_result=kpi_identifier,
                identifier_parent=parent_identifier,
                should_enter=True)

        for i in level_4:
            res = i.to_dict
            kpi_identifier = "level_4_" + str(
                (int(res['numerator_id']), int(res['denominator_id'])))
            parent_identifier = str(int(res['denominator_id']))
            self.commonV2.write_to_db_result(
                fk=res['kpi_definition_fk'],
                numerator_id=res['numerator_id'],
                denominator_id=res['denominator_id'],
                numerator_result=res['numerator_result'],
                denominator_result=res['denominator_result'],
                result=res['result'],
                score=res['result'],
                identifier_result=kpi_identifier,
                identifier_parent=parent_identifier,
                should_enter=True)

        for res in level_5:
            kpi_identifier = "level_5_" + str(res['identifier_result'])
            parent_identifier = "level_4_" + str(res['identifier_parent'])
            self.commonV2.write_to_db_result(
                fk=res['kpi_definition_fk'],
                numerator_id=res['numerator_id'],
                denominator_id=res['denominator_id'],
                numerator_result=res['numerator_result'],
                denominator_result=res['denominator_result'],
                result=res['result'],
                score=res['result'],
                identifier_result=kpi_identifier,
                identifier_parent=parent_identifier,
                should_enter=True)

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        delete_queries = DIAGEOQueries.get_delete_session_results_query_old_tables(
            self.session_uid)
        for query in delete_queries:
            cur.execute(query)
        for query in self.kpi_results_queries:
            cur.execute(query)
        self.rds_conn.db.commit()

    def get_equipment_score_relevant_scenes(self):
        scenes = []
        if not self.diageo_generator.scif.empty:
            scenes = self.diageo_generator.scif[self.diageo_generator.scif['template_name'] == \
                                                'ON - DRAUGHT TAPS']['scene_fk'].unique().tolist()
        return scenes
コード例 #19
0
class CCRU_SANDContract:
    STORE_NUMBER = 'Store Number'
    START_DATE = 'Start Date'
    END_DATE = 'End Date'

    def __init__(self, rds_conn=None):
        self.cloud_path = CLOUD_BASE_PATH
        self.temp_path = os.path.join(TEMPLATES_TEMP_PATH, 'TempFile')
        self.stores = {}
        self.stores_processed = []
        self.invalid_stores = []
        self.stores_with_invalid_dates = []
        self.stores_with_invalid_targets = []

    def __del__(self):
        if os.path.exists(self.temp_path):
            os.remove(self.temp_path)

    @property
    def amz_conn(self):
        if not hasattr(self, '_amz_conn'):
            self._amz_conn = StorageFactory.get_connector(BUCKET)
        return self._amz_conn

    @property
    def rds_conn(self):
        if not hasattr(self, '_rds_conn'):
            self._rds_conn = PSProjectConnector(PROJECT,
                                                DbUsers.CalculationEng)
        try:
            pd.read_sql_query('select pk from probedata.session limit 1',
                              self._rds_conn.db)
        except Exception as e:
            self._rds_conn.disconnect_rds()
            self._rds_conn = PSProjectConnector(PROJECT,
                                                DbUsers.CalculationEng)
        return self._rds_conn

    @property
    def store_data(self):
        if not hasattr(self, '_store_data'):
            query = "select pk as store_fk, store_number_1 as store_number from static.stores"
            self._store_data = pd.read_sql_query(query, self.rds_conn.db)
        return self._store_data

    def get_store_fk(self, store_number):
        store_number = str(store_number)
        if store_number in self.stores:
            store_fk = self.stores[store_number]
        else:
            store_fk = self.store_data[self.store_data['store_number'] ==
                                       store_number]
            if not store_fk.empty:
                store_fk = store_fk['store_fk'].values[0]
                self.stores[store_number] = store_fk
            else:
                store_fk = None
        return store_fk

    def get_json_file_content(self, file_name):
        """
        This function receives a KPI set name and return its relevant template as a JSON.
        """
        cloud_path = os.path.join(CLOUD_BASE_PATH, file_name)
        with open(self.temp_path, 'wb') as f:
            try:
                self.amz_conn.download_file(cloud_path, f)
            except:
                f.write('{}')
        with open(self.temp_path, 'rb') as f:
            data = json.load(f)
        os.remove(self.temp_path)
        return data

    def parse_and_upload_file(self):

        Log.info("Starting template parsing and validation")
        parsed_args = self.parse_arguments()
        file_path = parsed_args.file

        kpi_weights = zip(
            list(
                pd.read_excel(file_path,
                              header=2,
                              sheet_name=TARGETS_SHEET_NAME).columns)[3:],
            list(
                pd.read_excel(file_path,
                              skipcols=3,
                              sheet_name=TARGETS_SHEET_NAME).iloc[0].values))
        kpi_weights = {x[0]: x[1] for x in kpi_weights}

        raw_data = pd.read_excel(file_path,
                                 skiprows=2,
                                 sheet_name=TARGETS_SHEET_NAME,
                                 dtype={
                                     self.STORE_NUMBER: str
                                 }).fillna('')
        if self.STORE_NUMBER not in raw_data.columns:
            Log.error('File must contain a {} column header'.format(
                self.STORE_NUMBER))
            return
        if self.START_DATE not in raw_data.columns:
            Log.error('File must contain a {} column header'.format(
                self.START_DATE))
            return
        if self.END_DATE not in raw_data.columns:
            Log.error('File must contain a {} column header'.format(
                self.END_DATE))
            return
        raw_data[self.STORE_NUMBER] = raw_data[self.STORE_NUMBER].astype(str)
        raw_data[self.START_DATE] = raw_data[self.START_DATE].astype(str)
        raw_data[self.END_DATE] = raw_data[self.END_DATE].astype(str)

        Log.info("Starting Stores validation")
        target_data_new = {}
        count_stores_total = raw_data.shape[0]
        for x, row in raw_data.iterrows():

            store_number = row[self.STORE_NUMBER]
            store_id = self.get_store_fk(store_number)
            if store_id is None:

                self.invalid_stores.append(store_number)

            else:

                if store_id not in target_data_new.keys():
                    target_data_new[store_id] = []
                row = row.to_dict()
                row_to_append = {
                    self.STORE_NUMBER: row[self.STORE_NUMBER],
                    self.START_DATE: row[self.START_DATE],
                    self.END_DATE: row[self.END_DATE]
                }
                for key in row.keys():
                    if key in kpi_weights:
                        row_to_append[str(key)] = [row[key], kpi_weights[key]]
                target_data_new[store_id].append(row_to_append)

            count_stores_processed = x + 1
            if count_stores_processed % 1000 == 0 or count_stores_processed == count_stores_total:
                Log.info("Number of stores validated: {}/{}".format(
                    count_stores_processed, count_stores_total))

        if self.invalid_stores:
            Log.warning(
                "The following stores do not exist in the DB and will be ignored ({}): "
                "{}".format(len(self.invalid_stores), self.invalid_stores))

        Log.info("Starting data processing")
        count_stores_total = len(target_data_new.keys())
        for x, store_id in enumerate(target_data_new.keys()):

            data_new = target_data_new[store_id][0]
            start_date_new = dt.datetime.strptime(data_new[self.START_DATE],
                                                  '%Y-%m-%d').date()
            end_date_new = dt.datetime.strptime(data_new[self.END_DATE],
                                                '%Y-%m-%d').date()
            store_number = data_new[self.STORE_NUMBER]

            if not start_date_new <= end_date_new:

                self.stores_with_invalid_dates += [store_number]

            else:

                target_data = []
                target_data_cur = self.get_json_file_content(str(store_id))
                for data_cur in target_data_cur:
                    try:
                        start_date_cur = dt.datetime.strptime(
                            data_cur[self.START_DATE], '%Y-%m-%d').date()
                        end_date_cur = dt.datetime.strptime(
                            data_cur[self.END_DATE], '%Y-%m-%d').date()
                        store_number_cur = data_cur[self.STORE_NUMBER]
                    except:
                        self.stores_with_invalid_targets += [store_number]
                        continue
                    if store_number_cur == store_number \
                            and start_date_cur <= end_date_new \
                            and end_date_cur >= start_date_new:
                        details_new = data_new.copy()
                        del details_new[self.START_DATE]
                        del details_new[self.END_DATE]
                        details_cur = data_cur.copy()
                        del details_cur[self.START_DATE]
                        del details_cur[self.END_DATE]
                        if details_cur == details_new:
                            data_new[self.START_DATE] = str(
                                start_date_cur
                            ) if start_date_cur <= start_date_new else str(
                                start_date_new)
                        else:
                            end_date_cur = start_date_new - dt.timedelta(
                                days=1)
                            if start_date_cur <= end_date_cur:
                                data_cur[self.END_DATE] = str(end_date_cur)
                                target_data += [data_cur]
                    else:
                        target_data += [data_cur]
                target_data += [data_new]

                try:
                    with open(self.temp_path, 'wb') as f:
                        f.write(json.dumps(target_data))
                    self.amz_conn.save_file(self.cloud_path, str(store_id),
                                            self.temp_path)
                except Exception as e:
                    Log.error(
                        "Store Seq/ID/Number: {}/{}/{}. Error: {}".format(
                            x, store_id, store_number, e))
                    Log.error("target_data: {}".format(target_data))

            count_stores_processed = x + 1
            self.stores_processed += [store_number]
            if count_stores_processed % 1000 == 0 or count_stores_processed == count_stores_total:
                Log.info("Number of stores processed: {}/{}".format(
                    count_stores_processed, count_stores_total))
                # Log.debug("Stores processed: {}".format(self.stores_processed))
                self.stores_processed = []

        if os.path.exists(self.temp_path):
            os.remove(self.temp_path)

        if self.invalid_stores:
            Log.warning(
                "The following stores do not exist in the DB and were ignored ({}): "
                "{}".format(len(self.invalid_stores), self.invalid_stores))

        if self.stores_with_invalid_dates:
            Log.warning(
                "The following stores have invalid date period and were ignored ({}): "
                "{}".format(len(self.stores_with_invalid_dates),
                            self.stores_with_invalid_dates))

        if self.stores_with_invalid_targets:
            Log.warning(
                "The following stores have invalid target format and were ignored ({}): "
                "{}".format(len(self.stores_with_invalid_dates),
                            self.stores_with_invalid_dates))

        Log.info("Execution targets are uploaded successfully. " +
                 ("Incorrect template data were ignored (see above)"
                  if self.invalid_stores or self.stores_with_invalid_dates
                  or self.stores_with_invalid_targets else ""))

    @staticmethod
    def parse_arguments():
        """
        This function gets the arguments from the command line / configuration in case of a local run and manages them.
        To run it locally just copy: -e prod --file **your file path** to the configuration parameters
        :return:
        """
        parser = argparse.ArgumentParser(description='Execution Contract')
        parser.add_argument('--env',
                            '-e',
                            type=str,
                            help='The environment - dev/int/prod')
        parser.add_argument('--file',
                            type=str,
                            required=True,
                            help='The targets template')
        return parser.parse_args()
コード例 #20
0
class DISPLAYSToolBox(DISPLAYSConsts):

    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.k_engine = BaseCalculationsScript(data_provider, output)
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        # self.all_products = self.all_products.merge(self.get_additional_attributes(), on='product_fk', how='left')
        self.match_display_in_scene = self.get_match_display()
        self.tools = DISPLAYSGENERALToolBox(self.data_provider,
                                            self.output,
                                            rds_conn=self.rds_conn,
                                            scif=self.scif)
        self.template_data = parse_template(TEMPLATE_PATH)
        self.kpi_static_data = self.get_kpi_static_data()
        self.kpi_results_queries = []

    # def get_additional_attributes(self):
    #     query = DISPLAYSQueries.get_attributes_data()
    #     attributes = pd.read_sql_query(query, self.rds_conn.db)
    #     return attributes

    def get_match_display(self):
        """
        This function extracts the display matches data and saves it into one global data frame.
        The data is taken from probedata.match_display_in_scene.
        """
        query = DISPLAYSQueries.get_match_display(self.session_uid)
        match_display = pd.read_sql_query(query, self.rds_conn.db)
        match_display = match_display.merge(
            self.scif.drop_duplicates(subset=['scene_fk']),
            on='scene_fk',
            how='left')
        return match_display

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = DISPLAYSQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def main_calculation(self):
        """
        This function calculates the KPI results.
        """
        kpi_results = {
            self.kpi_static_data.iloc[k]['kpi_name']: [0, 0, 0, 0, 0, 0]
            for k in xrange(len(self.kpi_static_data))
            if self.kpi_static_data.iloc[k]['kpi_fk'] not in (195, 196, 197,
                                                              198, 199, 200,
                                                              201, 202)
        }
        relevant_displays = self.template_data[
            self.SCENE_RECOGNITION].unique().tolist()
        actual_displays = self.match_display_in_scene[
            self.match_display_in_scene['display_name'].isin(
                relevant_displays)]
        for scene in actual_displays['scene_fk'].unique():
            scene_displays = actual_displays[actual_displays['scene_fk'] ==
                                             scene]
            calculate_all = False
            if scene_displays.iloc[0]['template_name'] == 'Display (End Cap/Palette Drop/Rack)' \
                    and scene_displays['display_name'].unique().tolist() == ['Display']:
                display_type = scene_displays['display_name'].values[0]
                kpi_data = self.template_data[self.template_data[
                    self.SCENE_RECOGNITION] == display_type]
                bays = sorted(scene_displays['bay_number'].unique().tolist())
                bay_groups = [[]]
                for bay in bays:
                    if len(bay_groups[-1]) < 3:
                        bay_groups[-1].append(bay)
                    else:
                        bay_groups.append([bay])
                for i, group in enumerate(bay_groups):
                    display = scene_displays.iloc[0]
                    display['bay_number'] = group
                    for p in xrange(len(kpi_data)):
                        params = kpi_data.iloc[p]
                        score = self.calculate_facing_sos(params, display)
                        for x, s in enumerate(score):
                            kpi_results[params[self.KPI_NAME]][x * 3 + 1] += s
                        if len(bay_groups) == 1 or len(
                                bay_groups[-1]
                        ) == 3 or i + 2 < len(bay_groups):
                            for x, s in enumerate(score):
                                kpi_results[params[self.KPI_NAME]][x * 3 +
                                                                   2] += s
                        elif i + 2 == len(bay_groups):
                            display['bay_number'] = group + bay_groups[i + 1]
                            score = self.calculate_facing_sos(params, display)
                            for x, s in enumerate(score):
                                kpi_results[params[self.KPI_NAME]][x * 3 +
                                                                   2] += s
            else:
                calculate_all = True
            del scene_displays
            for d in xrange(len(scene_displays)):
                display = scene_displays.iloc[d]
                display_name = display['display_name']
                kpi_data = self.template_data[self.template_data[
                    self.SCENE_RECOGNITION] == display_name]
                for p in xrange(len(kpi_data)):
                    params = kpi_data.iloc[p]
                    score = self.calculate_facing_sos(params, display)
                    for x, s in enumerate(score):
                        if display_name == 'Display':
                            kpi_results[params[self.KPI_NAME]][x * 3 + 0] += s
                        else:
                            kpi_results[params[self.KPI_NAME]][x] += s
                    if calculate_all and display['display_name'] == 'Display':
                        for x, s in enumerate(score):
                            kpi_results[params[self.KPI_NAME]][x * 3 + 1] += s
                            kpi_results[params[self.KPI_NAME]][x * 3 + 2] += s
        for kpi_name in kpi_results.keys():
            self.write_to_db_result(kpi_name, 100, level=self.LEVEL2)
            self.write_to_db_result(kpi_name,
                                    kpi_results[kpi_name],
                                    level=self.LEVEL3)
        self.write_to_db_result('Manufacturer Displays',
                                100,
                                level=self.LEVEL1)

    def calculate_facing_sos(self, params, display):
        filters = self.get_filters(params)
        numerator = self.tools.calculate_availability(
            bay_number=display['bay_number'],
            product_type=('Empty', self.tools.EXCLUDE_FILTER),
            scene_fk=display['scene_fk'],
            **filters)
        denominator = self.tools.calculate_availability(
            bay_number=display['bay_number'],
            product_type=('Empty', self.tools.EXCLUDE_FILTER),
            scene_fk=display['scene_fk'])
        result = 0 if denominator == 0 else numerator / float(denominator)
        target = map(float, str(params[self.SOS_TARGET]).split(self.SEPARATOR))
        scores = []
        for t in target:
            scores.append(1 if result > t else 0)
        return scores

    def get_filters(self, params):
        products = set()
        if params[self.MANUFACTURERS]:
            products = products.union(
                self.get_product_list(manufacturer_name=params[
                    self.MANUFACTURERS].split(self.SEPARATOR)))
            if params[self.BRANDS]:
                products = products.union(
                    self.get_product_list(
                        att2=params[self.BRANDS].split(self.SEPARATOR)))
        else:
            products = products.union(
                self.all_products['product_ean_code'].unique().tolist())
        if params[self.SSD_OR_STILL]:
            products = products.intersection(
                self.get_product_list(att4=params[self.SSD_OR_STILL]))
        if params[self.MANUFACTURERS_TO_EXCLUDE]:
            products = products.difference(
                self.get_product_list(manufacturer_name=params[
                    self.MANUFACTURERS_TO_EXCLUDE].split(self.SEPARATOR)))
        if params[self.BRANDS_TO_EXCLUDE]:
            products = products.difference(
                self.get_product_list(
                    att2=params[self.BRANDS_TO_EXCLUDE].split(self.SEPARATOR)))
        products = products.difference(
            self.get_product_list(manufacturer_name='GENERAL'))
        # products = self.all_products[self.all_products['att2'].isin(params[self.ATT2].split(self.SEPARATOR))]['product_ean_code'].unique().tolist()
        return dict(product_ean_code=list(products))

    def get_product_list(self, **filters):
        product_list = self.all_products[self.tools.get_filter_condition(
            self.all_products, **filters)]
        return set(product_list['product_ean_code'].unique().tolist())

    def write_to_db_result(self, fk, score, level):
        """
        This function creates the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(fk, score, level)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self, name, score, level):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        if level == self.LEVEL1:
            set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name']
                                          == name]['kpi_set_fk'].values[0]
            attributes = pd.DataFrame(
                [(name, self.session_uid, self.store_id,
                  self.visit_date.isoformat(), format(score, '.2f'), set_fk)],
                columns=[
                    'kps_name', 'session_uid', 'store_fk', 'visit_date',
                    'score_1', 'kpi_set_fk'
                ])
        elif level == self.LEVEL2:
            kpi_fk = self.kpi_static_data[self.kpi_static_data['kpi_name'] ==
                                          name]['kpi_fk'].values[0]
            attributes = pd.DataFrame(
                [(self.session_uid, self.store_id, self.visit_date.isoformat(),
                  kpi_fk, name, score)],
                columns=[
                    'session_uid', 'store_fk', 'visit_date', 'kpi_fk',
                    'kpk_name', 'score'
                ])
        elif level == self.LEVEL3:
            score, score_2, score_3, result, result_2, result_3 = score
            data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_name']
                                        == name]
            atomic_fk = data['atomic_kpi_fk'].values[0]
            kpi_fk = data['kpi_fk'].values[0]
            kpi_set_name = data['kpi_set_name'].values[0]
            attributes = pd.DataFrame(
                [(name, self.session_uid, kpi_set_name, self.store_id,
                  self.visit_date.isoformat(), datetime.utcnow().isoformat(),
                  score, score_2, score_3, result, result_2, result_3, kpi_fk,
                  atomic_fk)],
                columns=[
                    'display_text', 'session_uid', 'kps_name', 'store_fk',
                    'visit_date', 'calculation_time', 'score', 'score_2',
                    'score_3', 'result', 'result_2', 'result_3', 'kpi_fk',
                    'atomic_kpi_fk'
                ])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn.disconnect_rds()
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        cur = self.rds_conn.db.cursor()
        delete_queries = DISPLAYSQueries.get_delete_session_results_query(
            self.session_uid, self.kpi_static_data)
        for query in delete_queries:
            cur.execute(query)
        for query in self.kpi_results_queries:
            cur.execute(query)
        self.rds_conn.db.commit()
コード例 #21
0
class CCRUCCHKPIFetcher:

    TCCC = ['TCCC', 'BF']

    def __init__(self, project_name):
        self.project_name = project_name
        self.rds_conn = self.rds_connection()
        self.kpi_set_name = None
        self.kpi_static_data = None

    def rds_connection(self):
        if not hasattr(self, '_rds_conn'):
            self._rds_conn = PSProjectConnector(self.project_name,
                                                DbUsers.CalculationEng)
        try:
            pd.read_sql_query('select pk from probedata.session limit 1',
                              self._rds_conn.db)
        except:
            self._rds_conn.disconnect_rds()
            self._rds_conn = PSProjectConnector(self.project_name,
                                                DbUsers.CalculationEng)
        return self._rds_conn

    @staticmethod
    def get_delete_session_results(session_uid, session_fk):
        queries = [
            "delete from report.kps_results where session_uid = '{}';".format(
                session_uid),
            "delete from report.kpk_results where session_uid = '{}';".format(
                session_uid),
            "delete from report.kpi_results where session_uid = '{}';".format(
                session_uid),
            "delete from pservice.custom_gaps where session_fk = '{}';".format(
                session_fk),
            "delete from pservice.custom_scene_item_facts where session_fk = '{}';"
            .format(session_fk)
        ]
        return queries

    def get_static_kpi_data(self, kpi_set_name=None):
        kpi_set_name = kpi_set_name if kpi_set_name else self.kpi_set_name
        self.rds_conn = self.rds_connection()
        query = """
                select api.name as atomic_kpi_name, api.pk as atomic_kpi_fk,
                       kpi.display_text as kpi_name, kpi.pk as kpi_fk,
                       kps.name as kpi_set_name, kps.pk as kpi_set_fk
                from static.atomic_kpi api
                join static.kpi kpi on kpi.pk = api.kpi_fk
                join static.kpi_set kps on kps.pk = kpi.kpi_set_fk
                where kps.name = '{}'
                """.format(kpi_set_name)
        df = pd.read_sql_query(query, self.rds_conn.db)
        return df

    def get_kpi_set_fk(self):
        kpi_set_fk = self.kpi_static_data['kpi_set_fk']
        return kpi_set_fk.values[0]

    def get_kpi_fk(self, kpi_name):
        try:
            kpi_name = kpi_name.decode('utf-8')
        except UnicodeEncodeError:
            pass
        kpi_fk = self.kpi_static_data[self.kpi_static_data['kpi_name'] ==
                                      kpi_name.replace("\\'", "'")]['kpi_fk']
        if not kpi_fk.empty:
            return kpi_fk.values[0]
        else:
            return None

    def get_atomic_kpi_fk(self, atomic_kpi_name, kpi_fk=None):
        try:
            atomic_kpi_name = atomic_kpi_name.decode('utf-8')
        except UnicodeEncodeError:
            pass

        if kpi_fk:
            atomic_kpi_fk = self.kpi_static_data[
                (self.kpi_static_data['kpi_fk'] == kpi_fk)
                & (self.kpi_static_data['atomic_kpi_name'] ==
                   atomic_kpi_name.replace("\\'", "'"))]['atomic_kpi_fk']
        else:
            atomic_kpi_fk = self.kpi_static_data[
                self.kpi_static_data['atomic_kpi_name'] ==
                atomic_kpi_name.replace("\\'", "'")]['atomic_kpi_fk']

        if not atomic_kpi_fk.empty:
            return atomic_kpi_fk.values[0]
        else:
            return None

    def get_category_target_by_region(self, category, store_id):
        store_type_dict = {
            'PoS 2017 - MT - Hypermarket': 'Hypermarket',
            'PoS 2017 - MT - Supermarket': 'Supermarket',
            'PoS 2017 - MT - Superette': 'Superette'
        }
        store_region_fk = self.get_store_region(store_id)
        branch_fk = self.get_store_branch(store_id)
        jg = CCRUJsonGenerator()
        jg.create_kpi_data_json('cat_targets_by_region',
                                'MT Shelf facings_2017.xlsx')
        targets = jg.project_kpi_dict['cat_targets_by_region']
        final_target = 0
        for row in targets:
            if row.get('branch_fk') == branch_fk and row.get('region_fk') == store_region_fk \
                    and row.get('store_type') == store_type_dict.get(self.set_name):
                final_target = row.get(category)
            else:
                continue
        return final_target

    def get_store_number(self, store_id):
        query = """
                SELECT store_number_1
                FROM static.stores ss
                WHERE ss.pk = {};
                """.format(store_id)
        cur = self.rds_conn.db.cursor()
        cur.execute(query)
        res = cur.fetchall()[0]
        return res[0]

    def get_store_region(self, store_id):
        query = """
                SELECT region_fk
                FROM static.stores ss
                WHERE ss.pk = {};
                """.format(store_id)
        cur = self.rds_conn.db.cursor()
        cur.execute(query)
        res = cur.fetchall()[0]
        return res[0]

    def get_attr15_store(self, store_id):
        query = """
                SELECT additional_attribute_15
                FROM static.stores ss
                WHERE ss.pk = {};
                """.format(store_id)
        cur = self.rds_conn.db.cursor()
        cur.execute(query)
        result = cur.fetchall()

        try:
            result = float(result[0][0].replace(',', '.'))
        except:
            result = 1.0

        return result

    def get_test_store(self, store_id):
        query = """
                SELECT test_store
                FROM static.stores ss
                WHERE ss.pk = {};
                """.format(store_id)
        cur = self.rds_conn.db.cursor()
        cur.execute(query)
        res = cur.fetchall()[0]
        return res[0]

    def get_store_branch(self, store_id):
        query = """
                SELECT branch_fk
                FROM static.stores ss
                WHERE ss.pk = {};
                """.format(store_id)
        cur = self.rds_conn.db.cursor()
        cur.execute(query)
        res = cur.fetchall()[0]
        return res[0]

    def get_external_session_id(self, session_uid):
        query = """
                SELECT external_session_id
                FROM probedata.session ss
                WHERE ss.session_uid = '{}';
                """.format(session_uid)
        cur = self.rds_conn.db.cursor()
        cur.execute(query)
        res = cur.fetchall()[0]
        return res[0]

    def get_store_info(self, store_id):
        query = """
                    SELECT s.pk as store_fk, s.additional_attribute_3, r.name as retailer, s.store_number_1,
                    s.business_unit_fk, s.additional_attribute_5, s.district_fk, s.test_store
                    FROM static.stores s
                    left join static.retailer r
                    on r.pk = s.retailer_fk where s.pk = '{}'
                """.format(store_id)
        store_info = pd.read_sql_query(query, self.rds_conn.db)
        return store_info

    def get_store_area_df(self, session_uid):
        query = """
                select sst.scene_fk, st.name, sc.session_uid from probedata.scene_store_task_area_group_items sst
                join static.store_task_area_group_items st on st.pk=sst.store_task_area_group_item_fk
                join probedata.scene sc on sc.pk=sst.scene_fk
                where sc.delete_time is null and sc.session_uid = '{}';
                """.format(session_uid)

        cur = self.rds_conn.db.cursor()
        cur.execute(query)
        res = cur.fetchall()
        df = pd.DataFrame(
            list(res), columns=['scene_fk', 'store_area_name', 'session_uid'])
        return df

    def get_kpi_result_values(self):
        self.rds_conn = self.rds_connection()
        query = """
                select 
                rt.pk as result_type_fk,
                rt.name as result_type, 
                rv.pk as result_value_fk, 
                rv.value as result_value
                from static.kpi_result_value rv
                join static.kpi_result_type rt on rt.pk=rv.kpi_result_type_fk;
                """
        df = pd.read_sql_query(query, self.rds_conn.db)
        return df

    def get_kpi_entity_types(self):
        self.rds_conn = self.rds_connection()
        query = """
                select * from static.kpi_entity_type;
                """
        df = pd.read_sql_query(query, self.rds_conn.db)
        return df

    def get_kpi_entity(self, entity, entity_type_fk, entity_table_name,
                       entity_uid_field):
        self.rds_conn = self.rds_connection()
        query = """
                select 
                '{0}' as entity,
                {1} as type,
                pk as fk,
                {2} as uid_field
                from {3};
                """.format(entity, entity_type_fk, entity_uid_field,
                           entity_table_name)
        df = pd.read_sql_query(query, self.rds_conn.db)
        return df

    def get_session_user(self, session_uid):
        query = """
                SELECT 
                    sr.login_name AS user_name, 
                    ur.role AS user_role, 
                    sr.position AS user_position
                FROM probedata.session ss
                LEFT JOIN static.sales_reps sr ON sr.pk=ss.s_sales_rep_fk
                LEFT JOIN static.mobile_user_roles ur ON ur.sales_rep_fk=sr.pk
                WHERE ss.session_uid='{}';
                """.format(session_uid)
        result = pd.read_sql_query(
            query, self.rds_conn.db).to_dict(orient='records')[0]
        return result

    def get_planned_visit_flag(self, session_uid):
        query = """
                SELECT pv.planned_flag
                FROM probedata.session ss
                LEFT JOIN pservice.planned_visits pv ON 1
                AND pv.store_fk=ss.store_fk 
                AND pv.sales_rep_fk=ss.s_sales_rep_fk 
                AND pv.visit_date=ss.visit_date
                WHERE ss.session_uid='{}';
                """.format(session_uid)
        result = pd.read_sql_query(query, self.rds_conn.db)[0][0]
        return result

    def get_top_skus_for_store(self, store_fk, visit_date):
        query = """
                select
                anchor_product_fk,
                group_concat(product_fk) as product_fks,
                max(min_facings) as min_facings
                from (
                    select          
                    ifnull(ts.anchor_product_fk, ts.product_fk) as anchor_product_fk,
                    ts.product_fk as product_fk,
                    ifnull(ts.min_facings, 1) as min_facings
                    from {} ts
                    where ts.store_fk = {}
                    and ts.start_date <= '{}' 
                    and ifnull(ts.end_date, curdate()) >= '{}'
                ) t
                group by anchor_product_fk;
                """.format('pservice.custom_osa', store_fk, visit_date,
                           visit_date)
        data = pd.read_sql_query(query, self.rds_conn.db)
        return data.groupby(['anchor_product_fk']).agg({
            'product_fks': 'first',
            'min_facings': 'first'
        }).to_dict()

    def get_custom_entity(self, entity_type):
        self.rds_conn = self.rds_connection()
        query = \
            """
            SELECT en.pk, en.name
            FROM static.custom_entity en
            JOIN static.kpi_entity_type et ON et.pk=en.entity_type_fk
            WHERE et.name = '{}';
            """.format(entity_type)
        data = pd.read_sql_query(query, self.rds_conn.db)
        return data

    def get_kpi_level_2_fk(self, kpi_level_2_type):
        query = \
            """
            SELECT pk FROM static.kpi_level_2
            WHERE type = '{}';
            """.format(kpi_level_2_type)
        data = pd.read_sql_query(query, self.rds_conn.db)
        return None if data.empty else data.values[0][0]

    def get_kpi_operation_type_fk(self, kpi_operation_type):
        query = \
            """
            SELECT pk FROM static.kpi_operation_type
            WHERE operation_type = '{}';
            """.format(kpi_operation_type)
        data = pd.read_sql_query(query, self.rds_conn.db)
        return None if data.empty else data.values[0][0]

    def get_scene_item_prices(self, scene_list):
        query = """
                SELECT
                scene_fk,
                product_fk,
                MIN(IF((is_promotion = 0 AND outlier <> (-2) AND outlier <> 2) 
                        OR (is_promotion = 0 AND (ISNULL(outlier) OR outlier = 0)), price, NULL)) AS price
                FROM (
                    SELECT 
                     mpip.product_fk AS product_fk
                    ,mpip.probe_fk AS probe_fk
                    ,pr.scene_fk AS scene_fk
                    ,mpippav.value AS price
                    ,mpippav.is_promotion AS is_promotion
                    ,IF(ISNULL(tipr.soft_min) OR ISNULL(mpippav.value),NULL
                        ,IF(mpippav.value > tipr.soft_min AND mpippav.value <= tipr.soft_max, 0 
                            ,IF(mpippav.value > tipr.hard_min AND mpippav.value <= tipr.soft_min, -1
                                ,IF(mpippav.value > tipr.soft_max AND mpippav.value <= tipr.hard_max, 1
                                    ,IF(mpippav.value <= tipr.hard_min, -2
                                        ,2
                                        )
                                    )
                                )
                            )
                        ) 					 AS outlier
                    FROM probedata.match_product_in_probe  mpip
                    JOIN probedata.match_product_in_probe_price_attribute_value mpippav ON mpip.pk = mpippav.match_product_in_probe_fk
                    JOIN probedata.probe pr ON pr.pk = mpip.probe_fk AND ISNULL(pr.delete_time)
                    LEFT JOIN (
                        SELECT 
                             table1.pk
                            ,table1.product_fk 						AS product_fk
                            ,table1.soft_min 					 	AS soft_min
                            ,table1.soft_max 					    AS soft_max
                            ,table1.hard_min						AS hard_min
                            ,table1.hard_max						AS hard_max
                            ,table1.avg_price						AS avg_price
                            ,table1.creation_time 				 	AS start_date
                            ,IFNULL(table2.creation_time, NOW()) 	AS end_date
                        FROM (
                        SELECT ppr.*, @rownum := @rownum+1 AS rownum
                        FROM static.product_price_range ppr
                        JOIN (SELECT @rownum := 0) r
                        ORDER BY ppr.product_fk,ppr.creation_time
                        ) table1
                        LEFT JOIN (
                        SELECT ppr.*, @rownum2 := @rownum2+1 AS rownum2
                        FROM static.product_price_range ppr
                        JOIN (SELECT @rownum2 := -1) r
                        ORDER BY ppr.product_fk,ppr.creation_time
                        ) table2 ON table1.product_fk = table2.product_fk AND table1.rownum = table2.rownum2
                    ) tipr ON tipr.product_fk = mpip.product_fk AND (mpip.creation_time BETWEEN tipr.start_date AND tipr.end_date)
                    WHERE 1
                    AND pr.scene_fk IN ({scene_list})
                ) prices
                GROUP BY scene_fk, product_fk
                HAVING price IS NOT NULL;
                """.format(
            scene_list=','.join([unicode(x) for x in scene_list]))
        data = pd.read_sql_query(query, self.rds_conn.db)
        return data

    def get_scene_survey_response(self, scenes_list):
        if scenes_list:
            if len(scenes_list) == 1:
                query = """
                           select sr.*, sq.question_text, sq.group_name 
                           from probedata.scene_survey_response sr
                           join static.survey_question sq on sr.question_fk=sq.pk
                           where sq.delete_time is null
                           and sq.group_name in ('Cooler Audit', 'Cooler Audit Test')
                           and sr.delete_time is null
                           and sr.scene_fk in ({});
                           """.format(scenes_list[0])
            else:
                query = """
                           select sr.*, sq.question_text, sq.group_name 
                           from probedata.scene_survey_response sr
                           join static.survey_question sq on sr.question_fk=sq.pk
                           where sq.delete_time is null
                           and sq.group_name in ('Cooler Audit', 'Cooler Audit Test')
                           and sr.delete_time is null
                           and sr.scene_fk in {};
                           """.format(tuple(scenes_list))
            data = pd.read_sql_query(query, self.rds_conn.db)
        else:
            data = pd.DataFrame(
                columns=['pk', 'text_value', 'question_text', 'group_name'])
        return data

    def get_all_coolers_from_assortment_list(self, cooler_list):
        cooler_list = list(map(str, cooler_list))
        if len(cooler_list) == 1:
            query = """
                       select c.pk as cooler_fk, c.cooler_id, c.cooler_model_fk, m.name as cooler_model_name 
                       from pservice.cooler c
                       left join pservice.cooler_model m
                       on c.cooler_model_fk = m.pk
                       where c.cooler_id = '{}';
                       """.format(cooler_list[0])
        else:
            query = """
                       select c.pk as cooler_fk, c.cooler_id, c.cooler_model_fk, m.name as cooler_model_name 
                       from pservice.cooler c
                       left join pservice.cooler_model m
                       on c.cooler_model_fk = m.pk
                       where c.cooler_id in {};
                       """.format(tuple(cooler_list))
        data = pd.read_sql_query(query, self.rds_conn.db)
        return data

    def get_kpi_external_targets(self, visit_date, store_fk):
        query = """SELECT ext.*, ot.operation_type from static.kpi_external_targets ext
                   LEFT JOIN static.kpi_operation_type ot on ext.kpi_operation_type_fk=ot.pk 
                   WHERE 
                   ((ext.start_date<='{}' and ext.end_date is null) or 
                   (ext.start_date<='{}' and ext.end_date>='{}'))
                   AND ot.operation_type='COOLER_AUDIT'
                   AND ext.key_fk={}
                """.format(visit_date, visit_date, visit_date, store_fk)
        data = pd.read_sql_query(query, self.rds_conn.db)
        return data
コード例 #22
0
class CCBRToolBox:
    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.tools = CCBRGENERALToolBox(self.data_provider,
                                        self.output,
                                        rds_conn=self.rds_conn)
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.kpi_results_queries = []
        self.survey = Survey(self.data_provider, self.output)
        self.kpi_results_new_tables_queries = []
        self.New_kpi_static_data = self.get_new_kpi_static_data()
        self.session_id = self.data_provider.session_id
        self.prices_per_session = PsDataProvider(
            self.data_provider, self.output).get_price_union(self.session_id)
        self.common_db = Common(self.data_provider)
        self.count_sheet = pd.read_excel(PATH, Const.COUNT).fillna("")
        self.group_count_sheet = pd.read_excel(PATH,
                                               Const.GROUP_COUNT).fillna("")
        self.survey_sheet = pd.read_excel(PATH, Const.SURVEY).fillna("")

    def main_calculation(self):
        """
        This function calculates the KPI results.
        """
        kpis_sheet = pd.read_excel(PATH, Const.KPIS).fillna("")
        for index, row in kpis_sheet.iterrows():
            self.handle_atomic(row)
        self.handle_simon_kpis()
        self.commit_results_data()

    def handle_simon_kpis(self):
        """
        activate the availability and pricing functions
        """
        active_products = self.all_products.loc[
            self.all_products["is_active"] > 0]
        self.calculate_availability(active_products)
        self.calculate_pricing(self.all_products)

    def calculate_availability(self, active_products):
        """
        calculates the availability for all products per session, used is sovi and sovi vertical reports
        :param active_products: a df containing only active products
        """
        active_products_sku_and_other = active_products[
            (active_products['product_type'] == 'SKU')
            | (active_products['product_type'] == 'Other')]
        active_products_pks = active_products_sku_and_other[
            'product_fk'].unique().tolist()
        filters = {'product_fk': active_products_pks}
        filtered_df = self.scif[self.tools.get_filter_condition(
            self.scif, **filters)]
        facing_filtered = filtered_df.loc[filtered_df['facings'] > 0][[
            'template_fk', 'product_fk', 'facings'
        ]]
        facing_filtered_pks = facing_filtered['product_fk'].unique().tolist()
        for product in facing_filtered_pks:
            product_df = facing_filtered.loc[facing_filtered['product_fk'] ==
                                             product]
            product_template_fks = product_df['template_fk'].unique().tolist()
            for template_fk in product_template_fks:
                sum_facing = product_df.loc[product_df['template_fk'] ==
                                            template_fk]['facings'].sum()
                self.write_to_db_result_new_tables(fk=Const.AVAILABILITY_PK,
                                                   numerator_id=product,
                                                   score='1',
                                                   denominator_id=template_fk,
                                                   numerator_result='1',
                                                   result=sum_facing)

    def calculate_pricing(self, all_products):
        """
        inserting the db the pricing of all active and inactive skus.
        used in preco and preco vertical reports
        :param all_products: df containing all products
        """
        only_sku_type_products = all_products.loc[all_products['product_type']
                                                  == 'SKU']
        all_products_fks_size = only_sku_type_products[['product_fk',
                                                        'size']].fillna("")
        product_fks_and_prices = self.prices_per_session
        merge_size_and_price = pd.merge(all_products_fks_size,
                                        product_fks_and_prices,
                                        how='left',
                                        on='product_fk')
        merge_size_and_price['value'] = merge_size_and_price['value'].fillna(
            '0')
        for row in merge_size_and_price.itertuples():
            product = row[1]  # row['product_fk']
            size = row[2]  # row['size']
            price = row[3]  # row['value']
            if size == '':
                size = 0
            if price > 0:
                self.write_to_db_result_new_tables(fk=Const.PRICING_PK,
                                                   numerator_id=product,
                                                   numerator_result=size,
                                                   result=price)

    def handle_atomic(self, row):
        """
        run the correct kpi for a specific row in the template
        :param row: a row from the template
        """
        atomic_name = row[Const.ENGLISH_KPI_NAME].strip()
        kpi_type = row[Const.KPI_TYPE].strip()
        if kpi_type == Const.SURVEY:
            self.handle_survey_atomics(atomic_name)
        elif kpi_type == Const.COUNT:
            self.handle_count_atomics(atomic_name)
        elif kpi_type == Const.GROUP_COUNT:
            self.handle_group_count_atomics(atomic_name)

    def handle_survey_atomics(self, atomic_name):
        """
        handle survey questions
        :param atomic_name: the name of the kpi
        :return: only if the survey filters aren't satisfied
        """
        row = self.survey_sheet.loc[self.survey_sheet[Const.ENGLISH_KPI_NAME]
                                    == atomic_name]
        if row.empty:
            Log.warning("Dataframe is empty, wrong kpi name: " + atomic_name)
            return
        store_type_filter = self.store_info['store_type'].values[0].strip()
        store_type_template = row[Const.STORE_TYPE_TEMPLATE].values[0].strip()

        # if cell in template is not empty
        if store_type_template != "":
            store_types = store_type_template.split(",")
            store_types = [item.strip() for item in store_types]
            if store_type_filter not in store_types:
                return

        # find the answer to the survey in session
        question_id = row[Const.SURVEY_QUESTION_ID].values[0]
        question_answer_template = row[Const.TARGET_ANSWER].values[0]

        survey_result = self.survey.get_survey_answer(
            ('question_fk', question_id))
        if question_answer_template == Const.NUMERIC:
            if not survey_result:
                survey_result = 0
            if not isinstance(survey_result, (int, long, float)):
                Log.warning("question id " + str(question_id) +
                            " in template is not a number")
                survey_result = 0

        else:
            answer = self.survey.check_survey_answer(
                ('question_fk', question_id), question_answer_template)
            survey_result = 1 if answer else -1

        try:
            atomic_pk = self.common_db.get_kpi_fk_by_kpi_name_new_tables(
                atomic_name)
        except IndexError:
            Log.warning("There is no matching Kpi fk for kpi name: " +
                        atomic_name)
            return

        self.write_to_db_result_new_tables(fk=atomic_pk,
                                           numerator_id=self.session_id,
                                           numerator_result=survey_result,
                                           result=survey_result)

    def handle_count_atomics(self, atomic_name):
        """
        handle count kpis, used in consolidada report
        :param atomic_name: the name of the kpi to calculate
        """
        sum_of_count = 0
        target = 0
        count_result = 0
        row = self.count_sheet.loc[self.count_sheet[Const.ENGLISH_KPI_NAME] ==
                                   atomic_name]
        if row.empty:
            Log.warning("Dataframe is empty, wrong kpi name: " + atomic_name)
            return

        try:
            atomic_pk = self.common_db.get_kpi_fk_by_kpi_name_new_tables(
                atomic_name)
        except IndexError:
            Log.warning("There is no matching Kpi fk for kpi name: " +
                        atomic_name)
            return
        for index, row in row.iterrows():
            sum_of_count, target, count_result = self.handle_count_row(row)
        if not isinstance(sum_of_count, (int, float, long)):
            sum_of_count = count_result

        self.write_to_db_result_new_tables(fk=atomic_pk,
                                           numerator_id=self.session_id,
                                           numerator_result=sum_of_count,
                                           denominator_result=target,
                                           result=count_result)

    def handle_group_count_atomics(self, atomic_name):
        """
        handle group count kpis (different from count in or and and conditions), used in consolidada report
        :param atomic_name: the name of the kpi to calculate
        """
        rows = self.group_count_sheet.loc[self.group_count_sheet[
            Const.GROUP_KPI_NAME] == atomic_name]
        group_weight = 0
        group_result = 0
        group_target = 0
        group_sum_of_count = 0
        sum_of_count_df = pd.DataFrame()
        target_operator = ""
        if rows.empty:
            Log.warning("Dataframe is empty, wrong kpi name: " + atomic_name)
            return

        try:
            atomic_pk = self.common_db.get_kpi_fk_by_kpi_name_new_tables(
                atomic_name)
        except IndexError:
            Log.warning("There is no matching Kpi fk for kpi name: " +
                        atomic_name)
            return

        for index, row in rows.iterrows():
            target_operator = row[Const.TARGET_OPERATOR].strip()
            weight = row[Const.WEIGHT]
            sum_of_count, target, count_result = self.handle_count_row(row)
            if count_result >= 1:
                group_weight += weight
                if group_weight >= 100:
                    # use for getting numeric results instead of 1 and 0
                    if (target_operator == '+'):
                        sum_of_count_df = pd.concat(
                            [sum_of_count_df, sum_of_count])
                    else:
                        group_result = 1
                        break

            # conditional, if given -1000 kpi must fail
            elif count_result == -1000:
                group_result = 0
                break

        # use for getting numeric results instead of 1 and 0
        if (target_operator == '+'):
            if sum_of_count_df.empty:
                group_sum_of_count = 0
            else:
                group_sum_of_count = len(sum_of_count_df.groupby('scene_id'))
            group_result = group_sum_of_count

        self.write_to_db_result_new_tables(fk=atomic_pk,
                                           numerator_id=self.session_id,
                                           numerator_result=group_sum_of_count,
                                           denominator_result=group_target,
                                           result=group_result)

    def handle_count_row(self, row):
        """
        filters qall params in aspecific row and send it to the correct count calculation
        :param row:
        :return:
        """
        count_type = row[Const.COUNT_TYPE].strip()
        target = row[Const.TARGET]
        target_operator = row[Const.TARGET_OPERATOR].strip()
        product_template = row[Const.PRODUCT]
        store_type_filter = self.store_info['store_type'].values[0]
        store_type_template = row[Const.STORE_TYPE_TEMPLATE]
        product_size = row[Const.PRODUCT_SIZE]
        product_size_operator = row[Const.PRODUCT_SIZE_OPERATOR].strip()
        product_measurement_unit = row[Const.MEASUREMENT_UNIT].strip()
        consider_few = row[Const.CONSIDER_FEW]
        multipack_template = row[Const.MULTIPACK].strip()
        multipack_df = None

        # filter store type
        if store_type_template != "":
            store_types = store_type_template.split(",")
            store_types = [item.strip() for item in store_types]
            if store_type_filter not in store_types:
                return 0, 0, 0

        filtered_df = self.scif.copy()

        # filter product
        if product_template != "":
            products_to_check = product_template.split(",")
            products_to_check = [item.strip() for item in products_to_check]
            filtered_df = filtered_df[filtered_df['product_name'].isin(
                products_to_check)]
            if filtered_df.empty:
                return 0, 0, 0

        # filter product size
        if product_size != "":
            if product_measurement_unit == 'l':
                product_size *= 1000

            ml_df = filtered_df[filtered_df['size_unit'] == 'ml']
            l_df = filtered_df[filtered_df['size_unit'] == 'l']

            if multipack_template != "":
                multipack_df = filtered_df[filtered_df['MPACK'] == 'Y']
            temp_df = l_df.copy()
            temp_df['size'] = l_df['size'].apply((lambda x: x * 1000))
            filtered_df = pd.concat([temp_df, ml_df])

            if product_size_operator == '<':
                filtered_df = filtered_df[filtered_df['size'] < product_size]
            elif product_size_operator == '<=':
                filtered_df = filtered_df[filtered_df['size'] <= product_size]
            elif product_size_operator == '>':
                filtered_df = filtered_df[filtered_df['size'] > product_size]
            elif product_size_operator == '>=':
                filtered_df = filtered_df[filtered_df['size'] >= product_size]
            elif product_size_operator == '=':
                filtered_df = filtered_df[filtered_df['size'] == product_size]

            # multipack conditions is an or between product size and MPACK
            if multipack_template != "":
                filtered_df = pd.concat([filtered_df,
                                         multipack_df]).drop_duplicates()

        filters = self.get_filters_from_row(row)
        count_of_units = 0
        if count_type == Const.SCENE:
            count_of_units = self.count_of_scenes(filtered_df, filters,
                                                  target_operator, target)
        elif count_type == Const.FACING:
            count_of_units = self.count_of_facings(filtered_df, filters,
                                                   consider_few, target)
        elif count_type == Const.SCENE_SOS:
            count_of_units = self.count_of_sos(filtered_df, filters)
        else:
            Log.warning("Couldn't find a correct COUNT variable in template")

        if target_operator == '<=':
            count_result = 1 if (target <= count_of_units) else 0

        # use for getting numeric results instead of 1 and 0
        elif target_operator == '+':
            if isinstance(count_of_units, (int, float, long)):
                count_result = count_of_units
            else:
                count_result = len(count_of_units)
        else:
            count_result = 1 if (target >= count_of_units) else 0
        return count_of_units, target, count_result

    def get_filters_from_row(self, row):
        """
        handle filters appering in scif
        :param row: row containing all filters
        :return: a dictionary of the filters
        """
        filters = dict(row)

        # no need to be accounted for, fields that aren't in scif
        for field in Const.DELETE_FIELDS:
            if field in filters:
                del filters[field]

        if Const.WEIGHT in filters.keys():
            del filters[Const.WEIGHT]
        if Const.GROUP_KPI_NAME in filters.keys():
            del filters[Const.GROUP_KPI_NAME]

        exclude_manufacturer = filters[Const.EXCLUDE_MANUFACTURER].strip()
        if exclude_manufacturer != "":
            filters[Const.MANUFACTURER] = (exclude_manufacturer,
                                           Const.EXCLUDE_FILTER)
            del filters[Const.EXCLUDE_MANUFACTURER]

        exclude_category = filters[Const.EXCLUDE_CATEGORY].strip()
        if exclude_category != "":
            filters[Const.CATEGORY] = (exclude_category, Const.EXCLUDE_FILTER)
            del filters[Const.EXCLUDE_CATEGORY]

        # filter all the empty cells
        for key in filters.keys():
            if (filters[key] == ""):
                del filters[key]
            elif isinstance(filters[key], tuple):
                filters[key] = (filters[key][0].split(","), filters[key][1])
            else:
                filters[key] = filters[key].split(",")
                filters[key] = [item.strip() for item in filters[key]]

        return self.create_filters_according_to_scif(filters)

    def create_filters_according_to_scif(self, filters):
        """
        adjusting the template names to scif names
        :param filters: only the scif filters in the template shape
        :return: the filters dictionary
        """
        convert_from_scif = {
            Const.TEMPLATE_GROUP: 'template_group',
            Const.TEMPLATE_NAME: 'template_name',
            Const.BRAND: 'brand_name',
            Const.CATEGORY: 'category',
            Const.MANUFACTURER: 'manufacturer_name',
            Const.PRODUCT_TYPE: 'product_type',
            Const.MULTIPACK: 'MPAK'
        }
        for key in filters.keys():
            filters[convert_from_scif[key]] = filters.pop(key)
        return filters

    def count_of_scenes(self, filtered_df, filters, target_operator, target):
        """
        calculate the count of scene types
        :param filtered_df: the first filtered (no scif filters) dataframe
        :param filters: the scif filters
        :param target_operator: the operation to do, + for returning a dataframe (used in group count)
        :param target: the target
        :return: dataframe for group counts +, number of scenes for all other functions
        """
        scene_data = filtered_df[self.tools.get_filter_condition(
            filtered_df, **filters)]
        if target_operator == '+':

            # filter by scene_id and by template_name (scene type)
            scene_types_groupby = scene_data.groupby(
                ['template_name', 'scene_id'])['facings'].sum().reset_index()
            number_of_scenes = scene_types_groupby[
                scene_types_groupby['facings'] >= target]
        else:
            number_of_scenes = len(scene_data['scene_id'].unique())
        return number_of_scenes

    def count_of_sos(self, filtered_df, filters):
        """
        calculating the share of shelf
        :param filtered_df: the first filtered (no scif filters) dataframe
        :param filters: the scif filters
        :return: the number of different scenes answered the condition  (hard coded 50%)
        """
        scene_data = filtered_df[self.tools.get_filter_condition(
            filtered_df, **filters)]
        scene_data = scene_data.rename(columns={"facings": "facings_nom"})

        # filter by scene_id and by template_name (scene type)
        scene_types_groupby = scene_data.groupby(['template_name', 'scene_id'
                                                  ])['facings_nom'].sum()
        all_products_groupby = self.scif.groupby(['template_name', 'scene_id'
                                                  ])['facings'].sum()
        merge_result = pd.concat((scene_types_groupby, all_products_groupby),
                                 axis=1,
                                 join='inner').reset_index()
        return len(merge_result[
            merge_result['facings_nom'] >= merge_result['facings'] * 0.5])

    def count_of_facings(self, filtered_df, filters, consider_few, target):
        '''
        calculate the count of facings
        :param filtered_df: the first filtered (no scif filters) dataframe
        :param filters: the scif filters
        :param consider_few: in case there is a need to consider more then one brand
        :param target: the target to pass
        :return:
        '''
        facing_data = filtered_df[self.tools.get_filter_condition(
            filtered_df, **filters)]
        if consider_few != "":
            facing_data_groupby = facing_data.groupby(['brand_name'
                                                       ])['facings'].sum()
            if len(facing_data_groupby[
                    facing_data_groupby >= target]) >= consider_few:
                number_of_facings = facing_data['facings'].sum()
            else:
                number_of_facings = 0
        else:
            number_of_facings = facing_data['facings'].sum()
        return number_of_facings

    def get_new_kpi_static_data(self):
        """
        This function extracts the static new KPI data (new tables) and saves it into one global data frame.
        The data is taken from static.kpi_level_2.
        """
        query = CCBRQueries.get_new_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def write_to_db_result_new_tables(self,
                                      fk,
                                      numerator_id,
                                      numerator_result,
                                      result,
                                      denominator_id=None,
                                      denominator_result=None,
                                      score=None):
        """
        This function creates the result data frame of new rables KPI,
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        table = KPI_NEW_TABLE
        attributes = self.create_attributes_dict_new_tables(
            fk, numerator_id, numerator_result, denominator_id,
            denominator_result, result, score)
        query = insert(attributes, table)
        self.kpi_results_new_tables_queries.append(query)

    def create_attributes_dict_new_tables(self, kpi_fk, numerator_id,
                                          numerator_result, denominator_id,
                                          denominator_result, result, score):
        """
        This function creates a data frame with all attributes needed for saving in KPI results new tables.
        """
        attributes = pd.DataFrame(
            [(kpi_fk, self.session_id, numerator_id, numerator_result,
              denominator_id, denominator_result, result, score)],
            columns=[
                'kpi_level_2_fk', 'session_fk', 'numerator_id',
                'numerator_result', 'denominator_id', 'denominator_result',
                'result', 'score'
            ])
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        insert_queries = self.merge_insert_queries(
            self.kpi_results_new_tables_queries)
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        delete_query = CCBRQueries.get_delete_session_results_query(
            self.session_uid, self.session_id)
        cur.execute(delete_query)
        for query in insert_queries:
            cur.execute(query)
        self.rds_conn.db.commit()
        self.rds_conn.disconnect_rds()

    @staticmethod
    def merge_insert_queries(insert_queries):
        query_groups = {}
        for query in insert_queries:
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            merged_queries.append('{0} VALUES {1}'.format(
                group, ',\n'.join(query_groups[group])))
        return merged_queries
コード例 #23
0
class PNGJP_SAND2KpiQualitative_ToolBox(Consts):
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    EXCLUDE_FILTER = 0
    INCLUDE_FILTER = 1
    EXCLUDE_EMPTY = False
    INCLUDE_EMPTY = True
    EXCLUDE_IRRELEVANT = False
    INCLUDE_IRRELEVANT = True

    def __init__(self, data_provider, output):
        self.k_engine = BaseCalculationsScript(data_provider, output)
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.all_templates = self.data_provider[Data.ALL_TEMPLATES]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.store_type = self.data_provider[Data.STORE_INFO][
            StoreInfoConsts.STORE_TYPE].values[0]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.match_display_in_scene = self.get_match_display()
        self.data_provider.probe_groups = self.get_probe_group(
            self.data_provider.session_uid)
        self.tools = PNGJP_SAND2GENERALToolBox(self.data_provider,
                                               self.output,
                                               rds_conn=self.rds_conn)
        self.template_name = 'TemplateQualitative.xlsx'
        self.TEMPLATE_PATH = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), '..', 'Data',
            self.template_name)
        self.template_data = parse_template(self.TEMPLATE_PATH, self.HIERARCHY)
        self.golden_zone_data = parse_template(self.TEMPLATE_PATH,
                                               self.GOLDEN_ZONE)
        self.golden_zone_data_criteria = parse_template(
            self.TEMPLATE_PATH, self.GOLDEN_ZONE_CRITERIA)
        self.block_data = parse_template(self.TEMPLATE_PATH, self.BLOCK)
        self.adjacency_data = parse_template(self.TEMPLATE_PATH,
                                             self.ADJACENCY)
        self.anchor_data = parse_template(self.TEMPLATE_PATH, self.ANCHOR)
        self.perfect_execution_data = parse_template(self.TEMPLATE_PATH,
                                                     self.PERFECT_EXECUTION)
        self.category_list_data = parse_template(self.TEMPLATE_PATH,
                                                 self.CATEGORY_LIST)
        self.product_groups_data = parse_template(self.TEMPLATE_PATH,
                                                  self.PRODUCT_GROUP)
        self._custom_templates = {}
        self.scenes_types_for_categories = {}
        self.kpi_static_data = self.get_kpi_static_data()
        self.kpi_results_queries = []
        self.kpi_results = {}
        self.atomic_results = {}
        self.categories = self.all_products[
            ProductsConsts.CATEGORY_FK].unique().tolist()
        self.display_types = [
            'Aisle', 'Casher', 'End-shelf', 'Entrance', 'Island', 'Side-End',
            'Side-net'
        ]
        self.custom_scif_queries = []
        self.session_fk = self.data_provider[Data.SESSION_INFO][
            BasicConsts.PK].iloc[0]
        self.block = Block(data_provider=self.data_provider,
                           rds_conn=self.rds_conn)
        self.adjacency = Adjancency(data_provider=self.data_provider,
                                    rds_conn=self.rds_conn)
        self.fix_utf_space_problem()
        self.kpi_scores = {}

    @property
    def rds_conn(self):
        if not hasattr(self, '_rds_conn'):
            self._rds_conn = PSProjectConnector(self.project_name,
                                                DbUsers.CalculationEng)
        try:
            pd.read_sql_query('select pk from probedata.session limit 1',
                              self._rds_conn.db)
        except:
            self._rds_conn.disconnect_rds()
            self._rds_conn = PSProjectConnector(self.project_name,
                                                DbUsers.CalculationEng)
        return self._rds_conn

    @property
    def _allowed_products(self):
        return {
            ProductsConsts.PRODUCT_TYPE:
            [ProductTypeConsts.OTHER, ProductTypeConsts.EMPTY]
        }

    def get_template(self, name):
        if name not in self._custom_templates.keys():
            self._custom_templates[name] = parse_template(
                self.TEMPLATE_PATH, name)
        return self._custom_templates[name]

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = PNGJP_SAND2Queries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def get_match_display(self):
        """
        This function extracts the display matches data and saves it into one global data frame.
        The data is taken from probedata.match_display_in_scene.
        """
        query = PNGJP_SAND2Queries.get_match_display(self.session_uid)
        match_display = pd.read_sql_query(query, self.rds_conn.db)
        match_display = match_display.merge(self.scene_info[[
            SceneInfoConsts.SCENE_FK, SceneInfoConsts.TEMPLATE_FK
        ]],
                                            on=SceneInfoConsts.SCENE_FK,
                                            how='left')
        match_display = match_display.merge(self.all_templates,
                                            on=TemplatesConsts.TEMPLATE_FK,
                                            how='left',
                                            suffixes=['', '_y'])
        return match_display

    def get_probe_group(self, session_uid):
        query = PNGJP_SAND2Queries.get_probe_group(session_uid)
        probe_group = pd.read_sql_query(query, self.rds_conn.db)
        return probe_group

    def fix_utf_space_problem(self):
        self.template_data['fixed KPI name'] = self.template_data[
            'KPI name'].str.replace(' ', '')
        self.golden_zone_data['fixed KPI name'] = self.golden_zone_data[
            'KPI name'].str.replace(' ', '')
        self.block_data['fixed KPI name'] = self.block_data[
            'KPI name'].str.replace(' ', '')
        self.adjacency_data['fixed KPI name'] = self.adjacency_data[
            'KPI name'].str.replace(' ', '')
        self.anchor_data['fixed KPI name'] = self.anchor_data[
            'KPI name'].str.replace(' ', '')
        self.perfect_execution_data[
            'fixed KPI name'] = self.perfect_execution_data[
                'KPI name'].str.replace(' ', '')
        self.template_data['fixed KPI name'] = self.template_data[
            'KPI name'].str.replace(' ', '')
        self.kpi_static_data['fixed atomic_kpi_name'] = self.kpi_static_data[
            'atomic_kpi_name'].str.replace(' ', '')

    @log_runtime('Main Calculation')
    def main_calculation(self):
        """
        This function calculates the KPI results.
        """
        for category in self.template_data['Category Name'].unique().tolist():
            category = \
                self.all_products[
                    self.all_products[ProductsConsts.CATEGORY_LOCAL_NAME].str.encode(
                        HelperConsts.UTF8) == category.encode(HelperConsts.UTF8)][
                    ProductsConsts.CATEGORY_LOCAL_NAME].values[0]
            self.category_calculation(category)

        # for kpi_set in self.template_data[self.SET_NAME].unique().tolist():
        for kpi_set in [
                'Golden Zone', 'Block', 'Adjacency', 'Perfect Execution',
                'Anchor'
        ]:
            self.write_to_db_result(score=None,
                                    level=self.LEVEL1,
                                    kpi_set_name=kpi_set)
            kpi_set_fk = self.kpi_static_data.loc[
                self.kpi_static_data['kpi_set_name'] == kpi_set][
                    KpsResults.KPI_SET_FK].values[0]
            set_kpis = self.kpi_static_data.loc[
                self.kpi_static_data['kpi_set_name'] ==
                kpi_set]['kpi_name'].unique().tolist()
            for kpi in set_kpis:
                self.write_to_db_result(score=None,
                                        level=self.LEVEL2,
                                        kpi_set_fk=kpi_set_fk,
                                        kpi_name=kpi)

    def category_calculation(self, category):
        self.calculation_per_entity(category)
        self.category_aggregation_calculation(category)

    def scene_type_not_exists(self, sfs):
        result = True
        session_scene_types = self.scif["template_name"].unique().tolist()

        for session_scene_type in session_scene_types:
            for sf in sfs:
                if session_scene_type == sf:
                    return False
        return result

    def calculation_per_entity(self, category):
        template_data = self.template_data[
            self.template_data['Category Name'].str.encode(
                HelperConsts.UTF8) == category.encode(HelperConsts.UTF8)]
        filters = {ProductsConsts.CATEGORY_LOCAL_NAME: category}

        for kpi in template_data['fixed KPI name'].unique().tolist():
            entity_kpis = template_data.loc[
                template_data['fixed KPI name'].str.encode(
                    HelperConsts.UTF8) == kpi.encode(HelperConsts.UTF8)]
            entity_filters = filters

            for p in xrange(len(entity_kpis)):
                try:
                    score = threshold = result = None
                    params = entity_kpis.iloc[p]
                    set_name = params[self.SET_NAME]
                    kpi_type = params[self.KPI_TYPE]
                    scenes_filters = self.get_scenes_filters(params)
                    kpi_filters = dict(scenes_filters, **entity_filters)

                    if self.scene_type_not_exists(
                            scenes_filters['template_name']):
                        continue

                    if kpi_type == self.GOLDEN_ZONE:
                        kpi_params = self.golden_zone_data[
                            self.golden_zone_data['fixed KPI name'].str.encode(
                                HelperConsts.UTF8) == kpi.encode(
                                    HelperConsts.UTF8)]
                        score, result, threshold = self.calculate_golden_zone(
                            kpi, kpi_filters, kpi_params)

                    elif kpi_type == self.BLOCK:
                        kpi_params = self.block_data[
                            self.block_data['fixed KPI name'].str.encode(
                                HelperConsts.UTF8) == kpi.encode(
                                    HelperConsts.UTF8)]
                        score, result, threshold = self.calculate_block(
                            kpi, kpi_filters, kpi_params)

                    elif kpi_type == self.ANCHOR:
                        kpi_params = self.anchor_data[
                            self.anchor_data['fixed KPI name'].str.encode(
                                HelperConsts.UTF8) == kpi.encode(
                                    HelperConsts.UTF8)]
                        score, result, threshold = self.calculate_anchor(
                            kpi, kpi_filters, kpi_params)

                    elif kpi_type == self.ADJACENCY:
                        kpi_params = self.adjacency_data[
                            self.adjacency_data['fixed KPI name'].str.encode(
                                HelperConsts.UTF8) == kpi.encode(
                                    HelperConsts.UTF8)]
                        score, result, threshold = self.calculate_adjacency(
                            kpi, kpi_filters, kpi_params)

                    else:
                        Log.debug(
                            "KPI type '{}' is not supported".format(kpi_type))
                        continue

                    extra_data = self.get_extra_data_from_params(kpi_params)

                    self.kpi_scores.update({kpi: score})
                    self.write_result(score,
                                      result,
                                      threshold,
                                      kpi,
                                      category,
                                      set_name,
                                      template_data,
                                      extra_data=extra_data)
                except Exception as ex:
                    Log.warning("Exception:{} no score/result for '{}'".format(
                        ex.message, kpi_type))

    def category_aggregation_calculation(self, category):
        template_data = self.template_data[
            (self.template_data['Category Name'].str.encode(HelperConsts.UTF8)
             == category.encode(HelperConsts.UTF8))
            & (self.template_data['Set Name'] == 'Perfect Execution')]
        for kpi in template_data['fixed KPI name'].unique().tolist():
            entity_kpis = template_data.loc[
                template_data['fixed KPI name'].str.encode(
                    HelperConsts.UTF8) == kpi.encode(HelperConsts.UTF8)]
            for p in xrange(len(entity_kpis)):
                score = threshold = result = None
                params = entity_kpis.iloc[p]
                set_name = params[self.SET_NAME]
                kpi_type = params[self.KPI_TYPE]

                st = [
                    x.strip()
                    for x in params['Scene Types to Include'].split(",")
                ]
                if self.scene_type_not_exists(st):
                    continue

                if kpi_type == self.PERFECT_EXECUTION:
                    score, result, threshold = self.calculate_perfect_execution(
                        kpi)

                    self.write_result(score, result, threshold, kpi, category,
                                      set_name, template_data)

    def _get_filtered_products(self):
        products = self.data_provider.products.copy()
        filtered_products_fk = set(
            products[ProductsConsts.PRODUCT_FK].tolist())
        return {ProductsConsts.PRODUCT_FK: list(filtered_products_fk)}

    def _get_ean_codes_by_product_group_id(self,
                                           column_name=Consts.PRODUCT_GROUP_ID,
                                           **params):
        return self.product_groups_data[self.product_groups_data['Group Id'] ==
                                        params[column_name].values[0].split('.')[0]]['Product EAN Code'].values[0]. \
            split(self.SEPARATOR)

    def _get_allowed_products(self, allowed):
        allowed_products = set()

        # allowed.setdefault(ProductsConsts.PRODUCT_TYPE, []).
        # extend(self._allowed_products[ProductsConsts.PRODUCT_TYPE])

        for key, value in allowed.items():
            products = self.data_provider.products.copy()
            allowed_bulk = set(products[self.tools.get_filter_condition(
                products, **{key: value})][ProductsConsts.PRODUCT_FK].tolist())
            allowed_products.update(allowed_bulk)

        return {ProductsConsts.PRODUCT_FK: list(allowed_products)}

    def check_bay(self, matches, probe_group, threshold, **filters):
        relevant_bays = matches[(matches[ProductsConsts.PRODUCT_FK].isin(
            filters[ProductsConsts.PRODUCT_FK]))
                                & (matches['probe_group_id'] == probe_group)]
        relevant_bays['freq'] = relevant_bays.groupby(
            MatchesConsts.BAY_NUMBER)[MatchesConsts.BAY_NUMBER].transform(
                'count')

        relevant_bays = relevant_bays[relevant_bays['freq'] >= threshold][
            MatchesConsts.BAY_NUMBER].unique().tolist()

        if relevant_bays:
            relevant_bays.sort()
            return {'left': relevant_bays[0], 'right': relevant_bays[-1]}
        return {}

    def get_scenes_filters(self, params):
        filters = {}
        if params[self.SCENE_TYPES_TO_INCLUDE]:
            scene_types = params[self.SCENE_TYPES_TO_INCLUDE].split(
                self.SEPARATOR)
            template_names = []
            for scene_type in scene_types:
                template_names.append(scene_type)
            if template_names:
                filters[TemplatesConsts.TEMPLATE_NAME] = template_names
        return filters

    def write_to_db_result(self,
                           score,
                           level,
                           threshold=None,
                           level3_score=None,
                           **kwargs):
        """
        This function creates the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(score, level, threshold,
                                                 level3_score, **kwargs)
        if level == self.LEVEL1:
            table = Consts.KPS_RESULT
        elif level == self.LEVEL2:
            table = Consts.KPK_RESULT
        elif level == self.LEVEL3:
            table = Consts.KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self,
                               score,
                               level,
                               threshold=None,
                               level3_score=None,
                               **kwargs):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        if level == self.LEVEL1:
            set_name = kwargs['kpi_set_name']
            set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name']
                                          == set_name][
                                              KpsResults.KPI_SET_FK].values[0]
            if score is not None:
                attributes = pd.DataFrame(
                    [(set_name, self.session_uid, self.store_id,
                      self.visit_date.isoformat(), format(score,
                                                          '.2f'), set_fk)],
                    columns=[
                        KpsResults.KPS_NAME, KpiResults.SESSION_UID,
                        KpiResults.STORE_FK, KpiResults.VISIT_DATE,
                        KpsResults.SCORE_1, KpsResults.KPI_SET_FK
                    ])
            else:
                attributes = pd.DataFrame(
                    [(set_name, self.session_uid, self.store_id,
                      self.visit_date.isoformat(), None, set_fk)],
                    columns=[
                        KpsResults.KPS_NAME, KpiResults.SESSION_UID,
                        KpiResults.STORE_FK, KpiResults.VISIT_DATE,
                        KpsResults.SCORE_1, KpsResults.KPI_SET_FK
                    ])
        elif level == self.LEVEL2:
            kpi_name = kwargs['kpi_name']
            kpi_set_fk = kwargs[KpsResults.KPI_SET_FK]
            kpi_fk = \
                self.kpi_static_data[(self.kpi_static_data['kpi_name'].str.encode(HelperConsts.UTF8) == kpi_name.encode(
                    HelperConsts.UTF8)) &
                                     (self.kpi_static_data[KpsResults.KPI_SET_FK] == kpi_set_fk)][
                    KpiResults.KPI_FK].values[0]

            attributes = pd.DataFrame(
                [(self.session_uid, self.store_id, self.visit_date.isoformat(),
                  kpi_fk, kpi_name, score)],
                columns=[
                    KpkResults.SESSION_UID, KpkResults.STORE_FK,
                    KpkResults.VISIT_DATE, KpiResults.KPI_FK,
                    KpkResults.KPK_NAME, KpiResults.SCORE
                ])
            self.kpi_results[kpi_name] = score
        elif level == self.LEVEL3:
            kpi_name = kwargs['kpi_name']
            kpi_fk = self.kpi_static_data[
                self.kpi_static_data['kpi_name'].str.encode(
                    HelperConsts.UTF8) == kpi_name.encode(HelperConsts.UTF8)][
                        KpiResults.KPI_FK].values[0]
            atomic_kpi_name = kwargs['atomic_kpi_name']
            atomic_kpi_fk = kwargs[KpiResults.ATOMIC_KPI_FK]
            kpi_set_name = kwargs['kpi_set_name']
            if level3_score is None and threshold is None:
                attributes = pd.DataFrame(
                    [(atomic_kpi_name, self.session_uid, kpi_set_name,
                      self.store_id, self.visit_date.isoformat(),
                      datetime.utcnow().isoformat(), score, kpi_fk,
                      atomic_kpi_fk)],
                    columns=[
                        KpiResults.DISPLAY_TEXT, KpsResults.SESSION_UID,
                        KpsResults.KPS_NAME, KpsResults.STORE_FK,
                        KpsResults.VISIT_DATE, KpiResults.CALCULATION_TIME,
                        KpiResults.RESULT, KpiResults.KPI_FK,
                        KpiResults.ATOMIC_KPI_FK
                    ])

            elif level3_score is not None and threshold is None:
                attributes = pd.DataFrame(
                    [(atomic_kpi_name, self.session_uid, kpi_set_name,
                      self.store_id, self.visit_date.isoformat(),
                      datetime.utcnow().isoformat(), score, kpi_fk,
                      level3_score, None, atomic_kpi_fk)],
                    columns=[
                        KpiResults.DISPLAY_TEXT, KpsResults.SESSION_UID,
                        KpsResults.KPS_NAME, KpsResults.STORE_FK,
                        KpsResults.VISIT_DATE, KpiResults.CALCULATION_TIME,
                        KpiResults.RESULT, KpiResults.KPI_FK, KpiResults.SCORE,
                        KpiResults.THRESHOLD, KpiResults.ATOMIC_KPI_FK
                    ])
            elif level3_score is None and threshold is not None:
                attributes = pd.DataFrame(
                    [(atomic_kpi_name, self.session_uid, kpi_set_name,
                      self.store_id, self.visit_date.isoformat(),
                      datetime.utcnow().isoformat(), score, kpi_fk, threshold,
                      None, atomic_kpi_fk)],
                    columns=[
                        KpiResults.DISPLAY_TEXT, KpsResults.SESSION_UID,
                        KpsResults.KPS_NAME, KpsResults.STORE_FK,
                        KpsResults.VISIT_DATE, KpiResults.CALCULATION_TIME,
                        KpiResults.RESULT, KpiResults.KPI_FK,
                        KpiResults.THRESHOLD, KpiResults.SCORE,
                        KpiResults.ATOMIC_KPI_FK
                    ])
            else:
                attributes = pd.DataFrame(
                    [(atomic_kpi_name, self.session_uid, kpi_set_name,
                      self.store_id, self.visit_date.isoformat(),
                      datetime.utcnow().isoformat(), score, kpi_fk, threshold,
                      level3_score, atomic_kpi_fk)],
                    columns=[
                        KpiResults.DISPLAY_TEXT, KpsResults.SESSION_UID,
                        KpsResults.KPS_NAME, KpsResults.STORE_FK,
                        KpsResults.VISIT_DATE, KpiResults.CALCULATION_TIME,
                        KpiResults.RESULT, KpiResults.KPI_FK,
                        KpiResults.THRESHOLD, KpiResults.SCORE,
                        KpiResults.ATOMIC_KPI_FK
                    ])
            if kpi_set_name not in self.atomic_results.keys():
                self.atomic_results[kpi_set_name] = {}
            self.atomic_results[kpi_set_name][atomic_kpi_name] = score
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        cur = self.rds_conn.db.cursor()
        # delete_queries = PNGJPQueries.get_delete_session_results_query(self.session_uid)
        # for query in delete_queries:
        #     cur.execute(query)
        queries = self.merge_insert_queries(self.kpi_results_queries)
        for query in queries:
            cur.execute(query)
        self.rds_conn.db.commit()

    def merge_insert_queries(self, insert_queries):
        query_groups = {}
        for query in insert_queries:
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            for group_index in xrange(0, len(query_groups[group]), 10**4):
                merged_queries.append('{0} VALUES {1}'.format(
                    group, ',\n'.join(
                        query_groups[group][group_index:group_index + 10**4])))
        return merged_queries

    def get_extra_data_from_params(self, params):
        extra_df = {}
        if 'Brand' in params.columns and params['Brand'].values[0] != "":
            extra_df['brand'] = params['Brand'].values[0]
        else:
            extra_df['brand'] = 'XX'

        if 'Product Group Id' in params.columns and params[
                'Product Group Id'].values[0] != "":
            extra_df['group'] = params['Product Group Id'].values[0]
        elif 'Product Group Id;A' in params.columns and 'Product Group Id;B' in params.columns:
            extra_df['group'] = "A-" + params['Product Group Id;A'].values[0] + ";B-" + \
                                params['Product Group Id;B'].values[0]
        else:
            extra_df['group'] = 'XX'
        return extra_df

    @kpi_runtime(kpi_desc='calculate_golden_zone', project_name='pngjp')
    def calculate_golden_zone(self, kpi, kpi_filters, params):

        kpi_filter = kpi_filters.copy()
        assortment_entity = ProductsConsts.PRODUCT_EAN_CODE
        if params[self.BRANDS].values[0]:
            kpi_filter[ProductsConsts.BRAND_LOCAL_NAME] = params[
                self.BRANDS].values[0]
            total_group_skus = int(
                self.tools.calculate_availability(**kpi_filter))
        elif params[self.PRODUCT_GROUP_ID].values[0]:
            product_eans = self._get_ean_codes_by_product_group_id(**params)
            kpi_filter[assortment_entity] = product_eans
            total_group_skus = int(
                self.tools.calculate_availability(**kpi_filter))
        else:
            product_eans = params['Product EAN Code'].values[0].split(
                self.SEPARATOR)
            kpi_filter[assortment_entity] = product_eans
            total_group_skus = int(
                self.tools.calculate_availability(**kpi_filter))

        result = int(
            self.tools.calculate_linear_facings_on_golden_zone(
                self.golden_zone_data_criteria, **kpi_filter))
        score = 0
        threshold = float(params[self.GROUP_GOLDEN_ZONE_THRESHOLD].values[0])
        if total_group_skus:
            score = 100 if (result / float(total_group_skus)) >= \
                           float(params[self.GROUP_GOLDEN_ZONE_THRESHOLD].values[0]) else 0
            result = (result / float(total_group_skus)) * 100
        return score, result, threshold

    @kpi_runtime(kpi_desc='calculate_block', project_name='pngjp')
    def calculate_block(self, kpi, kpi_filters, params):
        allowed_products_filters = {}
        threshold = 0
        kpi_filter = kpi_filters.copy()
        block_threshold = params['Threshold'].values[0]
        if params[self.PRODUCT_GROUP_ID].values[0] is not None:
            product_eans = self._get_ean_codes_by_product_group_id(**params)
            kpi_filter[ProductsConsts.PRODUCT_EAN_CODE] = product_eans
        if (params[self.ALLOWED_PRODUCT_GROUP_ID].values[0] is not None) and (
                params[self.ALLOWED_PRODUCT_GROUP_ID].values[0] != ''):
            product_eans = self._get_ean_codes_by_product_group_id(
                column_name=self.ALLOWED_PRODUCT_GROUP_ID, **params)
            allowed_products_filters[
                ProductsConsts.PRODUCT_EAN_CODE] = product_eans
        else:
            allowed_products_filters = None
        if params[self.VERTICAL].values[0] == 'Y':
            block_result, num_of_shelves = self.tools.calculate_block_together(
                vertical=True,
                allowed_products_filters=allowed_products_filters,
                minimum_block_ratio=float(block_threshold),
                **kpi_filter)
            score = 100 if block_result and num_of_shelves >= 3 else 0
            result = 1 if block_result and num_of_shelves >= 3 else 0

        else:
            block_result = self.tools.calculate_block_together(
                minimum_block_ratio=float(block_threshold),
                allowed_products_filters=allowed_products_filters,
                **kpi_filter)
            score = 100 if block_result else 0
            result = 1 if block_result else 0
        return score, result, threshold

    @kpi_runtime(kpi_desc='calculate_anchor', project_name='pngjp')
    def calculate_anchor(self, kpi, kpi_filters, params):
        score = result = threshold = 0
        kpi_filter = kpi_filters.copy()
        minimum_products = int(params['Minimum Products'].values[0])
        params.pop('Minimum Products')
        block_threshold = params['Threshold'].values[0]
        params.pop('Threshold')

        product_eans = self._get_ean_codes_by_product_group_id(**params)
        kpi_filter[ProductsConsts.PRODUCT_EAN_CODE] = product_eans

        allowed = {
            ProductsConsts.PRODUCT_TYPE: [
                ProductTypeConsts.OTHER, ProductTypeConsts.EMPTY,
                ProductTypeConsts.IRRELEVANT
            ]
        }
        # allowed = params['allowed']
        allowed_products = self._get_allowed_products(allowed)
        filtered_products_all = self._get_filtered_products()
        filter_products_after_exclude = {
            ProductsConsts.PRODUCT_FK:
            list(
                set(filtered_products_all[ProductsConsts.PRODUCT_FK]) -
                set(allowed_products[ProductsConsts.PRODUCT_FK]))
        }

        filtered_products_sub_group = params.copy().to_dict()
        filtered_products_sub_group.update(kpi_filter)

        separate_filters, relevant_scenes = self.tools.separate_location_filters_from_product_filters(
            **filtered_products_sub_group)

        for scene in relevant_scenes:
            separate_filters.update({SceneInfoConsts.SCENE_FK: scene})
            kpi_filter.update({SceneInfoConsts.SCENE_FK: scene})
            block_result = self.tools.calculate_block_together(
                minimum_block_ratio=float(block_threshold), **kpi_filter)

            if block_result:
                matches = self.tools.match_product_in_scene
                relevant_probe_group = matches[matches[MatchesConsts.SCENE_FK]
                                               == scene]
                for probe_group in relevant_probe_group[
                        'probe_group_id'].unique().tolist():
                    relevant_bay = self.check_bay(
                        relevant_probe_group, probe_group, minimum_products,
                        **filter_products_after_exclude)
                    if not relevant_bay:
                        continue
                    for direction in ['left', 'right']:
                        separate_filters.update({
                            MatchesConsts.BAY_NUMBER:
                            relevant_bay[direction]
                        })
                        edge = self.tools.calculate_products_on_edge(
                            position=direction,
                            edge_population=filter_products_after_exclude,
                            min_number_of_shelves=2,
                            **separate_filters)
                        if edge[0] > 0:
                            score = 100
                            result = 1
                            break
        return score, result, threshold

    @kpi_runtime(kpi_desc='calculate_adjacency', project_name='pngjp')
    def calculate_adjacency(self, kpi, kpi_filters, params):
        score = result = threshold = 0
        kpi_filter = kpi_filters.copy()
        target = params['Threshold']
        target = float(target.values[0])
        a_target = params.get('Threshold A')
        if not a_target.empty:
            params.pop('Threshold A')
            a_target = float(a_target.values[0])
        b_target = params.get('Threshold B')
        if not b_target.empty:
            params.pop('Threshold B')
            b_target = float(b_target.values[0])

        group_a = {
            ProductsConsts.PRODUCT_EAN_CODE:
            self._get_ean_codes_by_product_group_id('Product Group Id;A',
                                                    **params)
        }
        group_b = {
            ProductsConsts.PRODUCT_EAN_CODE:
            self._get_ean_codes_by_product_group_id('Product Group Id;B',
                                                    **params)
        }

        # allowed_filter = self._get_allowed_products({ProductsConsts.PRODUCT_TYPE:
        # ([self.EMPTY, self.IRRELEVANT], self.EXCLUDE_FILTER)})

        allowed_filter = self._get_allowed_products({
            ProductsConsts.PRODUCT_TYPE: [
                ProductTypeConsts.IRRELEVANT, ProductTypeConsts.EMPTY,
                ProductTypeConsts.OTHER
            ]
        })

        allowed_filter_without_other = self._get_allowed_products({
            ProductsConsts.PRODUCT_TYPE:
            [ProductTypeConsts.IRRELEVANT, ProductTypeConsts.EMPTY]
        })
        scene_filters = {
            TemplatesConsts.TEMPLATE_NAME:
            kpi_filter[TemplatesConsts.TEMPLATE_NAME]
        }

        filters, relevant_scenes = self.tools.separate_location_filters_from_product_filters(
            **scene_filters)

        for scene in relevant_scenes:
            adjacency = self.tools.calculate_adjacency(
                group_a, group_b, {SceneInfoConsts.SCENE_FK: scene},
                allowed_filter, allowed_filter_without_other, a_target,
                b_target, target)
            if adjacency:
                direction = params.get('Direction', 'All').values[0]
                if direction == 'All':
                    score = result = adjacency
                else:
                    # a = self.data_provider.products[
                    # self.tools.get_filter_condition(self.data_provider.products, **group_a)]
                    # [ProductsConsts.PRODUCT_FK].tolist()
                    #
                    # b = self.data_provider.products[
                    # self.tools.get_filter_condition(self.data_provider.products, **group_b)]
                    # [ProductsConsts.PRODUCT_FK].tolist()

                    # a = self.scif[self.scif[ProductsConsts.PRODUCT_FK].isin(a)]
                    # [ProductsConsts.PRODUCT_NAME].drop_duplicates()
                    # b = self.scif[self.scif[ProductsConsts.PRODUCT_FK].isin(b)]
                    # [ProductsConsts.PRODUCT_NAME].drop_duplicates()

                    edges_a = self.tools.calculate_block_edges(
                        minimum_block_ratio=a_target,
                        **dict(group_a,
                               allowed_products_filters=allowed_filter,
                               **{MatchesConsts.SCENE_FK: scene}))
                    edges_b = self.tools.calculate_block_edges(
                        minimum_block_ratio=b_target,
                        **dict(group_b,
                               allowed_products_filters=allowed_filter,
                               **{MatchesConsts.SCENE_FK: scene}))

                    if edges_a and edges_b:
                        if direction == 'Vertical':
                            if sorted(set(edges_a['shelfs'])) == sorted(set(edges_b['shelfs'])) and \
                                    len(set(edges_a['shelfs'])) == 1:
                                score = result = 0
                            elif max(edges_a['shelfs']) <= min(
                                    edges_b['shelfs']):
                                score = 100
                                result = 1
                            # elif max(edges_b['shelfs']) <= min(edges_a['shelfs']):
                            #     score = 100
                            #     result = 1
                        elif direction == 'Horizontal':
                            if set(edges_a['shelfs']).intersection(
                                    edges_b['shelfs']):
                                extra_margin_a = (
                                    edges_a['visual']['right'] -
                                    edges_a['visual']['left']) / 10
                                extra_margin_b = (
                                    edges_b['visual']['right'] -
                                    edges_b['visual']['left']) / 10
                                edges_a_right = edges_a['visual'][
                                    'right'] - extra_margin_a
                                edges_b_left = edges_b['visual'][
                                    'left'] + extra_margin_b
                                edges_b_right = edges_b['visual'][
                                    'right'] - extra_margin_b
                                edges_a_left = edges_a['visual'][
                                    'left'] + extra_margin_a
                                if edges_a_right <= edges_b_left:
                                    score = 100
                                    result = 1
                                elif edges_b_right <= edges_a_left:
                                    score = 100
                                    result = 1
        return score, result, threshold

    def calculate_perfect_execution(self, kpi):
        score = result = threshold = 0
        tested_kpis = self.perfect_execution_data[
            self.perfect_execution_data['fixed KPI name'].str.encode(
                HelperConsts.UTF8) == kpi.encode(HelperConsts.UTF8)]
        for i, tested_kpi in tested_kpis.iterrows():
            try:
                param_score = int(
                    self.kpi_scores[tested_kpi['KPI test name'].replace(
                        ' ', '')])
            except:
                param_score = 0
            if param_score == 100:
                score = 100
                result = 1
            else:
                score = result = 0
                break
        return score, result, threshold

    def write_result(self,
                     score,
                     result,
                     threshold,
                     kpi,
                     category,
                     set_name,
                     template_data,
                     extra_data=None):
        kpi_name = template_data.loc[
            template_data['fixed KPI name'].str.encode(HelperConsts.UTF8) ==
            kpi.encode(HelperConsts.UTF8)]['KPI name'].values[0]
        if extra_data is not None:
            brand = extra_data['brand']
            group = extra_data['group']
            kpi_name = self.KPI_FORMAT.format(
                category=category.encode(HelperConsts.UTF8),
                brand=brand.encode(HelperConsts.UTF8),
                group=str(group),
                question=kpi_name.encode(HelperConsts.UTF8))
        else:
            kpi_name = self.KPI_FORMAT.format(
                category=category.encode(HelperConsts.UTF8),
                brand='XX',
                group='XX',
                question=kpi_name.encode(HelperConsts.UTF8))
        while '  ' in kpi_name:
            kpi_name = kpi_name.replace('  ', ' ')
        atomic_kpi_fk = \
            self.kpi_static_data[
                self.kpi_static_data['fixed atomic_kpi_name'].str.encode(HelperConsts.UTF8) == kpi.encode(
                    HelperConsts.UTF8)][
                KpiResults.ATOMIC_KPI_FK].values[0]

        if result is not None or score is not None:
            if not kpi_name:
                kpi_name = self.KPI_FORMAT.format(category=category)
            if score is None and threshold is None:
                self.write_to_db_result(score=result,
                                        level=self.LEVEL3,
                                        kpi_set_name=set_name,
                                        kpi_name=category,
                                        atomic_kpi_name=kpi_name,
                                        atomic_kpi_fk=atomic_kpi_fk)

            elif score is not None and threshold is None:
                self.write_to_db_result(score=result,
                                        level=self.LEVEL3,
                                        level3_score=score,
                                        kpi_set_name=set_name,
                                        kpi_name=category,
                                        atomic_kpi_name=kpi_name,
                                        atomic_kpi_fk=atomic_kpi_fk)

            elif score is None and threshold is not None:
                self.write_to_db_result(score=result,
                                        level=self.LEVEL3,
                                        threshold=threshold,
                                        kpi_set_name=set_name,
                                        kpi_name=category,
                                        atomic_kpi_name=kpi_name,
                                        atomic_kpi_fk=atomic_kpi_fk)
            else:
                self.write_to_db_result(score=result,
                                        level=self.LEVEL3,
                                        level3_score=score,
                                        threshold=threshold,
                                        kpi_set_name=set_name,
                                        kpi_name=category,
                                        atomic_kpi_name=kpi_name,
                                        atomic_kpi_fk=atomic_kpi_fk)
コード例 #24
0
class BATRUAssortment:
    def __init__(self):
        self.parsed_args = _parse_arguments()
        self.project = self.parsed_args.project
        self.rds_conn = self.rds_connect
        self.file_path = self.parsed_args.file
        self.start_date = self.parsed_args.date
        self.partial_update = self.parsed_args.update
        self.store_data = self.get_store_data
        self.all_products = self.get_product_data
        self.current_top_skus = self.get_current_top_skus
        self.stores = {}
        self.products = {}
        self.all_queries = []

        if self.start_date is None:
            self.current_date = datetime.now().date()
        else:
            self.current_date = datetime.strptime(self.start_date,
                                                  '%Y-%m-%d').date()
        self.deactivate_date = self.current_date - timedelta(1)
        self.activate_date = self.current_date

        if self.partial_update in ('1', 'True', 'Yes', 'Y'):
            self.partial_update = True
        else:
            self.partial_update = False

    def upload_assortment(self):
        """
        This is the main function of the assortment.
        It does the validation and then upload the assortment.
        :return:
        """
        Log.debug("Parsing and validating the assortment template")
        is_valid, invalid_inputs = self.p1_assortment_validator()

        Log.info("Assortment upload is started")
        self.upload_store_assortment_file()
        if not is_valid:
            Log.warning("Errors were found during the template validation")
            if invalid_inputs[INVALID_STORES]:
                Log.warning("The following stores don't exist in the DB: {}"
                            "".format(invalid_inputs[INVALID_STORES]))
            if invalid_inputs[INVALID_PRODUCTS]:
                Log.warning("The following products don't exist in the DB: {}"
                            "".format(invalid_inputs[INVALID_PRODUCTS]))
        Log.info("Assortment upload is finished")

    @property
    def rds_connect(self):
        self.rds_conn = PSProjectConnector(self.project,
                                           DbUsers.CalculationEng)
        try:
            pd.read_sql_query('select pk from probedata.session limit 1',
                              self.rds_conn.db)
        except Exception as e:
            self.rds_conn.disconnect_rds()
            self.rds_conn = PSProjectConnector(self.project,
                                               DbUsers.CalculationEng)
        return self.rds_conn

    @property
    def get_store_data(self):
        query = "select pk as store_fk, store_number_1 as store_number from static.stores"
        self.store_data = pd.read_sql_query(query, self.rds_conn.db)
        return self.store_data

    @property
    def get_product_data(self):
        query = "select pk as product_fk, product_ean_code from static.product " \
                "where delete_date is null"
        self.all_products = pd.read_sql_query(query, self.rds_conn.db)
        return self.all_products

    @property
    def get_current_top_skus(self):
        query = """select store_fk, product_fk
                   from pservice.custom_osa
                   where end_date is null"""
        data = pd.read_sql_query(query, self.rds_conn.db)
        return data

    def p1_assortment_validator(self):
        """
        This function validates the store assortment template.
        It compares the OUTLET_ID (= store_number_1) and the products ean_code to the stores and products from the DB
        :return: False in case of an error and True in case of a valid template
        """
        raw_data = self.parse_assortment_template()
        legal_template = True
        invalid_inputs = {INVALID_STORES: [], INVALID_PRODUCTS: []}
        valid_stores = self.store_data.loc[
            self.store_data['store_number'].isin(raw_data[OUTLET_ID])]
        if len(valid_stores) != len(raw_data[OUTLET_ID].unique()):
            invalid_inputs[INVALID_STORES] = list(
                set(raw_data[OUTLET_ID].unique()) -
                set(valid_stores['store_number']))
            Log.debug("The following stores don't exist in the DB: {}".format(
                invalid_inputs[INVALID_STORES]))
            legal_template = False

        valid_product = self.all_products.loc[self.all_products[EAN_CODE].isin(
            raw_data[EAN_CODE])]
        if len(valid_product) != len(raw_data[EAN_CODE].unique()):
            invalid_inputs[INVALID_PRODUCTS] = list(
                set(raw_data[EAN_CODE].unique()) -
                set(valid_product[EAN_CODE]))
            Log.debug(
                "The following products don't exist in the DB: {}".format(
                    invalid_inputs[INVALID_PRODUCTS]))
            legal_template = False
        return legal_template, invalid_inputs

    def parse_assortment_template(self):
        """
        This functions turns the csv into DF
        It tries to handle all of the possible format situation that I encountered yet (different delimiter and unicode)
        :return: DF that contains the store_number_1 (Outlet ID) and the product_ean_code of the assortments
        """
        data = pd.read_csv(self.file_path, sep='\t')
        if OUTLET_ID not in data.columns or EAN_CODE not in data.columns:
            data = pd.read_csv(self.file_path)
        if OUTLET_ID not in data.columns or EAN_CODE not in data.columns:
            data = pd.read_csv(self.file_path, encoding='utf-7')
        data = data.drop_duplicates(subset=data.columns, keep='first')
        data = data.fillna('')
        return data

    def set_end_date_for_irrelevant_assortments(self, stores_list):
        """
        This function sets an end_date to all of the irrelevant stores in the assortment.
        :param stores_list: List of the stores from the assortment template
        """
        Log.debug("Closing assortment for stores out of template")
        irrelevant_stores = self.store_data.loc[
            ~self.store_data['store_number'].
            isin(stores_list)]['store_fk'].unique().tolist()
        current_assortment_stores = self.current_top_skus['store_fk'].unique(
        ).tolist()
        stores_to_remove = list(
            set(irrelevant_stores).intersection(
                set(current_assortment_stores)))
        for store in stores_to_remove:
            query = [
                self.get_store_deactivation_query(store, self.deactivate_date)
            ]
            self.commit_results(query)
        Log.debug("Assortment is closed for ({}) stores".format(
            len(stores_to_remove)))

    def upload_store_assortment_file(self):
        raw_data = self.parse_assortment_template()
        data = []
        list_of_stores = raw_data[OUTLET_ID].unique().tolist()

        if not self.partial_update:
            self.set_end_date_for_irrelevant_assortments(list_of_stores)

        Log.debug("Preparing assortment data for update")
        store_counter = 0
        for store in list_of_stores:
            store_data = {}
            store_products = raw_data.loc[raw_data[OUTLET_ID] ==
                                          store][EAN_CODE].tolist()
            store_data[store] = store_products
            data.append(store_data)

            store_counter += 1
            if store_counter % 1000 == 0 or store_counter == len(
                    list_of_stores):
                Log.debug("Assortment is prepared for {}/{} stores".format(
                    store_counter, len(list_of_stores)))

        Log.debug("Updating assortment data in DB")
        store_counter = 0
        for store_data in data:

            self.update_db_from_json(store_data)

            if self.all_queries:
                queries = self.merge_insert_queries(self.all_queries)
                self.commit_results(queries)
                self.all_queries = []

            store_counter += 1
            if store_counter % 1000 == 0 or store_counter == len(data):
                Log.debug(
                    "Assortment is updated in DB for {}/{} stores".format(
                        store_counter, len(data)))

    @staticmethod
    def merge_insert_queries(queries):
        """
        This function aggregates all of the insert queries
        :param queries: all of the queries (update and insert) for the assortment
        :return: The merged insert queries
        """
        query_groups = {}
        other_queries = []
        for query in queries:
            if 'VALUES' not in query:
                other_queries.append(query)
                continue
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            for group_index in xrange(0, len(query_groups[group]), 10**4):
                merged_queries.append('{0} VALUES {1}'.format(
                    group, ',\n'.join(
                        query_groups[group][group_index:group_index + 10**4])))
        return other_queries + merged_queries

    def update_db_from_json(self, data):
        update_products = set()
        missing_products = set()

        store_number = data.keys()[0]
        if store_number is None:
            Log.debug("'{}' column or value is missing".format(STORE_NUMBER))
            return

        store_fk = self.get_store_fk(store_number)
        if store_fk is None:
            Log.debug(
                'Store Number {} does not exist in DB'.format(store_number))
            return

        for key in data[store_number]:
            validation = False
            if isinstance(key, (float, int)):
                validation = True
            elif isinstance(key, (str, unicode)):
                validation = True
            if validation:
                product_ean_code = str(key).split(',')[-1]
                product_fk = self.get_product_fk(product_ean_code)
                if product_fk is None:
                    missing_products.add(product_ean_code)
                else:
                    update_products.add(product_fk)

        if missing_products:
            Log.debug(
                'The following EAN Codes for Store Number {} do not exist in DB: {}.'
                ''.format(store_number, list(missing_products)))
        queries = []
        current_products = self.current_top_skus[
            self.current_top_skus['store_fk'] ==
            store_fk]['product_fk'].tolist()

        products_to_deactivate = tuple(
            set(current_products).difference(update_products))
        products_to_activate = tuple(
            set(update_products).difference(current_products))

        if products_to_deactivate:
            if len(products_to_deactivate) == 1:
                queries.append(
                    self.get_deactivation_query(
                        store_fk, "(" + str(products_to_deactivate[0]) + ")",
                        self.deactivate_date))
            else:
                queries.append(
                    self.get_deactivation_query(store_fk,
                                                tuple(products_to_deactivate),
                                                self.deactivate_date))

        for product_fk in products_to_activate:
            queries.append(
                self.get_activation_query(store_fk, product_fk,
                                          self.activate_date))

        self.all_queries.extend(queries)
        Log.debug(
            'Store Number {} - Products to update {}: Deactivated {}, Activated {}'
            ''.format(store_number, len(update_products),
                      len(products_to_deactivate), len(products_to_activate)))

    def get_store_fk(self, store_number):
        """
        This functions returns the store's fk
        :param store_number: 'store_number_1' attribute of the store
        :return: store fk
        """
        store_number = str(store_number)
        if store_number in self.stores:
            store_fk = self.stores[store_number]
        else:
            store_fk = self.store_data[self.store_data['store_number'] ==
                                       store_number]
            if not store_fk.empty:
                store_fk = store_fk['store_fk'].values[0]
                self.stores[store_number] = store_fk
            else:
                store_fk = None
        return store_fk

    def get_product_fk(self, product_ean_code):
        product_ean_code = str(product_ean_code).strip()
        if product_ean_code in self.products:
            product_fk = self.products[product_ean_code]
        else:
            product_fk = self.all_products[
                self.all_products['product_ean_code'] == product_ean_code]
            if not product_fk.empty:
                product_fk = product_fk['product_fk'].values[0]
                self.products[product_ean_code] = product_fk
            else:
                product_fk = None
        return product_fk

    @staticmethod
    def get_deactivation_query(store_fk, product_fks, date):
        query = \
            """
            update {} set end_date = '{}', is_current = NULL 
            where store_fk = {} and product_fk in {} and end_date is null;
            """\
            .format(STORE_ASSORTMENT_TABLE, date, store_fk, product_fks)
        return query

    @staticmethod
    def get_store_deactivation_query(store_fk, date):
        query = \
            """
            update {} set end_date = '{}', is_current = NULL
            where store_fk = {} and end_date is null;
            """.format(STORE_ASSORTMENT_TABLE, date, store_fk)
        return query

    @staticmethod
    def get_activation_query(store_fk, product_fk, date):
        attributes = pd.DataFrame(
            [(store_fk, product_fk, str(date), 1)],
            columns=['store_fk', 'product_fk', 'start_date', 'is_current'])
        query = insert(attributes.to_dict(), STORE_ASSORTMENT_TABLE)
        return query

    def commit_results(self, queries):
        """
        This function commits the results into the DB in batches.
        query_num is the number of queires that were executed in the current batch
        After batch_size is reached, the function re-connects the DB and cursor.
        """
        self.rds_conn.connect_rds()
        cursor = self.rds_conn.db.cursor()
        batch_size = 1000
        query_num = 0
        failed_queries = []
        for query in queries:
            try:
                cursor.execute(query)
                # print query
            except Exception as e:
                Log.warning(
                    'Committing to DB failed to due to: {}. Query: {}'.format(
                        e, query))
                self.rds_conn.db.commit()
                failed_queries.append(query)
                self.rds_conn.connect_rds()
                cursor = self.rds_conn.db.cursor()
                continue
            if query_num > batch_size:
                self.rds_conn.db.commit()
                self.rds_conn.connect_rds()
                cursor = self.rds_conn.db.cursor()
                query_num = 0
            query_num += 1
        self.rds_conn.db.commit()
コード例 #25
0
class CczaTemplateValidator(Main_Template):
    def __init__(self, project_name, file_url=DEFAULT_PATH):
        Main_Template.__init__(self)
        self.project = project_name
        self.template_path = file_url
        self.rds_conn = self.rds_connect
        self.store_data = self.get_store_data()
        self.store_types_db = self.store_data[Const.ATTR3].values.tolist()
        # self.all_products = self.get_product_data
        self.kpi_sheets = {}
        self.kpis_lvl2 = self.get_kpis_new_tables()
        self.kpis_old = self.get_kpis_old_tables()
        self.db_static_data = {}
        self.get_static_db_table_contents()
        self.type_function_map = self.map_validation_function_to_valid_type()

    def get_static_db_table_contents(self):
        # all_tables = map(lambda y: '{}.{}'.format(y[0], y[1]), map(lambda x: x.split('.'),
        #                                                            Parameters.TYPE_DB_MAP.values()))
        table_data = {}

        for entity, table in Parameters.TYPE_DB_MAP.items():
            table_col = table.split('.')
            table_name = '{}.{}'.format(table_col[0], table_col[1])
            if table_data.get(table_name) is None:
                query = """ select * from {} """.format(table_name)
                table_contents = pd.read_sql_query(query, self.rds_conn.db)
                table_data.update({table_name: table_contents})
            else:
                table_contents = table_data.get(table_name)
            self.db_static_data[entity] = table_contents[table_col[-1]]

        for templ_column, table in Parameters.COLUMN_DB_MAP.items():
            table_col = table.split('.')
            table_name = '{}.{}'.format(table_col[0], table_col[1])
            if table_data.get(table_name) is None:
                query = """ select * from {} """.format(table_name)
                table_contents = pd.read_sql_query(query, self.rds_conn.db)
                table_data.update({table_name: table_contents})
            else:
                table_contents = table_data.get(table_name)
            self.db_static_data[templ_column] = table_contents[table_col[-1]]

    def get_kpis_new_tables(self):
        query = """select * from static.kpi_level_2 """
        kpis = pd.read_sql_query(query, self.rds_conn.db)
        return kpis

    def get_kpis_old_tables(self):
        query = """select api.name as atomic_kpi_name, api.pk as atomic_kpi_fk,
                   kpi.display_text as kpi_name, kpi.pk as kpi_fk,
                   kps.name as kpi_set_name, kps.pk as kpi_set_fk
                   from static.atomic_kpi api
                   left join static.kpi kpi on kpi.pk = api.kpi_fk
                   join static.kpi_set kps on kps.pk = kpi.kpi_set_fk 
                   where kps.name = "Red Score" """
        kpis = pd.read_sql_query(query, self.rds_conn.db)
        return kpis

    @property
    def rds_connect(self):
        self.rds_conn = PSProjectConnector(self.project, DbUsers.Ps)
        try:
            pd.read_sql_query('select pk from probedata.session limit 1',
                              self.rds_conn.db)
        except Exception as e:
            self.rds_conn.disconnect_rds()
            self.rds_conn = PSProjectConnector(self.project, DbUsers.Ps)
        return self.rds_conn

    def get_store_data(self):
        query = """select * from static.stores where is_active="Y" and delete_date is null and test_store != "Y" """
        store_data = pd.read_sql_query(query, self.rds_conn.db)
        return store_data

    # @property
    # def get_product_data(self):
    #     query = "select * from static.product"
    #     self.all_products = pd.read_sql_query(query, self.rds_conn.db)
    #     return self.store_data

    def validate_template_data(self):
        try:
            self.errorHandler.log_info('Checking tabs and columns')
            self.check_all_tabs_exist_and_have_relevant_columns()
        except Exception as e:
            self.errorHandler.log_error(
                'Unhandled error in format checking: {}'.format(e))

        try:
            self.errorHandler.log_info('Checking store_types')
            self.check_store_types_and_extra_columns()
        except Exception as e:
            self.errorHandler.log_error(
                'Unhandled error in store type validation: {}'.format(e))

        try:
            self.errorHandler.log_info('Checking sheets data')
            self.check_kpis_sheets()
        except Exception as e:
            self.errorHandler.log_error(
                'Unhandled error in sheet data validation: {}'.format(e))
        error_file_link = self.dump_logs_to_file_and_upload_to_bucket()
        print error_file_link
        return

    def dump_logs_to_file_and_upload_to_bucket(self):
        file_path = self.errorHandler.dump_to_file()
        error_file_link = Utilities.upload_file_to_s3(
            file_path, '{}.log'.format(self.execution_unique_id),
            os.path.join(Parameters.BUCKET_FOLDER,
                         Config.get_environment().upper(),
                         self.project.upper()), Parameters.LOG_FILE_BUCKET)
        return error_file_link

    def check_kpis_sheets(self):
        for sheet, template_df in self.kpi_sheets.items():
            self.errorHandler.log_info('Validating Sheet: {}'.format(sheet))
            self.errorHandler.log_info('Checking configurable validations')
            self.perform_configurable_validations(sheet, template_df)
            self.errorHandler.log_info('Checking additional validations')
            self.perform_additional_validations(sheet, template_df)

    def perform_configurable_validations(self, sheet, template_df):
        columns_to_validate = filter(
            lambda x: x in Parameters.SHEETS_COL_MAP[sheet],
            template_df.columns.values)
        for templ_column in columns_to_validate:
            self.errorHandler.log_info(
                'Validating sheet {}, column: {}'.format(sheet, templ_column))
            validation_params = Parameters.SHEETS_COL_VALID_TYPE[Parameters.ALL][templ_column] if \
                templ_column in Parameters.SHEETS_COL_VALID_TYPE[Parameters.ALL].keys() else \
                Parameters.SHEETS_COL_VALID_TYPE[sheet][templ_column]

            if validation_params.get('filter_out'):
                filtering_param = validation_params.get('filter_out')
                template_df = template_df[~(
                    template_df[templ_column].isin(filtering_param))]

            self.validate_empty(sheet, template_df, templ_column,
                                validation_params)
            self.validate_particular_values(sheet, template_df, templ_column,
                                            validation_params)

    def validate_particular_values(self, sheet, template_df, templ_column,
                                   validation_params):
        val_types = validation_params.get('type')
        val_sources = validation_params.get('source')
        if val_types is not None:
            for i in range(len(val_types)):
                val_type = val_types[i]
                val_source = val_sources[i]
                self.type_function_map[val_type](sheet, template_df,
                                                 templ_column, val_source)

    def check_value_based_on_type(self, sheet, template_df, templ_column,
                                  val_source):
        type_source_col = val_source['column']
        empty_type_records = template_df[
            (template_df[type_source_col] == '') |
            (template_df[type_source_col].isnull())]
        if len(empty_type_records) > 0:
            self.errorHandler.log_error(
                'Sheet: {}. Column: {}. Entity types are not completed for kpis: '
                '{}'.format(sheet, type_source_col,
                            empty_type_records[Const.ATOMIC_NAME].values))

        sheet_df = template_df[[type_source_col, templ_column]]
        sheet_df = sheet_df[(~(sheet_df[type_source_col].isnull()))
                            & (~(sheet_df[type_source_col] == ''))]
        sheet_df = sheet_df[(~(sheet_df[templ_column].isnull()))
                            & (~(sheet_df[templ_column] == ''))]

        # handle double entity
        double_entity_df = sheet_df[sheet_df[type_source_col].str.contains(
            ',')]
        if not double_entity_df.empty:
            missing_values_df = double_entity_df[~(
                double_entity_df[templ_column].str.contains(','))]
            if len(missing_values_df) > 0:
                self.errorHandler.log_error(
                    'Sheet {}. One of multiple values is missing '
                    'in column {}'.format(sheet, templ_column))
            double_entity_df = double_entity_df[
                double_entity_df[templ_column].str.contains(',')]
            duplicate_df = double_entity_df.copy()
            double_entity_df[type_source_col] = double_entity_df[
                type_source_col].apply(lambda x: x.split(',')[0].strip())
            double_entity_df[templ_column] = double_entity_df[
                templ_column].apply(lambda x: x.split(',')[0].strip())
            duplicate_df[type_source_col] = duplicate_df[
                type_source_col].apply(lambda x: x.split(',')[1].strip())
            duplicate_df[templ_column] = duplicate_df[templ_column].apply(
                lambda x: x.split(',')[1].strip())
            joined_template_df = pd.concat([double_entity_df, duplicate_df])
            self.check_value_based_on_type(sheet, joined_template_df,
                                           templ_column, val_source)

        # handle single entity
        sheet_df = sheet_df[~(sheet_df[type_source_col].str.contains(','))]
        sheet_df = sheet_df.sort_values(by=[type_source_col])
        sheet_df['accumulated_values'] = sheet_df.groupby(type_source_col)[
            templ_column].apply(lambda x: (x + ',').cumsum().str.strip())
        sheet_df = sheet_df.drop_duplicates(subset=[type_source_col],
                                            keep='last')
        sheet_df = sheet_df.reset_index(drop=True)

        for i, row in sheet_df.iterrows():
            validated_entity = row[type_source_col]
            template_values = row['accumulated_values'][0:-1].split(',')
            template_values = set(map(lambda z: z.strip(), template_values))

            db_col = val_source['db'][validated_entity].split('.')[-1]
            db_values = set(self.db_static_data[validated_entity].values)
            diff = template_values.difference(db_values)
            if len(diff) > 0:
                self.errorHandler.log_error(
                    'Sheet: {}, Column: {}, Entity: {} '
                    'does not match DB: {}'.format(sheet, templ_column,
                                                   validated_entity, diff))

    def check_value_based_on_property(self, sheet, template_df, templ_column,
                                      val_source):
        prop_name, col = val_source.split('.')
        # source_values = set(self.__getattribute__(prop_name)[col].values)
        source_values = set(getattr(self, prop_name)[col].values)
        template_values = filter(lambda x: x == x or x != '' or x is not None,
                                 template_df[templ_column].values)
        template_values = set(template_values)
        diff = template_values.difference(source_values)
        if len(diff) > 0:
            self.errorHandler.log_error('{} values do not match {} of {} '
                                        'in sheet {}: {}'.format(
                                            templ_column, col, prop_name,
                                            sheet, diff))

    def check_db_values(self, sheet, template_df, templ_column, val_source):
        db_col = val_source.split('.')[-1]
        db_col_values = filter(lambda x: x == x and x != '' and x is not None,
                               self.db_static_data[templ_column].values)
        db_col_values = set(db_col_values)
        template_values = filter(
            lambda x: x == x and x != '' and x is not None,
            template_df[templ_column].values)
        template_values = set(template_values)
        diff = template_values.difference(db_col_values)
        if len(diff) > 0:
            try:
                template_values1 = set(
                    map(lambda x: float(x), list(template_values)))
                db_col_values1 = set(
                    map(lambda x: float(x), list(db_col_values)))
                diff1 = template_values1.difference(db_col_values1)
                if len(diff1) > 0:
                    self.errorHandler.log_error(
                        'Sheet {}: . Column: {}. Values do not match values of'
                        ' column {} in DB table {}: '
                        '{}'.format(sheet, templ_column, db_col, val_source,
                                    diff))
            except Exception:
                self.errorHandler.log_error(
                    'Sheet {}: . Column: {}. Values do not match values of'
                    ' column {} in DB table {}: '
                    '{}'.format(sheet, templ_column, db_col, val_source, diff))

    def validate_list(self, sheet, template_df, templ_column, val_source):
        template_values = filter(
            lambda x: x == x and x != '' and x is not None,
            template_df[templ_column].values)
        template_values_final = []
        for raw_value in template_values:
            values = map(lambda x: x.strip(), raw_value.split(','))
            for value in values:
                template_values_final.append(value)
        template_values = set(template_values_final)
        val_values = set(val_source)
        diff = template_values.difference(val_values)
        if len(diff) > 0:
            self.errorHandler.log_error(
                'Values in column {} in sheet {} do not match the allowed values {}: '
                '{}'.format(templ_column, sheet, val_values, diff))

    def validate_empty(self, sheet, template_df, templ_column,
                       validation_params):
        if validation_params.get('disallow_empty'):
            empty_values = filter(
                lambda x: (x == '') or (x is None) or (x != x),
                template_df[templ_column].values)
            if len(empty_values) > 0:
                self.errorHandler.log_error(
                    'Column {} in sheet {} has empty values'.format(
                        templ_column, sheet))

    def perform_additional_validations(self, sheet, template_df):
        if sheet == Const.KPIS:
            self.validate_weights_in_kpis_sheet(sheet, template_df)
        if sheet != Const.KPIS:
            self.check_duplicate_atomics(sheet, template_df)
        if sheet in Parameters.TARGET_SHEET_WEIGHT_SHEET_MAP.keys():
            self.compare_weights_vs_targets_sheet(sheet, template_df)
        if sheet in [
                Const.LIST_OF_ENTITIES, Const.SOS_WEIGHTS,
                Const.PRICING_WEIGHTS, Const.SURVEY_QUESTIONS
        ]:
            self.check_weights_for_lvl2_kpis_add_to_100(sheet, template_df)

    def check_weights_for_lvl2_kpis_add_to_100(self, sheet, template_df):
        non_store_columns = list(
            filter(lambda x: x in Parameters.SHEETS_COL_MAP[sheet],
                   template_df.columns.values))
        non_store_columns.remove(Const.KPI_NAME)
        template_df = template_df.drop(non_store_columns, axis=1)
        groupby_dict = {}
        store_col = filter(lambda x: x != Const.KPI_NAME,
                           template_df.columns.values)
        template_df[store_col] = template_df[store_col].replace('', 0.0)
        template_df[store_col] = template_df[store_col].astype(float)
        for col in store_col:
            groupby_dict.update({col: np.sum})
        aggregate_df = template_df.groupby([Const.KPI_NAME],
                                           as_index=False).agg(groupby_dict)
        for i, row in aggregate_df.iterrows():
            row_values = map(lambda x: round(x, 6), row[store_col].values)
            all_100 = all(map(lambda x: x == 100 or x == 1, row_values))
            if not all_100:
                self.errorHandler.log_error(
                    'Sheet {}. KPI Name: {} . Not '
                    'all weights per stores add up to 100'.format(
                        sheet, row[Const.KPI_NAME]))

    def compare_weights_vs_targets_sheet(self, target_sheet, targets_df):
        weight_sheet = Parameters.TARGET_SHEET_WEIGHT_SHEET_MAP[target_sheet]
        weights_df = self.kpi_sheets[weight_sheet]
        self.compare_kpi_lists(target_sheet, targets_df, weight_sheet,
                               weights_df)
        self.compare_weights_and_targets_store_sections(
            target_sheet, targets_df, weight_sheet, weights_df)

    def compare_weights_and_targets_store_sections(self, target_sheet,
                                                   targets_df, weight_sheet,
                                                   weights_df):
        if Const.KPI_NAME in targets_df.columns.values and Const.ATOMIC_NAME in targets_df.columns.values:
            targets_df = targets_df.sort_values(
                [Const.KPI_NAME, Const.ATOMIC_NAME])
            targets_df = targets_df.reset_index(drop=True)
            atomics_ordered = targets_df[Const.ATOMIC_NAME].values
            existing_store_types = filter(lambda x: x in self.store_types_db,
                                          targets_df.columns.values)
            target_stores_df = targets_df[existing_store_types]
            target_stores_df = target_stores_df.sort_index(axis=1)
            store_columns_t = target_stores_df.columns.values
            target_values = target_stores_df.values
            target_values = target_values.astype(np.bool)

            weights_df = weights_df.sort_values(
                [Const.KPI_NAME, Const.ATOMIC_NAME])
            weights_df = weights_df.reset_index(drop=True)
            existing_store_types = filter(lambda x: x in self.store_types_db,
                                          weights_df.columns.values)
            weight_stores_df = weights_df[existing_store_types]
            weight_stores_df = weight_stores_df.sort_index(axis=1)
            store_columns_w = weight_stores_df.columns.values
            weight_values = weight_stores_df.values
            weight_values = weight_values.astype(np.bool)

            if np.array_equal(store_columns_t, store_columns_w
                              ) and target_values.shape == weight_values.shape:
                compare = np.isclose(target_values, weight_values)
                compare_df = pd.DataFrame(compare, columns=store_columns_t)
                if not compare.all():
                    self.errorHandler.log_error(
                        'weights and targets are not aligned in sheets'
                        ' {} and {}'.format(target_sheet, weight_sheet))
                    for col in compare_df.columns.values:
                        store_values = compare_df[col].values
                        compare_col_res = store_values.all()
                        # compare_col_res = all(compare_df[col].values.tolist())
                        if not compare_col_res:
                            atomics = atomics_ordered[np.where(~store_values)]
                            self.errorHandler.log_error(
                                'Sheets: {} and {}. Fix weights or targets '
                                'for store {}: {}'.format(
                                    weight_sheet, target_sheet, col, atomics))
            else:
                self.errorHandler.log_error(
                    'Stores or kpi lists are not the same in '
                    'sheets {} and {}'.format(target_sheet, weight_sheet))

    def compare_kpi_lists(self, target_sheet, targets_df, weight_sheet,
                          weights_df):
        weights_df = weights_df[[Const.KPI_NAME, Const.ATOMIC_NAME]]
        weights_df.rename(columns={Const.KPI_NAME: 'KPI_Name_Weights'},
                          inplace=True)
        targets_df = targets_df[[Const.KPI_NAME, Const.ATOMIC_NAME]]
        targets_df.rename(columns={Const.KPI_NAME: 'KPI_Name_Targets'},
                          inplace=True)
        validation_df = pd.merge(targets_df,
                                 weights_df,
                                 on=[Const.ATOMIC_NAME],
                                 how='outer')
        missing_weights = validation_df[
            validation_df['KPI_Name_Targets'].isnull()]
        if len(missing_weights) > 0:
            atomics = missing_weights[Const.ATOMIC_NAME].values.tolist()
            self.errorHandler.log_error(
                'Sheet: {} has atomics that are missing '
                'from Sheet {}: {}'.format(target_sheet, weight_sheet,
                                           atomics))
        missing_targets = validation_df[
            validation_df['KPI_Name_Weights'].isnull()]
        if len(missing_targets) > 0:
            atomics = missing_weights[Const.ATOMIC_NAME].values.tolist()
            self.errorHandler.log_error(
                'Sheet: {} has atomics that are missing '
                'from Sheet {}: {}'.format(weight_sheet, target_sheet,
                                           atomics))

    def check_duplicate_atomics(self, sheet, template_df):
        atomics_df = template_df[[Const.ATOMIC_NAME]]
        atomics_df['count'] = 1
        atomics_df = atomics_df.groupby([Const.ATOMIC_NAME],
                                        as_index=False).agg({'count': np.sum})
        atomics_df = atomics_df[atomics_df['count'] > 1]
        for i, row in atomics_df.iterrows():
            self.errorHandler.log_error(
                'Sheet {} has duplicate atomic kpi rows for '
                'kpi {}'.format(sheet, row[Const.ATOMIC_NAME]))

    def validate_weights_in_kpis_sheet(self, sheet, template_df):
        # store_weights_df = template_df.drop(Parameters.SHEETS_COL_MAP[sheet], axis=1)
        existing_store_types = filter(lambda x: x in self.store_types_db,
                                      template_df.columns.values)
        store_weights_df = template_df[existing_store_types]
        store_weights = store_weights_df.values
        try:
            store_weights.astype(float)
        except (ValueError, TypeError):
            self.errorHandler.log_error(
                'Sheet: {}. Not all weights in KPIs sheet are filled or '
                'are numeric'.format(sheet))
        total_weights = store_weights[store_weights.shape[0] -
                                      1:store_weights.shape[0]][0].astype(
                                          float)
        validate_100 = all(map(lambda x: x == 100, total_weights))
        if not validate_100:
            self.errorHandler.log_error(
                'Sheet: {}. Certain weights per store type are not equal to 100'
                .format(sheet))

        total_weights_summed = store_weights[0:-1].astype(float)
        total_weights_summed = total_weights_summed.sum(axis=0)
        validate_100 = all(map(lambda x: x == 100, total_weights_summed))
        if not validate_100:
            self.errorHandler.log_error(
                'Sheet: {}. Certain weights per store type do not add up to 100'
                .format(sheet))

    def check_all_tabs_exist_and_have_relevant_columns(self):
        for name in Const.sheet_names_and_rows:
            try:
                template_df = parse_template(
                    self.template_path,
                    sheet_name=name,
                    lower_headers_row_index=Const.sheet_names_and_rows[name])
                columns = template_df.columns.values
                columns = filter(lambda x: 'Unnamed' not in x, columns)
                template_df = template_df[columns]
                self.check_template_columns(name, columns)
                self.kpi_sheets[name] = template_df
            except Exception as e:  # look up the type of exception in case sheet name is missing
                self.errorHandler.log_error(
                    'Sheet {} is missing in the file'.format(name))

    def check_template_columns(self, sheet, columns):
        missing_columns = filter(lambda x: x not in columns,
                                 Parameters.SHEETS_COL_MAP[sheet])
        if len(missing_columns) > 0:
            self.errorHandler.log_error('The following columns are missing '
                                        'from sheet {}: {}'.format(
                                            sheet, missing_columns))

    def check_store_types_and_extra_columns(self):
        for sheet, template_df in self.kpi_sheets.items():
            store_types = set(template_df.columns.values).difference(
                set(Parameters.SHEETS_COL_MAP[sheet]))
            store_types_in_db = set(self.store_data[Const.ATTR3].values)
            template_vs_db = store_types.difference(store_types_in_db)
            if len(template_vs_db) > 0:
                self.errorHandler.log_error(
                    'Sheet: {}. Store types {} exist in template but do not '
                    'exist in DB / or extra columns exist. Only data related to stores types '
                    'existing in DB will be further validated'.format(
                        sheet, template_vs_db))
            db_vs_template = store_types_in_db.difference(store_types)
            if len(db_vs_template) > 0:
                self.errorHandler.log_error(
                    'Sheet: {}. Store types {} exist in db but do not '
                    'exist in the template / or extra columns exist'.format(
                        sheet, template_vs_db))

    def map_validation_function_to_valid_type(self):
        type_func_map = {
            'db': self.check_db_values,
            'list': self.validate_list,
            'type_value': self.check_value_based_on_type,
            'prop': self.check_value_based_on_property
        }
        return type_func_map
コード例 #26
0
class AutoAssortmentHandler:

    STORE_NUMBER = 'Store Number'
    PRODUCT_EAN_CODE = 'Product EAN'

    def __init__(self, rds_conn=None):
        if rds_conn is not None:
            self._rds_conn = rds_conn
        self.stores = {}
        self.products = {}
        self.all_queries = []
        self.update_queries = []

    @property
    def current_top_skus(self):
        if not hasattr(self, '_current_top_skus'):
            self._current_top_skus = self.get_current_top_skus()
        return self._current_top_skus

    @property
    def rds_conn(self):
        if not hasattr(self, '_rds_conn'):
            self._rds_conn = PSProjectConnector(PROJECT,
                                                DbUsers.CalculationEng)
        try:
            pd.read_sql_query('select pk from probedata.session limit 1',
                              self._rds_conn.db)
        except:
            self._rds_conn.disconnect_rds()
            self._rds_conn = PSProjectConnector(PROJECT,
                                                DbUsers.CalculationEng)
        return self._rds_conn

    @property
    def store_data(self):
        if not hasattr(self, '_store_data'):
            query = "select pk as store_fk, store_number_1 as store_number from static.stores"
            self._store_data = pd.read_sql_query(query, self.rds_conn.db)
        return self._store_data

    @property
    def product_data(self):
        if not hasattr(self, '_product_data'):
            query = "select pk as product_fk, ean_code from static_new.product " \
                    "where delete_date is null".format(CORRELATION_FIELD)
            self._product_data = pd.read_sql_query(query, self.rds_conn.db)
        return self._product_data

    def get_store_fk(self, store_number):
        store_number = str(store_number)
        if store_number in self.stores:
            store_fk = self.stores[store_number]
        else:
            store_fk = self.store_data[self.store_data['store_number'] ==
                                       store_number]
            if not store_fk.empty:
                store_fk = store_fk['store_fk'].values[0]
                self.stores[store_number] = store_fk
            else:
                store_fk = None
        return store_fk

    def get_product_fk(self, product_ean_code):
        product_ean_code = str(product_ean_code).strip()
        if product_ean_code in self.products:
            product_fk = self.products[product_ean_code]
        else:
            if product_ean_code.isdigit():
                ean_to_check = int(product_ean_code)
            else:
                ean_to_check = product_ean_code
            product_fk = self.product_data[self.product_data['ean_code'] ==
                                           ean_to_check]
            if not product_fk.empty:
                product_fk = product_fk['product_fk'].values[0]
                self.products[product_ean_code] = product_fk
            else:
                product_fk = None
        return product_fk

    def get_product_ean_code(self, product_fk):
        product_fk = str(product_fk).strip()
        if product_fk in self.products:
            product_ean_code = self.products[product_fk]
        else:
            if product_fk.isdigit():
                ean_to_check = int(product_fk)
            else:
                ean_to_check = product_fk
            product_fk = self.product_data[self.product_data['product_fk'] ==
                                           ean_to_check]
            if not product_fk.empty:
                product_ean_code = product_fk['ean_code'].values[0]
                self.products[product_fk] = product_ean_code
            else:
                product_ean_code = None
        return product_ean_code

    def get_current_top_skus(self):
        query = """select store_fk, product_fk
                   from pservice.custom_osa
                   where end_date is null"""
        data = pd.read_sql_query(query, self.rds_conn.db)
        return data

    def get_current_assortment_per_store(self, store_fk, visit_date):
        query = """select product_fk
                   from pservice.custom_osa
                   where end_date is null and store_fk = {} and start_date <= '{}'""".format(
            store_fk, visit_date)
        data = pd.read_sql_query(query, self.rds_conn.db)
        return data['product_fk'].unique().tolist()

    def update_db_from_json(self,
                            data,
                            immediate_change=False,
                            discard_missing_products=False):
        products = set()
        missing_products = set()
        store_number = data.pop(self.STORE_NUMBER, None)
        if store_number is None:
            Log.warning("'{}' is required in data".format(self.STORE_NUMBER))
            return
        # store_fk = self.get_store_fk(store_number)
        store_fk = store_number
        if store_fk is None:
            Log.warning(
                'Store {} does not exist. Exiting...'.format(store_number))
            return
        for key in data.keys():
            validation = False
            # if not data[key]:
            #     validation = False
            # elif isinstance(data[key], (float, int)) and data[key]:
            #     validation = True
            # elif isinstance(data[key], (str, unicode)) and data[key].isdigit() and int(data[key]):
            #     validation = True
            if data[key]:
                if not key == 'date':
                    validation = True
            if validation:
                # product_ean_code = str(key).split(',')[-1]
                product_fk = str(key).split(',')[-1]
                # product_fk = self.get_product_fk(product_ean_code)
                # product_ean_code = self.get_product_ean_code(product_fk)
                if product_fk is None:
                    Log.warning(
                        'Product PK {} does not exist'.format(product_fk))
                    missing_products.add(product_fk)
                    continue
                try:
                    products.add(int(product_fk))
                except Exception as e:
                    continue
        if missing_products and not discard_missing_products:
            Log.warning('Some EANs do not exist: {}. Exiting...'.format(
                '; '.join(missing_products)))
            return

        if products:
            current_date = data['date']
            if immediate_change:
                deactivate_date = current_date
                activate_date = current_date
            else:
                deactivate_date = current_date
                activate_date = current_date

            queries = []
            current_skus = self.current_top_skus[
                self.current_top_skus['store_fk'] ==
                store_fk]['product_fk'].tolist()
            products_to_deactivate = []
            # products_to_deactivate = set(current_skus).difference(products)
            products_to_activate = set(products).difference(current_skus)
            # for product_fk in products_to_deactivate:
            #     if product_fk.isdigit():
            #         queries.append(self.get_deactivation_query(store_fk, product_fk, deactivate_date))
            for product_fk in products_to_activate:
                try:
                    if activate_date in self.stores_min_dates[product_fk,
                                                              store_fk]:
                        queries.append(
                            self.get_activation_query(store_fk, product_fk,
                                                      activate_date))
                except Exception as e:
                    queries.append(
                        self.get_activation_query(store_fk, product_fk,
                                                  activate_date))
            # self.commit_results(queries)
            self.all_queries.extend(queries)
            Log.debug(
                '{} - Out of {} products, {} products were deactivated and {} products were activated'
                .format(store_number,
                        len(products), len(products_to_deactivate),
                        len(products_to_activate)))
        else:
            Log.debug('{} - No products are configured as Top SKUs'.format(
                store_number))

    def upload_top_sku_file(self,
                            file_path,
                            data_first_cell,
                            ean_row_index,
                            store_number_column_index,
                            update_correlations=False):

        raw_data = pd.read_csv(file_path)
        raw_data = raw_data.fillna('')
        data = []
        self.stores_min_dates = {}
        for index_data, store_raw_data in raw_data.iterrows():
            # store_data = {self.STORE_NUMBER: index_data[store_number_column_index]}
            store_data = {
                self.STORE_NUMBER: store_raw_data['store_fk'],
                'date': store_raw_data['date']
            }
            columns = list(store_raw_data.keys())
            columns.remove('store_fk')
            columns.remove('woy')
            columns.remove('date')
            try:
                columns.remove('stores_name')
            except Exception as e:
                pass

            for column in columns:
                store_data[column] = store_raw_data[column]
                if store_raw_data[column]:
                    if (column, store_raw_data['store_fk']
                        ) in self.stores_min_dates.keys():
                        if datetime.strptime(
                                self.stores_min_dates[
                                    column, store_raw_data['store_fk']],
                                "%Y-%m-%d") > datetime.strptime(
                                    store_raw_data['date'], "%Y-%m-%d"):
                            self.stores_min_dates[
                                column,
                                store_raw_data['store_fk']] = store_raw_data[
                                    'date']
                    else:
                        self.stores_min_dates[
                            column,
                            store_raw_data['store_fk']] = store_raw_data[
                                'date']
            data.append(store_data)

        if update_correlations:
            self.update_correlations(data[0].keys())
        for store_data in data:
            self.update_db_from_json(store_data, immediate_change=True)

        queries = self.merge_insert_queries(self.all_queries)
        self.commit_results(queries)
        return data

    def update_correlations(self, products_data):
        correlations = {}
        for products in products_data:
            products = str(products)
            if products.count(','):
                correlated_products = set()
                products = products.split(',')
                main_product = products.pop(-1).strip()
                for product in products:
                    product_fk = self.get_product_fk(product)
                    if product_fk is not None:
                        correlated_products.add(product_fk)
                if correlated_products:
                    correlations[main_product] = list(correlated_products)
        if correlations:
            queries = [self.get_delete_correlation_query()]
            for product_ean_code in correlations:
                queries.append(
                    self.get_correlation_query(product_ean_code,
                                               correlations[product_ean_code]))
            self.commit_results(queries)
            delattr(self, '_product_data')

    @staticmethod
    def get_deactivation_query(store_fk, product_fk, date):
        query = """update {} set end_date = '{}', is_current = NULL
                   where store_fk = {} and product_fk = {} and end_date is null""".format(
            TOP_SKU_TABLE, date, store_fk, product_fk)
        return query

    def get_activation_query(self, store_fk, product_fk, date):
        # if date in self.stores_min_dates[store_fk]:
        attributes = pd.DataFrame(
            [(store_fk, product_fk, str(date), None)],
            columns=['store_fk', 'product_fk', 'start_date', 'is_current'])
        # else:
        #     end_date = datetime.strptime(date, "%Y-%m-%d") + timedelta(7)
        #     attributes = pd.DataFrame([(store_fk, product_fk, str(date), None, str(end_date))],
        #                               columns=['store_fk', 'product_fk', 'start_date', 'is_current', 'end_date'])
        query = insert(attributes.to_dict(), TOP_SKU_TABLE)
        return query

    @staticmethod
    def get_delete_correlation_query():
        query = 'update static.product set {0} = null where {0} is not null'.format(
            CORRELATION_FIELD)
        return query

    @staticmethod
    def get_correlation_query(anchor_ean_code, correlated_products):
        if len(correlated_products) == 1:
            condition = 'pk = {}'.format(correlated_products[0])
        else:
            condition = 'pk in ({})'.format(tuple(correlated_products))
        query = "update static.product set {} = '{}' where {}".format(
            CORRELATION_FIELD, anchor_ean_code, condition)
        return query

    def commit_results(self, queries):
        self.rds_conn.disconnect_rds()
        rds_conn = PSProjectConnector(PROJECT, DbUsers.CalculationEng)
        cur = rds_conn.db.cursor()
        for query in self.update_queries:
            print query
            try:
                cur.execute(query)
            except Exception as e:
                Log.debug('Inserting to DB failed due to: {}'.format(e))
                rds_conn.disconnect_rds()
                rds_conn = PSProjectConnector(PROJECT, DbUsers.CalculationEng)
                cur = rds_conn.db.cursor()
                continue
        rds_conn.db.commit()
        rds_conn.disconnect_rds()
        rds_conn = PSProjectConnector(PROJECT, DbUsers.CalculationEng)
        cur = rds_conn.db.cursor()
        for query in queries:
            print query
            try:
                cur.execute(query)
            except Exception as e:
                Log.debug('Inserting to DB failed due to: {}'.format(e))
                rds_conn.disconnect_rds()
                rds_conn = PSProjectConnector(PROJECT, DbUsers.CalculationEng)
                cur = rds_conn.db.cursor()
                continue
        rds_conn.db.commit()

    def get_top_skus_for_store(self, store_fk, visit_date):
        query = """
                select ts.product_fk, p.product_ean_code
                from {} ts
                join static.product p on p.pk = ts.product_fk
                where ts.store_fk = {} and '{}' between ts.start_date and ifnull(ts.end_date, curdate())
                """.format(TOP_SKU_TABLE, store_fk, visit_date)
        data = pd.read_sql_query(query, self.rds_conn.db)
        return data.groupby('product_fk')['product_ean_code'].first().to_dict()

    def get_correlated_products(self, product_ean_code):
        return self.product_data[self.product_data['correlation'] ==
                                 product_ean_code]['product_fk'].tolist()

    @staticmethod
    def get_custom_scif_query(session_fk, scene_fk, product_fk, in_assortment,
                              distributed):
        in_assortment = 1 if in_assortment else 0
        out_of_stock = 1 if not distributed else 0
        attributes = pd.DataFrame(
            [(session_fk, scene_fk, product_fk, in_assortment, out_of_stock)],
            columns=[
                'session_fk', 'scene_fk', 'product_fk', 'in_assortment_osa',
                'oos_osa'
            ])
        query = insert(attributes.to_dict(), CUSTOM_SCIF_TABLE)
        return query

    def merge_insert_queries(self, insert_queries):
        # other_queries = []
        query_groups = {}
        for query in insert_queries:
            if 'update' in query:
                self.update_queries.append(query)
            else:
                static_data, inserted_data = query.split('VALUES ')
                if static_data not in query_groups:
                    query_groups[static_data] = []
                query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            for group_index in xrange(0, len(query_groups[group]), 10**4):
                merged_queries.append('{0} VALUES {1}'.format(
                    group, ',\n'.join(
                        query_groups[group][group_index:group_index + 10**4])))
        # merged_queries.extend(other_queries)
        return merged_queries
コード例 #27
0
 def read_custom_query(self, query):
     local_con = PSProjectConnector(self.project_name,
                                    DbUsers.CalculationEng)
     df = pd.read_sql_query(query, local_con.db)
     local_con.disconnect_rds()
     return df
コード例 #28
0
class PURINAToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.common = Common(self.data_provider)
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        self.kpi_static_data = self.common.get_kpi_static_data()
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.session_fk = self.session_info['pk'].values[0]
        self.kpi_results_queries = []
        self.kpi_static_queries = []
        self.purina_scif = self.scif.loc[self.scif['category_fk'] == PET_FOOD_CATEGORY]

    def calculate_purina(self, *args, **kwargs):
        """
        This function calculates the KPI results.
        """
        if not self.is_session_purina():
            return
        # Update all new static KPIs
        self.create_new_static_kpi()

        self.kpi_static_data = self.common.get_kpi_static_data(refresh=True)

        self.update_kpi_score()
        self.run_data_collecting()
        self.common.commit_results_data()

    def update_kpi_score(self):
        # Only to see results in join :(

        for kpi in PURINA_KPI:
            kpi_fk = self.get_kpi_fk_by_kpi_name(kpi, self.LEVEL2, set_name=PURINA_SET)
            self.common.write_to_db_result(kpi_fk, self.LEVEL2, 1)


    def run_data_collecting(self):
        """
        This function run the man calculation of linear sos with sub category out of subsegment
        or price out of subsegment
        :param price_kpi:
        :return:
        """

        data = self.purina_scif.dropna(subset=[LINEAR_SIZE])

        if data.empty:
            Log.info("No relevant purina's products were found in session.")
            return

        # subseg_name_list = data[SCIF_SUBSEGMENT].unique()
        # for subseg in subseg_name_list:
        #     if not subseg:
        #         subseg = NO_SUBSEG
        #         by_subseg = data.loc[pd.isnull(data[SCIF_SUBSEGMENT])]
        #         subseg_ft = self.cm_to_ft(sum(by_subseg[LINEAR_SIZE]))
        #     else:
        #         by_subseg = data.loc[data[SCIF_SUBSEGMENT] == subseg]
        #         subseg_ft = self.cm_to_ft(sum(by_subseg[LINEAR_SIZE]))
        #     atomic_fk = self.get_kpi_fk_by_kpi_name(subseg, self.LEVEL3, father=SUBSEGMENT_KPI, set_name=SUBSEGMENT_SET)
        #     self.common.old_write_to_db_result(fk=atomic_fk, level=self.LEVEL3, score=subseg_ft)
        #     atomic_fk = self.get_kpi_fk_by_kpi_name(subseg, self.LEVEL3, father=SUBSEGMENT_KPI, set_name=PRICE_SET)
        #     self.common.old_write_to_db_result(fk=atomic_fk, level=self.LEVEL3, score=subseg_ft)

        # gets all category linear size

        category_ft = self.cm_to_ft(sum(data[LINEAR_SIZE]))
        fk = self.get_kpi_fk_by_kpi_name(PURINA_SET, self.LEVEL1)
        self.common.write_to_db_result(fk, self.LEVEL1, category_ft)

        man = data['manufacturer_name'].unique()
        for mf in man:
            by_mf = data.loc[data['manufacturer_name'] == mf]
            manufacturer_ft = self.cm_to_ft(sum(by_mf[LINEAR_SIZE]))
            relevant_kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == MANUFACTUR) &
                                          (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0]
            atomic_fk = self.get_kpi_fk_by_kpi_name(mf, self.LEVEL3, father=MANUFACTUR, set_name=PURINA_SET)
            if atomic_fk:
                self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                               score=manufacturer_ft, score_2=manufacturer_ft,
                                               session_uid=self.session_uid, store_fk=self.store_id,
                                               display_text=mf.replace("'","''"),
                                               visit_date=self.visit_date.isoformat(),
                                               calculation_time=datetime.utcnow().isoformat(),
                                               kps_name=PURINA_SET,
                                               kpi_fk=relevant_kpi_fk)
            else:
                print 'atomic cannot be saved for manufacturer {}'.format(mf)

            brands = by_mf['brand_name'].unique()
            for brand in brands:
                by_brand = by_mf.loc[data['brand_name'] == brand]
                brand_ft = self.cm_to_ft(sum(by_brand[LINEAR_SIZE]))
                kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == BRAND) &
                                                  (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0]
                atomic_fk = self.get_kpi_fk_by_kpi_name(brand, self.LEVEL3, father=BRAND, set_name=PURINA_SET)
                if atomic_fk:
                    self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                   score=brand_ft, score_2=brand_ft, style=mf.replace("'","''"),
                                                   session_uid=self.session_uid, store_fk=self.store_id,
                                                   display_text=brand.replace("'","''"),
                                                   visit_date=self.visit_date.isoformat(),
                                                   calculation_time=datetime.utcnow().isoformat(),
                                                   kps_name=PURINA_SET,
                                                   kpi_fk=kpi_fk)
                else:
                    print 'atomic cannot be saved for brand {}'.format(brand)

                categories = by_brand[SCIF_CATEOGRY].unique()
                for cat in categories:
                    if not cat:
                        cat = OTHER
                        by_cat = by_brand.loc[pd.isnull(by_brand[SCIF_PRICE])]
                        cat_ft = self.cm_to_ft(sum(by_cat[LINEAR_SIZE]))
                    else:
                        by_cat = by_brand.loc[data[SCIF_SUB_CATEOGRY] == cat]
                        cat_ft = self.cm_to_ft(sum(by_cat[LINEAR_SIZE]))

                    kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == CATEGORY) &
                                          (self.kpi_static_data['kpi_set_name'] == PURINA_SET)]['kpi_fk'].values[0]
                    atomic_fk = self.get_kpi_fk_by_kpi_name(cat, self.LEVEL3, father=CATEGORY, set_name=PURINA_SET)
                    if atomic_fk:
                        self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                       score=cat_ft,
                                                       score_2=cat_ft, style=mf.replace("'","''"),
                                                       result=brand.replace("'","''"),
                                                       session_uid=self.session_uid, store_fk=self.store_id,
                                                       display_text=cat.replace("'","''"),
                                                       visit_date=self.visit_date.isoformat(),
                                                       calculation_time=datetime.utcnow().isoformat(),
                                                       kps_name=PURINA_SET,
                                                       kpi_fk=kpi_fk)
                    else:
                        print 'atomic cannot be saved for category {}'.format(cat)

                    sub_cats = by_cat[SCIF_SUB_CATEOGRY].unique()
                    for sub_cat in sub_cats:
                        if not sub_cat:
                            sub_cat = OTHER
                            by_sub_cat = by_cat.loc[pd.isnull(by_cat[SCIF_PRICE])]
                            sub_cat_ft = self.cm_to_ft(sum(by_sub_cat[LINEAR_SIZE]))
                        else:
                            by_sub_cat = by_cat.loc[data[SCIF_SUB_CATEOGRY] == sub_cat]
                            sub_cat_ft = self.cm_to_ft(sum(by_sub_cat[LINEAR_SIZE]))
                        # write to db under sub category atomic kpi score with brand name in results

                        kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == SUB_CATEGORY) &
                                                          (self.kpi_static_data['kpi_set_name'] == PURINA_SET)][
                                                                                                    'kpi_fk'].values[0]
                        atomic_fk = self.get_kpi_fk_by_kpi_name(sub_cat, self.LEVEL3, father=SUB_CATEGORY,
                                                                    set_name=PURINA_SET)
                        if atomic_fk:
                            self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                           score=sub_cat_ft,
                                                           score_2=sub_cat_ft, style=mf.replace("'","''"),
                                                           result=brand.replace("'","''"),
                                                           result_2=cat.replace("'","''"),
                                                           session_uid=self.session_uid, store_fk=self.store_id,
                                                           display_text=sub_cat.replace("'","''"),
                                                           visit_date=self.visit_date.isoformat(),
                                                           calculation_time=datetime.utcnow().isoformat(),
                                                           kps_name=PURINA_SET,
                                                           kpi_fk=kpi_fk)
                        else:
                            print 'atomic cannot be saved for sub category {}'.format(sub_cat)

                        prices = by_sub_cat[SCIF_PRICE].unique()
                        for price_class in prices:
                            if not price_class:
                                price_class = OTHER
                                by_prices = by_sub_cat.loc[pd.isnull(by_sub_cat[SCIF_PRICE])]
                                price_ft = self.cm_to_ft(sum(by_prices[LINEAR_SIZE]))
                            else:
                                by_prices = by_sub_cat.loc[by_sub_cat[SCIF_PRICE] == price_class]
                                price_ft = self.cm_to_ft(sum(by_prices[LINEAR_SIZE]))
                            kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == PRICE_KPI) &
                                                              (self.kpi_static_data['kpi_set_name'] == PURINA_SET)][
                                                                                                    'kpi_fk'].values[0]
                            atomic_fk = self.get_kpi_fk_by_kpi_name(price_class, self.LEVEL3, father=PRICE_KPI,
                                                                    set_name=PURINA_SET)
                            if atomic_fk:
                                self.common.write_to_db_result(fk=atomic_fk, atomic_kpi_fk=atomic_fk, level=self.LEVEL3,
                                                               score=price_ft,
                                                               score_2=price_ft, style=mf.replace("'","''"),
                                                               result=brand.replace("'","''"),
                                                               result_2=cat.replace("'","''"),
                                                               result_3=sub_cat.replace("'","''"),
                                                               session_uid=self.session_uid, store_fk=self.store_id,
                                                               display_text=price_class.replace("'", "''"),
                                                               visit_date=self.visit_date.isoformat(),
                                                               calculation_time=datetime.utcnow().isoformat(),
                                                               kps_name=PURINA_SET,
                                                               kpi_fk=kpi_fk )
                            else:
                                print 'atomic cannot be saved for price class {}'.format(price_class)


    @staticmethod
    def cm_to_ft(cm):
        return cm / 30.48

    def get_labels(self):
        query = """select pk, labels, ean_code
        from static_new.product
        """
        labels = pd.read_sql_query(query, self.rds_conn.db)
        return labels

    def get_kpi_fk_by_kpi_name(self, kpi_name, kpi_level, father=None, logic_father=None, set_name=None):
        if kpi_level == self.LEVEL1:
            column_key = 'kpi_set_fk'
            column_value = 'kpi_set_name'
            father_value = 'kpi_set_name'


        elif kpi_level == self.LEVEL2:
            column_key = 'kpi_fk'
            column_value = 'kpi_name'
            father_value = 'kpi_set_name'


        elif kpi_level == self.LEVEL3:
            column_key = 'atomic_kpi_fk'
            column_value = 'atomic_kpi_name'
            father_value = 'kpi_name'

        else:
            raise ValueError('invalid level')

        try:
            relevant = self.kpi_static_data[self.kpi_static_data[column_value] == kpi_name]
            if father:
                relevant = relevant[relevant[father_value] == father]
            if set_name:
                relevant = relevant[relevant['kpi_set_name'] == set_name]

            return relevant[column_key].values[0]

        except IndexError:
            Log.info('Kpi name: {}, isn\'t equal to any kpi name in static table'.format(kpi_name))
            return None

    def create_new_static_kpi(self):
        #  This functions takes all brands, sub categories, categories and manufacturers in session.
        #  The function adds them to database in case they are new.
        brands = self.get_all_brands()
        sub_cats = self.get_all_sub_categories()
        manufacturer = self.get_all_manufacturers()
        cats = self.get_all_categories()
        prices = self.get_all_price_classes()

        new_brands = self.purina_scif.loc[~self.purina_scif['brand_name'].isin(brands)]['brand_name'].unique()
        new_manufacturer = self.purina_scif.loc[~self.purina_scif['manufacturer_name'].isin(manufacturer)][
            'manufacturer_name'].unique()
        new_sub_cat = self.purina_scif.loc[(~self.purina_scif[SCIF_SUB_CATEOGRY].isin(sub_cats)) &
                                        (~pd.isnull(self.purina_scif[SCIF_SUB_CATEOGRY]))][SCIF_SUB_CATEOGRY].unique()
        new_cat = self.purina_scif.loc[(~self.purina_scif[SCIF_CATEOGRY].isin(cats)) &
                                       (~pd.isnull(self.purina_scif[SCIF_CATEOGRY]))][SCIF_CATEOGRY].unique()
        new_prices = self.purina_scif.loc[(~self.purina_scif[SCIF_PRICE].isin(prices)) &
                                          (~pd.isnull(self.purina_scif[SCIF_PRICE]))][SCIF_PRICE].unique()

        self.save_static_atomics(BRAND, new_brands, PURINA_SET)
        self.save_static_atomics(MANUFACTUR, new_manufacturer, PURINA_SET)
        self.save_static_atomics(CATEGORY, new_cat, PURINA_SET)
        self.save_static_atomics(SUB_CATEGORY, new_sub_cat, PURINA_SET)
        self.save_static_atomics(PRICE_KPI, new_prices, PURINA_SET)

        self.commit_static_data()

    def get_all_brands(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == BRAND]['atomic_kpi_name']

    def get_all_sub_categories(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == SUB_CATEGORY]['atomic_kpi_name']

    def get_all_manufacturers(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == MANUFACTUR]['atomic_kpi_name']

    def get_all_categories(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == CATEGORY]['atomic_kpi_name']

    def get_all_price_classes(self):
        return self.kpi_static_data.loc[self.kpi_static_data['kpi_name'] == PRICE_KPI]['atomic_kpi_name']

    def save_static_atomics(self, kpi_name, atomics, set_name):
        kpi_fk = self.kpi_static_data.loc[(self.kpi_static_data['kpi_name'] == kpi_name) &
                                          (self.kpi_static_data['kpi_set_name'] == set_name)]['kpi_fk'].values[0]
        for current in atomics:
            current = current.replace("'", "''")
            query = """
               INSERT INTO {0} (`kpi_fk`, `name`, `description`, `display_text`,`presentation_order`, `display`)
               VALUES ('{1}', '{2}', '{3}', '{4}', '{5}', '{6}');""".format(STATIC_ATOMIC,
                                                                            kpi_fk, current, current, current, 1, 'Y')

            self.kpi_static_queries.append(query)

    def commit_static_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        # ProjectConnector(self.project_name, DbUsers.CalculationEng)
        cur = self.rds_conn.db.cursor()
        for query in self.kpi_static_queries:
            try:
                cur.execute(query)
            except Exception as e:
                Log.info('query {} could not be executed.'.format(query))
        self.rds_conn.db.commit()

        self.rds_conn.disconnect_rds()

    def is_session_purina(self):
        # This function checks is the session is of Purina project by its category and that it is a successful visit.
        session_data = self.get_session_category_data()
        session_data = session_data.loc[(session_data['category_fk'] == 13) &
                                        (session_data['resolution_fk'] == 1) &
                                        (session_data['exclude_status_fk'] == 1)]
        if not session_data.empty:
            return True
        return False

    def get_session_category_data(self):
        local_con = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        query = """select category_fk, resolution_fk, exclude_status_fk from probedata.session_category
                where session_fk = {}""".format(self.session_fk)
        data = pd.read_sql_query(query, local_con.db)
        return data
コード例 #29
0
class PENAFLORAR_SANDDIAGEOARToolBox:
    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3
    ACTIVATION_STANDARD = 'Activation Standard'

    def __init__(self, data_provider, output):
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.rds_conn = PSProjectConnector(self.project_name,
                                           DbUsers.CalculationEng)
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.store_type = self.store_info['additional_attribute_1'].values[0]
        self.kpi_static_data = self.get_kpi_static_data()
        self.set_templates_data = {}
        self.match_display_in_scene = self.get_match_display()
        self.kpi_results_queries = []
        self.scores = {self.LEVEL1: {}, self.LEVEL2: {}, self.LEVEL3: {}}

        self.output = output
        self.common = Common(self.data_provider)
        self.commonV2 = CommonV2(self.data_provider)
        self.global_gen = DIAGEOGenerator(self.data_provider, self.output,
                                          self.common)
        self.tools = DIAGEOToolBox(
            self.data_provider,
            output,
            match_display_in_scene=self.match_display_in_scene
        )  # replace the old one
        self.diageo_generator = DIAGEOGenerator(self.data_provider,
                                                self.output, self.common)

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = DIAGEOQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def get_match_display(self):
        """
        This function extracts the display matches data and saves it into one global data frame.
        The data is taken from probedata.match_display_in_scene.
        """
        query = DIAGEOQueries.get_match_display(self.session_uid)
        match_display = pd.read_sql_query(query, self.rds_conn.db)
        return match_display

    def main_calculation(self, set_names):
        """
        This function calculates the KPI results.
        """
        log_runtime('Updating templates')(self.tools.update_templates)()

        # Global assortment kpis
        assortment_res_dict = self.diageo_generator.diageo_global_assortment_function_v2(
        )
        self.commonV2.save_json_to_new_tables(assortment_res_dict)

        for set_name in set_names:
            set_score = 0
            if set_name not in self.tools.KPI_SETS_WITHOUT_A_TEMPLATE and set_name not in self.set_templates_data.keys(
            ):
                try:
                    self.set_templates_data[
                        set_name] = self.tools.download_template(set_name)
                except:
                    Log.warning("Couldn't find a template for set name: " +
                                str(set_name))
                    continue
            # if set_name in ('MPA', 'New Products',):
            #     set_score = self.calculate_assortment_sets(set_name)

            # Global Visible to Customer / Visible to Consumer
            if set_name in ('Visible to Customer', 'Visible to Consumer %'):
                # Global function
                sku_list = filter(
                    None, self.scif[self.scif['product_type'] ==
                                    'SKU'].product_ean_code.tolist())
                res_dict = self.diageo_generator.diageo_global_visible_percentage(
                    sku_list)

                if res_dict:
                    # Saving to new tables
                    # parent_res = res_dict[-1]
                    self.commonV2.save_json_to_new_tables(res_dict)

                    # Saving to old tables
                    # result = parent_res['result']
                    # self.save_level2_and_level3(set_name=set_name, kpi_name=set_name, score=result)

                # Saving to old tables
                filters = {self.tools.VISIBILITY_PRODUCTS_FIELD: 'Y'}
                set_score = self.tools.calculate_visible_percentage(
                    visible_filters=filters)
                self.save_level2_and_level3(set_name, set_name, set_score)

            elif set_name in ('Relative Position'):
                # Global function
                res_dict = self.diageo_generator.diageo_global_relative_position_function(
                    self.set_templates_data[set_name],
                    location_type='template_display_name')

                if res_dict:
                    # Saving to new tables
                    self.commonV2.save_json_to_new_tables(res_dict)

                set_score = self.calculate_relative_position_sets(set_name)
            else:
                return

            if set_score == 0:
                pass
            elif set_score is False:
                continue

            set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name']
                                          == set_name]['kpi_set_fk'].values[0]
            self.write_to_db_result(set_fk, set_score, self.LEVEL1)

        # commiting to new tables
        self.commonV2.commit_results_data()

    def calculate_relative_position_sets(self, set_name):
        """
        This function calculates every relative-position-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        for params in self.set_templates_data[set_name]:
            if self.store_info.at[0, 'additional_attribute_2'] == params.get(
                    'additional_attribute_2', 'Empty'):
                tested_filters = {
                    params.get(TESTED_TYPE): params.get(TESTED_VALUE)
                }
                anchor_filters = {
                    params.get(ANCHOR_TYPE): params.get(ANCHOR_VALUE)
                }
                direction_data = {
                    'top':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.TOP_DISTANCE)),
                    'bottom':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.BOTTOM_DISTANCE)),
                    'left':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.LEFT_DISTANCE)),
                    'right':
                    self._get_direction_for_relative_position(
                        params.get(self.tools.RIGHT_DISTANCE))
                }
                general_filters = {
                    'template_display_name': params.get(self.tools.LOCATION)
                }
                result = self.tools.calculate_relative_position(
                    tested_filters, anchor_filters, direction_data,
                    **general_filters)
                score = 1 if result else 0
                scores.append(score)

                self.save_level2_and_level3(set_name,
                                            params.get(self.tools.KPI_NAME),
                                            score)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def _get_direction_for_relative_position(self, value):
        """
        This function converts direction data from the template (as string) to a number.
        """
        if value == self.tools.UNLIMITED_DISTANCE:
            value = 1000
        elif not value or not str(value).isdigit():
            value = 0
        else:
            value = int(value)
        return value

    def calculate_assortment_sets(self, set_name):
        """
        This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        for params in self.set_templates_data[set_name]:
            target = str(params.get(self.store_type, ''))
            if target.isdigit() or target.capitalize() in (
                    self.tools.RELEVANT_FOR_STORE,
                    self.tools.OR_OTHER_PRODUCTS):
                products = str(
                    params.get(self.tools.PRODUCT_EAN_CODE,
                               params.get(self.tools.PRODUCT_EAN_CODE2,
                                          ''))).replace(',', ' ').split()
                target = 1 if not target.isdigit() else int(target)
                kpi_name = params.get(self.tools.GROUP_NAME,
                                      params.get(self.tools.PRODUCT_NAME))
                kpi_static_data = self.kpi_static_data[
                    (self.kpi_static_data['kpi_set_name'] == set_name)
                    & (self.kpi_static_data['kpi_name'] == kpi_name)]
                if len(products) > 1:
                    result = 0
                    for product in products:
                        product_score = self.tools.calculate_assortment(
                            product_ean_code=product)
                        result += product_score
                        atomic_fk = kpi_static_data[
                            kpi_static_data['description'] ==
                            product]['atomic_kpi_fk'].values[0]
                        self.write_to_db_result(atomic_fk,
                                                product_score,
                                                level=self.LEVEL3)
                    score = 1 if result >= target else 0
                else:
                    result = self.tools.calculate_assortment(
                        product_ean_code=products)
                    atomic_fk = kpi_static_data['atomic_kpi_fk'].values[0]
                    score = 1 if result >= target else 0
                    self.write_to_db_result(atomic_fk,
                                            score,
                                            level=self.LEVEL3)

                scores.append(score)
                kpi_fk = kpi_static_data['kpi_fk'].values[0]
                self.write_to_db_result(kpi_fk, score, level=self.LEVEL2)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def calculate_activation_standard(self):
        """
        This function calculates the Activation Standard KPI, and saves the result to the DB (for all 3 levels).
        """
        final_score = 0
        for params in self.tools.download_template(self.ACTIVATION_STANDARD):
            set_name = params.get(self.tools.ACTIVATION_SET_NAME)
            kpi_name = params.get(self.tools.ACTIVATION_KPI_NAME)
            target = float(params.get(self.tools.ACTIVATION_TARGET))
            target = target * 100 if target < 1 else target
            score_type = params.get(self.tools.ACTIVATION_SCORE)
            weight = float(params.get(self.tools.ACTIVATION_WEIGHT))
            if kpi_name:
                kpi_fk = self.kpi_static_data[
                    (self.kpi_static_data['kpi_set_name'] == set_name)
                    & (self.kpi_static_data['kpi_name'] == kpi_name
                       )]['kpi_fk'].values[0]
                score = self.scores[self.LEVEL2].get(kpi_fk, 0)
            else:
                set_fk = self.kpi_static_data[
                    self.kpi_static_data['kpi_set_name'] ==
                    set_name]['kpi_set_fk'].values[0]
                score = self.scores[self.LEVEL1].get(set_fk, 0)
            if score >= target:
                score = 100
            else:
                if score_type == 'PROPORTIONAL':
                    score = (score / float(target)) * 100
                else:
                    score = 0
            final_score += score * weight
            self.save_level2_and_level3(self.ACTIVATION_STANDARD, set_name,
                                        score)
        set_fk = self.kpi_static_data[
            self.kpi_static_data['kpi_set_name'] ==
            self.ACTIVATION_STANDARD]['kpi_set_fk'].values[0]
        self.write_to_db_result(set_fk, final_score, self.LEVEL1)

    def save_level2_and_level3(self, set_name, kpi_name, score):
        """
        Given KPI data and a score, this functions writes the score for both KPI level 2 and 3 in the DB.
        """
        kpi_data = self.kpi_static_data[
            (self.kpi_static_data['kpi_set_name'] == set_name)
            & (self.kpi_static_data['kpi_name'] == kpi_name)]
        try:
            kpi_fk = kpi_data['kpi_fk'].values[0]
        except:
            Log.warning("kpi name or set name don't exist")
            return
        atomic_kpi_fk = kpi_data['atomic_kpi_fk'].values[0]
        self.write_to_db_result(kpi_fk, score, self.LEVEL2)
        self.write_to_db_result(atomic_kpi_fk, score, self.LEVEL3)

    def write_to_db_result(self, fk, score, level):
        """
        This function creates the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(fk, score, level)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self, fk, score, level):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        if level == self.LEVEL1:
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['kpi_set_fk'] ==
                fk]['kpi_set_name'].values[0]
            attributes = pd.DataFrame(
                [(kpi_set_name, self.session_uid, self.store_id,
                  self.visit_date.isoformat(), format(score, '.2f'), fk)],
                columns=[
                    'kps_name', 'session_uid', 'store_fk', 'visit_date',
                    'score_1', 'kpi_set_fk'
                ])
        elif level == self.LEVEL2:
            kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] ==
                                            fk]['kpi_name'].values[0]
            attributes = pd.DataFrame(
                [(self.session_uid, self.store_id, self.visit_date.isoformat(),
                  fk, kpi_name, score)],
                columns=[
                    'session_uid', 'store_fk', 'visit_date', 'kpi_fk',
                    'kpk_name', 'score'
                ])
        elif level == self.LEVEL3:
            data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk']
                                        == fk]
            atomic_kpi_name = data['atomic_kpi_name'].values[0]
            kpi_fk = data['kpi_fk'].values[0]
            kpi_set_name = self.kpi_static_data[
                self.kpi_static_data['atomic_kpi_fk'] ==
                fk]['kpi_set_name'].values[0]
            attributes = pd.DataFrame(
                [(atomic_kpi_name, self.session_uid,
                  kpi_set_name, self.store_id, self.visit_date.isoformat(),
                  datetime.utcnow().isoformat(), score, kpi_fk, fk)],
                columns=[
                    'display_text', 'session_uid', 'kps_name', 'store_fk',
                    'visit_date', 'calculation_time', 'score', 'kpi_fk',
                    'atomic_kpi_fk'
                ])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        insert_queries = self.merge_insert_queries(self.kpi_results_queries)
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        delete_queries = DIAGEOQueries.get_delete_session_results_query_old_tables(
            self.session_uid)
        for query in delete_queries:
            cur.execute(query)
        for query in insert_queries:
            cur.execute(query)
        self.rds_conn.db.commit()

    @staticmethod
    def merge_insert_queries(insert_queries):
        query_groups = {}
        for query in insert_queries:
            static_data, inserted_data = query.split('VALUES ')
            if static_data not in query_groups:
                query_groups[static_data] = []
            query_groups[static_data].append(inserted_data)
        merged_queries = []
        for group in query_groups:
            merged_queries.append('{0} VALUES {1}'.format(
                group, ',\n'.join(query_groups[group])))
        return merged_queries
コード例 #30
0
class DIAGEOBR_SANDToolBox:

    LEVEL1 = 1
    LEVEL2 = 2
    LEVEL3 = 3

    def __init__(self, data_provider, output):
        self.k_engine = BaseCalculationsScript(data_provider, output)
        self.output = output
        self.data_provider = data_provider
        self.project_name = self.data_provider.project_name
        self.session_uid = self.data_provider.session_uid
        self.products = self.data_provider[Data.PRODUCTS]
        self.all_products = self.data_provider[Data.ALL_PRODUCTS]
        self.match_product_in_scene = self.data_provider[Data.MATCHES]
        self.visit_date = self.data_provider[Data.VISIT_DATE]
        self.session_info = self.data_provider[Data.SESSION_INFO]
        self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng)
        self.store_info = self.data_provider[Data.STORE_INFO]
        self.store_id = self.data_provider[Data.STORE_FK]
        self.store_channel = self.store_info['store_type'].values[0]
        if self.store_channel:
            self.store_channel = self.store_channel.upper()
        self.store_type = self.store_info['additional_attribute_1'].values[0]
        self.segment = self.get_business_unit_name()
        self.scene_info = self.data_provider[Data.SCENES_INFO]
        self.scif = self.data_provider[Data.SCENE_ITEM_FACTS]
        self.match_display_in_scene = self.get_match_display()
        self.set_templates_data = {}
        self.kpi_static_data = self.get_kpi_static_data()
        self.tools = DIAGEOToolBox(self.data_provider, output, match_display_in_scene=self.match_display_in_scene)
        self.kpi_results_queries = []
        self.common = Common(self.data_provider)
        self.commonV2 = CommonV2(self.data_provider)
        self.diageo_generator = DIAGEOGenerator(self.data_provider, self.output, self.common)



    def get_business_unit_name(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = DIAGEOQueries.get_business_unit_name(self.store_id)
        business_unit_name = pd.read_sql_query(query, self.rds_conn.db)
        if business_unit_name['business_unit_name'].empty:
            return ""
        else:
            return business_unit_name['business_unit_name'].values[0]

    def get_kpi_static_data(self):
        """
        This function extracts the static KPI data and saves it into one global data frame.
        The data is taken from static.kpi / static.atomic_kpi / static.kpi_set.
        """
        query = DIAGEOQueries.get_all_kpi_data()
        kpi_static_data = pd.read_sql_query(query, self.rds_conn.db)
        return kpi_static_data

    def get_match_display(self):
        """
        This function extracts the display matches data and saves it into one global data frame.
        The data is taken from probedata.match_display_in_scene.
        """
        query = DIAGEOQueries.get_match_display(self.session_uid)
        match_display = pd.read_sql_query(query, self.rds_conn.db)
        return match_display

    def main_calculation(self, set_names):
        """
        This function calculates the KPI results.
        """
        log_runtime('Updating templates')(self.tools.update_templates)()

        # Global assortment kpis
        assortment_res_dict = self.diageo_generator.diageo_global_assortment_function_v2()
        self.commonV2.save_json_to_new_tables(assortment_res_dict)

        for set_name in set_names:
            set_score = 0
            if set_name not in self.tools.KPI_SETS_WITHOUT_A_TEMPLATE and set_name not in self.set_templates_data.keys():
                try:
                    self.set_templates_data[set_name] = self.tools.download_template(set_name)
                except:
                    Log.warning("Couldn't find a template for set name: " + str(set_name))
                    continue

            # if set_name in ('MPA', 'New Products'):
            #     set_score = self.calculate_assortment_sets(set_name)
            # elif set_name in ('POSM',):
            #     set_score = self.calculate_posm_sets(set_name)
            if set_name == 'Visible to Customer':

                # Global function
                sku_list = filter(None, self.scif[self.scif['product_type'] == 'SKU'].product_ean_code.tolist())
                res_dict = self.diageo_generator.diageo_global_visible_percentage(sku_list)

                if res_dict:
                    # Saving to new tables
                    parent_res = res_dict[-1]
                    for r in res_dict:
                        self.commonV2.write_to_db_result(**r)

                    # Saving to old tables
                    set_score = result = parent_res['result']
                    self.save_level2_and_level3(set_name=set_name, kpi_name=set_name, score=result)

                # filters = {self.tools.VISIBILITY_PRODUCTS_FIELD: 'Y'}
                # set_score = self.tools.calculate_visible_percentage(visible_filters=filters)
                # self.save_level2_and_level3(set_name, set_name, set_score)

            elif set_name in ('Secondary Displays', 'Secondary'):
                # Global function
                res_dict = self.diageo_generator.diageo_global_secondary_display_secondary_function()

                # Saving to new tables
                if res_dict:
                    self.commonV2.write_to_db_result(fk=res_dict['fk'], numerator_id=1, denominator_id=self.store_id,
                                                                                            result=res_dict['result'])

                # Saving to old tables
                set_score = self.tools.calculate_assortment(assortment_entity='scene_id', location_type='Secondary Shelf')
                self.save_level2_and_level3(set_name, set_name, set_score)

            if set_score == 0:
                pass
            elif set_score is False:
                return

            set_fk = self.kpi_static_data[self.kpi_static_data['kpi_set_name'] == set_name]['kpi_set_fk'].values[0]
            self.write_to_db_result(set_fk, set_score, self.LEVEL1)

        # commiting to new tables
        self.commonV2.commit_results_data()

    def save_level2_and_level3(self, set_name, kpi_name, score):
        """
        Given KPI data and a score, this functions writes the score for both KPI level 2 and 3 in the DB.
        """
        kpi_data = self.kpi_static_data[(self.kpi_static_data['kpi_set_name'] == set_name) &
                                        (self.kpi_static_data['kpi_name'] == kpi_name)]
        try:
            kpi_fk = kpi_data['kpi_fk'].values[0]
        except:
            Log.warning("kpi name or set name don't exist")
            return
        atomic_kpi_fk = kpi_data['atomic_kpi_fk'].values[0]
        self.write_to_db_result(kpi_fk, score, self.LEVEL2)
        self.write_to_db_result(atomic_kpi_fk, score, self.LEVEL3)

    def calculate_posm_sets(self, set_name):
        """
        This function calculates every POSM-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        for params in self.set_templates_data[set_name]:
            if self.store_channel is None:
                break

            kpi_res = self.tools.calculate_posm(display_name=params.get(self.tools.DISPLAY_NAME))
            score = 1 if kpi_res > 0 else 0
            if params.get(self.store_type) == self.tools.RELEVANT_FOR_STORE:
                scores.append(score)

            if score == 1 or params.get(self.store_type) == self.tools.RELEVANT_FOR_STORE:
                self.save_level2_and_level3(set_name, params.get(self.tools.DISPLAY_NAME), score)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def calculate_assortment_sets(self, set_name):
        """
        This function calculates every Assortment-typed KPI from the relevant sets, and returns the set final score.
        """
        scores = []
        segment = '{};{}'.format(self.store_type, self.segment)
        for params in self.set_templates_data[set_name]:
            if params.get(segment, '').capitalize() in (self.tools.RELEVANT_FOR_STORE,
                                                                self.tools.OR_OTHER_PRODUCTS):

                object_type = self.tools.ENTITY_TYPE_CONVERTER.get(params.get(self.tools.ENTITY_TYPE),
                                                                   'product_ean_code')
                objects = [str(params.get(self.tools.PRODUCT_EAN_CODE, params.get(self.tools.PRODUCT_EAN_CODE2, '')))]
                if params.get(self.store_type) == self.tools.OR_OTHER_PRODUCTS:
                    additional_objects = str(params.get(self.tools.ADDITIONAL_SKUS)).split(',')
                    objects.extend(additional_objects)
                filters = {object_type: objects}
                result = self.tools.calculate_assortment(**filters)
                score = 1 if result > 0 else 0
                scores.append(score)

                self.save_level2_and_level3(set_name, params.get(self.tools.PRODUCT_NAME), score)

        if not scores:
            return False
        set_score = (sum(scores) / float(len(scores))) * 100
        return set_score

    def write_to_db_result(self, fk, score, level):
        """
        This function the result data frame of every KPI (atomic KPI/KPI/KPI set),
        and appends the insert SQL query into the queries' list, later to be written to the DB.
        """
        attributes = self.create_attributes_dict(fk, score, level)
        if level == self.LEVEL1:
            table = KPS_RESULT
        elif level == self.LEVEL2:
            table = KPK_RESULT
        elif level == self.LEVEL3:
            table = KPI_RESULT
        else:
            return
        query = insert(attributes, table)
        self.kpi_results_queries.append(query)

    def create_attributes_dict(self, fk, score, level):
        """
        This function creates a data frame with all attributes needed for saving in KPI results tables.

        """
        score = round(score, 2)
        if level == self.LEVEL1:
            kpi_set_name = self.kpi_static_data[self.kpi_static_data['kpi_set_fk'] == fk]['kpi_set_name'].values[0]
            score_type = '%' if kpi_set_name in self.tools.KPI_SETS_WITH_PERCENT_AS_SCORE else ''
            attributes = pd.DataFrame([(kpi_set_name, self.session_uid, self.store_id, self.visit_date.isoformat(),
                                        format(score, '.2f'), score_type, fk)],
                                      columns=['kps_name', 'session_uid', 'store_fk', 'visit_date', 'score_1',
                                               'score_2', 'kpi_set_fk'])

        elif level == self.LEVEL2:
            kpi_name = self.kpi_static_data[self.kpi_static_data['kpi_fk'] == fk]['kpi_name'].values[0].replace("'", "\\'")
            attributes = pd.DataFrame([(self.session_uid, self.store_id, self.visit_date.isoformat(),
                                        fk, kpi_name, score)],
                                      columns=['session_uid', 'store_fk', 'visit_date', 'kpi_fk', 'kpk_name', 'score'])
        elif level == self.LEVEL3:
            data = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk'] == fk]
            atomic_kpi_name = data['atomic_kpi_name'].values[0].replace("'", "\\'")
            kpi_fk = data['kpi_fk'].values[0]
            kpi_set_name = self.kpi_static_data[self.kpi_static_data['atomic_kpi_fk'] == fk]['kpi_set_name'].values[0]
            attributes = pd.DataFrame([(atomic_kpi_name, self.session_uid, kpi_set_name, self.store_id,
                                        self.visit_date.isoformat(), datetime.utcnow().isoformat(),
                                        score, kpi_fk, fk, None, None)],
                                      columns=['display_text', 'session_uid', 'kps_name', 'store_fk', 'visit_date',
                                               'calculation_time', 'score', 'kpi_fk', 'atomic_kpi_fk', 'threshold',
                                               'result'])
        else:
            attributes = pd.DataFrame()
        return attributes.to_dict()

    @log_runtime('Saving to DB')
    def commit_results_data(self):
        """
        This function writes all KPI results to the DB, and commits the changes.
        """
        self.rds_conn.disconnect_rds()
        self.rds_conn.connect_rds()
        cur = self.rds_conn.db.cursor()
        delete_queries = DIAGEOQueries.get_delete_session_results_query_old_tables(self.session_uid)
        for query in delete_queries:
            cur.execute(query)
        for query in self.kpi_results_queries:
            cur.execute(query)
        self.rds_conn.db.commit()