Exemple #1
0
    def align_taxonomy(self, lower=1, verbose=1):
        """
        Align taxonomy across all tables loaded. Use the taxonomy dict to replace across all columns
        :param verbose: 1, if detailed logs 0 otherwise
        :return:
        """
        if lower == 1:
            self.logger.info('Convert columns to lower case')
            self.input_dict['taxonomy'].columns = self.input_dict['taxonomy'].columns.str.lower().tolist()

        self.logger.info('Create dictionary')
        self.input_dict['taxonomy'].dropna(subset=['from'], inplace=True)
        self.input_dict['taxonomy'].dropna(subset=['to'], inplace=True)
        self.taxonomy_dict = {s: a for s, a in zip(self.input_dict['taxonomy']['from'], self.input_dict['taxonomy']['to'])}
        for key in self.input_dict.keys():
            if (isinstance(self.input_dict[key], pd.DataFrame)):
                if key != 'taxonomy':
                    self.input_dict[key] = pandas_replace(self.input_dict[key], self.taxonomy_dict, verbose=verbose)
Exemple #2
0
        append_group = 0
        if flash_mulitprocessing == 0:
            logger.info('------ Loop through groups')
            for group in tqdm(atl_group_test_g.groups.keys()):
                # group = ('Rockshore', 'ATL')
                atl_group_sub = atl_group_test_g.get_group(group)
                atl_group_sub = pd.melt(atl_group_sub,
                                        id_vars=id_vars,
                                        value_name='adstock',
                                        var_name='stimuli_ratio',
                                        value_vars=perc_cols)

                # logger.info('------ Get percentage')
                atl_group_sub['p'] = atl_group_sub['stimuli_ratio'].copy()
                atl_group_sub = pandas_replace(atl_group_sub,
                                               perc_dict, ['p'],
                                               verbose=0)
                # print(f'Time taken: {str(datetime.datetime.now() - a)}')

                # atl_group_test = atl_group_test.merge(perc_df, on='stimuli_ratio', how='left')
                # print(f'Time taken: {str(datetime.datetime.now() - a)}')

                # logger.info('------ Scale spend and stimuli')
                atl_group_sub[_stimuli_col] = atl_group_sub[
                    'p'] * atl_group_sub[_stimuli_col + '_copy'].copy()
                atl_group_sub[_spend_col] = atl_group_sub['p'] * atl_group_sub[
                    _spend_col + '_copy'].copy()
                del atl_group_sub[_stimuli_col +
                                  '_copy'], atl_group_sub[_spend_col + '_copy']
                # print(f'Time taken: {str(datetime.datetime.now() - a)}')
geo_master = all_files['geo_master']

# Get taxonomy dict
taxonomy.dropna(subset=['From'], inplace=True)
taxonomy.dropna(subset=['To'], inplace=True)
taxonomy_dict = {s: a for s, a in zip(taxonomy['From'], taxonomy['To'])}

# Convert raw to BCV combinations
raw_cols = raw.columns.tolist()
raw['Vehicles'] = raw['Vehicles'].str.split(',')
raw = raw.set_index(['Country', 'Brand'])['Vehicles'].apply(pd.Series).stack()
raw =pd.DataFrame(raw).reset_index()
del raw['level_2']
raw.columns = raw_cols
raw_melt = raw.copy()
raw_renamed = pandas_replace(raw_melt, taxonomy_dict,verbose=1)


# Get available
available_c = available.copy()
available_renamed = pandas_replace(available_c, taxonomy_dict,verbose=1)

available_renamed.to_csv(config_path + "Inputs/AvailableCurves_renamed.csv")
raw_renamed.to_csv(config_path + "Inputs/RawBCV_CR_renamed.csv")

# Get atlbtl
atlbtl_c = atlbtl.copy()
atlbtl_renamed = pandas_replace(atlbtl_c, taxonomy_dict,verbose=1)

atlbtl_renamed.to_csv(input_path + "SpendsFY20/ATL BTL spend FY20 renamed.csv")
    },
    logger=logger)
# Import all the files
ETL.import_files(lowercase=1)

if align_taxonomy:
    logger.info('------ Rename according to taxonomy')
    ETL.align_taxonomy(verbose=0)

# get calendar
ETL.get_calendar(_week_col, _plan_period_col, _startdate_col, _enddate_col,
                 _planperiodname_col)

# Rename india
ETL.input_dict['media'] = pandas_replace(ETL.input_dict['media'],
                                         {'India': 'IND'},
                                         additional_cols=[_geo_col],
                                         verbose=1)
ETL.input_dict['spend'] = pandas_replace(ETL.input_dict['spend'],
                                         {'India': 'IND'},
                                         additional_cols=[_geo_col],
                                         verbose=1)
ETL.input_dict['curves'] = pandas_replace(ETL.input_dict['curves'],
                                          {'India': 'IND'},
                                          additional_cols=[_geo_col],
                                          verbose=1)

# ETL.input_dict['curves'][_rating_col] = 4
curves_4 = ETL.get_relevant_curves(_coeffA_col, _coeffB_col, _coeffC_col,
                                   form_col, _rating_col)
filter_df = curves_4.loc[:, [_geo_col, _brand_col, _instrument_col
                             ]].drop_duplicates()
    },
    logger=logger)

# Import all the files
ETL.import_files(lowercase=1)

logger.info('------ Rename instruments according to taxonomy')
if instrument_taxonomy:
    ETL.inst_taxonomy_dict = {
        s: a
        for s, a in zip(ETL.input_dict['instrument_taxonomy']['old'],
                        ETL.input_dict['instrument_taxonomy']['new'])
    }
    ETL.input_dict['bus_inputs'] = pandas_replace(ETL.input_dict['bus_inputs'],
                                                  ETL.inst_taxonomy_dict,
                                                  additional_cols=['vehicle'],
                                                  anywhere=0,
                                                  verbose=1)
    ETL.input_dict['bus_inputs'] = pandas_replace(
        ETL.input_dict['bus_inputs'],
        ETL.inst_taxonomy_dict,
        additional_cols=['bcv', 'selection', 'aggregation'],
        anywhere=1,
        verbose=1)
# ETL.input_dict['bus_inputs'].to_excel(config_path + "ME/Catalyst - Proxy Curves - Input template FINAL TAXONOMY.xlsx", index=False)

if align_taxonomy:
    logger.info('------ Rename according to taxonomy')
    ETL.align_taxonomy(verbose=0)

# get calendar
Exemple #6
0
logger.info('------ Get population ratio. To be used as proxy for differential cost.')
pop_ratio = ETL.get_pop_ratio(_geo_col, _pop_col, _ppp_col, parity=1)

logger.info('------ Exclude completed BCVs')
completed_bcvs = ETL.input_dict['exec_status'].loc[ETL.input_dict['exec_status']['status'] =='Complete', 'bcv'].tolist()
pending_bcv_index = ~ETL.input_dict['bus_inputs']['bcv'].isin(completed_bcvs)
ETL.input_dict['bus_inputs'] = ETL.input_dict['bus_inputs'].loc[pending_bcv_index, :]

logger.info('------ Rename instruments according to taxonomy')
if instrument_taxonomy:
    ETL.inst_taxonomy_dict = {s: a for s, a in zip(ETL.input_dict['instrument_taxonomy']['old'], ETL.input_dict['instrument_taxonomy']['new'])}
    # ETL.input_dict['lt_values_df_all'] = pandas_replace(ETL.input_dict['lt_values_df_all'], ETL.inst_taxonomy_dict
    #                                               , additional_cols=[_instrument_col]
    #                                               ,anywhere=0, verbose=1)
    ETL.input_dict['lt_values_df_eu'] = pandas_replace(ETL.input_dict['lt_values_df_eu'], ETL.inst_taxonomy_dict
                                                  , additional_cols=[_instrument_col]
                                                  ,anywhere=0, verbose=1)

    ETL.input_dict['mediacost'] = pandas_replace(ETL.input_dict['mediacost'], ETL.inst_taxonomy_dict
                                                  , additional_cols=[_instrument_col]
                                                  ,anywhere=0, verbose=1)

curves_consolidated = ETL.input_dict['curves_consolidated'].copy()

# get calendar
ETL.get_calendar(_week_col, _plan_period_col, _startdate_col, _enddate_col, _planperiodname_col, convert_datetime=1)

# Create
CalcEngine = CalculationEngine(ETL.input_dict.copy(), logger)

logger.info