def align_taxonomy(self, lower=1, verbose=1): """ Align taxonomy across all tables loaded. Use the taxonomy dict to replace across all columns :param verbose: 1, if detailed logs 0 otherwise :return: """ if lower == 1: self.logger.info('Convert columns to lower case') self.input_dict['taxonomy'].columns = self.input_dict['taxonomy'].columns.str.lower().tolist() self.logger.info('Create dictionary') self.input_dict['taxonomy'].dropna(subset=['from'], inplace=True) self.input_dict['taxonomy'].dropna(subset=['to'], inplace=True) self.taxonomy_dict = {s: a for s, a in zip(self.input_dict['taxonomy']['from'], self.input_dict['taxonomy']['to'])} for key in self.input_dict.keys(): if (isinstance(self.input_dict[key], pd.DataFrame)): if key != 'taxonomy': self.input_dict[key] = pandas_replace(self.input_dict[key], self.taxonomy_dict, verbose=verbose)
append_group = 0 if flash_mulitprocessing == 0: logger.info('------ Loop through groups') for group in tqdm(atl_group_test_g.groups.keys()): # group = ('Rockshore', 'ATL') atl_group_sub = atl_group_test_g.get_group(group) atl_group_sub = pd.melt(atl_group_sub, id_vars=id_vars, value_name='adstock', var_name='stimuli_ratio', value_vars=perc_cols) # logger.info('------ Get percentage') atl_group_sub['p'] = atl_group_sub['stimuli_ratio'].copy() atl_group_sub = pandas_replace(atl_group_sub, perc_dict, ['p'], verbose=0) # print(f'Time taken: {str(datetime.datetime.now() - a)}') # atl_group_test = atl_group_test.merge(perc_df, on='stimuli_ratio', how='left') # print(f'Time taken: {str(datetime.datetime.now() - a)}') # logger.info('------ Scale spend and stimuli') atl_group_sub[_stimuli_col] = atl_group_sub[ 'p'] * atl_group_sub[_stimuli_col + '_copy'].copy() atl_group_sub[_spend_col] = atl_group_sub['p'] * atl_group_sub[ _spend_col + '_copy'].copy() del atl_group_sub[_stimuli_col + '_copy'], atl_group_sub[_spend_col + '_copy'] # print(f'Time taken: {str(datetime.datetime.now() - a)}')
geo_master = all_files['geo_master'] # Get taxonomy dict taxonomy.dropna(subset=['From'], inplace=True) taxonomy.dropna(subset=['To'], inplace=True) taxonomy_dict = {s: a for s, a in zip(taxonomy['From'], taxonomy['To'])} # Convert raw to BCV combinations raw_cols = raw.columns.tolist() raw['Vehicles'] = raw['Vehicles'].str.split(',') raw = raw.set_index(['Country', 'Brand'])['Vehicles'].apply(pd.Series).stack() raw =pd.DataFrame(raw).reset_index() del raw['level_2'] raw.columns = raw_cols raw_melt = raw.copy() raw_renamed = pandas_replace(raw_melt, taxonomy_dict,verbose=1) # Get available available_c = available.copy() available_renamed = pandas_replace(available_c, taxonomy_dict,verbose=1) available_renamed.to_csv(config_path + "Inputs/AvailableCurves_renamed.csv") raw_renamed.to_csv(config_path + "Inputs/RawBCV_CR_renamed.csv") # Get atlbtl atlbtl_c = atlbtl.copy() atlbtl_renamed = pandas_replace(atlbtl_c, taxonomy_dict,verbose=1) atlbtl_renamed.to_csv(input_path + "SpendsFY20/ATL BTL spend FY20 renamed.csv")
}, logger=logger) # Import all the files ETL.import_files(lowercase=1) if align_taxonomy: logger.info('------ Rename according to taxonomy') ETL.align_taxonomy(verbose=0) # get calendar ETL.get_calendar(_week_col, _plan_period_col, _startdate_col, _enddate_col, _planperiodname_col) # Rename india ETL.input_dict['media'] = pandas_replace(ETL.input_dict['media'], {'India': 'IND'}, additional_cols=[_geo_col], verbose=1) ETL.input_dict['spend'] = pandas_replace(ETL.input_dict['spend'], {'India': 'IND'}, additional_cols=[_geo_col], verbose=1) ETL.input_dict['curves'] = pandas_replace(ETL.input_dict['curves'], {'India': 'IND'}, additional_cols=[_geo_col], verbose=1) # ETL.input_dict['curves'][_rating_col] = 4 curves_4 = ETL.get_relevant_curves(_coeffA_col, _coeffB_col, _coeffC_col, form_col, _rating_col) filter_df = curves_4.loc[:, [_geo_col, _brand_col, _instrument_col ]].drop_duplicates()
}, logger=logger) # Import all the files ETL.import_files(lowercase=1) logger.info('------ Rename instruments according to taxonomy') if instrument_taxonomy: ETL.inst_taxonomy_dict = { s: a for s, a in zip(ETL.input_dict['instrument_taxonomy']['old'], ETL.input_dict['instrument_taxonomy']['new']) } ETL.input_dict['bus_inputs'] = pandas_replace(ETL.input_dict['bus_inputs'], ETL.inst_taxonomy_dict, additional_cols=['vehicle'], anywhere=0, verbose=1) ETL.input_dict['bus_inputs'] = pandas_replace( ETL.input_dict['bus_inputs'], ETL.inst_taxonomy_dict, additional_cols=['bcv', 'selection', 'aggregation'], anywhere=1, verbose=1) # ETL.input_dict['bus_inputs'].to_excel(config_path + "ME/Catalyst - Proxy Curves - Input template FINAL TAXONOMY.xlsx", index=False) if align_taxonomy: logger.info('------ Rename according to taxonomy') ETL.align_taxonomy(verbose=0) # get calendar
logger.info('------ Get population ratio. To be used as proxy for differential cost.') pop_ratio = ETL.get_pop_ratio(_geo_col, _pop_col, _ppp_col, parity=1) logger.info('------ Exclude completed BCVs') completed_bcvs = ETL.input_dict['exec_status'].loc[ETL.input_dict['exec_status']['status'] =='Complete', 'bcv'].tolist() pending_bcv_index = ~ETL.input_dict['bus_inputs']['bcv'].isin(completed_bcvs) ETL.input_dict['bus_inputs'] = ETL.input_dict['bus_inputs'].loc[pending_bcv_index, :] logger.info('------ Rename instruments according to taxonomy') if instrument_taxonomy: ETL.inst_taxonomy_dict = {s: a for s, a in zip(ETL.input_dict['instrument_taxonomy']['old'], ETL.input_dict['instrument_taxonomy']['new'])} # ETL.input_dict['lt_values_df_all'] = pandas_replace(ETL.input_dict['lt_values_df_all'], ETL.inst_taxonomy_dict # , additional_cols=[_instrument_col] # ,anywhere=0, verbose=1) ETL.input_dict['lt_values_df_eu'] = pandas_replace(ETL.input_dict['lt_values_df_eu'], ETL.inst_taxonomy_dict , additional_cols=[_instrument_col] ,anywhere=0, verbose=1) ETL.input_dict['mediacost'] = pandas_replace(ETL.input_dict['mediacost'], ETL.inst_taxonomy_dict , additional_cols=[_instrument_col] ,anywhere=0, verbose=1) curves_consolidated = ETL.input_dict['curves_consolidated'].copy() # get calendar ETL.get_calendar(_week_col, _plan_period_col, _startdate_col, _enddate_col, _planperiodname_col, convert_datetime=1) # Create CalcEngine = CalculationEngine(ETL.input_dict.copy(), logger) logger.info