def remove_cols_and_make_calculations(self, df): df = utl.date_removal(df, vmc.date, self.p[vmc.startdate], self.p[vmc.enddate]) df = ad_cost_calculation(df) df = utl.col_removal(df, self.key, self.p[vmc.dropcol]) df = utl.apply_rules(df, self.vm_rules, utl.POST, **self.p) return df
def add_names_to_df(self): self.df = self.dict_to_cols(self.df, 'id', self.adids) # type: pd.DataFrame cids = self.get_campaigns() self.df = self.dict_to_cols(self.df, 'campaign_id', cids) # type: pd.DataFrame self.df = utl.col_removal(self.df, 'API_VK', ['id', 'campaign_id'])
def check_plan_error(self, df): plan_names = self.matrix.vendor_set(vm.plan_key)[vmc.fullplacename] er = self.matrix.vendor_set(vm.plan_key)[vmc.filenameerror] edf = utl.import_read_csv(er, utl.error_path) if edf.empty: plan_error_msg = ('No Planned error - all {} ' 'combinations are defined.'.format(plan_names)) logging.info(plan_error_msg) self.add_to_analysis_dict(key_col=self.unknown_col, message=plan_error_msg) return True df = df[df[dctc.PFPN].isin( edf[vmc.fullplacename].values)][plan_names + [vmc.vendorkey]].drop_duplicates() df = vm.full_placement_creation(df, None, dctc.FPN, plan_names) df = df[df[dctc.FPN].isin(edf[dctc.FPN].values)] df = utl.col_removal(df, None, [dctc.FPN]) for col in df.columns: df[col] = "'" + df[col] + "'" df = df.dropna() df_dict = '\n'.join([ '{}{}'.format(k, v) for k, v in df.to_dict(orient='index').items() ]) undefined_msg = 'Undefined placements have the following keys:' logging.info('{}\n{}'.format(undefined_msg, df_dict)) self.add_to_analysis_dict(key_col=self.unknown_col, message=undefined_msg, data=df.to_dict())
def add_names_to_df(self): if self.df.empty: logging.warning('No data for date range, returning empty df.') return self.df if not self.breakdown: self.df = utl.col_removal(self.df, 'API_Snapchat', ['id', 'index']) return self.df adids = self.get_ads() self.df = self.dict_to_cols(self.df, 'id', adids) # type: pd.DataFrame asids = self.get_adsquads() self.df = self.dict_to_cols(self.df, 'ad_squad_id', asids) # type: pd.DataFrame self.df = self.df.rename(columns={ 'ad_squad_id': 'Ad Set Name', 'name': 'Ad Name' }) self.df = utl.col_removal(self.df, 'API_Snapchat', [0, 'id', 'stats', 'index']) return self.df
def apply_cap(self, df, c): logging.info('Calculating metric cap from: ' '{}'.format(c[self.file_name])) pdf = self.get_cap_file(c) df = df.append(pdf) df = net_cost_final_calculation(df, p_col=c[self.proc_dim], p_cost=self.temp_metric) df = df[~df[dctc.FPN].isnull()] df[vmc.cost] = df[NCF] df = utl.col_removal(df, 'Raw Data', [self.temp_metric]) return df
def net_cost_final(df, p_col=dctc.PFPN, n_cost=vmc.cost): tdf = (df[df[NC_CUM_SUM] > df[DIF_PNC]].groupby([p_col ]).min().reset_index()) if not tdf.empty: tdf = tdf[[vmc.date, p_col]] tdf[NC_CUM_SUM_MIN_DATE] = True df = df.merge(tdf, on=[p_col, vmc.date], how='left') df[NCF] = np.where( df[NC_CUM_SUM] > df[DIF_PNC], (np.where(df[NC_CUM_SUM_MIN_DATE] == True, (df[n_cost] - (df[n_cost] / (df[NC_SUM_DATE])) * (df[NC_CUM_SUM] - df[DIF_PNC])), 0)), df[vmc.cost]) else: df[NC_CUM_SUM_MIN_DATE] = 0 df[NCF] = df[n_cost] df = utl.col_removal(df, 'Raw Data', DROP_COL) return df
def vm_parse(self): self.vm_df = pd.DataFrame(columns=vmc.datacol) self.vm_df = self.read() self.vm = self.vm_df.copy() self.plan_net_check() drop = [ item for item in self.vm.columns.values.tolist() if (item[0] == '|') ] self.vm = utl.col_removal(self.vm, 'vm', drop) self.vm = utl.data_to_type(self.vm, [], vmc.datecol, vmc.barsplitcol) self.vl = self.vm[vmc.vendorkey].tolist() self.vm = self.vm.set_index(vmc.vendorkey).to_dict() for col in vmc.barsplitcol: self.vm[col] = ({ key: list(value.split('|')) for key, value in self.vm[col].items() })
def clean_df(self, df): if df.empty: return df for col in mobile_conversions + web_conversions: if col in df.columns: df = utl.col_removal(df, 'API_Twitter', [col], warn=False) df = df.drop([jsonmet, jsonseg], axis=1).set_index(colcid) ndf = pd.DataFrame(columns=[coldate, colcid]) ndf = utl.data_to_type(ndf, str_col=[colcid], int_col=[coldate]) for col in df.columns: tdf = df[col].apply(lambda x: self.clean_data(x)).apply(pd.Series) tdf = tdf.unstack().reset_index() tdf = tdf.rename(columns={0: col, 'level_0': coldate}) tdf = utl.data_to_type(tdf, str_col=[colcid], int_col=[coldate]) ndf = pd.merge(ndf, tdf, on=[coldate, colcid], how='outer') df = ndf df[colspend] /= 1000000 df[coldate].replace(self.dates, inplace=True) return df
def get_upload_df(self, table): cols = self.dbs.get_cols_for_export(table) cols_to_add = [] other_event_cols = [exc.event_steam_name, exc.event_conv_name] if exc.event_name in cols and not any( [x in cols for x in other_event_cols]): cols_to_add = [ x for x in self.dft.real_columns if x not in cols and x not in ['plannednetcost', 'modelcoefa', 'modelcoefb', 'modelcoefc'] ] cols += cols_to_add ul_df = self.dft.slice_for_upload(cols) if cols_to_add: ul_df = utl.col_removal(ul_df, key='None', removal_cols=cols_to_add, warn=False) if not ul_df.empty: ul_df = self.add_ids_to_df(self.dbs.fk, ul_df) return ul_df
def vm_update(old_path=utl.config_path, old_file='OldVendorMatrix.csv'): logging.info('Updating Vendor Matrix') shutil.copyfile(csv_full_file, os.path.join(old_path, old_file)) ovm = utl.import_read_csv(filename=old_file, path=old_path) rules = [col for col in ovm.columns if 'RULE_' in col] rule_metrics = [col for col in ovm.columns if '_METRIC' in col] nvm = pd.DataFrame(columns=[vmc.vendorkey] + vmc.vmkeys) vm = nvm.append(ovm, sort=True) if 'FIRSTROWADJ' in vm.columns: vm[vmc.firstrow] = np.where(vm['FIRSTROWADJ'], vm[vmc.firstrow] + 1, vm[vmc.firstrow]) if vmc.autodicplace not in ovm.columns: vm[vmc.autodicplace] = vmc.fullplacename vm = utl.col_removal(vm, 'vm', ['FIRSTROWADJ', 'LASTROWADJ', 'AUTO DICTIONARY'], warn=False) vm = vm.reindex([vmc.vendorkey] + vmc.vmkeys + rules, axis=1) for col in rule_metrics: vm = vm_update_rule_check(vm, col) vm = vm.fillna('') vm = vm.replace('nan', '') vm.to_csv(csv_full_file, index=False, encoding='utf-8')
def replace_with_parent(df, parent, id_col): df[id_col] = df[id_col].map(parent[0]) df = df.join(df[id_col].apply(pd.Series)) df = utl.col_removal(df, 'API_Twitter', [0, id_col], warn=False) df = df.rename(columns={'name': parent[1], 'parent': id_col}) return df