Example #1
0
 def remove_cols_and_make_calculations(self, df):
     df = utl.date_removal(df, vmc.date, self.p[vmc.startdate],
                           self.p[vmc.enddate])
     df = ad_cost_calculation(df)
     df = utl.col_removal(df, self.key, self.p[vmc.dropcol])
     df = utl.apply_rules(df, self.vm_rules, utl.POST, **self.p)
     return df
Example #2
0
 def add_names_to_df(self):
     self.df = self.dict_to_cols(self.df, 'id',
                                 self.adids)  # type: pd.DataFrame
     cids = self.get_campaigns()
     self.df = self.dict_to_cols(self.df, 'campaign_id',
                                 cids)  # type: pd.DataFrame
     self.df = utl.col_removal(self.df, 'API_VK', ['id', 'campaign_id'])
Example #3
0
 def check_plan_error(self, df):
     plan_names = self.matrix.vendor_set(vm.plan_key)[vmc.fullplacename]
     er = self.matrix.vendor_set(vm.plan_key)[vmc.filenameerror]
     edf = utl.import_read_csv(er, utl.error_path)
     if edf.empty:
         plan_error_msg = ('No Planned error - all {} '
                           'combinations are defined.'.format(plan_names))
         logging.info(plan_error_msg)
         self.add_to_analysis_dict(key_col=self.unknown_col,
                                   message=plan_error_msg)
         return True
     df = df[df[dctc.PFPN].isin(
         edf[vmc.fullplacename].values)][plan_names +
                                         [vmc.vendorkey]].drop_duplicates()
     df = vm.full_placement_creation(df, None, dctc.FPN, plan_names)
     df = df[df[dctc.FPN].isin(edf[dctc.FPN].values)]
     df = utl.col_removal(df, None, [dctc.FPN])
     for col in df.columns:
         df[col] = "'" + df[col] + "'"
     df = df.dropna()
     df_dict = '\n'.join([
         '{}{}'.format(k, v) for k, v in df.to_dict(orient='index').items()
     ])
     undefined_msg = 'Undefined placements have the following keys:'
     logging.info('{}\n{}'.format(undefined_msg, df_dict))
     self.add_to_analysis_dict(key_col=self.unknown_col,
                               message=undefined_msg,
                               data=df.to_dict())
Example #4
0
 def add_names_to_df(self):
     if self.df.empty:
         logging.warning('No data for date range, returning empty df.')
         return self.df
     if not self.breakdown:
         self.df = utl.col_removal(self.df, 'API_Snapchat', ['id', 'index'])
         return self.df
     adids = self.get_ads()
     self.df = self.dict_to_cols(self.df, 'id', adids)  # type: pd.DataFrame
     asids = self.get_adsquads()
     self.df = self.dict_to_cols(self.df, 'ad_squad_id',
                                 asids)  # type: pd.DataFrame
     self.df = self.df.rename(columns={
         'ad_squad_id': 'Ad Set Name',
         'name': 'Ad Name'
     })
     self.df = utl.col_removal(self.df, 'API_Snapchat',
                               [0, 'id', 'stats', 'index'])
     return self.df
Example #5
0
 def apply_cap(self, df, c):
     logging.info('Calculating metric cap from: '
                  '{}'.format(c[self.file_name]))
     pdf = self.get_cap_file(c)
     df = df.append(pdf)
     df = net_cost_final_calculation(df,
                                     p_col=c[self.proc_dim],
                                     p_cost=self.temp_metric)
     df = df[~df[dctc.FPN].isnull()]
     df[vmc.cost] = df[NCF]
     df = utl.col_removal(df, 'Raw Data', [self.temp_metric])
     return df
Example #6
0
def net_cost_final(df, p_col=dctc.PFPN, n_cost=vmc.cost):
    tdf = (df[df[NC_CUM_SUM] > df[DIF_PNC]].groupby([p_col
                                                     ]).min().reset_index())
    if not tdf.empty:
        tdf = tdf[[vmc.date, p_col]]
        tdf[NC_CUM_SUM_MIN_DATE] = True
        df = df.merge(tdf, on=[p_col, vmc.date], how='left')
        df[NCF] = np.where(
            df[NC_CUM_SUM] > df[DIF_PNC],
            (np.where(df[NC_CUM_SUM_MIN_DATE] == True,
                      (df[n_cost] - (df[n_cost] / (df[NC_SUM_DATE])) *
                       (df[NC_CUM_SUM] - df[DIF_PNC])), 0)), df[vmc.cost])
    else:
        df[NC_CUM_SUM_MIN_DATE] = 0
        df[NCF] = df[n_cost]
    df = utl.col_removal(df, 'Raw Data', DROP_COL)
    return df
Example #7
0
 def vm_parse(self):
     self.vm_df = pd.DataFrame(columns=vmc.datacol)
     self.vm_df = self.read()
     self.vm = self.vm_df.copy()
     self.plan_net_check()
     drop = [
         item for item in self.vm.columns.values.tolist()
         if (item[0] == '|')
     ]
     self.vm = utl.col_removal(self.vm, 'vm', drop)
     self.vm = utl.data_to_type(self.vm, [], vmc.datecol, vmc.barsplitcol)
     self.vl = self.vm[vmc.vendorkey].tolist()
     self.vm = self.vm.set_index(vmc.vendorkey).to_dict()
     for col in vmc.barsplitcol:
         self.vm[col] = ({
             key: list(value.split('|'))
             for key, value in self.vm[col].items()
         })
Example #8
0
 def clean_df(self, df):
     if df.empty:
         return df
     for col in mobile_conversions + web_conversions:
         if col in df.columns:
             df = utl.col_removal(df, 'API_Twitter', [col], warn=False)
     df = df.drop([jsonmet, jsonseg], axis=1).set_index(colcid)
     ndf = pd.DataFrame(columns=[coldate, colcid])
     ndf = utl.data_to_type(ndf, str_col=[colcid], int_col=[coldate])
     for col in df.columns:
         tdf = df[col].apply(lambda x: self.clean_data(x)).apply(pd.Series)
         tdf = tdf.unstack().reset_index()
         tdf = tdf.rename(columns={0: col, 'level_0': coldate})
         tdf = utl.data_to_type(tdf, str_col=[colcid], int_col=[coldate])
         ndf = pd.merge(ndf, tdf, on=[coldate, colcid], how='outer')
     df = ndf
     df[colspend] /= 1000000
     df[coldate].replace(self.dates, inplace=True)
     return df
Example #9
0
 def get_upload_df(self, table):
     cols = self.dbs.get_cols_for_export(table)
     cols_to_add = []
     other_event_cols = [exc.event_steam_name, exc.event_conv_name]
     if exc.event_name in cols and not any(
         [x in cols for x in other_event_cols]):
         cols_to_add = [
             x for x in self.dft.real_columns if x not in cols and x not in
             ['plannednetcost', 'modelcoefa', 'modelcoefb', 'modelcoefc']
         ]
         cols += cols_to_add
     ul_df = self.dft.slice_for_upload(cols)
     if cols_to_add:
         ul_df = utl.col_removal(ul_df,
                                 key='None',
                                 removal_cols=cols_to_add,
                                 warn=False)
     if not ul_df.empty:
         ul_df = self.add_ids_to_df(self.dbs.fk, ul_df)
     return ul_df
Example #10
0
def vm_update(old_path=utl.config_path, old_file='OldVendorMatrix.csv'):
    logging.info('Updating Vendor Matrix')
    shutil.copyfile(csv_full_file, os.path.join(old_path, old_file))
    ovm = utl.import_read_csv(filename=old_file, path=old_path)
    rules = [col for col in ovm.columns if 'RULE_' in col]
    rule_metrics = [col for col in ovm.columns if '_METRIC' in col]
    nvm = pd.DataFrame(columns=[vmc.vendorkey] + vmc.vmkeys)
    vm = nvm.append(ovm, sort=True)
    if 'FIRSTROWADJ' in vm.columns:
        vm[vmc.firstrow] = np.where(vm['FIRSTROWADJ'], vm[vmc.firstrow] + 1,
                                    vm[vmc.firstrow])
    if vmc.autodicplace not in ovm.columns:
        vm[vmc.autodicplace] = vmc.fullplacename
    vm = utl.col_removal(vm,
                         'vm',
                         ['FIRSTROWADJ', 'LASTROWADJ', 'AUTO DICTIONARY'],
                         warn=False)
    vm = vm.reindex([vmc.vendorkey] + vmc.vmkeys + rules, axis=1)
    for col in rule_metrics:
        vm = vm_update_rule_check(vm, col)
    vm = vm.fillna('')
    vm = vm.replace('nan', '')
    vm.to_csv(csv_full_file, index=False, encoding='utf-8')
Example #11
0
 def replace_with_parent(df, parent, id_col):
     df[id_col] = df[id_col].map(parent[0])
     df = df.join(df[id_col].apply(pd.Series))
     df = utl.col_removal(df, 'API_Twitter', [0, id_col], warn=False)
     df = df.rename(columns={'name': parent[1], 'parent': id_col})
     return df