Exemple #1
0
 def read():
     if not os.path.isfile(os.path.join(csv_path, csv_file)):
         logging.info('Creating Vendor Matrix.  Populate it and run again')
         vm = pd.DataFrame(columns=[vmc.vendorkey] + vmc.vmkeys, index=None)
         vm.to_csv(csv_full_file, index=False, encoding='utf-8')
     vm = utl.import_read_csv(csv_file, csv_path)
     return vm
Exemple #2
0
 def read_raw_df(self, configfile):
     try:
         self.df = utl.import_read_csv(configfile, self.csvpath)
     except IOError:
         logging.debug('No Constant Dictionary config')
         return None
     self.check_for_dict_col(configfile)
Exemple #3
0
 def read(self):
     if not os.path.isfile(self.full_file_path):
         logging.info('Creating {}'.format(self.filename))
         df = pd.DataFrame(columns=self.columns, index=None)
         df.to_csv(self.full_file_path, index=False, encoding='utf-8')
     self.df = utl.import_read_csv(self.filename, self.csvpath)
     self.df = utl.data_to_type(self.df, str_col=[self.key])
Exemple #4
0
 def read(self, configfile):
     try:
         self.df = utl.import_read_csv(configfile, self.csvpath)
     except IOError:
         logging.debug('No Translational Dictionary config')
         return None
     self.clean_df()
Exemple #5
0
 def get_column_names_from_raw_files(self):
     data_sources = self.matrix.get_all_data_sources()
     df = pd.DataFrame()
     for source in data_sources:
         file_name = source.p[vmc.filename]
         first_row = source.p[vmc.firstrow]
         missing_cols = []
         if os.path.exists(file_name):
             tdf = utl.import_read_csv(file_name, nrows=first_row + 5)
             tdf = utl.first_last_adj(tdf, first_row, 0)
             cols = list(tdf.columns)
             active_metrics = source.get_active_metrics()
             for k, v in active_metrics.items():
                 for c in v:
                     if c not in cols:
                         missing_cols.append({k: c})
         else:
             cols = []
         data_dict = {
             vmc.vendorkey: [source.key],
             self.raw_columns: [cols],
             'missing': [missing_cols]
         }
         df = df.append(pd.DataFrame(data_dict),
                        ignore_index=True,
                        sort=False)
     update_msg = 'Columns and missing columns by key as follows:'
     logging.info('{}\n{}'.format(update_msg, df.to_string()))
     self.add_to_analysis_dict(key_col=self.raw_columns,
                               message=update_msg,
                               data=df.to_dict())
Exemple #6
0
 def check_plan_error(self, df):
     plan_names = self.matrix.vendor_set(vm.plan_key)[vmc.fullplacename]
     er = self.matrix.vendor_set(vm.plan_key)[vmc.filenameerror]
     edf = utl.import_read_csv(er, utl.error_path)
     if edf.empty:
         plan_error_msg = ('No Planned error - all {} '
                           'combinations are defined.'.format(plan_names))
         logging.info(plan_error_msg)
         self.add_to_analysis_dict(key_col=self.unknown_col,
                                   message=plan_error_msg)
         return True
     df = df[df[dctc.PFPN].isin(
         edf[vmc.fullplacename].values)][plan_names +
                                         [vmc.vendorkey]].drop_duplicates()
     df = vm.full_placement_creation(df, None, dctc.FPN, plan_names)
     df = df[df[dctc.FPN].isin(edf[dctc.FPN].values)]
     df = utl.col_removal(df, None, [dctc.FPN])
     for col in df.columns:
         df[col] = "'" + df[col] + "'"
     df = df.dropna()
     df_dict = '\n'.join([
         '{}{}'.format(k, v) for k, v in df.to_dict(orient='index').items()
     ])
     undefined_msg = 'Undefined placements have the following keys:'
     logging.info('{}\n{}'.format(undefined_msg, df_dict))
     self.add_to_analysis_dict(key_col=self.unknown_col,
                               message=undefined_msg,
                               data=df.to_dict())
Exemple #7
0
 def read(self):
     if not os.path.isfile(self.dict_path_filename):
         self.create_new_dictionary()
     self.data_dict = utl.import_read_csv(self.filename, self.dict_path)
     if not isinstance(self.data_dict, pd.DataFrame) and not self.data_dict:
         self.data_dict = self.create_new_dictionary()
     self.clean()
     self.data_dict = self.data_dict.drop_duplicates()
Exemple #8
0
 def __init__(self, config_file='config/cap_config.csv'):
     self.file_name = 'file_name'
     self.file_dim = 'file_dim'
     self.file_metric = 'file_metric'
     self.proc_dim = 'processor_dim'
     self.proc_metric = 'processor_metric'
     self.temp_metric = None
     self.config = utl.import_read_csv(config_file)
     self.config = self.config.to_dict(orient='index')
Exemple #9
0
 def read(self, configfile):
     self.df = utl.import_read_csv(configfile, self.csvpath)
     if self.df.empty:
         logging.debug('No Relational Dictionary config')
         return None
     self.key_list = self.df[dctc.RK].tolist()
     self.rc = self.df.set_index(dctc.RK).to_dict()
     self.rc[dctc.DEP] = ({key: list(str(value).split('|')) for key, value
                           in self.rc[dctc.DEP].items()})
Exemple #10
0
 def get_raw_df(self):
     df = utl.import_read_csv(self.p[vmc.filename])
     if df is None or df.empty:
         return df
     df = utl.add_header(df, self.p[vmc.header], self.p[vmc.firstrow])
     df = utl.first_last_adj(df, self.p[vmc.firstrow], self.p[vmc.lastrow])
     df = df_transform(df, self.p[vmc.transform])
     df = full_placement_creation(df, self.key, dctc.FPN,
                                  self.p[vmc.fullplacename])
     return df
Exemple #11
0
 def read(self, configfile):
     try:
         self.df = utl.import_read_csv(configfile, self.csvpath)
     except IOError:
         logging.debug('No Constant Dictionary config')
         return None
     self.check_for_dict_col(configfile)
     self.filter_df()
     self.dict_col_names = self.df[dctc.DICT_COL_NAME].tolist()
     self.dict_constants = self.df.set_index(dctc.DICT_COL_NAME).to_dict()
Exemple #12
0
 def get_file_as_df(temp_path=None):
     pd.DataFrame()
     file = os.listdir(temp_path)
     file_path = os.path.join(temp_path, file[0])
     sheet_names = ['Daily Spend', 'Daily Impressions', 'Top Sites']
     df = pd.concat(pd.read_excel(file_path, sheet_name=sheet_names,
                                  parse_dates=True), ignore_index=True)
     df.to_csv('tmp/output.csv', encoding='utf-8')
     temp_file = os.path.join(temp_path, 'output.csv')
     time.sleep(5)
     df = utl.import_read_csv(temp_file)
     shutil.rmtree(temp_path)
     return df
Exemple #13
0
 def merge_df(self, api_df, filename, date_col, start_date, end_date,
              first_row, last_row, api_merge):
     if not os.path.isfile(os.path.join(utl.raw_path, filename)):
         return api_df
     df = utl.import_read_csv(filename, utl.raw_path)
     df = self.merge_df_cleaning(df, first_row, last_row, date_col, pd.NaT,
                                 end_date - dt.timedelta(days=api_merge))
     api_df = self.merge_df_cleaning(api_df, first_row, last_row, date_col,
                                     start_date, end_date)
     df = df.append(api_df, ignore_index=True).reset_index(drop=True)
     df = utl.add_dummy_header(df, first_row)
     df = utl.add_dummy_header(df, last_row, location='foot')
     return df
Exemple #14
0
def agency_fees_calculation(df):
    logging.info('Calculating Agency Fees')
    if dctc.AGF not in df.columns:
        logging.warning('Agency Fee Rates not in dict.  '
                        'Update dict and run again to calculate agency fees.')
        return df
    threshold = utl.import_read_csv(agency_fee_file, utl.config_path)
    df = utl.data_to_type(df, float_col=[NCF, dctc.AGF])
    if not df.empty and not threshold.empty:
        threshold = threshold[AGENCY_THRESH].fillna(0).astype(float).values[0]
        threshold = (df[NCF].sum() - threshold) / df[NCF].sum()
        df[dctc.AGF] = df[dctc.AGF] * threshold
    df[AGENCY_FEES] = df[dctc.AGF] * df[NCF]
    return df
Exemple #15
0
 def check_plan_error(self, df):
     plan_names = self.matrix.vendor_set(vm.plan_key)[vmc.fullplacename]
     er = self.matrix.vendor_set(vm.plan_key)[vmc.filenameerror]
     edf = utl.import_read_csv(er, utl.error_path)
     if edf.empty:
         logging.info('No Planned error.')
         return True
     edf[plan_names] = pd.DataFrame(
         edf[vmc.fullplacename].str.split('_').values.tolist(),
         columns=plan_names)
     for col in plan_names:
         df = df[df[col].isin(edf[col].values)]
     df = df[plan_names + [vmc.vendorkey]].drop_duplicates()
     for col in df.columns:
         df[col] = "'" + df[col] + "'"
     logging.info('Undefined placements have the following keys: \n'
                  '{}'.format(df))
Exemple #16
0
 def get_file_as_df(temp_path=None):
     df = pd.DataFrame()
     for x in range(100):
         logging.info('Checking for file.  Attempt {}.'.format(x + 1))
         files = os.listdir(temp_path)
         files = [x for x in files if x[-4:] == '.csv']
         if files:
             files = files[-1]
             logging.info('File downloaded.')
             temp_file = os.path.join(temp_path, files)
             time.sleep(5)
             df = utl.import_read_csv(temp_file)
             os.remove(temp_file)
             break
         time.sleep(5)
     shutil.rmtree(temp_path)
     return df
Exemple #17
0
 def get_raw_data(self):
     full_url = self.create_url()
     for x in range(1, 101):
         self.r = self.client.get(full_url)
         if 'metadata' in self.r.json().keys():
             break
         else:
             logging.warning('Rate limit exceeded. Pausing. '
                             'Response: {}'.format(self.r.json()))
             time.sleep(60)
     report_url = (
         self.r.json()['metadata']['googleCloudStoragePathForLatestReport'])
     if report_url:
         self.df = utl.import_read_csv(report_url,
                                       file_check=False,
                                       error_bad=False)
     else:
         logging.warning('Report does not exist.  Create it.')
         sys.exit(0)
Exemple #18
0
 def get_raw_data(self):
     header = self.create_header()
     response = None
     for x in range(1, 101):
         self.r = self.make_request('get', header=header)
         response = self.r.json()
         if response.get('urls') and response['urls']:
             break
         else:
             logging.warning('Waiting for Request. '
                             'Response: {}'.format(self.r.json()))
             time.sleep(60)
     report_url = (response['urls'])
     if report_url:
         logging.info('Found report url, downloading.')
         self.df = utl.import_read_csv(report_url[0],
                                       file_check=False,
                                       error_bad=False)
     else:
         logging.warning('Report does not exist.  Create it.')
         sys.exit(0)
Exemple #19
0
def vm_update(old_path=utl.config_path, old_file='OldVendorMatrix.csv'):
    logging.info('Updating Vendor Matrix')
    shutil.copyfile(csv_full_file, os.path.join(old_path, old_file))
    ovm = utl.import_read_csv(filename=old_file, path=old_path)
    rules = [col for col in ovm.columns if 'RULE_' in col]
    rule_metrics = [col for col in ovm.columns if '_METRIC' in col]
    nvm = pd.DataFrame(columns=[vmc.vendorkey] + vmc.vmkeys)
    vm = nvm.append(ovm, sort=True)
    if 'FIRSTROWADJ' in vm.columns:
        vm[vmc.firstrow] = np.where(vm['FIRSTROWADJ'], vm[vmc.firstrow] + 1,
                                    vm[vmc.firstrow])
    if vmc.autodicplace not in ovm.columns:
        vm[vmc.autodicplace] = vmc.fullplacename
    vm = utl.col_removal(vm,
                         'vm',
                         ['FIRSTROWADJ', 'LASTROWADJ', 'AUTO DICTIONARY'],
                         warn=False)
    vm = vm.reindex([vmc.vendorkey] + vmc.vmkeys + rules, axis=1)
    for col in rule_metrics:
        vm = vm_update_rule_check(vm, col)
    vm = vm.fillna('')
    vm = vm.replace('nan', '')
    vm.to_csv(csv_full_file, index=False, encoding='utf-8')
Exemple #20
0
 def load_df_from_file(self):
     self.df = utl.import_read_csv(self.file)
Exemple #21
0
 def read(self):
     df = utl.import_read_csv(self.file_name, self.file_path)
     return df
Exemple #22
0
 def get_cap_file(self, c):
     pdf = utl.import_read_csv(c[self.file_name])
     p_dict = self.col_dict(c)
     pdf = pdf.rename(columns=p_dict)
     return pdf