def __init__(self): self.csvpath = utl.config_path utl.dir_check(utl.config_path) self.df = pd.DataFrame() self.rc = None self.relational_params = None self.key_list = []
def export_to_csv(self): logging.info('Downloading created report.') utl.dir_check(self.temp_path) for x in range(20): self.go_to_url(self.report_url, sleep=(x + 5)) if self.client: self.change_client() elem = self.get_report_element() if not elem: return None try: link = elem.get_attribute('href') except: logging.warning('Element being refreshed.') link = None if link and link == 'https://pinnacle.doubleverify.com/null': logging.warning('Got null url, refreshing page.') continue if link and link[:4] == 'http': self.go_to_url(elem.get_attribute('href')) break else: logging.warning('Report not ready, current link {}' ' attempt: {}'.format(link, x + 1)) return True
def export_to_csv(self): logging.info('Downloading created report.') utl.dir_check(self.temp_path) export_xpath = '//div[normalize-space(text())="Export report"]' self.click_on_xpath(export_xpath) download_xpath = '//button[normalize-space(text())="Download .csv"]' self.click_on_xpath(download_xpath)
def __init__(self, parent_dict): self.parent_dict = parent_dict self.csvpath = utl.config_path utl.dir_check(utl.config_path) self.df = pd.DataFrame() self.dict_col_names = None self.dict_col_values = None self.dict_constants = None
def __init__(self, **kwargs): self.csvpath = os.path.join(csvpath, 'Relational/') utl.dir_check(self.csvpath) self.df = pd.DataFrame() self.params = kwargs self.filename = self.params[dctc.FN] self.full_file_path = os.path.join(self.csvpath, self.filename) self.key = self.params[dctc.KEY] self.dependents = self.params[dctc.DEP] self.columns = [self.key] + self.dependents
def output(self, api_df, filename, api_merge=None, first_row=None, last_row=None, date_col=None, start_date=None, end_date=None): utl.dir_check(utl.raw_path) if str(api_merge) != 'nan': api_df = self.merge_df(api_df, filename, date_col, start_date, end_date, first_row, last_row, api_merge) if '/' in filename: full_file = filename else: full_file = os.path.join(utl.raw_path, filename) self.write_df(api_df, full_file)
def __init__(self, filename=None): utl.dir_check(csvpath) if str(filename) == 'nan': logging.error('No dictionary file provided. Aborting.') sys.exit(0) self.filename = filename self.dict_path = csvpath self.data_dict = pd.DataFrame(columns=dctc.COLS, index=None) if filename: self.dict_path_filename = os.path.join(self.dict_path, self.filename) self.read()
def __init__(self, df, dic, pn, filename, merge_col=dctc.FPN): utl.dir_check(csvpath) if str(filename) == 'nan': logging.error('No error report file provided. Aborting.') sys.exit(0) self.df = df self.dic = dic self.pn = pn self.filename = filename self.merge_col = merge_col self.merge_df = None self.data_err = None self.dictionary = None self.reset()
def __init__(self): log.info('Initializing Vendor Matrix') utl.dir_check(csv_path) self.vm = None self.vm_df = pd.DataFrame() self.vl = [] self.api_fb_key = [] self.api_aw_key = [] self.api_tw_key = [] self.api_ttd_key = [] self.api_ga_key = [] self.api_nb_key = [] self.api_af_key = [] self.api_sc_key = [] self.api_aj_key = [] self.api_dc_key = [] self.api_rs_key = [] self.api_db_key = [] self.api_vk_key = [] self.api_rc_key = [] self.api_szk_key = [] self.api_red_key = [] self.api_dv_key = [] self.api_adk_key = [] self.api_inn_key = [] self.api_tik_key = [] self.api_amz_key = [] self.api_cri_key = [] self.api_pm_key = [] self.api_sam_key = [] self.api_gs_key = [] self.api_qt_key = [] self.ftp_sz_key = [] self.db_dna_key = [] self.s3_dna_key = [] self.vm_rules_dict = {} self.ven_param = None self.plan_omit_list = None self.process_omit_list = None self.tdf = None self.df = None self.vm_parse() self.vm_import_keys() self.vm_rules() self.make_omit_lists() self.sort_vendor_list()
def backup_files(self): bu = os.path.join(utl.backup_path, dt.date.today().strftime('%Y%m%d')) logging.info('Backing up all files to {}'.format(bu)) for path in [utl.backup_path, bu]: utl.dir_check(path) file_dicts = {'raw.gzip': self.df} for file_name, df in file_dicts.items(): file_name = os.path.join(bu, file_name) df.to_csv(file_name, compression='gzip') for file_path in [utl.config_path, utl.dict_path, utl.raw_path]: file_name = '{}.tar.gz'.format(file_path.replace('/', '')) file_name = os.path.join(bu, file_name) tar = tarfile.open(file_name, "w:gz") tar.add(file_path, arcname=file_path.replace('/', '')) tar.close() for file_name in ['logfile.log']: new_file_name = os.path.join(bu, file_name) shutil.copy(file_name, new_file_name) logging.info('Successfully backed up files to {}'.format(bu))
def __init__(self): self.csvpath = os.path.join(csvpath, 'Translational/') utl.dir_check(self.csvpath) self.df = pd.DataFrame()
def export_to_csv(self, base_xpath=None): logging.info('Downloading created report.') utl.dir_check(self.temp_path) export_xpath = base_xpath + 'div[1]/div[1]/div/div[3]/button' self.click_on_xpath(export_xpath)