def run(logger, manifest, config): resource_name = manifest['name'] sql_dir = config['source_dir'] db_dir = config['working_dir'] state_file = config['state_file'] new_files = state.new_files(resource_name, state_file, sql_dir, '.sql') log.info( logger, { "name": __name__, "method": "run", "resource": resource_name, "sql_dir": sql_dir, "db_dir": db_dir, "state_file": state_file, "new_files_count": len(new_files), "message": "started processing sql files", }) state.update(db.insert(logger, resource_name, sql_dir, db_dir, new_files), state_file) log.info( logger, { "name": __name__, "method": "run", "resource": resource_name, "sql_dir": sql_dir, "db_dir": db_dir, "state_file": state_file, "new_files_count": len(new_files), "message": "finished processing sql files", })
def run(logger, manifest, config): start_date = datetime.date(*manifest['start_date']) resource_name = manifest['name'] resource_url = manifest['url'] delay = manifest['download_delay_secs'] download_dir = config['working_dir'] state_file = config['state_file'] # sleep for N seconds in between downloads to meet caiso expected use requirements dates = xtime.range_pairs(xtime.day_range_to_today(start_date)) urls = list(web.generate_urls(logger, dates, resource_url)) log.debug( logger, { "name": __name__, "method": "run", "resource": resource_name, "url": resource_url, "delay": delay, "download_dir": download_dir, "state_file": state_file, "start_date": str(start_date), "urls_count": len(urls), }) state.update( web.download(logger, resource_name, delay, urls, state_file, download_dir), state_file)
def run(manifest, config, logging_level=logging.INFO): log.configure_logging(logging_level) resource_name = manifest['name'] xml_dir = config['working_dir'] zip_dir = config['source_dir'] state_file = config['state_file'] state.update( unzip( resource_name, new_zip_files(), zip_dir, xml_dir), state_file)
def run(logger, manifest, config): resource_name = manifest['name'] resource_url = manifest['url'] xml_dir = config['source_dir'] sql_dir = config['working_dir'] state_file = config['state_file'] new_files = state.new_files(resource_name, state_file, xml_dir, '.xml') log.debug(logger, { "name" : __name__, "method" : "run", "resource" : resource_name, "url" : resource_url, "xml_dir" : xml_dir, "sql_dir" : sql_dir, "state_file": state_file, "new_files_count" : len(new_files), }) state.update( xmlparser.parse(logger, resource_name, new_files, xml_dir, sql_dir), state_file)
def run(logger, manifest, config): resource_name = manifest['name'] resource_url = manifest['url'] txt_dir = config['source_dir'] sql_dir = config['working_dir'] state_file = config['state_file'] new_files = state.new_files(resource_name, state_file, txt_dir, 'DailyRenewablesWatch.txt') log.debug( logger, { "name": __name__, "method": "run", "resource": resource_name, "url": resource_url, "txt_dir": txt_dir, "sql_dir": sql_dir, "state_file": state_file, "new_files_count": len(new_files), }) state.update( parse_text_files(logger, resource_name, new_files, txt_dir, sql_dir), state_file)
def run(logger, manifest, config): start_date = datetime.date(*manifest['start_date']) resource_name = manifest['name'] resource_url = manifest['url'] delay = manifest['download_delay_secs'] download_dir = config['working_dir'] txt_dir = config['source_dir'] state_file = config['state_file'] # sleep for N seconds in between downloads to meet caiso expected use requirements dates = xtime.range_pairs(xtime.day_range_to_today(start_date)) urls = list(web.generate_urls(logger, dates, resource_url)) log.debug( logger, { "name": __name__, "method": "run", "resource": resource_name, "url": resource_url, "delay": delay, "download_dir": download_dir, "state_file": state_file, "start_date": str(start_date), "urls_count": len(urls), }) # download .txt files downloaded_txt_urls = web.download(logger, resource_name, delay, urls, state_file, download_dir, ending='.txt') # copy .txt files to ./text dir and then # compress original .txt files to .zip files if not os.path.exists(txt_dir): log.debug( logger, { "name": __name__, "method": "run", "src": "10_down.py", "message": "created target txt dir: %s" % txt_dir, }) os.makedirs(txt_dir) # process downloaded .txt files data_files = glob.glob( os.path.join(download_dir, "*DailyRenewablesWatch.txt")) for tf in data_files: try: # remove write protections for .txt files os.chmod( os.path.join(download_dir, tf), S_IWRITE | S_IWGRP | S_IWOTH | S_IREAD | S_IRGRP | S_IROTH) # if the txt file is here, it needs to be copied to the ./txt dir fqtf = os.path.join(download_dir, tf) fqtf2 = os.path.join(txt_dir, tf) fqtfzip = os.path.join(download_dir, '%s.zip' % tf) if not os.path.exists(fqtf2): shutil.copyfile(fqtf, fqtf2) with zipfile.ZipFile(fqtfzip, 'w') as myzip: myzip.write(fqtfzip) # set .zip file to be read only os.chmod(fqtfzip, S_IREAD | S_IRGRP | S_IROTH) # remove the zip/.txt file as it's been copied to txt/.txt if os.path.exists(fqtf2) and os.path.exists(fqtfzip): os.remove(fqtf) log.debug( logger, { "name": __name__, "method": "run", "src": "10_down.py", "message": "zipped file: %s" % tf, }) except Exception as e: log.error( logger, { "name": __name__, "method": "run", "src": "10_down.py", "file": tf, "error": "failed to process file", "exception": str(e), }) # TODO: something is clobbering perms on the state file, so clobber it back os.chmod(os.path.join(download_dir, 'state.txt'), S_IWRITE | S_IWGRP | S_IWOTH | S_IREAD | S_IRGRP | S_IROTH) # final step state.update(downloaded_txt_urls, state_file)