def run(logger, manifest, config):
    resource_name = manifest['name']
    sql_dir = config['source_dir']
    db_dir = config['working_dir']
    state_file = config['state_file']
    new_files = state.new_files(resource_name, state_file, sql_dir, '.sql')
    log.info(
        logger, {
            "name": __name__,
            "method": "run",
            "resource": resource_name,
            "sql_dir": sql_dir,
            "db_dir": db_dir,
            "state_file": state_file,
            "new_files_count": len(new_files),
            "message": "started processing sql files",
        })
    state.update(db.insert(logger, resource_name, sql_dir, db_dir, new_files),
                 state_file)
    log.info(
        logger, {
            "name": __name__,
            "method": "run",
            "resource": resource_name,
            "sql_dir": sql_dir,
            "db_dir": db_dir,
            "state_file": state_file,
            "new_files_count": len(new_files),
            "message": "finished processing sql files",
        })
Exemple #2
0
def run(logger, manifest, config):
    start_date = datetime.date(*manifest['start_date'])
    resource_name = manifest['name']
    resource_url = manifest['url']
    delay = manifest['download_delay_secs']
    download_dir = config['working_dir']
    state_file = config['state_file']
    # sleep for N seconds in between downloads to meet caiso expected use requirements
    dates = xtime.range_pairs(xtime.day_range_to_today(start_date))
    urls = list(web.generate_urls(logger, dates, resource_url))
    log.debug(
        logger, {
            "name": __name__,
            "method": "run",
            "resource": resource_name,
            "url": resource_url,
            "delay": delay,
            "download_dir": download_dir,
            "state_file": state_file,
            "start_date": str(start_date),
            "urls_count": len(urls),
        })
    state.update(
        web.download(logger, resource_name, delay, urls, state_file,
                     download_dir), state_file)
def run(manifest, config, logging_level=logging.INFO):
    log.configure_logging(logging_level)
    resource_name   = manifest['name']
    xml_dir         = config['working_dir']
    zip_dir         = config['source_dir']
    state_file      = config['state_file']
    state.update(
            unzip(
                resource_name,
                new_zip_files(),
                zip_dir,
                xml_dir),
            state_file)
def run(logger, manifest, config):
    resource_name   = manifest['name']
    resource_url    = manifest['url']
    xml_dir         = config['source_dir']
    sql_dir         = config['working_dir']
    state_file      = config['state_file']
    new_files = state.new_files(resource_name, state_file, xml_dir, '.xml')
    log.debug(logger, {
        "name"      : __name__,
        "method"    : "run",
        "resource"  : resource_name,
        "url"       : resource_url,
        "xml_dir"   : xml_dir,
        "sql_dir"   : sql_dir,
        "state_file": state_file,
        "new_files_count" : len(new_files),
        })
    state.update(
            xmlparser.parse(logger, resource_name, new_files, xml_dir, sql_dir), 
            state_file)
def run(logger, manifest, config):
    resource_name = manifest['name']
    resource_url = manifest['url']
    txt_dir = config['source_dir']
    sql_dir = config['working_dir']
    state_file = config['state_file']
    new_files = state.new_files(resource_name, state_file, txt_dir,
                                'DailyRenewablesWatch.txt')
    log.debug(
        logger, {
            "name": __name__,
            "method": "run",
            "resource": resource_name,
            "url": resource_url,
            "txt_dir": txt_dir,
            "sql_dir": sql_dir,
            "state_file": state_file,
            "new_files_count": len(new_files),
        })
    state.update(
        parse_text_files(logger, resource_name, new_files, txt_dir, sql_dir),
        state_file)
Exemple #6
0
def run(logger, manifest, config):
    start_date = datetime.date(*manifest['start_date'])
    resource_name = manifest['name']
    resource_url = manifest['url']
    delay = manifest['download_delay_secs']
    download_dir = config['working_dir']
    txt_dir = config['source_dir']
    state_file = config['state_file']
    # sleep for N seconds in between downloads to meet caiso expected use requirements
    dates = xtime.range_pairs(xtime.day_range_to_today(start_date))
    urls = list(web.generate_urls(logger, dates, resource_url))
    log.debug(
        logger, {
            "name": __name__,
            "method": "run",
            "resource": resource_name,
            "url": resource_url,
            "delay": delay,
            "download_dir": download_dir,
            "state_file": state_file,
            "start_date": str(start_date),
            "urls_count": len(urls),
        })

    # download .txt files
    downloaded_txt_urls = web.download(logger,
                                       resource_name,
                                       delay,
                                       urls,
                                       state_file,
                                       download_dir,
                                       ending='.txt')

    # copy .txt files to ./text dir and then
    # compress original .txt files to .zip files
    if not os.path.exists(txt_dir):
        log.debug(
            logger, {
                "name": __name__,
                "method": "run",
                "src": "10_down.py",
                "message": "created target txt dir: %s" % txt_dir,
            })
        os.makedirs(txt_dir)

    # process downloaded .txt files
    data_files = glob.glob(
        os.path.join(download_dir, "*DailyRenewablesWatch.txt"))
    for tf in data_files:
        try:
            # remove write protections for .txt files
            os.chmod(
                os.path.join(download_dir, tf),
                S_IWRITE | S_IWGRP | S_IWOTH | S_IREAD | S_IRGRP | S_IROTH)

            # if the txt file is here, it needs to be copied to the ./txt dir
            fqtf = os.path.join(download_dir, tf)
            fqtf2 = os.path.join(txt_dir, tf)
            fqtfzip = os.path.join(download_dir, '%s.zip' % tf)
            if not os.path.exists(fqtf2):
                shutil.copyfile(fqtf, fqtf2)
            with zipfile.ZipFile(fqtfzip, 'w') as myzip:
                myzip.write(fqtfzip)

            # set .zip file to be read only
            os.chmod(fqtfzip, S_IREAD | S_IRGRP | S_IROTH)

            # remove the zip/.txt file as it's been copied to txt/.txt
            if os.path.exists(fqtf2) and os.path.exists(fqtfzip):
                os.remove(fqtf)
            log.debug(
                logger, {
                    "name": __name__,
                    "method": "run",
                    "src": "10_down.py",
                    "message": "zipped file: %s" % tf,
                })
        except Exception as e:
            log.error(
                logger, {
                    "name": __name__,
                    "method": "run",
                    "src": "10_down.py",
                    "file": tf,
                    "error": "failed to process file",
                    "exception": str(e),
                })

    # TODO: something is clobbering perms on the state file, so clobber it back
    os.chmod(os.path.join(download_dir, 'state.txt'),
             S_IWRITE | S_IWGRP | S_IWOTH | S_IREAD | S_IRGRP | S_IROTH)
    # final step
    state.update(downloaded_txt_urls, state_file)