예제 #1
0
def state_download(state, s3_bucket):
    config_file = Config.config_file_from_state(state=state)
    configs = Config(file_name=config_file)

    if state == "north_carolina":
        today = nc_date_grab()
        list_files = configs['data_chunk_links']
        zipped_files = []
        for i, url in enumerate(list_files):
            target_path = "/tmp/" + state + str(i) + ".zip"
            zipped_files.append(target_path)
            response = requests.get(url, stream=True)
            handle = open(target_path, "wb")
            for chunk in response.iter_content(chunk_size=512):
                if chunk:
                    handle.write(chunk)
            handle.close()
        file_to_zip = today + ".zip"
        with zipfile.ZipFile(file_to_zip, 'w') as myzip:
            for f in zipped_files:
                myzip.write(f)
        file_to_zip = FileItem(
            "NC file auto download",
            filename=file_to_zip,
            s3_bucket=s3_bucket)
        loader = Loader(config_file=config_file, force_date=today,
                        s3_bucket=s3_bucket)
        loader.s3_dump(file_to_zip, file_class=RAW_FILE_PREFIX)

    elif state == "ohio":
        today = str(ohio_get_last_updated().isoformat())[0:10]
        list_files = configs['data_chunk_links']
        file_names = configs['data_file_names']
        zipped_files = []
        for i, url in enumerate(list_files):
            logging.info("downloading {} file".format(url))
            target_path = "/tmp/" + state + "_" + file_names[i] + ".txt.gz"
            zipped_files.append(target_path)
            response = requests.get(url, stream=True, verify=False)
            handle = open(target_path, "wb")
            for chunk in response.iter_content(chunk_size=512):
                if chunk:
                    handle.write(chunk)
            handle.close()
            logging.info("downloaded {} file".format(url))
        file_to_zip = today + ".zip"
        logging.info("Zipping files")
        with zipfile.ZipFile(file_to_zip, 'w') as myzip:
            for f in zipped_files:
                myzip.write(f)
        logging.info("Uploading")
        file_to_zip = FileItem(
            "OH file auto download",
            filename=file_to_zip,
            s3_bucket=s3_bucket)
        loader = Loader(config_file=config_file, force_date=today,
                        s3_bucket=s3_bucket)
        loader.s3_dump(file_to_zip, file_class=RAW_FILE_PREFIX)
예제 #2
0
def convert_voter_file(state=None,
                       local_file=None,
                       file_date=None,
                       write_file=False):
    config_file = Config.config_file_from_state(state)
    file_date = str(datetime.datetime.strptime(file_date, '%Y-%m-%d').date())
    with Preprocessor(None,
                      config_file,
                      force_file=local_file,
                      force_date=file_date) as preprocessor:
        file_item = preprocessor.execute()
        if not write_file:
            return (preprocessor.output_dataframe(file_item),
                    preprocessor.meta)
        preprocessor.local_dump(file_item)
예제 #3
0
파일: main.py 프로젝트: Voteshield/reggie
def convert_voter_file(state=None,
                       local_file=None,
                       file_date=None,
                       write_file=False):
    """Main Reggie function; processes a voter file, which is often more than one file, so will likely be a compressed file such as a .zip file.

    Parameters
    ----------
    state : string, optional
        State identifier which is the lower case version of the state name with underscores replacing spaces, by default None
    local_file : string, optional
        Path to file to process, by default None
    file_date : string, optional
        The snapshot date in format "YYYY-MM-DD", by default None
    write_file : bool, optional
        Whether to write the file out into a CSV file, which will be automatically named and write to the local directory, by default False

    Returns
    -------
    tuple
        If `write_file` is falsey, this function will return a tuple with the following objects:
            - The processed voter file as a CSV string
            - The meta data object
            - The preprocessor object
    """
    config_file = Config.config_file_from_state(state)
    file_date = str(datetime.datetime.strptime(file_date, "%Y-%m-%d").date())
    preprocessor = state_router(
        state,
        raw_s3_file=None,
        config_file=config_file,
        force_file=local_file,
        force_date=file_date,
    )
    preprocessor.execute()
    if not write_file:
        return (
            preprocessor.output_dataframe(preprocessor.processed_file),
            preprocessor.meta,
            preprocessor,
        )
    preprocessor.local_dump(preprocessor.processed_file)