def state_download(state, s3_bucket): config_file = Config.config_file_from_state(state=state) configs = Config(file_name=config_file) if state == "north_carolina": today = nc_date_grab() list_files = configs['data_chunk_links'] zipped_files = [] for i, url in enumerate(list_files): target_path = "/tmp/" + state + str(i) + ".zip" zipped_files.append(target_path) response = requests.get(url, stream=True) handle = open(target_path, "wb") for chunk in response.iter_content(chunk_size=512): if chunk: handle.write(chunk) handle.close() file_to_zip = today + ".zip" with zipfile.ZipFile(file_to_zip, 'w') as myzip: for f in zipped_files: myzip.write(f) file_to_zip = FileItem( "NC file auto download", filename=file_to_zip, s3_bucket=s3_bucket) loader = Loader(config_file=config_file, force_date=today, s3_bucket=s3_bucket) loader.s3_dump(file_to_zip, file_class=RAW_FILE_PREFIX) elif state == "ohio": today = str(ohio_get_last_updated().isoformat())[0:10] list_files = configs['data_chunk_links'] file_names = configs['data_file_names'] zipped_files = [] for i, url in enumerate(list_files): logging.info("downloading {} file".format(url)) target_path = "/tmp/" + state + "_" + file_names[i] + ".txt.gz" zipped_files.append(target_path) response = requests.get(url, stream=True, verify=False) handle = open(target_path, "wb") for chunk in response.iter_content(chunk_size=512): if chunk: handle.write(chunk) handle.close() logging.info("downloaded {} file".format(url)) file_to_zip = today + ".zip" logging.info("Zipping files") with zipfile.ZipFile(file_to_zip, 'w') as myzip: for f in zipped_files: myzip.write(f) logging.info("Uploading") file_to_zip = FileItem( "OH file auto download", filename=file_to_zip, s3_bucket=s3_bucket) loader = Loader(config_file=config_file, force_date=today, s3_bucket=s3_bucket) loader.s3_dump(file_to_zip, file_class=RAW_FILE_PREFIX)
def convert_voter_file(state=None, local_file=None, file_date=None, write_file=False): config_file = Config.config_file_from_state(state) file_date = str(datetime.datetime.strptime(file_date, '%Y-%m-%d').date()) with Preprocessor(None, config_file, force_file=local_file, force_date=file_date) as preprocessor: file_item = preprocessor.execute() if not write_file: return (preprocessor.output_dataframe(file_item), preprocessor.meta) preprocessor.local_dump(file_item)
def convert_voter_file(state=None, local_file=None, file_date=None, write_file=False): """Main Reggie function; processes a voter file, which is often more than one file, so will likely be a compressed file such as a .zip file. Parameters ---------- state : string, optional State identifier which is the lower case version of the state name with underscores replacing spaces, by default None local_file : string, optional Path to file to process, by default None file_date : string, optional The snapshot date in format "YYYY-MM-DD", by default None write_file : bool, optional Whether to write the file out into a CSV file, which will be automatically named and write to the local directory, by default False Returns ------- tuple If `write_file` is falsey, this function will return a tuple with the following objects: - The processed voter file as a CSV string - The meta data object - The preprocessor object """ config_file = Config.config_file_from_state(state) file_date = str(datetime.datetime.strptime(file_date, "%Y-%m-%d").date()) preprocessor = state_router( state, raw_s3_file=None, config_file=config_file, force_file=local_file, force_date=file_date, ) preprocessor.execute() if not write_file: return ( preprocessor.output_dataframe(preprocessor.processed_file), preprocessor.meta, preprocessor, ) preprocessor.local_dump(preprocessor.processed_file)