def download_race_data(overwrite=True): """ Downloads ACS race data of interest Inputs: - RAW//CENSUS (root census folder) Outputs: - RAW//CENSUS//ACS_{year}_race.csv """ # download census tabular data census = validate_directory(make_path(RAW, "CENSUS")) print("RACE:") for year in YEARS: # setup folders race_out = make_path(census, f"ACS_{year}_race.csv") print(f"...Fetching race data ({race_out})") try: race = helper.download_race_vars( year, acs_dataset="acs5", state="12", county="086", table=dl_conf.ACS_RACE_TABLE, columns=dl_conf.ACS_RACE_COLUMNS, ) check_overwrite_path(output=race_out, overwrite=overwrite) race.to_csv(race_out, index=False) except: print(f"..ERROR DOWNLOADING RACE DATA ({year})")
def download_osm_data(overwrite=True): """ Download osm data - networks and buildings - downloads networks as nodes.shp and edges.shp - downloads all buildings, subset to poly/multipoly features - both functions will create the output folder if not there Inputs: - RAW//Miami-Dade_County_Boundary.geojson (used as AOI to define area of needed data) - RAW//OPEN_STREET_MAP Outputs: (generally suffix will take the form q{1-4}_{year} where q indicates the quarter of the year) - RAW//OPEN_STREET_MAP//bike_{suffix) [network] - RAW//OPEN_STREET_MAP//buildings_{suffix)[builidng footprints] - RAW//OPEN_STREET_MAP//drive_{suffix) [network] - RAW//OPEN_STREET_MAP//walk_{suffix) [network] """ print("Fetching OSM NETWORK data...") area_of_interest = make_path(RAW, "Miami-Dade_County_Boundary.geojson") osm_data_dir = make_path(RAW, "OPEN_STREET_MAP") data_crs = EPSG_FLSPF open_street_map.download_osm_networks( output_dir=osm_data_dir, polygon=area_of_interest, data_crs=data_crs, suffix="q1_2021", overwrite=overwrite ) print("\nFetching OSM BUILDING data...") open_street_map.download_osm_buildings( output_dir=osm_data_dir, polygon=area_of_interest, data_crs=data_crs, suffix="q1_2021", overwrite=overwrite )
def download_census_geo(overwrite=True): """ Download census data - downloads and unzips the census block and blockgroup shapefiles - downloads and writes out to table the ACS race and commute data - downloads LODES data to table Inputs: - RAW//temp_downloads (folder path) - RAW//CENSUS (extract path) - CENSUS_GEO_TYPES (list of geographies) Outputs: - RAW//CENSUS//BG (block groups geogrpahies) - RAW//CENSUS//TABBLOCK (block geographies) """ print("\nFetching CENSUS Geographies...") # download and extract census geographies dl_dir = make_path(RAW, "temp_downloads") ext_dir = make_path(RAW, "CENSUS") for path in [dl_dir, ext_dir]: check_overwrite_path(output=path, overwrite=overwrite) validate_directory(path) for geo_type in dl_conf.CENSUS_GEO_TYPES: census_geo.get_one_geo_type( geo_type=geo_type, download_dir=dl_dir, extract_dir=ext_dir, state=dl_conf.CENSUS_STATE, year=str(SNAPSHOT_YEAR), ) rmtree(dl_dir)
def download_commute_data(overwrite=True): """ Downloads ACS commute data of interest Inputs: - RAW//CENSUS (root census folder) Outputs: - RAW//CENSUS//ACS_{year}_commute.csv """ census = validate_directory(make_path(RAW, "CENSUS")) print("COMMUTE:") for year in YEARS: commute_out = make_path(census, f"ACS_{year}_commute.csv") print(f"...Fetching commute data ({commute_out})") try: commute = helper.download_commute_vars( year, acs_dataset="acs5", state="12", county="086", table=dl_conf.ACS_MODE_TABLE, columns=dl_conf.ACS_MODE_COLUMNS, ) check_overwrite_path(output=commute_out, overwrite=overwrite) commute.to_csv(commute_out, index=False) except: print(f"..ERROR DOWNLOADING COMMUTE DATA ({year})")
def download_lodes_data(overwrite=True): """ Download LODES data for job counts - downloads lodes files by year and optionally aggregates to a coarser geographic area Inputs: - RAW//LODES (root lodes folder) Outputs: - RAW//LODES//fl_wac_S000_JT00_{year}_blk.csv.gz - RAW//LODES//fl_wac_S000_JT00_{year}_bgrp.csv.gz - RAW//LODES//fl_xwalk.csv.gz """ lodes_path = validate_directory(make_path(RAW, "LODES")) print("LODES:") for year in YEARS: census.download_aggregate_lodes( output_dir=lodes_path, file_type="wac", state="fl", segment="S000", part="", job_type="JT00", year=year, agg_geog=["bgrp"], overwrite=overwrite, )
def download_urls(overwrite=True): """ Downloads raw data that are easily accessible via web `request' at a url endpoint Inputs: - DOWNLOAD_URL_DICT (dictionary of output_name: url found in config.download_config) Outputs: (11 files) - RAW//{output_name} --> ['Imperviousness', 'MD_Urban_Growth_Boundary', 'Miami-Dade_County_Boundary', 'Municipal_Parks', 'County_Parks', 'Federal_State_Parks', 'Park_Facilities', 'Bike_Lanes', 'Paved_Path', 'Paved_Shoulder', 'Wide_Curb_Lane'] """ for file, url in dl_conf.DOWNLOAD_URL_DICT.items(): _, ext = os.path.splitext(url) if ext == ".zip": out_file = make_path(RAW, f"{file}.zip") elif ext == ".geojson": out_file = make_path(RAW, f"{file}.geojson") else: print("downloader doesnt handle that extension") print(f"Downloading {out_file}") check_overwrite_path(output=out_file, overwrite=overwrite) helper.download_file_from_url(url=url, save_path=out_file)
def setup_download_folder(dl_folder="RAW"): """ Creates a download folder if it doesn't already exist and populates with necessary subfolders for remaining download work Args: dl_folder (str): path download ROOT folder Returns: None """ download_folder = validate_directory(dl_folder) for folder in dl_conf.RAW_FOLDERS: folder = make_path(download_folder, folder) if not os.path.exists(folder): os.mkdir(folder)
def download_file_from_url(url, save_path): """ Downloads file resources directly from a url endpoint to a folder Args: url (str): String; path to resource save_path (str): String; path to output file Returns: None """ if os.path.isdir(save_path): filename = get_filename_from_header(url) save_path = make_path(save_path, filename) print(f"...downloading {save_path} from {url}") try: request.urlretrieve(url, save_path) except: with request.urlopen(url) as download: with open(save_path, "wb") as out_file: out_file.write(download.read())
def download_aggregate_lodes( output_dir, file_type, state, segment, part, job_type, year, agg_geog=None, overwrite=False, ): """ Helper function to fetch lodes data and aggregate to another census geography if one is provided Args: output_dir (str): path to location downloaded files should end up file_type (str): one of three LODES groupings ['od', 'rac', 'wac'] - OD: Origin-Destination data, totals are associated with both a home Census Block and a work Census Block - RAC: Residence Area Characteristic data, jobs are totaled by home Census Block - WAC: Workplace Area Characteristic data, jobs are totaled by work Census Block state (str): The two-character postal abbreviation for the state segment (str): Segment of the workforce, can have the values of [“S000”, “SA01”, “SA02”, “SA03”, “SE01”, “SE02”, “SE03”, “SI01”, “SI02”, “SI03”, ""] part (str): Part of the state file, can have a value of either “main” or “aux”. Complimentary parts of the state file, the main part includes jobs with both workplace and residence in the state and the aux part includes jobs with the workplace in the state and the residence outside of the state. job_type (str): LODES job types (“JT00” for All Jobs, “JT01” for Primary Jobs, “JT02” for All Private Jobs, “JT03” for Private Primary Jobs, “JT04” for All Federal Jobs, or “JT05” for Federal Primary Jobs). year (int): year of LODES data to download agg_geog (str): census geographies to aggregate lodes data to overwrite (bool): if set to True, delete the existing copy of the LODES data Returns: None: writes csv tables of aggregated lodes data in `output_dir` """ st = state.lower() try: out_dir = validate_directory(directory=output_dir) if validate_lodes_download(file_type, state, segment, part, job_type, year, agg_geog): if file_type == "od": # kept for now as it will still download but not aggregate OD lodes_fname = f"{st}_{file_type}_{part}_{job_type}_{str(year)}.csv.gz" else: lodes_fname = ( f"{st}_{file_type}_{segment}_{job_type}_{str(year)}.csv.gz" ) lodes_download_url = f"{LODES_URL}/{st}/{file_type}/{lodes_fname}" lodes_out = make_path(out_dir, lodes_fname) lodes_out = lodes_out.replace(".csv.gz", "_blk.csv.gz") print(f"...downloading {lodes_fname} to {lodes_out}") check_overwrite_path(output=lodes_out, overwrite=overwrite) download_file_from_url(url=lodes_download_url, save_path=lodes_out) else: lodes_out = "" if agg_geog and lodes_out != "": if validate_aggregate_geo_inputs(values=agg_geog, valid=LODES_AGG_GEOS): if isinstance(agg_geog, string_types): agg_geog = [agg_geog] for geog in agg_geog: cross_fname = f"{state}_xwalk.csv.gz" cross_out = make_path(out_dir, cross_fname) agged_out = lodes_out.replace("_blk.csv.gz", f"_{geog}.csv.gz") crosswalk_url = f"{LODES_URL}/{state}/{state}_xwalk.csv.gz" if not os.path.exists(cross_out): print(f"...downloading {cross_fname} to {cross_out}") download_file_from_url(url=crosswalk_url, save_path=cross_out) print(f"...aggregating block group level data to {geog}") agged = aggregate_lodes_data( geo_crosswalk_path=cross_out, lodes_path=lodes_out, file_type=file_type, agg_geo=geog, ) check_overwrite_path(output=agged_out, overwrite=overwrite) agged.to_csv(agged_out, compression="gzip", index=False) else: print( "No aggregation requested or there is no LODES data for this request" ) except: print("something failed")
def download_osm_buildings( output_dir, polygon=None, bbox=None, data_crs=None, keep_fields=["osmid", "building", "name", "geometry"], suffix="", overwrite=False ): """ Uses an Overpass query to fetch the OSM building polygons within a specified bounding box or the bounding box of a provided shapefile. Args: output_dir (str): Path to output directory. polygon (str): path to a shapefile or geojson object readable by geopandas bbox (dict): default=None; A dictionary with keys 'south', 'west', 'north', and 'east' of EPSG:4326-style coordinates, defining a bounding box for the area from which to fetch OSM features. Only required when `study_area_polygon_path` is not provided. See module notes for performance and suggestions on usage. data_crs (int): integer value representing an EPSG code keep_fields (list): list of fields to keep in output dataset suffix (str): string value to be added to the end of the output folder overwrite (bool): if set to True, delete the existing copy of buildings Returns: buildings_gdf (gpd.GeoDataFrame): A gdf of OSM building features. By default, the CRS of the gdf will be EPSG:4326 unless a tranformation is specified using `transfor_epsg` or a shape file with a differnt CRS is provided as `study_area_polygon_path`. Notes: OSM building polygons features will automatically be saved in the `output_dir`s `OSM_Buildings_{YYYYMMDDHHMMSS}.shp` where `YYYYMMDDHHMMSS` is the date and time at which the Overpass query was pushed. This is done for record keeping purposes. """ # Validation of inputs # TODO: separate polygon and bbox validation bounding_box = validate_inputs( study_area_poly=polygon, bbox=bbox, data_crs=data_crs ) # - Output location output_dir = validate_directory(make_path(output_dir, f"buildings_{suffix}")) # Data read in and setup ------------------------------------------------- print("...Pulling building data from Overpass API...") buildings_gdf = ox.geometries_from_bbox( north=bounding_box["north"], south=bounding_box["south"], east=bounding_box["east"], west=bounding_box["west"], tags={"building": True}, ) # drop non-polygon features and subset fields print("...Dropping non-polygon features and unneeded fields") buildings_gdf = buildings_gdf[ buildings_gdf.geom_type.isin(["MultiPolygon", "Polygon"]) ] drop_cols = [col for col in buildings_gdf.columns if col not in keep_fields] buildings_gdf.drop(labels=drop_cols, axis=1, inplace=True) buildings_gdf.reset_index() # Saving ----------------------------------------------------------------- print("...Saving...") dt = datetime.now().strftime("%Y%m%d") file_name = "OSM_Buildings_{}.shp".format(dt) save_path = make_path(output_dir, file_name) check_overwrite_path(output=save_path, overwrite=overwrite) buildings_gdf.to_file(save_path) print("-- saved to: " + save_path) return buildings_gdf
download_lodes_data(overwrite=overwrite) def main(): # todo: add more utility to this, making the download script executable import argparse parser = argparse.ArgumentParser(prog="downloader", description="Download RAW data...") parser.add_argument("-x", "--overwrite", dest="overwrite", action="store_false") parser.add_argument("-s", "--setup", dest="setup", action="store_false") parser.add_argument("-u", "--urls", dest="urls", action="store_true") parser.add_argument("-o", "--osm", dest="osm", action="store_false") parser.add_argument("-g", "--census_geo", dest="census_geo", action="store_false") parser.add_argument("-c", "--commutes", dest="commutes", action="store_false") parser.add_argument("-r", "--race", dest="race", action="store_false") parser.add_argument("-l", "--lodes", dest="lodes", action="store_false") args = parser.parse_args() run(args) if __name__ == "__main__": DEBUG = True if DEBUG: ROOT = r"C:\PMT_TEST_FOLDER" RAW = validate_directory(make_path(ROOT, "RAW")) YEARS = YEARS t.start() main() t.stop()