Example #1
0
def download_race_data(overwrite=True):
    """
    Downloads ACS race data of interest
        
    Inputs:
        - RAW//CENSUS (root census folder)

    Outputs:
        - RAW//CENSUS//ACS_{year}_race.csv
    """
    # download census tabular data
    census = validate_directory(make_path(RAW, "CENSUS"))
    print("RACE:")
    for year in YEARS:
        # setup folders
        race_out = make_path(census, f"ACS_{year}_race.csv")
        print(f"...Fetching race data ({race_out})")
        try:
            race = helper.download_race_vars(
                year,
                acs_dataset="acs5",
                state="12",
                county="086",
                table=dl_conf.ACS_RACE_TABLE,
                columns=dl_conf.ACS_RACE_COLUMNS,
            )
            check_overwrite_path(output=race_out, overwrite=overwrite)
            race.to_csv(race_out, index=False)
        except:
            print(f"..ERROR DOWNLOADING RACE DATA ({year})")
Example #2
0
def download_osm_data(overwrite=True):
    """
    Download osm data - networks and buildings
        - downloads networks as nodes.shp and edges.shp
        - downloads all buildings, subset to poly/multipoly features
        - both functions will create the output folder if not there

    Inputs:
        - RAW//Miami-Dade_County_Boundary.geojson (used as AOI to define area of needed data)
        - RAW//OPEN_STREET_MAP
        
    Outputs: (generally suffix will take the form q{1-4}_{year} where q indicates the quarter of the year)
        - RAW//OPEN_STREET_MAP//bike_{suffix) [network]
        - RAW//OPEN_STREET_MAP//buildings_{suffix)[builidng footprints]
        - RAW//OPEN_STREET_MAP//drive_{suffix) [network]
        - RAW//OPEN_STREET_MAP//walk_{suffix) [network]
    """
    print("Fetching OSM NETWORK data...")
    area_of_interest = make_path(RAW, "Miami-Dade_County_Boundary.geojson")
    osm_data_dir = make_path(RAW, "OPEN_STREET_MAP")
    data_crs = EPSG_FLSPF
    open_street_map.download_osm_networks(
        output_dir=osm_data_dir, polygon=area_of_interest, data_crs=data_crs, suffix="q1_2021", overwrite=overwrite
    )
    print("\nFetching OSM BUILDING data...")
    open_street_map.download_osm_buildings(
        output_dir=osm_data_dir, polygon=area_of_interest, data_crs=data_crs, suffix="q1_2021", overwrite=overwrite
    )
Example #3
0
def download_census_geo(overwrite=True):
    """
    Download census data
        - downloads and unzips the census block and blockgroup shapefiles
        - downloads and writes out to table the ACS race and commute data
        - downloads LODES data to table
        
    Inputs:
        - RAW//temp_downloads (folder path)
        - RAW//CENSUS (extract path)
        - CENSUS_GEO_TYPES (list of geographies)

    Outputs:
        - RAW//CENSUS//BG (block groups geogrpahies)
        - RAW//CENSUS//TABBLOCK (block geographies)
    """
    print("\nFetching CENSUS Geographies...")
    # download and extract census geographies
    dl_dir = make_path(RAW, "temp_downloads")
    ext_dir = make_path(RAW, "CENSUS")
    for path in [dl_dir, ext_dir]:
        check_overwrite_path(output=path, overwrite=overwrite)
        validate_directory(path)
    for geo_type in dl_conf.CENSUS_GEO_TYPES:
        census_geo.get_one_geo_type(
            geo_type=geo_type,
            download_dir=dl_dir,
            extract_dir=ext_dir,
            state=dl_conf.CENSUS_STATE,
            year=str(SNAPSHOT_YEAR),
        )
    rmtree(dl_dir)
Example #4
0
def download_commute_data(overwrite=True):
    """
    Downloads ACS commute data of interest
        
    Inputs:
        - RAW//CENSUS (root census folder)

    Outputs:
        - RAW//CENSUS//ACS_{year}_commute.csv
    """
    census = validate_directory(make_path(RAW, "CENSUS"))
    print("COMMUTE:")
    for year in YEARS:
        commute_out = make_path(census, f"ACS_{year}_commute.csv")
        print(f"...Fetching commute data ({commute_out})")
        try:
            commute = helper.download_commute_vars(
                year,
                acs_dataset="acs5",
                state="12",
                county="086",
                table=dl_conf.ACS_MODE_TABLE,
                columns=dl_conf.ACS_MODE_COLUMNS,
            )
            check_overwrite_path(output=commute_out, overwrite=overwrite)
            commute.to_csv(commute_out, index=False)
        except:
            print(f"..ERROR DOWNLOADING COMMUTE DATA ({year})")
Example #5
0
def download_lodes_data(overwrite=True):
    """
    Download LODES data for job counts
        - downloads lodes files by year and optionally aggregates to a coarser geographic area
        
    Inputs:
        - RAW//LODES (root lodes folder)

    Outputs:
        - RAW//LODES//fl_wac_S000_JT00_{year}_blk.csv.gz
        - RAW//LODES//fl_wac_S000_JT00_{year}_bgrp.csv.gz
        - RAW//LODES//fl_xwalk.csv.gz
    """
    lodes_path = validate_directory(make_path(RAW, "LODES"))
    print("LODES:")
    for year in YEARS:
        census.download_aggregate_lodes(
            output_dir=lodes_path,
            file_type="wac",
            state="fl",
            segment="S000",
            part="",
            job_type="JT00",
            year=year,
            agg_geog=["bgrp"],
            overwrite=overwrite,
        )
Example #6
0
def download_urls(overwrite=True):
    """
    Downloads raw data that are easily accessible via web `request' at a url endpoint
        
    Inputs:
        - DOWNLOAD_URL_DICT (dictionary of output_name: url found in config.download_config)

    Outputs: (11 files)
        - RAW//{output_name} --> ['Imperviousness', 'MD_Urban_Growth_Boundary', 'Miami-Dade_County_Boundary',
        'Municipal_Parks', 'County_Parks', 'Federal_State_Parks', 'Park_Facilities',
        'Bike_Lanes', 'Paved_Path',  'Paved_Shoulder', 'Wide_Curb_Lane']
    """
    for file, url in dl_conf.DOWNLOAD_URL_DICT.items():
        _, ext = os.path.splitext(url)
        if ext == ".zip":
            out_file = make_path(RAW, f"{file}.zip")
        elif ext == ".geojson":
            out_file = make_path(RAW, f"{file}.geojson")
        else:
            print("downloader doesnt handle that extension")
        print(f"Downloading {out_file}")
        check_overwrite_path(output=out_file, overwrite=overwrite)
        helper.download_file_from_url(url=url, save_path=out_file)
Example #7
0
def setup_download_folder(dl_folder="RAW"):
    """
    Creates a download folder if it doesn't already exist and populates with
    necessary subfolders for remaining download work

    Args:
        dl_folder (str): path download ROOT folder
    
    Returns:
        None
    """
    download_folder = validate_directory(dl_folder)
    for folder in dl_conf.RAW_FOLDERS:
        folder = make_path(download_folder, folder)
        if not os.path.exists(folder):
            os.mkdir(folder)
Example #8
0
def download_file_from_url(url, save_path):
    """
    Downloads file resources directly from a url endpoint to a folder

    Args:
        url (str): String; path to resource
        save_path (str): String; path to output file
    
    Returns:
        None
    """
    if os.path.isdir(save_path):
        filename = get_filename_from_header(url)
        save_path = make_path(save_path, filename)

    print(f"...downloading {save_path} from {url}")
    try:
        request.urlretrieve(url, save_path)
    except:
        with request.urlopen(url) as download:
            with open(save_path, "wb") as out_file:
                out_file.write(download.read())
Example #9
0
def download_aggregate_lodes(
    output_dir,
    file_type,
    state,
    segment,
    part,
    job_type,
    year,
    agg_geog=None,
    overwrite=False,
):
    """
    Helper function to fetch lodes data and aggregate to another census geography if one is provided

    Args:
        output_dir (str): path to location downloaded files should end up
        file_type (str): one of three LODES groupings ['od', 'rac', 'wac']
            - OD: Origin-Destination data, totals are associated with both a home Census Block and a work Census Block
            - RAC: Residence Area Characteristic data, jobs are totaled by home Census Block
            - WAC: Workplace Area Characteristic data, jobs are totaled by work Census Block
        state (str): The two-character postal abbreviation for the state
        segment (str): Segment of the workforce, can have the values of
            [“S000”, “SA01”, “SA02”, “SA03”,  “SE01”, “SE02”, “SE03”, “SI01”, “SI02”, “SI03”, ""]
        part (str): Part of the state file, can have a value of either “main” or “aux”. Complimentary parts of
            the state file, the main part includes jobs with both workplace and residence in the state
            and the aux part includes jobs with the workplace in the state and the residence outside of the state.
        job_type (str): LODES job types (“JT00” for All Jobs, “JT01” for Primary Jobs, “JT02” for
            All Private Jobs, “JT03” for Private Primary Jobs, “JT04” for All Federal Jobs, or “JT05”
            for Federal Primary Jobs).
        year (int): year of LODES data to download
        agg_geog (str): census geographies to aggregate lodes data to
        overwrite (bool): if set to True, delete the existing copy of the LODES data
    
    Returns:
        None: writes csv tables of aggregated lodes data in `output_dir`
    """
    st = state.lower()
    try:
        out_dir = validate_directory(directory=output_dir)
        if validate_lodes_download(file_type, state, segment, part, job_type,
                                   year, agg_geog):
            if file_type == "od":
                # kept for now as it will still download but not aggregate OD
                lodes_fname = f"{st}_{file_type}_{part}_{job_type}_{str(year)}.csv.gz"
            else:
                lodes_fname = (
                    f"{st}_{file_type}_{segment}_{job_type}_{str(year)}.csv.gz"
                )
            lodes_download_url = f"{LODES_URL}/{st}/{file_type}/{lodes_fname}"
            lodes_out = make_path(out_dir, lodes_fname)
            lodes_out = lodes_out.replace(".csv.gz", "_blk.csv.gz")
            print(f"...downloading {lodes_fname} to {lodes_out}")
            check_overwrite_path(output=lodes_out, overwrite=overwrite)
            download_file_from_url(url=lodes_download_url, save_path=lodes_out)
        else:
            lodes_out = ""

        if agg_geog and lodes_out != "":
            if validate_aggregate_geo_inputs(values=agg_geog,
                                             valid=LODES_AGG_GEOS):
                if isinstance(agg_geog, string_types):
                    agg_geog = [agg_geog]
                for geog in agg_geog:
                    cross_fname = f"{state}_xwalk.csv.gz"
                    cross_out = make_path(out_dir, cross_fname)
                    agged_out = lodes_out.replace("_blk.csv.gz",
                                                  f"_{geog}.csv.gz")
                    crosswalk_url = f"{LODES_URL}/{state}/{state}_xwalk.csv.gz"
                    if not os.path.exists(cross_out):
                        print(f"...downloading {cross_fname} to {cross_out}")
                        download_file_from_url(url=crosswalk_url,
                                               save_path=cross_out)
                    print(f"...aggregating block group level data to {geog}")

                    agged = aggregate_lodes_data(
                        geo_crosswalk_path=cross_out,
                        lodes_path=lodes_out,
                        file_type=file_type,
                        agg_geo=geog,
                    )
                    check_overwrite_path(output=agged_out, overwrite=overwrite)
                    agged.to_csv(agged_out, compression="gzip", index=False)

        else:
            print(
                "No aggregation requested or there is no LODES data for this request"
            )
    except:
        print("something failed")
Example #10
0
def download_osm_buildings(
    output_dir,
    polygon=None,
    bbox=None,
    data_crs=None,
    keep_fields=["osmid", "building", "name", "geometry"],
    suffix="",
    overwrite=False
):
    """
    Uses an Overpass query to fetch the OSM building polygons within a
    specified bounding box or the bounding box of a provided shapefile.
    
    Args:
        output_dir (str): Path to output directory.
        polygon (str): path to a shapefile or geojson object readable by geopandas
        bbox (dict): default=None; A dictionary with keys 'south', 'west', 'north', and 'east' of
            EPSG:4326-style coordinates, defining a bounding box for the area from which to fetch
            OSM features. Only required when `study_area_polygon_path` is not provided. See module
            notes for performance and suggestions on usage.
        data_crs (int): integer value representing an EPSG code
        keep_fields (list): list of fields to keep in output dataset
        suffix (str): string value to be added to the end of the output folder
        overwrite (bool): if set to True, delete the existing copy of buildings
    
    Returns:
        buildings_gdf (gpd.GeoDataFrame): A gdf of OSM building features. By default, the CRS of
            the gdf will be EPSG:4326 unless a tranformation is specified using `transfor_epsg` or
            a shape file with a differnt CRS is provided as `study_area_polygon_path`.
    
    Notes:
        OSM building polygons features will automatically be saved in the `output_dir`s
        `OSM_Buildings_{YYYYMMDDHHMMSS}.shp` where `YYYYMMDDHHMMSS` is the date and time at which
        the Overpass query was pushed. This is done for record keeping purposes.
    """

    # Validation of inputs
    # TODO: separate polygon and bbox validation
    bounding_box = validate_inputs(
        study_area_poly=polygon, bbox=bbox, data_crs=data_crs
    )

    # - Output location
    output_dir = validate_directory(make_path(output_dir, f"buildings_{suffix}"))

    # Data read in and setup -------------------------------------------------
    print("...Pulling building data from Overpass API...")
    buildings_gdf = ox.geometries_from_bbox(
        north=bounding_box["north"],
        south=bounding_box["south"],
        east=bounding_box["east"],
        west=bounding_box["west"],
        tags={"building": True},
    )
    # drop non-polygon features and subset fields
    print("...Dropping non-polygon features and unneeded fields")
    buildings_gdf = buildings_gdf[
        buildings_gdf.geom_type.isin(["MultiPolygon", "Polygon"])
    ]
    drop_cols = [col for col in buildings_gdf.columns if col not in keep_fields]
    buildings_gdf.drop(labels=drop_cols, axis=1, inplace=True)
    buildings_gdf.reset_index()

    # Saving -----------------------------------------------------------------
    print("...Saving...")
    dt = datetime.now().strftime("%Y%m%d")
    file_name = "OSM_Buildings_{}.shp".format(dt)
    save_path = make_path(output_dir, file_name)
    check_overwrite_path(output=save_path, overwrite=overwrite)
    buildings_gdf.to_file(save_path)
    print("-- saved to: " + save_path)

    return buildings_gdf
Example #11
0
        download_lodes_data(overwrite=overwrite)


def main():
    # todo: add more utility to this, making the download script executable
    import argparse
    parser = argparse.ArgumentParser(prog="downloader",
                                     description="Download RAW data...")
    parser.add_argument("-x", "--overwrite",    dest="overwrite",   action="store_false")
    parser.add_argument("-s", "--setup",        dest="setup",       action="store_false")
    parser.add_argument("-u", "--urls",         dest="urls",        action="store_true")
    parser.add_argument("-o", "--osm",          dest="osm",         action="store_false")
    parser.add_argument("-g", "--census_geo",   dest="census_geo",  action="store_false")
    parser.add_argument("-c", "--commutes",     dest="commutes",    action="store_false")
    parser.add_argument("-r", "--race",         dest="race",        action="store_false")
    parser.add_argument("-l", "--lodes",        dest="lodes",       action="store_false")
    args = parser.parse_args()
    run(args)


if __name__ == "__main__":
    DEBUG = True
    if DEBUG:
        ROOT = r"C:\PMT_TEST_FOLDER"
        RAW = validate_directory(make_path(ROOT, "RAW"))
        YEARS = YEARS

    t.start()
    main()
    t.stop()