Beispiel #1
0
def download_race_data(overwrite=True):
    """
    Downloads ACS race data of interest
        
    Inputs:
        - RAW//CENSUS (root census folder)

    Outputs:
        - RAW//CENSUS//ACS_{year}_race.csv
    """
    # download census tabular data
    census = validate_directory(make_path(RAW, "CENSUS"))
    print("RACE:")
    for year in YEARS:
        # setup folders
        race_out = make_path(census, f"ACS_{year}_race.csv")
        print(f"...Fetching race data ({race_out})")
        try:
            race = helper.download_race_vars(
                year,
                acs_dataset="acs5",
                state="12",
                county="086",
                table=dl_conf.ACS_RACE_TABLE,
                columns=dl_conf.ACS_RACE_COLUMNS,
            )
            check_overwrite_path(output=race_out, overwrite=overwrite)
            race.to_csv(race_out, index=False)
        except:
            print(f"..ERROR DOWNLOADING RACE DATA ({year})")
Beispiel #2
0
def download_census_geo(overwrite=True):
    """
    Download census data
        - downloads and unzips the census block and blockgroup shapefiles
        - downloads and writes out to table the ACS race and commute data
        - downloads LODES data to table
        
    Inputs:
        - RAW//temp_downloads (folder path)
        - RAW//CENSUS (extract path)
        - CENSUS_GEO_TYPES (list of geographies)

    Outputs:
        - RAW//CENSUS//BG (block groups geogrpahies)
        - RAW//CENSUS//TABBLOCK (block geographies)
    """
    print("\nFetching CENSUS Geographies...")
    # download and extract census geographies
    dl_dir = make_path(RAW, "temp_downloads")
    ext_dir = make_path(RAW, "CENSUS")
    for path in [dl_dir, ext_dir]:
        check_overwrite_path(output=path, overwrite=overwrite)
        validate_directory(path)
    for geo_type in dl_conf.CENSUS_GEO_TYPES:
        census_geo.get_one_geo_type(
            geo_type=geo_type,
            download_dir=dl_dir,
            extract_dir=ext_dir,
            state=dl_conf.CENSUS_STATE,
            year=str(SNAPSHOT_YEAR),
        )
    rmtree(dl_dir)
Beispiel #3
0
def download_commute_data(overwrite=True):
    """
    Downloads ACS commute data of interest
        
    Inputs:
        - RAW//CENSUS (root census folder)

    Outputs:
        - RAW//CENSUS//ACS_{year}_commute.csv
    """
    census = validate_directory(make_path(RAW, "CENSUS"))
    print("COMMUTE:")
    for year in YEARS:
        commute_out = make_path(census, f"ACS_{year}_commute.csv")
        print(f"...Fetching commute data ({commute_out})")
        try:
            commute = helper.download_commute_vars(
                year,
                acs_dataset="acs5",
                state="12",
                county="086",
                table=dl_conf.ACS_MODE_TABLE,
                columns=dl_conf.ACS_MODE_COLUMNS,
            )
            check_overwrite_path(output=commute_out, overwrite=overwrite)
            commute.to_csv(commute_out, index=False)
        except:
            print(f"..ERROR DOWNLOADING COMMUTE DATA ({year})")
Beispiel #4
0
def download_urls(overwrite=True):
    """
    Downloads raw data that are easily accessible via web `request' at a url endpoint
        
    Inputs:
        - DOWNLOAD_URL_DICT (dictionary of output_name: url found in config.download_config)

    Outputs: (11 files)
        - RAW//{output_name} --> ['Imperviousness', 'MD_Urban_Growth_Boundary', 'Miami-Dade_County_Boundary',
        'Municipal_Parks', 'County_Parks', 'Federal_State_Parks', 'Park_Facilities',
        'Bike_Lanes', 'Paved_Path',  'Paved_Shoulder', 'Wide_Curb_Lane']
    """
    for file, url in dl_conf.DOWNLOAD_URL_DICT.items():
        _, ext = os.path.splitext(url)
        if ext == ".zip":
            out_file = make_path(RAW, f"{file}.zip")
        elif ext == ".geojson":
            out_file = make_path(RAW, f"{file}.geojson")
        else:
            print("downloader doesnt handle that extension")
        print(f"Downloading {out_file}")
        check_overwrite_path(output=out_file, overwrite=overwrite)
        helper.download_file_from_url(url=url, save_path=out_file)
Beispiel #5
0
def download_aggregate_lodes(
    output_dir,
    file_type,
    state,
    segment,
    part,
    job_type,
    year,
    agg_geog=None,
    overwrite=False,
):
    """
    Helper function to fetch lodes data and aggregate to another census geography if one is provided

    Args:
        output_dir (str): path to location downloaded files should end up
        file_type (str): one of three LODES groupings ['od', 'rac', 'wac']
            - OD: Origin-Destination data, totals are associated with both a home Census Block and a work Census Block
            - RAC: Residence Area Characteristic data, jobs are totaled by home Census Block
            - WAC: Workplace Area Characteristic data, jobs are totaled by work Census Block
        state (str): The two-character postal abbreviation for the state
        segment (str): Segment of the workforce, can have the values of
            [“S000”, “SA01”, “SA02”, “SA03”,  “SE01”, “SE02”, “SE03”, “SI01”, “SI02”, “SI03”, ""]
        part (str): Part of the state file, can have a value of either “main” or “aux”. Complimentary parts of
            the state file, the main part includes jobs with both workplace and residence in the state
            and the aux part includes jobs with the workplace in the state and the residence outside of the state.
        job_type (str): LODES job types (“JT00” for All Jobs, “JT01” for Primary Jobs, “JT02” for
            All Private Jobs, “JT03” for Private Primary Jobs, “JT04” for All Federal Jobs, or “JT05”
            for Federal Primary Jobs).
        year (int): year of LODES data to download
        agg_geog (str): census geographies to aggregate lodes data to
        overwrite (bool): if set to True, delete the existing copy of the LODES data
    
    Returns:
        None: writes csv tables of aggregated lodes data in `output_dir`
    """
    st = state.lower()
    try:
        out_dir = validate_directory(directory=output_dir)
        if validate_lodes_download(file_type, state, segment, part, job_type,
                                   year, agg_geog):
            if file_type == "od":
                # kept for now as it will still download but not aggregate OD
                lodes_fname = f"{st}_{file_type}_{part}_{job_type}_{str(year)}.csv.gz"
            else:
                lodes_fname = (
                    f"{st}_{file_type}_{segment}_{job_type}_{str(year)}.csv.gz"
                )
            lodes_download_url = f"{LODES_URL}/{st}/{file_type}/{lodes_fname}"
            lodes_out = make_path(out_dir, lodes_fname)
            lodes_out = lodes_out.replace(".csv.gz", "_blk.csv.gz")
            print(f"...downloading {lodes_fname} to {lodes_out}")
            check_overwrite_path(output=lodes_out, overwrite=overwrite)
            download_file_from_url(url=lodes_download_url, save_path=lodes_out)
        else:
            lodes_out = ""

        if agg_geog and lodes_out != "":
            if validate_aggregate_geo_inputs(values=agg_geog,
                                             valid=LODES_AGG_GEOS):
                if isinstance(agg_geog, string_types):
                    agg_geog = [agg_geog]
                for geog in agg_geog:
                    cross_fname = f"{state}_xwalk.csv.gz"
                    cross_out = make_path(out_dir, cross_fname)
                    agged_out = lodes_out.replace("_blk.csv.gz",
                                                  f"_{geog}.csv.gz")
                    crosswalk_url = f"{LODES_URL}/{state}/{state}_xwalk.csv.gz"
                    if not os.path.exists(cross_out):
                        print(f"...downloading {cross_fname} to {cross_out}")
                        download_file_from_url(url=crosswalk_url,
                                               save_path=cross_out)
                    print(f"...aggregating block group level data to {geog}")

                    agged = aggregate_lodes_data(
                        geo_crosswalk_path=cross_out,
                        lodes_path=lodes_out,
                        file_type=file_type,
                        agg_geo=geog,
                    )
                    check_overwrite_path(output=agged_out, overwrite=overwrite)
                    agged.to_csv(agged_out, compression="gzip", index=False)

        else:
            print(
                "No aggregation requested or there is no LODES data for this request"
            )
    except:
        print("something failed")
Beispiel #6
0
def download_osm_buildings(
    output_dir,
    polygon=None,
    bbox=None,
    data_crs=None,
    keep_fields=["osmid", "building", "name", "geometry"],
    suffix="",
    overwrite=False
):
    """
    Uses an Overpass query to fetch the OSM building polygons within a
    specified bounding box or the bounding box of a provided shapefile.
    
    Args:
        output_dir (str): Path to output directory.
        polygon (str): path to a shapefile or geojson object readable by geopandas
        bbox (dict): default=None; A dictionary with keys 'south', 'west', 'north', and 'east' of
            EPSG:4326-style coordinates, defining a bounding box for the area from which to fetch
            OSM features. Only required when `study_area_polygon_path` is not provided. See module
            notes for performance and suggestions on usage.
        data_crs (int): integer value representing an EPSG code
        keep_fields (list): list of fields to keep in output dataset
        suffix (str): string value to be added to the end of the output folder
        overwrite (bool): if set to True, delete the existing copy of buildings
    
    Returns:
        buildings_gdf (gpd.GeoDataFrame): A gdf of OSM building features. By default, the CRS of
            the gdf will be EPSG:4326 unless a tranformation is specified using `transfor_epsg` or
            a shape file with a differnt CRS is provided as `study_area_polygon_path`.
    
    Notes:
        OSM building polygons features will automatically be saved in the `output_dir`s
        `OSM_Buildings_{YYYYMMDDHHMMSS}.shp` where `YYYYMMDDHHMMSS` is the date and time at which
        the Overpass query was pushed. This is done for record keeping purposes.
    """

    # Validation of inputs
    # TODO: separate polygon and bbox validation
    bounding_box = validate_inputs(
        study_area_poly=polygon, bbox=bbox, data_crs=data_crs
    )

    # - Output location
    output_dir = validate_directory(make_path(output_dir, f"buildings_{suffix}"))

    # Data read in and setup -------------------------------------------------
    print("...Pulling building data from Overpass API...")
    buildings_gdf = ox.geometries_from_bbox(
        north=bounding_box["north"],
        south=bounding_box["south"],
        east=bounding_box["east"],
        west=bounding_box["west"],
        tags={"building": True},
    )
    # drop non-polygon features and subset fields
    print("...Dropping non-polygon features and unneeded fields")
    buildings_gdf = buildings_gdf[
        buildings_gdf.geom_type.isin(["MultiPolygon", "Polygon"])
    ]
    drop_cols = [col for col in buildings_gdf.columns if col not in keep_fields]
    buildings_gdf.drop(labels=drop_cols, axis=1, inplace=True)
    buildings_gdf.reset_index()

    # Saving -----------------------------------------------------------------
    print("...Saving...")
    dt = datetime.now().strftime("%Y%m%d")
    file_name = "OSM_Buildings_{}.shp".format(dt)
    save_path = make_path(output_dir, file_name)
    check_overwrite_path(output=save_path, overwrite=overwrite)
    buildings_gdf.to_file(save_path)
    print("-- saved to: " + save_path)

    return buildings_gdf
Beispiel #7
0
def download_osm_networks(
    output_dir,
    polygon=None,
    bbox=None,
    data_crs=None,
    net_types=["drive", "walk", "bike"],
    pickle_save=False,
    suffix="",
    overwrite=False
):
    """
    Download an OpenStreetMap network within the area defined by a polygon
    feature class or a bounding box.

    Args:
        output_dir (str): Path, Path to output directory. Each modal network (specified by `net_types`)
                is saved to this directory within an epoynmous folder  as a shape file.
                If `pickle_save` is True, pickled graph objects are also stored in this directory in the
                appropriate subfolders.
        polygon (str): Path, default=None; Path to study area polygon(s) shapefile. If provided, the polygon
                features define the area from which to fetch OSM features and `bbox` is ignored.
                See module notes for performance and suggestions on usage.
        bbox (dict): default=None; A dictionary with keys 'south', 'west', 'north', and 'east' of
                EPSG:4326-style coordinates, defining a bounding box for the area from which to
                fetch OSM features. Only required when `study_area_polygon_path` is not provided.
                See module notes for performance and suggestions on usage.
        data_crs (int): integer value representing an EPSG code
        net_types (list): [String,...], default=["drive", "walk", "bike"]
                A list containing any or all of "drive", "walk", or "bike", specifying
                the desired OSM network features to be downloaded.
        pickle_save (bool): default=False; If True, the downloaded OSM networks are saved as
                python `networkx` objects using the `pickle` module. See module notes for usage.
        suffix (str): default=""; Downloaded datasets may optionally be stored in folders with
                a suffix appended, differentiating networks by date, for example.
        overwrite (bool): if set to True, delete the existing copy of the network(s)
    
    Returns:
        G (dict): A dictionary of networkx graph objects. Keys are mode names based on
                `net_types`; values are graph objects.
    """
    # Validation of inputs
    # TODO: separate polygon and bbox validation
    bounding_box = validate_inputs(
        study_area_poly=polygon, bbox=bbox, data_crs=data_crs
    )

    # - ensure Network types are valid and formatted correctly
    net_types = validate_network_types(network_types=net_types)

    output_dir = validate_directory(output_dir)

    # Fetch network features
    mode_nets = {}
    for net_type in net_types:
        print("")
        net_folder = f"{net_type}_{suffix}"
        print(f"OSMnx '{net_type.upper()}' network extraction")
        print("-- extracting a composed network by bounding box...")
        g = ox.graph_from_bbox(
            north=bounding_box["north"],
            south=bounding_box["south"],
            east=bounding_box["east"],
            west=bounding_box["west"],
            network_type=net_type,
            retain_all=True,
        )
        if net_type in ["walk", "bike"]:
            g = dl_help.trim_components(graph=g)

        # Pickle if requested
        if pickle_save:
            print("-- saving the composed network as pickle")
            out_f = os.path.join(output_dir, net_folder, "osmnx_composed_net.p")
            with open(out_f, "wb") as pickle_file:
                pickle.dump(g, pickle_file)
            print("---- saved to: {}".format(out_f))

        # 2. Saving as shapefile
        print("-- saving network shapefile...")
        out_f = os.path.join(output_dir, net_folder)
        check_overwrite_path(output=out_f, overwrite=overwrite)
        ox.save_graph_shapefile(G=g, filepath=out_f)
        # need to change this directory
        print("---- saved to: " + out_f)

        # 3. Add the final graph to the dictionary of networks
        mode_nets[net_type] = g
    return mode_nets