def delete_dataset_files(doi):
    """
    Delete all files from draft dataset at the given DOI.
    """

    host = 'dataverse.harvard.edu'
    url_statement = f'https://{host}/dvn/api/data-deposit/v1.1/swordv2/statement/study/{doi}'
    auth = (api_key, None)
    response = requests.get(url_statement, auth=auth)
    assert response.status_code == 200

    response_dict = xmltodict.parse(response.text)
    if 'entry' not in response_dict['feed']:
        print(ox.ts(), f'No files to delete in {doi}')

    else:
        files = response_dict['feed']['entry']
        if isinstance(files, OrderedDict):
            files = [files]
        print(ox.ts(), f'There are {len(files)} files to delete in {doi}')
        st = time.time()

        i = 0
        for file in files:
            file_name = file['id'].split('/')[-1]
            file_id = file['id'].split('/')[-2]
            url_delete = f'https://{host}/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/{file_id}'
            auth = (api_key, None)
            response = requests.delete(url_delete, auth=auth)
            print(ox.ts(), f'Deleted {file_name} {response}')
            assert response.status_code == 204
            i += 1

        et = int(time.time() - st)
        print(ox.ts(), f'Deleted {i} files in {et} seconds')
Beispiel #2
0
def test_logging():
    # test OSMnx's logger
    ox.log("test a fake debug", level=lg.DEBUG)
    ox.log("test a fake info", level=lg.INFO)
    ox.log("test a fake warning", level=lg.WARNING)
    ox.log("test a fake error", level=lg.ERROR)

    ox.citation()
    ox.ts(style="date")
    ox.ts(style="time")
Beispiel #3
0
def download_file(url, session, filepath):
    try:
        with session.get(url, stream=True, timeout=timeout) as response:
            assert response.ok
            with open(filepath, 'wb') as file:
                shutil.copyfileobj(response.raw, file)
                print(ox.ts(), 'status', response.status_code, 'saved',
                      filepath)
    except Exception as e:
        print(ox.ts(), e)
def load_prep(filename):

    nodes = get_nodes(filename)
    print(ox.ts(), f'load {len(nodes)} total nodes')

    # remove any duplicate nodes that appeared in multiple graphs
    nodes = nodes.sort_index()
    nodes = nodes.loc[~nodes.index.duplicated(keep='first')]
    assert nodes.index.is_unique
    print(ox.ts(), 'keep {} unique nodes'.format(len(nodes)))

    return nodes
Beispiel #5
0
def plot_data(gdf_osm,
              gdf_official,
              study_area,
              filepath,
              figsize=(10, 10),
              bgcolor="#333333",
              projected=True):
    """
    Plot the OSM vs official streets and save to disk.

    Parameters
    ----------
    gdf_osm : geopandas.GeoDataFrame
        the osm streets
    gdf_official : geopandas.GeoDataFrame
        the official streets
    study_area : shapely.Polygon or shapely.MultiPolygon
        the study area boundary
    filepath : str
        path to save figure as file
    figsize : tuple
        size of plotting figure
    bgcolor : str
        background color of plot
    projected : bool
        True if gdfs are projected rather than lat-lng

    Returns
    -------
    fig, ax : tuple
    """

    fig, ax = plt.subplots(figsize=figsize, facecolor=bgcolor)
    ax.set_facecolor(bgcolor)

    # turn study_area polygon into gdf with correct CRS
    gdf_boundary = gpd.GeoDataFrame(geometry=[study_area], crs=gdf_osm.crs)

    # plot study area, then official streets, then osm streets as layers
    _ = gdf_boundary.plot(ax=ax, facecolor="k", label="Study Area")
    _ = gdf_official.plot(ax=ax, color="r", lw=1, label="Official Data")
    _ = gdf_osm.plot(ax=ax, color="y", lw=1, label="OSM Data")

    ax.axis("off")
    if projected:
        # only make x/y equal-aspect if data are projected
        ax.set_aspect("equal")

    # create legend
    ax.legend()

    # save to disk
    fig.savefig(filepath,
                dpi=300,
                bbox_inches="tight",
                facecolor=fig.get_facecolor())
    print(ox.ts(), f'figure saved to disk at "{filepath}"')

    plt.close()
    return fig, ax
def subcluster(nodes, labels, n=None, size=batch_size):

    if n is None:
        n = math.ceil(len(nodes.loc[labels]) / size)

    print(ox.ts(), 'clustering', len(nodes.loc[labels]), 'nodes into', n,
          'clusters')
    X = nodes.loc[labels, ['x', 'y']].values
    kmeans = KMeans(n_clusters=n,
                    init='k-means++',
                    algorithm='full',
                    n_init=10,
                    max_iter=300,
                    random_state=0)

    kmeans = kmeans.fit(X)
    cluster_labels = pd.Series(kmeans.predict(X)).astype(str).values

    if 'cluster' in nodes.columns:
        # make this a subcluster
        separators = np.array(['-'] * len(cluster_labels))
        nodes.loc[labels, 'cluster'] = nodes.loc[labels, 'cluster'].str.cat(
            others=[separators, cluster_labels])
    else:
        # create a new cluster column
        nodes['cluster'] = cluster_labels
def zip_dir(input_path, output_folder, output_file):

    output_path = os.path.join(output_folder, output_file)
    if not os.path.exists(output_path):
        print(ox.ts(), input_path, output_path)

        # create a zip file to contain all the files from the input path
        zf = zipfile.ZipFile(file=output_path,
                             mode='w',
                             compression=zipfile.ZIP_DEFLATED,
                             compresslevel=9)

        for root, folders, files in os.walk(input_path):
            for file in sorted(files):

                input_file = os.path.join(root, file)
                if '/nelist/' in input_file:
                    # preserve the relative folder structure below country level in zip file
                    arcname = os.path.join(os.path.split(root)[-1], file)
                else:
                    # no subfolders for gpkg or graphml, just files in root
                    arcname = file
                zf.write(filename=input_file, arcname=arcname)

        zf.close()
Beispiel #8
0
def login(url):
    session = requests.session()
    session.auth = (username, password)
    redirect = session.get(url, timeout=timeout)
    response = session.get(redirect.url, timeout=timeout)
    assert response.ok
    print(ox.ts(), 'status', response.status_code, 'logged in')
    return session
def cluster_nodes(nodes):

    # FIRST PASS
    # get the initial set of all country nodes into more manageably sized clusters
    # it's cpu/mem intensive to divide lots of points into lots of clusters
    # so this pass just divides lots of points into a few clusters
    if len(nodes) > max_cluster_input_size:
        subcluster(nodes, nodes.index, size=max_cluster_input_size * 2)
    else:
        nodes['cluster'] = '0'

    # SECOND PASS
    # recursively subcluster the clusters to make the clusters small enough to
    # be able to cluster into lots of sub-sub-clusters of size batch_size
    while (nodes['cluster'].value_counts() > max_cluster_input_size).any():
        for cluster, group in nodes.groupby('cluster'):
            if len(group) > max_cluster_input_size:
                subcluster(nodes, group.index, size=max_cluster_input_size / 2)

    # THIRD PASS
    # now that the clusters are of digestible size, subcluster them down to
    # approximately the size of batch_size. kmeans produces uneven cluster sizes
    # so many will be bigger/smaller than batch_size... handle this in 4th pass
    for cluster, group in nodes.groupby('cluster'):
        if len(group) > batch_size:
            subcluster(nodes, group.index)

    # status check
    n_clusters = len(nodes['cluster'].unique())
    n_too_big = (nodes.groupby('cluster').size() > batch_size).sum()
    print(ox.ts(), 'we now have', n_clusters, 'clusters but', n_too_big,
          'are too big and must be subdivided')

    # FOURTH PASS
    # if clustering produced clusters bigger than batch_size, bissect them
    while (nodes['cluster'].value_counts() > batch_size).any():
        for cluster, group in nodes.groupby('cluster'):
            if len(group) > batch_size:
                bissect_cluster(nodes, group)

    print(ox.ts(), 'all done, we now have', len(nodes['cluster'].unique()),
          'clusters')
    return nodes
def check_and_save(nodes, filename):

    # add country code to cluster identifier
    country_code = filename.split('-')[1].split('.')[0]
    nodes['cluster'] = country_code + nodes['cluster']

    cluster_sizes = nodes.groupby('cluster').size()
    print(ox.ts(), 'largest cluster contains', cluster_sizes.max(),
          'nodes and median is', int(cluster_sizes.median()))
    assert cluster_sizes.max() <= batch_size

    ideal_clusters = math.ceil(len(nodes) / batch_size)
    real_clusters = len(nodes['cluster'].unique())
    print(ox.ts(), 'ideally we\'d have', ideal_clusters,
          'clusters but we have', real_clusters)

    output_filepath = os.path.join(clusters_folder, filename)
    nodes.to_csv(output_filepath, index=True, encoding='utf-8')
    print(ox.ts(), 'saved node clusters to disk at', output_filepath)
Beispiel #11
0
def save_results(indicators, output_path):

    output_folder = output_path[:output_path.rfind('/')]
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    df = pd.DataFrame(indicators).T.reset_index(drop=True)
    df.to_csv(output_path, index=False, encoding='utf-8')
    print(ox.ts(),
          f'saved {len(indicators)} results to disk at "{output_path}"')
    return df
Beispiel #12
0
def load_data(osm_graphml_path, osm_buffer_gpkg_path,
              official_streets_gpkg_path):
    """
    Load the street network edges and study boundary.

    Parameters
    ----------
    osm_graphml_path : str
        path to the OSM graphml file
    osm_buffer_gpkg_path : str
        path to the buffered study area geopackage
    official_streets_gpkg_path : str
        path to the official streets shapefile

    Returns
    -------
    gdf_osm_streets_clipped, gdf_official_streets_clipped, study_area : tuple
        the osm streets (clipped to the study area), the official streets
        (clipped to the study area), and the study area polygon
    """

    # load the study area boundary as a shapely (multi)polygon
    gdf_study_area = gpd.read_file(osm_buffer_gpkg_path,
                                   layer="urban_study_region")
    study_area = gdf_study_area["geometry"].iloc[0]
    print(ox.ts(), "loaded study area boundary")

    # load the official streets shapefile
    gdf_official_streets = gpd.read_file(official_streets_gpkg_path)
    print(ox.ts(), "loaded official streets shapefile")

    # load the graph, make it undirected, then get edges GeoDataFrame
    gdf_osm_streets = ox.graph_to_gdfs(ox.get_undirected(
        ox.load_graphml(osm_graphml_path)),
                                       nodes=False)
    print(ox.ts(), "loaded osm edges and made undirected streets")

    # Project the data to a common crs
    crs = gdf_study_area.crs
    if gdf_osm_streets.crs != crs:
        gdf_osm_streets = gdf_osm_streets.to_crs(crs)
        print(ox.ts(), "projected osm streets")
    if gdf_official_streets.crs != crs:
        gdf_official_streets = gdf_official_streets.to_crs(crs)
        print(ox.ts(), "projected official streets")

    # spatially clip the streets to the study area boundary
    import warnings

    warnings.filterwarnings("ignore", "GeoSeries.notna",
                            UserWarning)  # temp warning suppression
    gdf_osm_streets_clipped = gpd.clip(gdf_osm_streets, study_area)
    gdf_official_streets_clipped = gpd.clip(gdf_official_streets, study_area)
    print(ox.ts(), "clipped osm/official streets to study area boundary")

    # double-check everything has same CRS, then return
    assert gdf_osm_streets_clipped.crs == gdf_official_streets_clipped.crs == gdf_study_area.crs
    return gdf_osm_streets_clipped, gdf_official_streets_clipped, study_area
Beispiel #13
0
def add_elevations(country_folder, graph_filename):

    # load graph
    graph_filepath = os.path.join(graphml_folder, country_folder, graph_filename)
    G = ox.load_graphml(filepath=graph_filepath)
    print(ox.ts(), 'load', len(G), 'nodes and', len(G.edges), 'edges from', graph_filepath)

    # get the elevation data for this graph's nodes
    graph_elevs = df_elevs.loc[set(G.nodes)].sort_index()

    # set nodes' elevation attributes
    nx.set_node_attributes(G, name='elevation', values=graph_elevs['elevation'].astype(int))
    nx.set_node_attributes(G, name='elevation_aster', values=graph_elevs['elev_aster'].dropna().astype(int))
    nx.set_node_attributes(G, name='elevation_srtm', values=graph_elevs['elev_srtm'].dropna().astype(int))

    # confirm that no graph node is missing elevation
    assert set(G.nodes) == set(nx.get_node_attributes(G, 'elevation'))

    # then calculate edge grades
    G = ox.add_edge_grades(G, add_absolute=True)

    # resave graphml now that it has elevations/grades
    ox.save_graphml(G, filepath=graph_filepath)
    print(ox.ts(), 'save', graph_filepath)

    # save node/edge lists
    uc_name = graph_filename.replace('.graphml', '')
    nelist_output_folder = os.path.join(nelist_folder, country_folder, uc_name)
    save_node_edge_lists(G, nelist_output_folder)
    print(ox.ts(), 'save', nelist_output_folder)

    # save as geopackage
    gpkg_filename = uc_name + '.gpkg'
    gpkg_filepath = os.path.join(gpkg_folder, country_folder, gpkg_filename)
    ox.save_graph_geopackage(G, filepath=gpkg_filepath)
    print(ox.ts(), 'save', gpkg_filepath)
def plot_hex_bins(
        gdf_boundary,
        hex_grid_clipped,
        gdf_official_destinations_clipped,
        gdf_osm_destinations_clipped,
        filepath,
        figsize=(10, 10),
        bgcolor="#333333",
        projected=True,
):

    fig, ax = plt.subplots(figsize=figsize, facecolor=bgcolor)
    ax.set_facecolor(bgcolor)

    # plot study area, then official destinations, then osm destinations as layers
    _ = gdf_boundary.plot(ax=ax, facecolor="k", label="Study Area")
    _ = hex_grid_clipped.plot(ax=ax,
                              facecolor="k",
                              edgecolor="w",
                              lw=2,
                              label="Hex Bins")
    _ = gdf_official_destinations_clipped.plot(ax=ax,
                                               color="r",
                                               lw=1,
                                               label="Official Data")
    _ = gdf_osm_destinations_clipped.plot(ax=ax,
                                          color="y",
                                          lw=1,
                                          label="OSM Data")

    ax.axis("off")
    if projected:
        # only make x/y equal-aspect if data are projected
        ax.set_aspect("equal")

    # create legend
    ax.legend()

    # save to disk
    fig.savefig(filepath,
                dpi=300,
                bbox_inches="tight",
                facecolor=fig.get_facecolor())
    print(ox.ts(), f'figure saved to disk at "{filepath}"')

    plt.close()
    return fig, ax
def get_preexisting_files(manifests):

    already_uploaded = {}
    published_files = {}

    for manifest in manifests:
        doi = manifest['doi']
        # what files have already been uploaded to the draft?
        already_uploaded[doi] = get_uploaded_draft_filenames(doi)
        # what files exist in the published version of the dataset?
        published_files[doi] = get_published_files(doi)
        print(
            ox.ts(),
            f"Pre-existing files in {doi}: {len(published_files[doi])} published, {len(already_uploaded[doi])} draft."
        )

    return already_uploaded, published_files
def get_graph(row):

    global count_failed
    global count_success
    global count_already
    global count_small
    global failed_list

    try:
        # graph name = country + country iso + uc + uc id
        graph_name = '{}-{}-{}-{}'.format(row['CTR_MN_NM'], row['CTR_MN_ISO'],
                                          row['UC_NM_MN'], row['ID_HDC_G0'])
        graphml_folder = '{}/{}-{}'.format(output_graphml_path,
                                           row['CTR_MN_NM'], row['CTR_MN_ISO'])
        graphml_file = '{}-{}.graphml'.format(row['UC_NM_MN'],
                                              row['ID_HDC_G0'])

        filepath = os.path.join(graphml_folder, graphml_file)
        if not os.path.exists(filepath):

            # get graph
            print(ox.ts(), graph_name)
            G = ox.graph_from_polygon(polygon=row['geometry'].buffer(0),
                                      network_type=network_type,
                                      retain_all=retain_all,
                                      simplify=simplify,
                                      truncate_by_edge=truncate_by_edge)

            # don't save graphs if they have fewer than 3 nodes
            if len(G) > 2:
                ox.save_graphml(G, filepath=filepath)
                count_success = count_success + 1
            else:
                count_small = count_small + 1
        else:
            count_already = count_already + 1

    except Exception as e:
        count_failed = count_failed + 1
        failed_list.append(graph_name)
        ox.log('"{}" failed: {}'.format(graph_name, e), level=lg.ERROR)
        print(e, graph_name)
def load_data(osm_buffer_gpkg_path, official_dests_filepath,
              destinations_column, destinations_values):

    # load the study area boundary as a shapely (multi)polygon
    gdf_study_area = gpd.read_file(osm_buffer_gpkg_path,
                                   layer="urban_study_region")
    study_area = gdf_study_area["geometry"].iloc[0]
    print(ox.ts(), "loaded study area boundary")

    # load the official destinations shapefile
    # retain only rows with desired values in the destinations column
    gdf_official_destinations = gpd.read_file(official_dests_filepath)
    mask = gdf_official_destinations[destinations_column].isin(
        destinations_values)
    gdf_official_destinations = gdf_official_destinations[mask]
    print(ox.ts(), "loaded and filtered official destinations shapefile")

    # load the osm destinations shapefile
    gdf_osm = gpd.read_file(osm_buffer_gpkg_path, layer="destinations")
    gdf_osm_destinations = gdf_osm[gdf_osm["dest_name"] == "fresh_food_market"]
    print(ox.ts(), "loaded osm destinations shapefile")

    # project the data to a common crs
    crs = gdf_study_area.crs
    if gdf_official_destinations.crs != crs:
        gdf_official_destinations = gdf_official_destinations.to_crs(crs)
        print(ox.ts(), "projected official destinations")
    if gdf_osm_destinations.crs != crs:
        gdf_osm_destinations = gdf_osm_destinations.to_crs(crs)
        print(ox.ts(), "projected osm destinations")

    # spatially clip the destinationss to the study area boundary
    import warnings

    warnings.filterwarnings("ignore", "GeoSeries.notna",
                            UserWarning)  # temp warning suppression
    gdf_osm_destinations_clipped = gpd.clip(gdf_osm_destinations, study_area)
    gdf_official_destinations_clipped = gpd.clip(gdf_official_destinations,
                                                 study_area)
    print(ox.ts(), "clipped osm/official destinations to study area boundary")

    # double-check everything has same CRS, then return
    assert gdf_study_area.crs == gdf_osm_destinations_clipped.crs == gdf_official_destinations_clipped.crs
    return study_area, gdf_osm_destinations_clipped, gdf_official_destinations_clipped
Beispiel #18
0
          cache_folder=config['osmnx_cache_path'])

pause_duration = 0
urls_path = config['elevation_google_urls_path']
nodestoget_path = config['elevation_google_nodestoget_path']
elevations_path = config['elevation_google_elevations_path']

# set countries=None to get all
countries = None

# In[ ]:

# load nodes in order from file
# we will use this order to match the url batch requests in order back to their nodes
nodes = pd.read_csv(nodestoget_path, index_col='osmid')
print(ox.ts(), 'loaded', len(nodes), 'node osmids to get elevations for')

# In[ ]:

urls = pd.read_csv(urls_path, index_col='cluster')
print(ox.ts(), 'there are', len(urls), 'urls to get')
if len(urls) == 0:
    exit()

# make country and key cols
urls['country'] = urls.index.str.slice(0, 3)
urls['key'] = urls['url'].str.split('key=', expand=True)[1]

# In[ ]:

# if countries list exists, then only retain nodes/urls in those countries
    osm_true_mean = sum(osm_true) / weight_count
    # osm_true_median = statistics.median(osm_true)
    official_mean = sum(official_percentages) / len(
        hex_grid_clipped["geometry"])
    # official_median = statistics.median(official_percentages)
    official_true_mean = sum(official_true) / weight_count
    # official_true_median = statistics.median(official_true)

    return weight_percentage, osm_mean, official_mean, osm_true_mean, official_true_mean


# RUN THE SCRIPT
indicators = {}
for city in cities:

    print(ox.ts(), f"begin processing {city}")
    indicators[city] = {}

    # load this city's configs
    with open(f"../configuration/{city}.json") as f:
        config = json.load(f)

    # load destination gdfs from osm graph and official shapefile
    study_area, gdf_osm_destinations_clipped, gdf_official_destinations_clipped = load_data(
        config["osm_buffer_gpkg_path"],
        config["official_dests_filepath"],
        config["destinations_column"],
        config["destinations_values"],
    )

    # create plot of hexbins for the city
network_type = 'drive'
retain_all = True
simplify = True
truncate_by_edge = True

# In[ ]:

uc_gpkg_path = config['uc_gpkg_path']  #prepped urban centers dataset

# ## Load the prepped urban centers data

# In[ ]:

# load the prepped dataset
ucs = gpd.read_file(uc_gpkg_path).sort_values('B15', ascending=False)
print(ox.ts(), 'loaded urban centers dataset with shape', ucs.shape)

# In[ ]:

# only retain urban centers marked as a "true positive" in quality control
ucs = ucs[ucs['QA2_1V'] == 1]
print(ox.ts(), 'retained "true positive" urban centers dataset with shape',
      ucs.shape)

# In[ ]:

# only retain urban centers with at least 1 sq km of built-up area
ucs = ucs[ucs['B15'] >= 1]
print(ox.ts(),
      'retained >=1 km2 built-up area urban centers dataset with shape',
      ucs.shape)
Beispiel #21
0
import osmnx as ox
import pandas as pd

print('osmnx version', ox.__version__)

# In[ ]:

# load configs
with open('../config.json') as f:
    config = json.load(f)

# In[ ]:

aster = pd.read_csv(
    config['elevation_aster_elevations_path']).set_index('osmid').sort_index()
print(ox.ts(), 'loaded ASTER elevation data for', len(aster), 'nodes')

# In[ ]:

srtm = pd.read_csv(
    config['elevation_srtm_elevations_path']).set_index('osmid').sort_index()
print(ox.ts(), 'loaded SRTM elevation data for', len(srtm), 'nodes')

# In[ ]:

google = pd.read_csv(config['elevation_google_elevations_path']).set_index(
    'osmid').sort_index()
print(ox.ts(), 'loaded Google elevation data for', len(google), 'nodes')

# In[ ]:
def upload_new_file(folder,
                    filename,
                    doi,
                    file_desc,
                    file_tags,
                    attempt_count=1):

    file_path = os.path.join(folder, filename)
    response = None

    # set up the api endpoint, open the file, and make the payload
    endpoint = f'api/v1/datasets/:persistentId/add?persistentId={doi}&key={api_key}'
    url = f'{host}/{endpoint}'
    file, md5 = get_file_to_upload(file_path)
    payload = get_payload_to_upload(file_desc, file_tags, filename)

    try:
        # upload the file to the server
        print(ox.ts(), f'Uploading "{file_path}" to {doi}')

        if debug_mode:
            pass
        else:
            start_time = time.time()
            session = requests.Session()
            response = session.post(url,
                                    data=payload,
                                    files=file,
                                    timeout=upload_timeout)
            session.close()
            et = time.time() - start_time
            sc = response.status_code

            # check if the server response is ok, if not, throw exception
            response_json = response.json()
            if 'status' in response_json and not response_json[
                    'status'] == 'OK':
                raise Exception(response_json['message'])

            # get the checksum calculated by the server
            md5_server = response_json['data']['files'][0]['dataFile']['md5']
            if md5 != md5_server:
                raise Exception(
                    f'Checksums do not match: {md5} and {md5_server}')

            print(ox.ts(),
                  f'Response {sc} in {et:,.1f} seconds and checksums match')
            time.sleep(pause_normal)

    except Exception as e:

        # if any exception is thrown, log it, and retry the upload if we haven't exceeded max number of tries
        print(ox.ts(), e)
        time.sleep(pause_error)

        if attempt_count < attempts_max:
            attempt_count += 1
            print(ox.ts(),
                  f'Re-trying (attempt {attempt_count} of {attempts_max})')
            response = upload_new_file(folder, filename, doi, file_desc,
                                       file_tags, attempt_count)
        else:
            print(ox.ts(), 'No more attempts for this file, we give up')

    return response
    cluster_sizes = nodes.groupby('cluster').size()
    print(ox.ts(), 'largest cluster contains', cluster_sizes.max(),
          'nodes and median is', int(cluster_sizes.median()))
    assert cluster_sizes.max() <= batch_size

    ideal_clusters = math.ceil(len(nodes) / batch_size)
    real_clusters = len(nodes['cluster'].unique())
    print(ox.ts(), 'ideally we\'d have', ideal_clusters,
          'clusters but we have', real_clusters)

    output_filepath = os.path.join(clusters_folder, filename)
    nodes.to_csv(output_filepath, index=True, encoding='utf-8')
    print(ox.ts(), 'saved node clusters to disk at', output_filepath)


# # Run Process

# In[ ]:

for filename in sorted(os.listdir(nodes_folder)):

    print(ox.ts(), 'loading nodes from', filename)
    nodes = load_prep(filename)
    nodes = cluster_nodes(nodes)
    check_and_save(nodes, filename)

print(ox.ts(), 'process finished')

# In[ ]:
Beispiel #24
0

def get_nodes(file):
    # load clustered nodes from file
    nodes = pd.read_csv(f'{nodes_folder}/{file}', index_col='osmid')

    # create latlng column rounded to 5 decimals (ie, 1-meter precision)
    nodes['latlng'] = nodes.apply(
        lambda row: '{:.5f},{:.5f}'.format(row['y'], row['x']), axis=1)

    return nodes.drop(columns=['x', 'y'])


# In[ ]:

print(ox.ts(), 'loading nodes from graph files:', end=' ')

# load clustered nodes
nodes = pd.DataFrame()
for file in sorted(os.listdir(nodes_folder)):
    print(file.split('-')[0], end=' ', flush=True)
    nodes = nodes.append(other=get_nodes(file),
                         ignore_index=False,
                         verify_integrity=False)

print('')
print(ox.ts(), f'load {len(nodes)} total nodes')

# In[ ]:

# remove any duplicate nodes that appeared in multiple graphs
Beispiel #25
0
def calculate_graph_indicators(graphml_folder, country_folder, filename):

    # get filepath and country/city identifiers
    filepath = os.path.join(graphml_folder, country_folder, filename)
    country, country_iso = country_folder.split('-')
    core_city, uc_id = filename.replace('.graphml', '').split('-')
    uc_id = int(uc_id)

    start_time = time.time()
    print(ox.ts(), 'processing', filepath)
    G = ox.load_graphml(filepath=filepath)

    # clustering and pagerank: needs directed representation
    cc_avg_dir, cc_avg_undir, cc_wt_avg_dir, cc_wt_avg_undir, pagerank_max = get_clustering(
        G)

    # get an undirected representation of this network for everything else
    Gu = ox.get_undirected(G)
    G.clear()
    G = None

    # street lengths
    lengths = pd.Series(nx.get_edge_attributes(Gu, 'length'))
    length_total = lengths.sum()
    length_median = lengths.median()
    length_mean = lengths.mean()

    # nodes, edges, node degree, self loops
    n = len(Gu.nodes)
    m = len(Gu.edges)
    k_avg = 2 * m / n
    self_loop_proportion = sum(u == v for u, v, k in Gu.edges) / m

    # proportion of 4-way intersections, 3-ways, and dead-ends
    streets_per_node = nx.get_node_attributes(Gu, 'street_count')
    prop_4way = list(streets_per_node.values()).count(4) / n
    prop_3way = list(streets_per_node.values()).count(3) / n
    prop_deadend = list(streets_per_node.values()).count(1) / n

    # average circuity and straightness
    circuity = calculate_circuity(Gu, length_total)
    straightness = 1 / circuity

    # elevation and grade
    grade_mean, grade_median, elev_mean, elev_median, elev_std, elev_range, elev_iqr = elevation_grades(
        Gu)

    # bearing/orientation entropy/order
    orientation_entropy = calculate_orientation_entropy(Gu)
    orientation_order = calculate_orientation_order(orientation_entropy)

    # total and clean intersection counts
    intersect_count, intersect_count_clean, intersect_count_clean_topo = intersection_counts(
        ox.project_graph(Gu), streets_per_node)

    # assemble the results
    rslt = {
        'country': country,
        'country_iso': country_iso,
        'core_city': core_city,
        'uc_id': uc_id,
        'cc_avg_dir': cc_avg_dir,
        'cc_avg_undir': cc_avg_undir,
        'cc_wt_avg_dir': cc_wt_avg_dir,
        'cc_wt_avg_undir': cc_wt_avg_undir,
        'circuity': circuity,
        'elev_iqr': elev_iqr,
        'elev_mean': elev_mean,
        'elev_median': elev_median,
        'elev_range': elev_range,
        'elev_std': elev_std,
        'grade_mean': grade_mean,
        'grade_median': grade_median,
        'intersect_count': intersect_count,
        'intersect_count_clean': intersect_count_clean,
        'intersect_count_clean_topo': intersect_count_clean_topo,
        'k_avg': k_avg,
        'length_mean': length_mean,
        'length_median': length_median,
        'length_total': length_total,
        'street_segment_count': m,
        'node_count': n,
        'orientation_entropy': orientation_entropy,
        'orientation_order': orientation_order,
        'pagerank_max': pagerank_max,
        'prop_4way': prop_4way,
        'prop_3way': prop_3way,
        'prop_deadend': prop_deadend,
        'self_loop_proportion': self_loop_proportion,
        'straightness': straightness
    }

    elapsed = time.time() - start_time
    ox.log(f'finished {filepath} in {elapsed:.0f} seconds')
    return rslt
manifest = [{
    'input': config['models_gpkg_path'],
    'output': config['staging_gpkg_path']
}, {
    'input': config['models_graphml_path'],
    'output': config['staging_graphml_path']
}, {
    'input': config['models_nelist_path'],
    'output': config['staging_nelist_path']
}]

if config['cpus'] == 0:
    cpus = mp.cpu_count()
else:
    cpus = config['cpus']
print(ox.ts(), 'using', cpus, 'CPUs')

# In[ ]:


# zip a whole directory
def zip_dir(input_path, output_folder, output_file):

    output_path = os.path.join(output_folder, output_file)
    if not os.path.exists(output_path):
        print(ox.ts(), input_path, output_path)

        # create a zip file to contain all the files from the input path
        zf = zipfile.ZipFile(file=output_path,
                             mode='w',
                             compression=zipfile.ZIP_DEFLATED,
Beispiel #27
0
print('osmnx version', ox.__version__)
print('networkx version', nx.__version__)

# In[ ]:

# load configs
with open('../config.json') as f:
    config = json.load(f)

ox.config(log_file=True, logs_folder=config['osmnx_log_path'])

if config['cpus_stats'] == 0:
    cpus = mp.cpu_count()
else:
    cpus = config['cpus_stats']
print(ox.ts(), 'using', cpus, 'CPUs')

graphml_folder = config['models_graphml_path']  #where to load graphml files
indicators_street_path = config[
    'indicators_street_path']  #where to save output street network indicators
save_every_n = 100  #save results every n cities

clean_int_tol = 10  #meters for intersection cleaning tolerance

entropy_bins = 36
min_entropy_bins = 4  #perfect grid
perfect_grid = [1] * min_entropy_bins + [0] * (entropy_bins - min_entropy_bins)
perfect_grid_entropy = stats.entropy(perfect_grid)

# In[ ]:
with open('../config.json') as f:
    config = json.load(f)

# In[ ]:

ox.config(use_cache=True,
          log_file=True,
          log_console=False,
          logs_folder=config['osmnx_log_path'],
          cache_folder=config['osmnx_cache_path'])

if config['cpus'] == 0:
    cpus = mp.cpu_count()
else:
    cpus = config['cpus']
print(ox.ts(), 'using', cpus, 'CPUs')

# In[ ]:

network_type = 'drive'
retain_all = True
simplify = True
truncate_by_edge = True

# In[ ]:

uc_gpkg_path = config['uc_gpkg_path']  #prepped urban centers dataset
output_graphml_path = config[
    'models_graphml_path']  #where to save graphml files

# ## Load the prepped urban centers data
        # what files exist in the published version of the dataset?
        published_files[doi] = get_published_files(doi)
        print(
            ox.ts(),
            f"Pre-existing files in {doi}: {len(published_files[doi])} published, {len(already_uploaded[doi])} draft."
        )

    return already_uploaded, published_files


# ## Run the script

# In[ ]:

st = time.time()
print(ox.ts(), 'Started process')
already_uploaded, published_files = get_preexisting_files(manifests)

# In[ ]:

if delete_existing_files:
    # delete all the existing (carried-over) files in the draft datasets
    for manifest in manifests:
        delete_dataset_files(manifest['doi'])
    already_uploaded, published_files = get_preexisting_files(manifests)

# In[ ]:

for manifest in manifests:

    # upload each file in folder
with open('../config.json') as f:
    config = json.load(f)

uc_gpkg_path = config['uc_gpkg_path']  #prepped urban centers dataset
indicators_street_path = config[
    'indicators_street_path']  #street network indicators to load
indicators_path = config[
    'indicators_path']  #merged indicators to save for repo upload
indicators_all_path = config[
    'indicators_all_path']  #all merged indicators to save for analysis

# In[ ]:

# load the UCs dataset
ucs = gpd.read_file(uc_gpkg_path).sort_index()
print(ox.ts(), 'loaded urban centers dataset with shape', ucs.shape)

# In[ ]:

# load the previously calculated street network indicators dataset
ind = pd.read_csv(indicators_street_path)
print(ox.ts(), 'loaded indicators dataset with shape', ind.shape)

# In[ ]:

# rename UC fields to something intelligible
mapper = {
    'UC_NM_LST': 'uc_names',
    'GRGN_L1': 'world_region',
    'GRGN_L2': 'world_subregion',
    'P15': 'resident_pop',