def delete_dataset_files(doi): """ Delete all files from draft dataset at the given DOI. """ host = 'dataverse.harvard.edu' url_statement = f'https://{host}/dvn/api/data-deposit/v1.1/swordv2/statement/study/{doi}' auth = (api_key, None) response = requests.get(url_statement, auth=auth) assert response.status_code == 200 response_dict = xmltodict.parse(response.text) if 'entry' not in response_dict['feed']: print(ox.ts(), f'No files to delete in {doi}') else: files = response_dict['feed']['entry'] if isinstance(files, OrderedDict): files = [files] print(ox.ts(), f'There are {len(files)} files to delete in {doi}') st = time.time() i = 0 for file in files: file_name = file['id'].split('/')[-1] file_id = file['id'].split('/')[-2] url_delete = f'https://{host}/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/{file_id}' auth = (api_key, None) response = requests.delete(url_delete, auth=auth) print(ox.ts(), f'Deleted {file_name} {response}') assert response.status_code == 204 i += 1 et = int(time.time() - st) print(ox.ts(), f'Deleted {i} files in {et} seconds')
def test_logging(): # test OSMnx's logger ox.log("test a fake debug", level=lg.DEBUG) ox.log("test a fake info", level=lg.INFO) ox.log("test a fake warning", level=lg.WARNING) ox.log("test a fake error", level=lg.ERROR) ox.citation() ox.ts(style="date") ox.ts(style="time")
def download_file(url, session, filepath): try: with session.get(url, stream=True, timeout=timeout) as response: assert response.ok with open(filepath, 'wb') as file: shutil.copyfileobj(response.raw, file) print(ox.ts(), 'status', response.status_code, 'saved', filepath) except Exception as e: print(ox.ts(), e)
def load_prep(filename): nodes = get_nodes(filename) print(ox.ts(), f'load {len(nodes)} total nodes') # remove any duplicate nodes that appeared in multiple graphs nodes = nodes.sort_index() nodes = nodes.loc[~nodes.index.duplicated(keep='first')] assert nodes.index.is_unique print(ox.ts(), 'keep {} unique nodes'.format(len(nodes))) return nodes
def plot_data(gdf_osm, gdf_official, study_area, filepath, figsize=(10, 10), bgcolor="#333333", projected=True): """ Plot the OSM vs official streets and save to disk. Parameters ---------- gdf_osm : geopandas.GeoDataFrame the osm streets gdf_official : geopandas.GeoDataFrame the official streets study_area : shapely.Polygon or shapely.MultiPolygon the study area boundary filepath : str path to save figure as file figsize : tuple size of plotting figure bgcolor : str background color of plot projected : bool True if gdfs are projected rather than lat-lng Returns ------- fig, ax : tuple """ fig, ax = plt.subplots(figsize=figsize, facecolor=bgcolor) ax.set_facecolor(bgcolor) # turn study_area polygon into gdf with correct CRS gdf_boundary = gpd.GeoDataFrame(geometry=[study_area], crs=gdf_osm.crs) # plot study area, then official streets, then osm streets as layers _ = gdf_boundary.plot(ax=ax, facecolor="k", label="Study Area") _ = gdf_official.plot(ax=ax, color="r", lw=1, label="Official Data") _ = gdf_osm.plot(ax=ax, color="y", lw=1, label="OSM Data") ax.axis("off") if projected: # only make x/y equal-aspect if data are projected ax.set_aspect("equal") # create legend ax.legend() # save to disk fig.savefig(filepath, dpi=300, bbox_inches="tight", facecolor=fig.get_facecolor()) print(ox.ts(), f'figure saved to disk at "{filepath}"') plt.close() return fig, ax
def subcluster(nodes, labels, n=None, size=batch_size): if n is None: n = math.ceil(len(nodes.loc[labels]) / size) print(ox.ts(), 'clustering', len(nodes.loc[labels]), 'nodes into', n, 'clusters') X = nodes.loc[labels, ['x', 'y']].values kmeans = KMeans(n_clusters=n, init='k-means++', algorithm='full', n_init=10, max_iter=300, random_state=0) kmeans = kmeans.fit(X) cluster_labels = pd.Series(kmeans.predict(X)).astype(str).values if 'cluster' in nodes.columns: # make this a subcluster separators = np.array(['-'] * len(cluster_labels)) nodes.loc[labels, 'cluster'] = nodes.loc[labels, 'cluster'].str.cat( others=[separators, cluster_labels]) else: # create a new cluster column nodes['cluster'] = cluster_labels
def zip_dir(input_path, output_folder, output_file): output_path = os.path.join(output_folder, output_file) if not os.path.exists(output_path): print(ox.ts(), input_path, output_path) # create a zip file to contain all the files from the input path zf = zipfile.ZipFile(file=output_path, mode='w', compression=zipfile.ZIP_DEFLATED, compresslevel=9) for root, folders, files in os.walk(input_path): for file in sorted(files): input_file = os.path.join(root, file) if '/nelist/' in input_file: # preserve the relative folder structure below country level in zip file arcname = os.path.join(os.path.split(root)[-1], file) else: # no subfolders for gpkg or graphml, just files in root arcname = file zf.write(filename=input_file, arcname=arcname) zf.close()
def login(url): session = requests.session() session.auth = (username, password) redirect = session.get(url, timeout=timeout) response = session.get(redirect.url, timeout=timeout) assert response.ok print(ox.ts(), 'status', response.status_code, 'logged in') return session
def cluster_nodes(nodes): # FIRST PASS # get the initial set of all country nodes into more manageably sized clusters # it's cpu/mem intensive to divide lots of points into lots of clusters # so this pass just divides lots of points into a few clusters if len(nodes) > max_cluster_input_size: subcluster(nodes, nodes.index, size=max_cluster_input_size * 2) else: nodes['cluster'] = '0' # SECOND PASS # recursively subcluster the clusters to make the clusters small enough to # be able to cluster into lots of sub-sub-clusters of size batch_size while (nodes['cluster'].value_counts() > max_cluster_input_size).any(): for cluster, group in nodes.groupby('cluster'): if len(group) > max_cluster_input_size: subcluster(nodes, group.index, size=max_cluster_input_size / 2) # THIRD PASS # now that the clusters are of digestible size, subcluster them down to # approximately the size of batch_size. kmeans produces uneven cluster sizes # so many will be bigger/smaller than batch_size... handle this in 4th pass for cluster, group in nodes.groupby('cluster'): if len(group) > batch_size: subcluster(nodes, group.index) # status check n_clusters = len(nodes['cluster'].unique()) n_too_big = (nodes.groupby('cluster').size() > batch_size).sum() print(ox.ts(), 'we now have', n_clusters, 'clusters but', n_too_big, 'are too big and must be subdivided') # FOURTH PASS # if clustering produced clusters bigger than batch_size, bissect them while (nodes['cluster'].value_counts() > batch_size).any(): for cluster, group in nodes.groupby('cluster'): if len(group) > batch_size: bissect_cluster(nodes, group) print(ox.ts(), 'all done, we now have', len(nodes['cluster'].unique()), 'clusters') return nodes
def check_and_save(nodes, filename): # add country code to cluster identifier country_code = filename.split('-')[1].split('.')[0] nodes['cluster'] = country_code + nodes['cluster'] cluster_sizes = nodes.groupby('cluster').size() print(ox.ts(), 'largest cluster contains', cluster_sizes.max(), 'nodes and median is', int(cluster_sizes.median())) assert cluster_sizes.max() <= batch_size ideal_clusters = math.ceil(len(nodes) / batch_size) real_clusters = len(nodes['cluster'].unique()) print(ox.ts(), 'ideally we\'d have', ideal_clusters, 'clusters but we have', real_clusters) output_filepath = os.path.join(clusters_folder, filename) nodes.to_csv(output_filepath, index=True, encoding='utf-8') print(ox.ts(), 'saved node clusters to disk at', output_filepath)
def save_results(indicators, output_path): output_folder = output_path[:output_path.rfind('/')] if not os.path.exists(output_folder): os.makedirs(output_folder) df = pd.DataFrame(indicators).T.reset_index(drop=True) df.to_csv(output_path, index=False, encoding='utf-8') print(ox.ts(), f'saved {len(indicators)} results to disk at "{output_path}"') return df
def load_data(osm_graphml_path, osm_buffer_gpkg_path, official_streets_gpkg_path): """ Load the street network edges and study boundary. Parameters ---------- osm_graphml_path : str path to the OSM graphml file osm_buffer_gpkg_path : str path to the buffered study area geopackage official_streets_gpkg_path : str path to the official streets shapefile Returns ------- gdf_osm_streets_clipped, gdf_official_streets_clipped, study_area : tuple the osm streets (clipped to the study area), the official streets (clipped to the study area), and the study area polygon """ # load the study area boundary as a shapely (multi)polygon gdf_study_area = gpd.read_file(osm_buffer_gpkg_path, layer="urban_study_region") study_area = gdf_study_area["geometry"].iloc[0] print(ox.ts(), "loaded study area boundary") # load the official streets shapefile gdf_official_streets = gpd.read_file(official_streets_gpkg_path) print(ox.ts(), "loaded official streets shapefile") # load the graph, make it undirected, then get edges GeoDataFrame gdf_osm_streets = ox.graph_to_gdfs(ox.get_undirected( ox.load_graphml(osm_graphml_path)), nodes=False) print(ox.ts(), "loaded osm edges and made undirected streets") # Project the data to a common crs crs = gdf_study_area.crs if gdf_osm_streets.crs != crs: gdf_osm_streets = gdf_osm_streets.to_crs(crs) print(ox.ts(), "projected osm streets") if gdf_official_streets.crs != crs: gdf_official_streets = gdf_official_streets.to_crs(crs) print(ox.ts(), "projected official streets") # spatially clip the streets to the study area boundary import warnings warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning) # temp warning suppression gdf_osm_streets_clipped = gpd.clip(gdf_osm_streets, study_area) gdf_official_streets_clipped = gpd.clip(gdf_official_streets, study_area) print(ox.ts(), "clipped osm/official streets to study area boundary") # double-check everything has same CRS, then return assert gdf_osm_streets_clipped.crs == gdf_official_streets_clipped.crs == gdf_study_area.crs return gdf_osm_streets_clipped, gdf_official_streets_clipped, study_area
def add_elevations(country_folder, graph_filename): # load graph graph_filepath = os.path.join(graphml_folder, country_folder, graph_filename) G = ox.load_graphml(filepath=graph_filepath) print(ox.ts(), 'load', len(G), 'nodes and', len(G.edges), 'edges from', graph_filepath) # get the elevation data for this graph's nodes graph_elevs = df_elevs.loc[set(G.nodes)].sort_index() # set nodes' elevation attributes nx.set_node_attributes(G, name='elevation', values=graph_elevs['elevation'].astype(int)) nx.set_node_attributes(G, name='elevation_aster', values=graph_elevs['elev_aster'].dropna().astype(int)) nx.set_node_attributes(G, name='elevation_srtm', values=graph_elevs['elev_srtm'].dropna().astype(int)) # confirm that no graph node is missing elevation assert set(G.nodes) == set(nx.get_node_attributes(G, 'elevation')) # then calculate edge grades G = ox.add_edge_grades(G, add_absolute=True) # resave graphml now that it has elevations/grades ox.save_graphml(G, filepath=graph_filepath) print(ox.ts(), 'save', graph_filepath) # save node/edge lists uc_name = graph_filename.replace('.graphml', '') nelist_output_folder = os.path.join(nelist_folder, country_folder, uc_name) save_node_edge_lists(G, nelist_output_folder) print(ox.ts(), 'save', nelist_output_folder) # save as geopackage gpkg_filename = uc_name + '.gpkg' gpkg_filepath = os.path.join(gpkg_folder, country_folder, gpkg_filename) ox.save_graph_geopackage(G, filepath=gpkg_filepath) print(ox.ts(), 'save', gpkg_filepath)
def plot_hex_bins( gdf_boundary, hex_grid_clipped, gdf_official_destinations_clipped, gdf_osm_destinations_clipped, filepath, figsize=(10, 10), bgcolor="#333333", projected=True, ): fig, ax = plt.subplots(figsize=figsize, facecolor=bgcolor) ax.set_facecolor(bgcolor) # plot study area, then official destinations, then osm destinations as layers _ = gdf_boundary.plot(ax=ax, facecolor="k", label="Study Area") _ = hex_grid_clipped.plot(ax=ax, facecolor="k", edgecolor="w", lw=2, label="Hex Bins") _ = gdf_official_destinations_clipped.plot(ax=ax, color="r", lw=1, label="Official Data") _ = gdf_osm_destinations_clipped.plot(ax=ax, color="y", lw=1, label="OSM Data") ax.axis("off") if projected: # only make x/y equal-aspect if data are projected ax.set_aspect("equal") # create legend ax.legend() # save to disk fig.savefig(filepath, dpi=300, bbox_inches="tight", facecolor=fig.get_facecolor()) print(ox.ts(), f'figure saved to disk at "{filepath}"') plt.close() return fig, ax
def get_preexisting_files(manifests): already_uploaded = {} published_files = {} for manifest in manifests: doi = manifest['doi'] # what files have already been uploaded to the draft? already_uploaded[doi] = get_uploaded_draft_filenames(doi) # what files exist in the published version of the dataset? published_files[doi] = get_published_files(doi) print( ox.ts(), f"Pre-existing files in {doi}: {len(published_files[doi])} published, {len(already_uploaded[doi])} draft." ) return already_uploaded, published_files
def get_graph(row): global count_failed global count_success global count_already global count_small global failed_list try: # graph name = country + country iso + uc + uc id graph_name = '{}-{}-{}-{}'.format(row['CTR_MN_NM'], row['CTR_MN_ISO'], row['UC_NM_MN'], row['ID_HDC_G0']) graphml_folder = '{}/{}-{}'.format(output_graphml_path, row['CTR_MN_NM'], row['CTR_MN_ISO']) graphml_file = '{}-{}.graphml'.format(row['UC_NM_MN'], row['ID_HDC_G0']) filepath = os.path.join(graphml_folder, graphml_file) if not os.path.exists(filepath): # get graph print(ox.ts(), graph_name) G = ox.graph_from_polygon(polygon=row['geometry'].buffer(0), network_type=network_type, retain_all=retain_all, simplify=simplify, truncate_by_edge=truncate_by_edge) # don't save graphs if they have fewer than 3 nodes if len(G) > 2: ox.save_graphml(G, filepath=filepath) count_success = count_success + 1 else: count_small = count_small + 1 else: count_already = count_already + 1 except Exception as e: count_failed = count_failed + 1 failed_list.append(graph_name) ox.log('"{}" failed: {}'.format(graph_name, e), level=lg.ERROR) print(e, graph_name)
def load_data(osm_buffer_gpkg_path, official_dests_filepath, destinations_column, destinations_values): # load the study area boundary as a shapely (multi)polygon gdf_study_area = gpd.read_file(osm_buffer_gpkg_path, layer="urban_study_region") study_area = gdf_study_area["geometry"].iloc[0] print(ox.ts(), "loaded study area boundary") # load the official destinations shapefile # retain only rows with desired values in the destinations column gdf_official_destinations = gpd.read_file(official_dests_filepath) mask = gdf_official_destinations[destinations_column].isin( destinations_values) gdf_official_destinations = gdf_official_destinations[mask] print(ox.ts(), "loaded and filtered official destinations shapefile") # load the osm destinations shapefile gdf_osm = gpd.read_file(osm_buffer_gpkg_path, layer="destinations") gdf_osm_destinations = gdf_osm[gdf_osm["dest_name"] == "fresh_food_market"] print(ox.ts(), "loaded osm destinations shapefile") # project the data to a common crs crs = gdf_study_area.crs if gdf_official_destinations.crs != crs: gdf_official_destinations = gdf_official_destinations.to_crs(crs) print(ox.ts(), "projected official destinations") if gdf_osm_destinations.crs != crs: gdf_osm_destinations = gdf_osm_destinations.to_crs(crs) print(ox.ts(), "projected osm destinations") # spatially clip the destinationss to the study area boundary import warnings warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning) # temp warning suppression gdf_osm_destinations_clipped = gpd.clip(gdf_osm_destinations, study_area) gdf_official_destinations_clipped = gpd.clip(gdf_official_destinations, study_area) print(ox.ts(), "clipped osm/official destinations to study area boundary") # double-check everything has same CRS, then return assert gdf_study_area.crs == gdf_osm_destinations_clipped.crs == gdf_official_destinations_clipped.crs return study_area, gdf_osm_destinations_clipped, gdf_official_destinations_clipped
cache_folder=config['osmnx_cache_path']) pause_duration = 0 urls_path = config['elevation_google_urls_path'] nodestoget_path = config['elevation_google_nodestoget_path'] elevations_path = config['elevation_google_elevations_path'] # set countries=None to get all countries = None # In[ ]: # load nodes in order from file # we will use this order to match the url batch requests in order back to their nodes nodes = pd.read_csv(nodestoget_path, index_col='osmid') print(ox.ts(), 'loaded', len(nodes), 'node osmids to get elevations for') # In[ ]: urls = pd.read_csv(urls_path, index_col='cluster') print(ox.ts(), 'there are', len(urls), 'urls to get') if len(urls) == 0: exit() # make country and key cols urls['country'] = urls.index.str.slice(0, 3) urls['key'] = urls['url'].str.split('key=', expand=True)[1] # In[ ]: # if countries list exists, then only retain nodes/urls in those countries
osm_true_mean = sum(osm_true) / weight_count # osm_true_median = statistics.median(osm_true) official_mean = sum(official_percentages) / len( hex_grid_clipped["geometry"]) # official_median = statistics.median(official_percentages) official_true_mean = sum(official_true) / weight_count # official_true_median = statistics.median(official_true) return weight_percentage, osm_mean, official_mean, osm_true_mean, official_true_mean # RUN THE SCRIPT indicators = {} for city in cities: print(ox.ts(), f"begin processing {city}") indicators[city] = {} # load this city's configs with open(f"../configuration/{city}.json") as f: config = json.load(f) # load destination gdfs from osm graph and official shapefile study_area, gdf_osm_destinations_clipped, gdf_official_destinations_clipped = load_data( config["osm_buffer_gpkg_path"], config["official_dests_filepath"], config["destinations_column"], config["destinations_values"], ) # create plot of hexbins for the city
network_type = 'drive' retain_all = True simplify = True truncate_by_edge = True # In[ ]: uc_gpkg_path = config['uc_gpkg_path'] #prepped urban centers dataset # ## Load the prepped urban centers data # In[ ]: # load the prepped dataset ucs = gpd.read_file(uc_gpkg_path).sort_values('B15', ascending=False) print(ox.ts(), 'loaded urban centers dataset with shape', ucs.shape) # In[ ]: # only retain urban centers marked as a "true positive" in quality control ucs = ucs[ucs['QA2_1V'] == 1] print(ox.ts(), 'retained "true positive" urban centers dataset with shape', ucs.shape) # In[ ]: # only retain urban centers with at least 1 sq km of built-up area ucs = ucs[ucs['B15'] >= 1] print(ox.ts(), 'retained >=1 km2 built-up area urban centers dataset with shape', ucs.shape)
import osmnx as ox import pandas as pd print('osmnx version', ox.__version__) # In[ ]: # load configs with open('../config.json') as f: config = json.load(f) # In[ ]: aster = pd.read_csv( config['elevation_aster_elevations_path']).set_index('osmid').sort_index() print(ox.ts(), 'loaded ASTER elevation data for', len(aster), 'nodes') # In[ ]: srtm = pd.read_csv( config['elevation_srtm_elevations_path']).set_index('osmid').sort_index() print(ox.ts(), 'loaded SRTM elevation data for', len(srtm), 'nodes') # In[ ]: google = pd.read_csv(config['elevation_google_elevations_path']).set_index( 'osmid').sort_index() print(ox.ts(), 'loaded Google elevation data for', len(google), 'nodes') # In[ ]:
def upload_new_file(folder, filename, doi, file_desc, file_tags, attempt_count=1): file_path = os.path.join(folder, filename) response = None # set up the api endpoint, open the file, and make the payload endpoint = f'api/v1/datasets/:persistentId/add?persistentId={doi}&key={api_key}' url = f'{host}/{endpoint}' file, md5 = get_file_to_upload(file_path) payload = get_payload_to_upload(file_desc, file_tags, filename) try: # upload the file to the server print(ox.ts(), f'Uploading "{file_path}" to {doi}') if debug_mode: pass else: start_time = time.time() session = requests.Session() response = session.post(url, data=payload, files=file, timeout=upload_timeout) session.close() et = time.time() - start_time sc = response.status_code # check if the server response is ok, if not, throw exception response_json = response.json() if 'status' in response_json and not response_json[ 'status'] == 'OK': raise Exception(response_json['message']) # get the checksum calculated by the server md5_server = response_json['data']['files'][0]['dataFile']['md5'] if md5 != md5_server: raise Exception( f'Checksums do not match: {md5} and {md5_server}') print(ox.ts(), f'Response {sc} in {et:,.1f} seconds and checksums match') time.sleep(pause_normal) except Exception as e: # if any exception is thrown, log it, and retry the upload if we haven't exceeded max number of tries print(ox.ts(), e) time.sleep(pause_error) if attempt_count < attempts_max: attempt_count += 1 print(ox.ts(), f'Re-trying (attempt {attempt_count} of {attempts_max})') response = upload_new_file(folder, filename, doi, file_desc, file_tags, attempt_count) else: print(ox.ts(), 'No more attempts for this file, we give up') return response
cluster_sizes = nodes.groupby('cluster').size() print(ox.ts(), 'largest cluster contains', cluster_sizes.max(), 'nodes and median is', int(cluster_sizes.median())) assert cluster_sizes.max() <= batch_size ideal_clusters = math.ceil(len(nodes) / batch_size) real_clusters = len(nodes['cluster'].unique()) print(ox.ts(), 'ideally we\'d have', ideal_clusters, 'clusters but we have', real_clusters) output_filepath = os.path.join(clusters_folder, filename) nodes.to_csv(output_filepath, index=True, encoding='utf-8') print(ox.ts(), 'saved node clusters to disk at', output_filepath) # # Run Process # In[ ]: for filename in sorted(os.listdir(nodes_folder)): print(ox.ts(), 'loading nodes from', filename) nodes = load_prep(filename) nodes = cluster_nodes(nodes) check_and_save(nodes, filename) print(ox.ts(), 'process finished') # In[ ]:
def get_nodes(file): # load clustered nodes from file nodes = pd.read_csv(f'{nodes_folder}/{file}', index_col='osmid') # create latlng column rounded to 5 decimals (ie, 1-meter precision) nodes['latlng'] = nodes.apply( lambda row: '{:.5f},{:.5f}'.format(row['y'], row['x']), axis=1) return nodes.drop(columns=['x', 'y']) # In[ ]: print(ox.ts(), 'loading nodes from graph files:', end=' ') # load clustered nodes nodes = pd.DataFrame() for file in sorted(os.listdir(nodes_folder)): print(file.split('-')[0], end=' ', flush=True) nodes = nodes.append(other=get_nodes(file), ignore_index=False, verify_integrity=False) print('') print(ox.ts(), f'load {len(nodes)} total nodes') # In[ ]: # remove any duplicate nodes that appeared in multiple graphs
def calculate_graph_indicators(graphml_folder, country_folder, filename): # get filepath and country/city identifiers filepath = os.path.join(graphml_folder, country_folder, filename) country, country_iso = country_folder.split('-') core_city, uc_id = filename.replace('.graphml', '').split('-') uc_id = int(uc_id) start_time = time.time() print(ox.ts(), 'processing', filepath) G = ox.load_graphml(filepath=filepath) # clustering and pagerank: needs directed representation cc_avg_dir, cc_avg_undir, cc_wt_avg_dir, cc_wt_avg_undir, pagerank_max = get_clustering( G) # get an undirected representation of this network for everything else Gu = ox.get_undirected(G) G.clear() G = None # street lengths lengths = pd.Series(nx.get_edge_attributes(Gu, 'length')) length_total = lengths.sum() length_median = lengths.median() length_mean = lengths.mean() # nodes, edges, node degree, self loops n = len(Gu.nodes) m = len(Gu.edges) k_avg = 2 * m / n self_loop_proportion = sum(u == v for u, v, k in Gu.edges) / m # proportion of 4-way intersections, 3-ways, and dead-ends streets_per_node = nx.get_node_attributes(Gu, 'street_count') prop_4way = list(streets_per_node.values()).count(4) / n prop_3way = list(streets_per_node.values()).count(3) / n prop_deadend = list(streets_per_node.values()).count(1) / n # average circuity and straightness circuity = calculate_circuity(Gu, length_total) straightness = 1 / circuity # elevation and grade grade_mean, grade_median, elev_mean, elev_median, elev_std, elev_range, elev_iqr = elevation_grades( Gu) # bearing/orientation entropy/order orientation_entropy = calculate_orientation_entropy(Gu) orientation_order = calculate_orientation_order(orientation_entropy) # total and clean intersection counts intersect_count, intersect_count_clean, intersect_count_clean_topo = intersection_counts( ox.project_graph(Gu), streets_per_node) # assemble the results rslt = { 'country': country, 'country_iso': country_iso, 'core_city': core_city, 'uc_id': uc_id, 'cc_avg_dir': cc_avg_dir, 'cc_avg_undir': cc_avg_undir, 'cc_wt_avg_dir': cc_wt_avg_dir, 'cc_wt_avg_undir': cc_wt_avg_undir, 'circuity': circuity, 'elev_iqr': elev_iqr, 'elev_mean': elev_mean, 'elev_median': elev_median, 'elev_range': elev_range, 'elev_std': elev_std, 'grade_mean': grade_mean, 'grade_median': grade_median, 'intersect_count': intersect_count, 'intersect_count_clean': intersect_count_clean, 'intersect_count_clean_topo': intersect_count_clean_topo, 'k_avg': k_avg, 'length_mean': length_mean, 'length_median': length_median, 'length_total': length_total, 'street_segment_count': m, 'node_count': n, 'orientation_entropy': orientation_entropy, 'orientation_order': orientation_order, 'pagerank_max': pagerank_max, 'prop_4way': prop_4way, 'prop_3way': prop_3way, 'prop_deadend': prop_deadend, 'self_loop_proportion': self_loop_proportion, 'straightness': straightness } elapsed = time.time() - start_time ox.log(f'finished {filepath} in {elapsed:.0f} seconds') return rslt
manifest = [{ 'input': config['models_gpkg_path'], 'output': config['staging_gpkg_path'] }, { 'input': config['models_graphml_path'], 'output': config['staging_graphml_path'] }, { 'input': config['models_nelist_path'], 'output': config['staging_nelist_path'] }] if config['cpus'] == 0: cpus = mp.cpu_count() else: cpus = config['cpus'] print(ox.ts(), 'using', cpus, 'CPUs') # In[ ]: # zip a whole directory def zip_dir(input_path, output_folder, output_file): output_path = os.path.join(output_folder, output_file) if not os.path.exists(output_path): print(ox.ts(), input_path, output_path) # create a zip file to contain all the files from the input path zf = zipfile.ZipFile(file=output_path, mode='w', compression=zipfile.ZIP_DEFLATED,
print('osmnx version', ox.__version__) print('networkx version', nx.__version__) # In[ ]: # load configs with open('../config.json') as f: config = json.load(f) ox.config(log_file=True, logs_folder=config['osmnx_log_path']) if config['cpus_stats'] == 0: cpus = mp.cpu_count() else: cpus = config['cpus_stats'] print(ox.ts(), 'using', cpus, 'CPUs') graphml_folder = config['models_graphml_path'] #where to load graphml files indicators_street_path = config[ 'indicators_street_path'] #where to save output street network indicators save_every_n = 100 #save results every n cities clean_int_tol = 10 #meters for intersection cleaning tolerance entropy_bins = 36 min_entropy_bins = 4 #perfect grid perfect_grid = [1] * min_entropy_bins + [0] * (entropy_bins - min_entropy_bins) perfect_grid_entropy = stats.entropy(perfect_grid) # In[ ]:
with open('../config.json') as f: config = json.load(f) # In[ ]: ox.config(use_cache=True, log_file=True, log_console=False, logs_folder=config['osmnx_log_path'], cache_folder=config['osmnx_cache_path']) if config['cpus'] == 0: cpus = mp.cpu_count() else: cpus = config['cpus'] print(ox.ts(), 'using', cpus, 'CPUs') # In[ ]: network_type = 'drive' retain_all = True simplify = True truncate_by_edge = True # In[ ]: uc_gpkg_path = config['uc_gpkg_path'] #prepped urban centers dataset output_graphml_path = config[ 'models_graphml_path'] #where to save graphml files # ## Load the prepped urban centers data
# what files exist in the published version of the dataset? published_files[doi] = get_published_files(doi) print( ox.ts(), f"Pre-existing files in {doi}: {len(published_files[doi])} published, {len(already_uploaded[doi])} draft." ) return already_uploaded, published_files # ## Run the script # In[ ]: st = time.time() print(ox.ts(), 'Started process') already_uploaded, published_files = get_preexisting_files(manifests) # In[ ]: if delete_existing_files: # delete all the existing (carried-over) files in the draft datasets for manifest in manifests: delete_dataset_files(manifest['doi']) already_uploaded, published_files = get_preexisting_files(manifests) # In[ ]: for manifest in manifests: # upload each file in folder
with open('../config.json') as f: config = json.load(f) uc_gpkg_path = config['uc_gpkg_path'] #prepped urban centers dataset indicators_street_path = config[ 'indicators_street_path'] #street network indicators to load indicators_path = config[ 'indicators_path'] #merged indicators to save for repo upload indicators_all_path = config[ 'indicators_all_path'] #all merged indicators to save for analysis # In[ ]: # load the UCs dataset ucs = gpd.read_file(uc_gpkg_path).sort_index() print(ox.ts(), 'loaded urban centers dataset with shape', ucs.shape) # In[ ]: # load the previously calculated street network indicators dataset ind = pd.read_csv(indicators_street_path) print(ox.ts(), 'loaded indicators dataset with shape', ind.shape) # In[ ]: # rename UC fields to something intelligible mapper = { 'UC_NM_LST': 'uc_names', 'GRGN_L1': 'world_region', 'GRGN_L2': 'world_subregion', 'P15': 'resident_pop',