def get_closest_nodes(): closest_stops = [] nodes = pandas.read_csv(HELSINKI_NODES_FNAME) for swimming_hall in target_locations: swimming_hall_lat = swimming_hall['latitude'] swimming_hall_lon = swimming_hall['longitude'] min_distance = float('inf') min_node = None for node in nodes.itertuples(): distance = wgs84_distance(swimming_hall_lat, swimming_hall_lon, node.lat, node.lon) if distance < min_distance: min_distance = distance min_node = node closest_stops.append(min_node.stop_I) return closest_stops
def calc_transfers(conn, threshold_meters=1000): geohash_precision = _get_geo_hash_precision(threshold_meters / 1000.) geo_index = GeoGridIndex(precision=geohash_precision) g = GTFS(conn) stops = g.get_table("stops") stop_geopoints = [] cursor = conn.cursor() for stop in stops.itertuples(): stop_geopoint = GeoPoint(stop.lat, stop.lon, ref=stop.stop_I) geo_index.add_point(stop_geopoint) stop_geopoints.append(stop_geopoint) for stop_geopoint in stop_geopoints: nearby_stop_geopoints = geo_index.get_nearest_points_dirty( stop_geopoint, threshold_meters / 1000.0, "km") from_stop_I = int(stop_geopoint.ref) from_lat = stop_geopoint.latitude from_lon = stop_geopoint.longitude to_stop_Is = [] distances = [] for nearby_stop_geopoint in nearby_stop_geopoints: to_stop_I = int(nearby_stop_geopoint.ref) if to_stop_I == from_stop_I: continue to_lat = nearby_stop_geopoint.latitude to_lon = nearby_stop_geopoint.longitude distance = math.ceil( wgs84_distance(from_lat, from_lon, to_lat, to_lon)) if distance <= threshold_meters: to_stop_Is.append(to_stop_I) distances.append(distance) n_pairs = len(to_stop_Is) from_stop_Is = [from_stop_I] * n_pairs cursor.executemany( 'INSERT OR REPLACE INTO stop_distances VALUES (?, ?, ?, ?, ?, ?);', zip(from_stop_Is, to_stop_Is, distances, [None] * n_pairs, [None] * n_pairs, [None] * n_pairs)) cursor.execute( 'CREATE INDEX IF NOT EXISTS idx_sd_fsid ON stop_distances (from_stop_I);' )
def get_stats(gtfs): """ Get basic statistics of the GTFS data. Parameters ---------- gtfs: GTFS Returns ------- stats: dict A dictionary of various statistics. Keys should be strings, values should be inputtable to a database (int, date, str, ...) (but not a list) """ stats = {} # Basic table counts for table in [ 'agencies', 'routes', 'stops', 'stop_times', 'trips', 'calendar', 'shapes', 'calendar_dates', 'days', 'stop_distances', 'frequencies', 'feed_info', 'transfers' ]: stats["n_" + table] = gtfs.get_row_count(table) # Agency names agencies = gtfs.get_table("agencies") stats["agencies"] = "_".join(agencies['name'].values) # Stop lat/lon range stops = gtfs.get_table("stops") lats = stops['lat'].values lons = stops['lon'].values percentiles = [0, 10, 50, 90, 100] try: lat_percentiles = numpy.percentile(lats, percentiles) except IndexError: lat_percentiles = [None] * 5 lat_min, lat_10, lat_median, lat_90, lat_max = lat_percentiles stats["lat_min"] = lat_min stats["lat_10"] = lat_10 stats["lat_median"] = lat_median stats["lat_90"] = lat_90 stats["lat_max"] = lat_max try: lon_percentiles = numpy.percentile(lons, percentiles) except IndexError: lon_percentiles = [None] * 5 lon_min, lon_10, lon_median, lon_90, lon_max = lon_percentiles stats["lon_min"] = lon_min stats["lon_10"] = lon_10 stats["lon_median"] = lon_median stats["lon_90"] = lon_90 stats["lon_max"] = lon_max if len(lats) > 0: stats["height_km"] = wgs84_distance(lat_min, lon_median, lat_max, lon_median) / 1000. stats["width_km"] = wgs84_distance(lon_min, lat_median, lon_max, lat_median) / 1000. else: stats["height_km"] = None stats["width_km"] = None first_day_start_ut, last_day_start_ut = gtfs.get_day_start_ut_span() stats["start_time_ut"] = first_day_start_ut if last_day_start_ut is None: stats["end_time_ut"] = None else: # 28 (instead of 24) comes from the GTFS stANDard stats["end_time_ut"] = last_day_start_ut + 28 * 3600 stats["start_date"] = gtfs.get_min_date() stats["end_date"] = gtfs.get_max_date() # Maximum activity day max_activity_date = gtfs.execute_custom_query( 'SELECT count(*), date ' 'FROM days ' 'GROUP BY date ' 'ORDER BY count(*) DESC, date ' 'LIMIT 1;').fetchone() if max_activity_date: stats["max_activity_date"] = max_activity_date[1] max_activity_hour = gtfs.get_cursor().execute( 'SELECT count(*), arr_time_hour FROM day_stop_times ' 'WHERE date=? GROUP BY arr_time_hour ' 'ORDER BY count(*) DESC;', (stats["max_activity_date"], )).fetchone() if max_activity_hour: stats["max_activity_hour"] = max_activity_hour[1] else: stats["max_activity_hour"] = None # Fleet size estimate: considering each line separately if max_activity_date and max_activity_hour: fleet_size_estimates = _fleet_size_estimate(gtfs, stats['max_activity_hour'], stats['max_activity_date']) stats.update(fleet_size_estimates) # Compute simple distributions of various columns that have a finite range of values. # Commented lines refer to values that are not imported yet, ? stats['routes__type__dist'] = _distribution(gtfs, 'routes', 'type') # stats['stop_times__pickup_type__dist'] = _distribution(gtfs, 'stop_times', 'pickup_type') # stats['stop_times__drop_off_type__dist'] = _distribution(gtfs, 'stop_times', 'drop_off_type') # stats['stop_times__timepoint__dist'] = _distribution(gtfs, 'stop_times', 'timepoint') stats['calendar_dates__exception_type__dist'] = _distribution( gtfs, 'calendar_dates', 'exception_type') stats['frequencies__exact_times__dist'] = _distribution( gtfs, 'frequencies', 'exact_times') stats['transfers__transfer_type__dist'] = _distribution( gtfs, 'transfers', 'transfer_type') stats['agencies__lang__dist'] = _distribution(gtfs, 'agencies', 'lang') stats['stops__location_type__dist'] = _distribution( gtfs, 'stops', 'location_type') # stats['stops__wheelchair_boarding__dist'] = _distribution(gtfs, 'stops', 'wheelchair_boarding') # stats['trips__wheelchair_accessible__dist'] = _distribution(gtfs, 'trips', 'wheelchair_accessible') # stats['trips__bikes_allowed__dist'] = _distribution(gtfs, 'trips', 'bikes_allowed') # stats[''] = _distribution(gtfs, '', '') stats = _feed_calendar_span(gtfs, stats) return stats
def _add_scale_bar(ax, lat, lon_min, lon_max, width_pixels): distance_m = util.wgs84_distance(lat, lon_min, lat, lon_max) scalebar = ScaleBar(distance_m / width_pixels) # 1 pixel = 0.2 meter ax.add_artist(scalebar)
def stop_to_stop_network_for_route_type(gtfs, route_type, link_attributes=None, start_time_ut=None, end_time_ut=None): """ Get a stop-to-stop network describing a single mode of travel. Parameters ---------- gtfs : gtfspy.GTFS route_type : int See gtfspy.route_types.TRANSIT_ROUTE_TYPES for the list of possible types. link_attributes: list[str], optional defaulting to use the following link attributes: "n_vehicles" : Number of vehicles passed "duration_min" : minimum travel time between stops "duration_max" : maximum travel time between stops "duration_median" : median travel time between stops "duration_avg" : average travel time between stops "d" : distance along straight line (wgs84_distance) "distance_shape" : minimum distance along shape "capacity_estimate" : approximate capacity passed through the stop "route_I_counts" : dict from route_I to counts start_time_ut: int start time of the time span (in unix time) end_time_ut: int end time of the time span (in unix time) Returns ------- net: networkx.DiGraph A directed graph Directed graph """ if link_attributes is None: link_attributes = DEFAULT_STOP_TO_STOP_LINK_ATTRIBUTES assert (route_type in route_types.TRANSIT_ROUTE_TYPES) stops_dataframe = gtfs.get_stops_for_route_type(route_type) net = networkx.DiGraph() _add_stops_to_net(net, stops_dataframe) events_df = gtfs.get_transit_events(start_time_ut=start_time_ut, end_time_ut=end_time_ut, route_type=route_type) if len(net.nodes()) < 2: assert events_df.shape[0] == 0 # group events by links, and loop over them (i.e. each link): link_event_groups = events_df.groupby(['from_stop_I', 'to_stop_I'], sort=False) for key, link_events in link_event_groups: from_stop_I, to_stop_I = key assert isinstance(link_events, pd.DataFrame) # 'dep_time_ut' 'arr_time_ut' 'shape_id' 'route_type' 'trip_I' 'duration' 'from_seq' 'to_seq' if link_attributes is None: net.add_edge(from_stop_I, to_stop_I) else: link_data = {} if "duration_min" in link_attributes: link_data['duration_min'] = float( link_events['duration'].min()) if "duration_max" in link_attributes: link_data['duration_max'] = float( link_events['duration'].max()) if "duration_median" in link_attributes: link_data['duration_median'] = float( link_events['duration'].median()) if "duration_avg" in link_attributes: link_data['duration_avg'] = float( link_events['duration'].mean()) # statistics on numbers of vehicles: if "n_vehicles" in link_attributes: link_data['n_vehicles'] = int(link_events.shape[0]) if "capacity_estimate" in link_attributes: link_data['capacity_estimate'] = route_types.ROUTE_TYPE_TO_APPROXIMATE_CAPACITY[route_type] \ * int(link_events.shape[0]) if "d" in link_attributes: from_lat = graph_node_attrs(net, from_stop_I)['lat'] from_lon = graph_node_attrs(net, from_stop_I)['lon'] to_lat = graph_node_attrs(net, to_stop_I)['lat'] to_lon = graph_node_attrs(net, to_stop_I)['lon'] distance = wgs84_distance(from_lat, from_lon, to_lat, to_lon) link_data['d'] = int(distance) if "distance_shape" in link_attributes: assert "shape_id" in link_events.columns.values found = None for i, shape_id in enumerate(link_events["shape_id"].values): if shape_id is not None: found = i break if found is None: link_data["distance_shape"] = None else: link_event = link_events.iloc[found] distance = gtfs.get_shape_distance_between_stops( link_event["trip_I"], int(link_event["from_seq"]), int(link_event["to_seq"])) link_data['distance_shape'] = distance if "route_I_counts" in link_attributes: link_data["route_I_counts"] = link_events.groupby( "route_I").size().to_dict() net.add_edge(from_stop_I, to_stop_I, **link_data) return net
def test_get_buffered_area_of_stops(self): # stop1 is far from stop2, theres no overlap # stop1 and stop3 are close and could have overlap # The area has an accuracy between 95%-99% of the real value. stop1_coords = 61.129094, 24.027896 stop2_coords = 61.747408, 23.924279 stop3_coords = 61.129621, 24.027363 #lat, lon lats_1, lons_1 = list(zip(stop1_coords)) lats_1_2, lons_1_2 = list(zip(stop1_coords, stop2_coords)) lats_1_3, lons_1_3 = list(zip(stop1_coords, stop3_coords)) #One point buffer buffer_onepoint = 100 #100 meters of radius true_area = 10000 * np.pi #area = pi * square radius area_1 = compute_buffered_area_of_stops(lats_1, lons_1, buffer_onepoint) confidence = true_area * 0.95 self.assertTrue(confidence < area_1 < true_area) # Two points buffer non-overlap # Note: the points are "far away" to avoid overlap, but since they are points in the same city # a "really big buffer" could cause overlap and the test is going fail. buffer_nonoverlap = 100 #100 meters of radius two_points_nonoverlap_true_area = 2 * buffer_nonoverlap**2 * np.pi #area = pi * square radius area_1_2 = compute_buffered_area_of_stops(lats_1_2, lons_1_2, buffer_nonoverlap) confidence_2 = two_points_nonoverlap_true_area * 0.95 self.assertTrue(confidence_2 < area_1_2 and area_1_2 < two_points_nonoverlap_true_area) # Two points buffer with overlap # Points so close that will overlap with a radius of 100 meters buffer_overlap = 100 # 100 meters of radius area_1_3 = compute_buffered_area_of_stops(lats_1_3, lons_1_3, buffer_overlap) self.assertLess(area_1, area_1_3) self.assertLess(area_1_3, two_points_nonoverlap_true_area) # 'Half-overlap' from gtfspy.util import wgs84_distance lat1, lat3 = lats_1_3 lon1, lon3 = lons_1_3 distance = wgs84_distance(lat1, lon1, lat3, lon3) # just a little overlap buffer = distance / 2. + 1 area_1_3b = compute_buffered_area_of_stops(lats_1_3, lons_1_3, buffer, resolution=100) one_point_true_area = np.pi * buffer**2 self.assertLess(one_point_true_area * 1.5, area_1_3b) self.assertLess(area_1_3b, 2 * one_point_true_area) # no overlap buffer = distance / 2. - 1 area_1_3b = compute_buffered_area_of_stops(lats_1_3, lons_1_3, buffer, resolution=100) two_points_nonoverlap_true_area = 2 * buffer**2 * np.pi self.assertGreater(area_1_3b, two_points_nonoverlap_true_area * 0.95) self.assertLess(area_1_3b, two_points_nonoverlap_true_area)