def build_stops(pfeed, shapes=None): """ Given a ProtoFeed, return a DataFrame representing ``stops.txt``. If ``pfeed.stops`` is not ``None``, then return that. Otherwise, require built shapes output by :func:`build_shapes`, create one stop at the beginning (the first point) of each shape and one at the end (the last point) of each shape, and drop stops with duplicate coordinates. Note that this will yield one stop for shapes that are loops. """ if pfeed.stops is not None: stops = pfeed.stops.copy() else: if shapes is None: raise ValueError("Must input shapes built by build_shapes()") geo_shapes = gk.geometrize_shapes_0(shapes) rows = [] for shape, geom in geo_shapes[["shape_id", "geometry"]].itertuples(index=False): # stop_ids = build_stop_ids(shape) stop_names = build_stop_names(shape) for i in range(2): stop_id = stop_ids[i] stop_name = stop_names[i] stop_lon, stop_lat = geom.interpolate( i, normalized=True).coords[0] rows.append([stop_id, stop_name, stop_lon, stop_lat]) stops = pd.DataFrame( rows, columns=["stop_id", "stop_name", "stop_lon", "stop_lat" ]).drop_duplicates(subset=["stop_lon", "stop_lat"]) return stops
def test_ungeometrize_shapes_0(): shapes = cairns.shapes.copy() geo_shapes = geometrize_shapes_0(shapes) shapes2 = ungeometrize_shapes_0(geo_shapes) # Test columns are correct expect_cols = set(list(shapes.columns)) - set(["shape_dist_traveled"]) assert set(shapes2.columns) == expect_cols # Data frames should agree on certain columns cols = ["shape_id", "shape_pt_lon", "shape_pt_lat"] assert_frame_equal(shapes2[cols], shapes[cols])
def test_geometrize_shapes_0(): shapes = cairns.shapes.copy() geo_shapes = geometrize_shapes_0(shapes, use_utm=True) # Should be a GeoDataFrame assert isinstance(geo_shapes, gp.GeoDataFrame) # Should have the correct shape assert geo_shapes.shape[0] == shapes["shape_id"].nunique() assert geo_shapes.shape[1] == shapes.shape[1] - 2 # Should have the correct columns expect_cols = set(list(shapes.columns) + ["geometry"]) - set( ["shape_pt_lon", "shape_pt_lat", "shape_pt_sequence", "shape_dist_traveled",] ) assert set(geo_shapes.columns) == expect_cols
def test_geometrize_shapes(): g_1 = geometrize_shapes(cairns, use_utm=True) g_2 = geometrize_shapes_0(cairns.shapes, use_utm=True) assert g_1.equals(g_2) with pytest.raises(ValueError): geometrize_shapes(cairns_shapeless)
def build_stop_times(pfeed, routes, shapes, stops, trips, buffer=cs.BUFFER): """ Given a ProtoFeed and its corresponding routes (DataFrame), shapes (DataFrame), stops (DataFrame), trips (DataFrame), return DataFrame representing ``stop_times.txt``. Includes the optional ``shape_dist_traveled`` column. Don't make stop times for trips with no nearby stops. """ # Get the table of trips and add frequency and service window details routes = routes.filter(["route_id", "route_short_name"]).merge( pfeed.frequencies.drop(["shape_id"], axis=1)) trips = trips.assign(service_window_id=lambda x: x.trip_id.map( lambda y: y.split(cs.SEP)[2])).merge(routes) # Get the geometries of ``shapes`` and not ``pfeed.shapes`` geometry_by_shape = dict( gk.geometrize_shapes_0(shapes, use_utm=True).filter(["shape_id", "geometry"]).values) # Save on distance computations by memoizing dist_by_stop_by_shape = {shape: {} for shape in geometry_by_shape} def compute_stops_dists_times(geo_stops, linestring, shape, start_time, end_time): """ Given a GeoDataFrame of stops on one side of a given Shapely LineString with given shape ID, compute distances and departure times of a trip traversing the LineString from start to end at the given start and end times (in seconds past midnight) and stopping at the stops encountered along the way. Do not assume that the stops are ordered by trip encounter. Return three lists of the same length: the stop IDs in order that the trip encounters them, the shape distances traveled along distances at the stops, and the times the stops are encountered, respectively. """ g = geo_stops.copy() dists_and_stops = [] for i, stop in enumerate(g["stop_id"].values): if stop in dist_by_stop_by_shape[shape]: d = dist_by_stop_by_shape[shape][stop] else: d = gk.get_segment_length(linestring, g.geometry.iat[i]) / 1000 # km dist_by_stop_by_shape[shape][stop] = d dists_and_stops.append((d, stop)) dists, stops = zip(*sorted(dists_and_stops)) D = linestring.length / 1000 dists_are_reasonable = all([dist < D + 100 for dist in dists]) if not dists_are_reasonable: # Assume equal distances between stops :-( n = len(stops) delta = D / (n - 1) dists = [i * delta for i in range(n)] # Compute times using distances, start and end stop times, # and linear interpolation t0, t1 = start_time, end_time d0, d1 = dists[0], dists[-1] # Interpolate times = np.interp(dists, [d0, d1], [t0, t1]) return stops, dists, times # Iterate through trips and set stop times based on stop ID # and service window frequency. # Remember that every trip has a valid shape ID. # Gather stops geographically from ``stops``. rows = [] geo_stops = gk.geometrize_stops_0(stops, use_utm=True) # Look on the side of the traffic side of street for this timezone side = cs.TRAFFIC_BY_TIMEZONE[pfeed.meta.agency_timezone.iat[0]] for index, row in trips.iterrows(): shape = row["shape_id"] geom = geometry_by_shape[shape] stops = get_nearby_stops(geo_stops, geom, side, buffer=buffer) # Don't make stop times for trips without nearby stops if stops.empty: continue length = geom.length / 1000 # km speed = row["speed"] # km/h duration = int((length / speed) * 3600) # seconds frequency = row["frequency"] if not frequency: # No stop times for this trip/frequency combo continue headway = 3600 / frequency # seconds trip = row["trip_id"] __, route, window, base_timestr, direction, i = trip.split(cs.SEP) direction = int(direction) base_time = gk.timestr_to_seconds(base_timestr) start_time = base_time + headway * int(i) end_time = start_time + duration stops, dists, times = compute_stops_dists_times( stops, geom, shape, start_time, end_time) new_rows = [[trip, stop, j, time, time, dist] for j, (stop, time, dist) in enumerate(zip(stops, times, dists))] rows.extend(new_rows) g = pd.DataFrame( rows, columns=[ "trip_id", "stop_id", "stop_sequence", "arrival_time", "departure_time", "shape_dist_traveled", ], ) # Convert seconds back to time strings g[["arrival_time", "departure_time" ]] = g[["arrival_time", "departure_time" ]].applymap(lambda x: gk.timestr_to_seconds(x, inverse=True)) return g