Ejemplo n.º 1
0
def build_stops(pfeed, shapes=None):
    """
    Given a ProtoFeed, return a DataFrame representing ``stops.txt``.
    If ``pfeed.stops`` is not ``None``, then return that.
    Otherwise, require built shapes output by :func:`build_shapes`,
    create one stop at the beginning (the first point) of each shape
    and one at the end (the last point) of each shape,
    and drop stops with duplicate coordinates.
    Note that this will yield one stop for shapes that are loops.
    """
    if pfeed.stops is not None:
        stops = pfeed.stops.copy()
    else:
        if shapes is None:
            raise ValueError("Must input shapes built by build_shapes()")

        geo_shapes = gk.geometrize_shapes_0(shapes)
        rows = []
        for shape, geom in geo_shapes[["shape_id",
                                       "geometry"]].itertuples(index=False):
            #
            stop_ids = build_stop_ids(shape)
            stop_names = build_stop_names(shape)
            for i in range(2):
                stop_id = stop_ids[i]
                stop_name = stop_names[i]
                stop_lon, stop_lat = geom.interpolate(
                    i, normalized=True).coords[0]
                rows.append([stop_id, stop_name, stop_lon, stop_lat])

        stops = pd.DataFrame(
            rows, columns=["stop_id", "stop_name", "stop_lon", "stop_lat"
                           ]).drop_duplicates(subset=["stop_lon", "stop_lat"])

    return stops
Ejemplo n.º 2
0
def test_ungeometrize_shapes_0():
    shapes = cairns.shapes.copy()
    geo_shapes = geometrize_shapes_0(shapes)
    shapes2 = ungeometrize_shapes_0(geo_shapes)
    # Test columns are correct
    expect_cols = set(list(shapes.columns)) - set(["shape_dist_traveled"])
    assert set(shapes2.columns) == expect_cols
    # Data frames should agree on certain columns
    cols = ["shape_id", "shape_pt_lon", "shape_pt_lat"]
    assert_frame_equal(shapes2[cols], shapes[cols])
Ejemplo n.º 3
0
def test_geometrize_shapes_0():
    shapes = cairns.shapes.copy()
    geo_shapes = geometrize_shapes_0(shapes, use_utm=True)
    # Should be a GeoDataFrame
    assert isinstance(geo_shapes, gp.GeoDataFrame)
    # Should have the correct shape
    assert geo_shapes.shape[0] == shapes["shape_id"].nunique()
    assert geo_shapes.shape[1] == shapes.shape[1] - 2
    # Should have the correct columns
    expect_cols = set(list(shapes.columns) + ["geometry"]) - set(
        ["shape_pt_lon", "shape_pt_lat", "shape_pt_sequence", "shape_dist_traveled",]
    )
    assert set(geo_shapes.columns) == expect_cols
Ejemplo n.º 4
0
def test_geometrize_shapes():
    g_1 = geometrize_shapes(cairns, use_utm=True)
    g_2 = geometrize_shapes_0(cairns.shapes, use_utm=True)
    assert g_1.equals(g_2)
    with pytest.raises(ValueError):
        geometrize_shapes(cairns_shapeless)
Ejemplo n.º 5
0
def build_stop_times(pfeed, routes, shapes, stops, trips, buffer=cs.BUFFER):
    """
    Given a ProtoFeed and its corresponding routes (DataFrame),
    shapes (DataFrame), stops (DataFrame), trips (DataFrame),
    return DataFrame representing ``stop_times.txt``.
    Includes the optional ``shape_dist_traveled`` column.
    Don't make stop times for trips with no nearby stops.
    """
    # Get the table of trips and add frequency and service window details
    routes = routes.filter(["route_id", "route_short_name"]).merge(
        pfeed.frequencies.drop(["shape_id"], axis=1))
    trips = trips.assign(service_window_id=lambda x: x.trip_id.map(
        lambda y: y.split(cs.SEP)[2])).merge(routes)

    # Get the geometries of ``shapes`` and not ``pfeed.shapes``
    geometry_by_shape = dict(
        gk.geometrize_shapes_0(shapes,
                               use_utm=True).filter(["shape_id",
                                                     "geometry"]).values)

    # Save on distance computations by memoizing
    dist_by_stop_by_shape = {shape: {} for shape in geometry_by_shape}

    def compute_stops_dists_times(geo_stops, linestring, shape, start_time,
                                  end_time):
        """
        Given a GeoDataFrame of stops on one side of a given Shapely
        LineString with given shape ID, compute distances and departure
        times of a trip traversing the LineString from start to end
        at the given start and end times (in seconds past midnight)
        and stopping at the stops encountered along the way.
        Do not assume that the stops are ordered by trip encounter.
        Return three lists of the same length: the stop IDs in order
        that the trip encounters them, the shape distances traveled
        along distances at the stops, and the times the stops are
        encountered, respectively.
        """
        g = geo_stops.copy()
        dists_and_stops = []
        for i, stop in enumerate(g["stop_id"].values):
            if stop in dist_by_stop_by_shape[shape]:
                d = dist_by_stop_by_shape[shape][stop]
            else:
                d = gk.get_segment_length(linestring,
                                          g.geometry.iat[i]) / 1000  # km
                dist_by_stop_by_shape[shape][stop] = d
            dists_and_stops.append((d, stop))
        dists, stops = zip(*sorted(dists_and_stops))
        D = linestring.length / 1000
        dists_are_reasonable = all([dist < D + 100 for dist in dists])
        if not dists_are_reasonable:
            # Assume equal distances between stops :-(
            n = len(stops)
            delta = D / (n - 1)
            dists = [i * delta for i in range(n)]

        # Compute times using distances, start and end stop times,
        # and linear interpolation
        t0, t1 = start_time, end_time
        d0, d1 = dists[0], dists[-1]
        # Interpolate
        times = np.interp(dists, [d0, d1], [t0, t1])
        return stops, dists, times

    # Iterate through trips and set stop times based on stop ID
    # and service window frequency.
    # Remember that every trip has a valid shape ID.
    # Gather stops geographically from ``stops``.
    rows = []
    geo_stops = gk.geometrize_stops_0(stops, use_utm=True)
    # Look on the side of the traffic side of street for this timezone
    side = cs.TRAFFIC_BY_TIMEZONE[pfeed.meta.agency_timezone.iat[0]]
    for index, row in trips.iterrows():
        shape = row["shape_id"]
        geom = geometry_by_shape[shape]
        stops = get_nearby_stops(geo_stops, geom, side, buffer=buffer)
        # Don't make stop times for trips without nearby stops
        if stops.empty:
            continue
        length = geom.length / 1000  # km
        speed = row["speed"]  # km/h
        duration = int((length / speed) * 3600)  # seconds
        frequency = row["frequency"]
        if not frequency:
            # No stop times for this trip/frequency combo
            continue
        headway = 3600 / frequency  # seconds
        trip = row["trip_id"]
        __, route, window, base_timestr, direction, i = trip.split(cs.SEP)
        direction = int(direction)
        base_time = gk.timestr_to_seconds(base_timestr)
        start_time = base_time + headway * int(i)
        end_time = start_time + duration
        stops, dists, times = compute_stops_dists_times(
            stops, geom, shape, start_time, end_time)
        new_rows = [[trip, stop, j, time, time, dist]
                    for j, (stop, time,
                            dist) in enumerate(zip(stops, times, dists))]
        rows.extend(new_rows)

    g = pd.DataFrame(
        rows,
        columns=[
            "trip_id",
            "stop_id",
            "stop_sequence",
            "arrival_time",
            "departure_time",
            "shape_dist_traveled",
        ],
    )

    # Convert seconds back to time strings
    g[["arrival_time", "departure_time"
       ]] = g[["arrival_time", "departure_time"
               ]].applymap(lambda x: gk.timestr_to_seconds(x, inverse=True))
    return g