Example #1
0
def livnehIDsAndAreas(df: geopandas.GeoDataFrame, crs: str = '4326') -> dict:
    # clipped data
    df.drop_duplicates(['id'], inplace=True)
    df.sort_values(['id'], axis=0, inplace=True)
    df = df.to_crs(epsg=crs)
    df['area_m2'] = df['geometry'].area
    df = df.filter(items=['coordinates', 'lat', 'lon', 'id', 'area_m2'])
    df = __points2grids(df, crs=crs)
    df = df.to_crs(epsg=crs)
    df['total_area_m2'] = df['geometry'].area
    return df
Example #2
0
def compute_screen_line_counts(
    feed: "Feed", screen_lines: gp.GeoDataFrame, dates: List[str]
) -> pd.DataFrame:
    """
    Find all the Feed trips active on the given YYYYMMDD dates whose shapes
    intersect the given GeoDataFrame of screen lines, that is, of straight WGS84
    LineStrings.
    Compute the intersection times and directions for each trip.

    Return a DataFrame with the columns

    - ``'date'``
    - ``'trip_id'``
    - ``'route_id'``
    - ``'route_short_name'``
    - ``'shape_id'``: shape ID of the trip
    - ``'screen_line_id'``: ID of the screen line as specified in ``screen_lines`` or as
      assigned after the fact.
    - ``'crossing_distance'``: distance along the trip shape of the screen line
      intersection
      ``'crossing_time'``: time that the trip's vehicle crosses
      the scren line; one trip could cross multiple times
    - ``'crossing_direction'``: 1 or -1; 1 indicates trip travel from the
      left side to the right side of the screen line;
      -1 indicates trip travel in the  opposite direction

    Notes:

    - Assume the Feed's stop times DataFrame has an accurate ``shape_dist_traveled`` column.
    - Assume that trips travel in the same direction as their shapes, an assumption
      that is part of the GTFS.
    - Assume that the screen line is straight and simple.
    - Probably does not give correct results for trips with self-intersecting shapes.
    - The algorithm works as follows

        1. Find the trip shapes that intersect the screen lines.
        2. For each such shape and screen line, compute the intersection points, the distance
           of the point along the shape, and the orientation of the screen line relative to
           the shape.
        3. For each given date, restrict to trips active on the
           date and interpolate a stop time for the intersection point using
           the ``shape_dist_traveled`` column.
        4. Use that interpolated time as the crossing time of the trip vehicle.

    """
    dates = feed.subset_dates(dates)
    if not dates:
        return pd.DataFrame()

    # Get shapes as GeoDataFrame
    shapes_g = feed.geometrize_shapes(use_utm=True)

    # Convert screen lines to UTM
    crs = shapes_g.crs
    screen_lines = screen_lines.to_crs(crs)

    # Create screen line IDs if necessary
    n = screen_lines.shape[0]
    if "screen_line_id" not in screen_lines.columns:
        screen_lines["screen_line_id"] = hp.make_ids(n, "sl")

    # Make a vector in the direction of each screen line to calculate crossing orientation.
    # Does not work in case of a bent screen line.
    p1 = screen_lines.geometry.map(lambda x: np.array(x.coords[0]))
    p2 = screen_lines.geometry.map(lambda x: np.array(x.coords[-1]))
    screen_lines["screen_line_vector"] = p2 - p1

    # Get intersection points of shapes and screen lines
    g0 = (
        # Only keep shapes that intersect screen lines to reduce computations
        gp.sjoin(shapes_g, screen_lines.filter(["screen_line_id", "geometry"])).merge(
            screen_lines, on="screen_line_id"
        )
        # Compute intersection points
        .assign(
            int_point=lambda x: gp.GeoSeries(x.geometry_x, crs=crs).intersection(
                gp.GeoSeries(x.geometry_y, crs=crs)
            )
        )
    )

    # Unpack multipoint intersections to yield a new GeoDataFrame
    records = []
    for row in g0.itertuples(index=False):
        if isinstance(row.int_point, sg.Point):
            intersections = [row.int_point]
        else:
            intersections = row.int_point
        for int_point in intersections:
            record = {
                "shape_id": row.shape_id,
                "screen_line_id": row.screen_line_id,
                "geometry": row.geometry_x,
                "int_point": int_point,
                "screen_line_vector": row.screen_line_vector,
            }
            records.append(record)

    g = gp.GeoDataFrame.from_records(records)
    g.crs = crs

    # Get distance (in meters) of each intersection point along shape
    g["crossing_dist"] = g.apply(lambda x: x.geometry.project(x.int_point), axis=1)

    # Build a tiny vector along each shape
    p2 = g.apply(lambda x: x.geometry.interpolate(x.crossing_dist + 1), axis=1).map(
        lambda x: np.array(x.coords[0])
    )
    p1 = g.int_point.map(lambda x: np.array(x.coords[0]))
    g["shape_vector"] = p2 - p1

    # Compute crossing direction by taking the vector cross product of
    # the shape vector and the screen line vector
    det = g.apply(
        lambda x: np.linalg.det(np.array([x.shape_vector, x.screen_line_vector])),
        axis=1,
    )
    g["crossing_direction"] = det.map(lambda x: 1 if x >= 0 else -1)

    # Convert to feed distance units
    converter = hp.get_convert_dist("m", feed.dist_units)
    g["crossing_dist"] = g["crossing_dist"].map(converter)

    # Summarize work so far into a lookup table
    h = (
        g.filter(["shape_id", "screen_line_id", "crossing_direction", "crossing_dist"])
        .set_index("shape_id")
        .sort_values(
            ["shape_id", "crossing_dist"]
        )  # Need this sorting for interpolation to work
    )

    # Get stop times of trips whose shapes lie in h
    st = (
        feed.trips.loc[lambda x: x.shape_id.isin(h.index)]
        # Merge in route short names and stop times
        .merge(feed.routes[["route_id", "route_short_name"]]).merge(feed.stop_times)
        # Keep only non-NaN departure times
        .loc[lambda x: x.departure_time.notna()]
        # Convert to seconds past midnight
        .assign(departure_time=lambda x: x.departure_time.map(hp.timestr_to_seconds))
    )

    # Compute crossing times by date
    records = []
    ta = feed.compute_trip_activity(dates)
    for date in dates:
        # Subset to trips active on date and merge with g
        ids = ta.loc[lambda x: x[date] == 1, "trip_id"]
        f = st.loc[lambda x: x.trip_id.isin(ids)].sort_values(
            ["trip_id", "shape_dist_traveled"]
        )  # Need this sorting for interpolation to work

        # Get crossing time for each trip
        for tid, group in f.groupby("trip_id"):
            sid = group["shape_id"].iat[0]
            rid = group["route_id"].iat[0]
            rsn = group["route_short_name"].iat[0]
            dists = group["shape_dist_traveled"].values
            times = group["departure_time"].values
            crossing_dists = h.loc[[sid], "crossing_dist"].values
            crossing_times = np.interp(crossing_dists, dists, times)
            for i, row in enumerate(h.loc[[sid]].itertuples(index=False)):
                record = {
                    "date": date,
                    "trip_id": tid,
                    "route_id": group.route_id.iat[0],
                    "route_short_name": group.route_short_name.iat[0],
                    "shape_id": group.shape_id.iat[0],
                    "screen_line_id": row.screen_line_id,
                    "crossing_direction": row.crossing_direction,
                    "crossing_distance": row.crossing_dist,
                    "crossing_time": crossing_times[i],
                }
                records.append(record)

    result = pd.DataFrame.from_records(records).assign(
        crossing_time=lambda x: x.crossing_time.map(
            lambda x: hp.timestr_to_seconds(x, inverse=True)
        )
    )
    return result
Example #3
0
def clean_acs(fd: gpd.GeoDataFrame, 
        returns=False, 
        groups=True, 
        home=True,
        reduced=True, 
        error=True) -> gpd.GeoDataFrame:
    r"""
    Clean up and organize ACS flow data.

    American Community Survey (ACS) data has information on many modes of
    transportation and their error margins. This function provides various
    options to simplify this data and to reduce and combine various modes of
    transportation.

    Parameters
    ----------
    returns : bool, defaults to False
        Add duplicate data with switched origin and destination codes
    groups : bool, defaults to True
        Create an active transportation group (`walk` and `bike`), transit group
        (`bus`, `streetcar`, `subway`, `railroad`, and `ferry`), and a carpool group
        (`car_2p`, `car_3p`, `car_4p`, `car_5p`, and `car_7p`).
    home : bool, defaults to True
        People working from home do not travel. Subtract `home` from `all`
    reduced : bool, defaults to True
        Only keep `all`, `home`, `walk`, `bike`, and `sov`, groups (if True).
    error : bool, defaults to True
        Keep the error data.

    Returns
    -------
    geopandas.GeoDataFrame
        Cleaned up GeoDataFrame with origin-destination data broken down by mode
    
    See Also
    --------
    ~stplanpy.acs.read_acs
    
    Examples
    --------
    An example data file, "`od_data.csv`_", can be downloaded from github.

    .. code-block:: python

        from stplanpy import acs

        flow_data = acs.read_acs("od_data.csv")
        flow_data = flow_data.clean_acs()
    """        
    if (returns):
# Add return data for commute trips per day
        df = fd.copy()
        df = df[[
            "dest_taz", "orig_taz",
            "all", "all_error",
            "sov", "sov_error",
            "car_2p", "car_2p_error",
            "car_3p", "car_3p_error",
            "car_4p", "car_4p_error",
            "car_5p", "car_5p_error",
            "car_7p", "car_7p_error",
            "bus", "bus_error",
            "streetcar", "streetcar_error",
            "subway", "subway_error",
            "railroad", "railroad_error",
            "ferry", "ferry_error",
            "bike", "bike_error",
            "walk", "walk_error",
            "taxi", "taxi_error",
            "motorcycle", "motorcycle_error",
            "other", "other_error",
            "home", "home_error",
            "auto", "auto_error"]]
        df.rename(columns = {
            "dest_taz":"orig_taz", 
            "orig_taz":"dest_taz"}, inplace = True)
        fd = pd.concat([fd, df], ignore_index=True)

    if (groups):
# Define some groups
        fd["active"] = (
            + fd["walk"]
            + fd["bike"])
        fd["active_error"] = (
            + fd["walk_error"]**2
            + fd["bike_error"]**2)**(1/2)
        fd["transit"] = (
            + fd["bus"]
            + fd["streetcar"]
            + fd["subway"]
            + fd["railroad"]
            + fd["ferry"])
        fd["transit_error"] = (
            + fd["bus_error"]**2
            + fd["streetcar_error"]**2
            + fd["subway_error"]**2
            + fd["railroad_error"]**2
            + fd["ferry_error"]**2)**(1/2)
        fd["carpool"] = (
            + fd["car_2p"]
            + fd["car_3p"]
            + fd["car_4p"]
            + fd["car_5p"]
            + fd["car_7p"])
        fd["carpool_error"] = (
            + fd["car_2p_error"]**2
            + fd["car_3p_error"]**2
            + fd["car_4p_error"]**2
            + fd["car_5p_error"]**2
            + fd["car_7p_error"]**2)**(1/2)

    if (home):
# People working from home do not travel
        fd["all"] = fd["all"] - fd["home"]
    
    if (reduced and groups):
# Columns to keep
        fd = fd[[
            "orig_taz",
            "dest_taz",
            "all",
            "all_error",
            "home",
            "home_error",
            "walk",
            "walk_error",
            "bike",
            "bike_error",
            "sov",
            "sov_error",
            "active",
            "active_error",
            "transit",
            "transit_error",
            "carpool",
            "carpool_error",
            fd.geometry.name]]
    elif (reduced and not groups):
# Columns to keep
        fd = fd[[
            "orig_taz",
            "dest_taz",
            "all",
            "all_error",
            "home",
            "home_error",
            "walk",
            "walk_error",
            "bike",
            "bike_error",
            "sov",
            "sov_error",
            fd.geometry.name]]

    if not (error):
        fd = fd[fd.columns.drop(list(fd.filter(regex="_error")))]

# Fix index
    fd = fd.reset_index(drop=True)

    return fd