예제 #1
0
 def testProfileExists(self):
     with patch('rubbish_geo_common.db_ops.APPDIR', new=get_app_dir()):
         set_db(profile='dev',
                connstr='postgresql://*****:*****@localhost:5432/baz',
                conntype='gcp',
                conname='bar')
         db_sessionmaker(profile='dev')
예제 #2
0
    def testProfileDoesntExist(self):
        with patch('rubbish_geo_common.db_ops.APPDIR', new=get_app_dir()):
            with pytest.raises(ValueError):
                db_sessionmaker(profile='dev')


# TODO: set_db tests, fix db_sessionmaker, etcetera.
# Very far-reaching and annoying refactor. :'(
예제 #3
0
def show_sectors(profile, wait=5, force_download=False):
    """Pretty-prints a list of sectors in the database."""
    with OptionalCloudSQLProxyProcess(profile,
                                      wait=wait,
                                      force_download=force_download):
        try:
            session = db_sessionmaker(profile)()

            sectors = session.query(Sector).all()
            if len(sectors) == 0:
                print("No sectors in the database. :(")
                return

            console = Console()
            table = Table(show_header=True, header_style="bold magenta")
            table.add_column("ID", justify="left")
            table.add_column("Name", justify="left")
            table.add_column("Bounding Box", justify="left")
            for sector in session.query(Sector).all():
                bounds = _poly_wkb_to_bounds_str(sector.geometry)
                table.add_row(str(sector.id), sector.name, bounds)
            console.print(table)
        finally:
            engine = session.bind
            session.close()
            engine.dispose()
예제 #4
0
def insert_sector(sector_name,
                  filepath,
                  profile,
                  wait=5,
                  force_download=False):
    with OptionalCloudSQLProxyProcess(profile,
                                      wait=wait,
                                      force_download=force_download):
        session = db_sessionmaker(profile)()

        if session.query(Sector).filter_by(name=sector_name).count() != 0:
            raise ValueError(
                f"The database already contains a sector with the name {sector_name!r}. "
                f"If you are redefining the same sector, please run `delete_sector({sector_name!r})` "
                f"first. Otherwise, please choose a different name for this sector."
            )

        sector_shape = _validate_sector_geom(filepath)
        sector = Sector(name=sector_name,
                        geometry=f'SRID=4326;{str(sector_shape)}')
        session.add(sector)
        try:
            session.commit()
        except:
            session.rollback()
            raise
        finally:
            engine = session.bind
            session.close()
            engine.dispose()
예제 #5
0
def show_zones(profile, wait=5, force_download=False):
    """Pretty-prints a list of zones in the database."""
    with OptionalCloudSQLProxyProcess(profile,
                                      wait=wait,
                                      force_download=force_download):
        session = db_sessionmaker(profile)()
        zones = (session.query(Zone).all())
        if len(zones) == 0:
            print("No zones in the database. :(")
        else:
            console = Console()
            table = Table(show_header=True, header_style="bold magenta")
            table.add_column("ID", justify="left")
            table.add_column("Name", justify="left")
            table.add_column("OSMNX Name", justify="left")
            table.add_column("N(Generations)", justify="right")
            table.add_column("N(Centerlines)", justify="right")
            table.add_column("Bounding Box", justify="left")
            for zone in zones:
                zone_gen_ids = [gen.id for gen in zone.zone_generations]
                n_centerlines = (session.query(Centerline).filter(
                    Centerline.first_zone_generation.in_(
                        zone_gen_ids)).count())
                bounds = _poly_wkb_to_bounds_str(zone.bounding_box)
                table.add_row(str(zone.id), zone.name, zone.osmnx_name,
                              str(len(zone.zone_generations)),
                              str(n_centerlines), str(bounds))
            console.print(table)
예제 #6
0
def run_get(run_id, profile):
    """
    Returns blockface statistics and run-specific data for a specific run by id.

    Parameters
    ----------
    run_id : str
        The run id. Note: this is stored as ``firebase_id`` in the ``Pickups`` table.
    profile: str
        The database to connect to (e.g. "dev") as configured in ~/.rubbish/config.

    Returns
    -------
    ``dict``
        Query result.
    """
    session = db_sessionmaker(profile)()
    # Runs are not a native object in the analytics database. Instead, pickups are stored
    # with firebase_run_id and centerline_id columns set. We use this to get the
    # (centerline, curb) combinations this run touched. We then find all blockface statistics
    # for the given centerlines. Then we filter out statistics with unmatched curbs: e.g. if
    # a run went only up the left side of Polk, we'll match both left and right sides, then
    # filter out the right side.
    pickups = session.query(Pickup).filter(
        Pickup.firebase_run_id == run_id).all()
    if len(pickups) == 0:
        raise ValueError(
            f"No pickups matching a run with ID {run_id} in the database.")

    curb_map = defaultdict(list)
    # TODO: shouldn't this be a set?
    centerline_ids = []
    for pickup in pickups:
        centerline_ids.append(pickup.centerline_id)
        curb_map[pickup.centerline_id].append(pickup.curb)

    statistics = (session.query(BlockfaceStatistic).filter(
        BlockfaceStatistic.centerline_id.in_(centerline_ids)).all())
    statistics_filtered = []
    for statistic in statistics:
        if statistic.curb in curb_map[statistic.centerline_id]:
            statistics_filtered.append(statistic)

    response_map = dict()
    for statistic in statistics_filtered:
        if statistic.centerline_id not in response_map:
            centerline_dict = centerline_obj_to_dict(statistic.centerline)
            response_map[statistic.centerline_id] = {
                'centerline': centerline_dict,
                'statistics': {
                    'left': None,
                    'middle': None,
                    'right': None
                }
            }
        statistic_dict = blockface_statistic_obj_to_dict(statistic)
        response_map[statistic.centerline_id]['statistics'][
            statistic.curb] = statistic_dict
    return [response_map[centerline_id] for centerline_id in response_map]
예제 #7
0
def delete_sector(sector_name, profile, wait=5, force_download=False):
    """Deletes a sector in the database."""
    with OptionalCloudSQLProxyProcess(profile,
                                      wait=wait,
                                      force_download=force_download):
        session = db_sessionmaker(profile)()

        sector = session.query(Sector).filter_by(
            name=sector_name).one_or_none()
        if sector is None:
            raise ValueError(
                f"Cannot delete sector {sector_name!r}: no such sector in the database."
            )

        session.delete(sector)
        try:
            session.commit()
        except:
            session.rollback()
            raise
        finally:
            engine = session.bind
            session.close()
            engine.dispose()
예제 #8
0
def coord_get(coord, profile, include_na=False):
    """
    Returns blockface statistics for the centerline closest to the given coordinate.

    Parameters
    ----------
    coord: (x, y) coordinate tuple
        Origin point for the snapped selection.
    profile: str
        The database to connect to (e.g. "dev") as configured in ~/.rubbish/config.
    include_na : bool, optional
        Whether or not to include blockfaces for which blockface statistics do not exist yet.
        Defaults to ``False``.

        Blockfaces with no statistics have not met the minimum threshold for assignment of
        statistics yet (at time of writing, this means that no runs touching at least 50% of
        the blockface have been saved to the database yet).

        When ``include_na=True``, the blockfaces returned will be that of the nearest centerline.

        When ``include_na=False``, the blockfaces returned will be that of the nearest centerline
        having at least one blockface statistic.
    
    Returns
    -------
    ``dict``
        Query result.    
    """
    session = db_sessionmaker(profile)()
    coord = shapely.geometry.Point(*coord)

    def get_stats_objs(session, centerline_id):
        return (session.query(BlockfaceStatistic).filter(
            BlockfaceStatistic.centerline_id == centerline_id).all())

    centerline = None
    if include_na == True:
        centerline = nearest_centerline_to_point(coord, session)
        stats_objs = get_stats_objs(session, centerline.id)
    else:
        stats_objs = []
        rank = 0
        while len(stats_objs) == 0:
            centerline = nearest_centerline_to_point(coord, session, rank=rank)
            stats_objs = get_stats_objs(session, centerline.id)
            rank += 1
            if rank >= 10:
                raise ValueError(
                    "Could not find non-null blockface statistics nearby.")

    stats_dicts = blockface_statistic_objs_to_dicts(stats_objs)
    statistics = {stat_dict['curb']: stat_dict for stat_dict in stats_dicts}
    if 'left' not in statistics:
        statistics['left'] = None
    if 'right' not in statistics:
        statistics['right'] = None
    if 'middle' not in statistics:
        statistics['middle'] = None
    return {
        "centerline": centerline_obj_to_dict(centerline),
        "statistics": statistics
    }
예제 #9
0
def sector_get(sector_name, profile, include_na=False, offset=0):
    """
    Returns all blockface statistics for blockfaces contained in a sector. Only blockfaces located
    completely within the sector count. Blockfaces touching sector edges are ok, blockfaces
    containing some points outside of the sector are not.

    Parameters
    ----------
    sector_name: str
        Unique sector name.
    profile: str
        The database to connect to (e.g. "dev") as configured in ~/.rubbish/config.
    include_na : bool, optional
        Whether or not to include blockfaces for which blockface statistics do not yet exist.
        Defaults to ``False``.

        Blockfaces with no statistics have not met the minimum threshold for assignment of
        statistics yet (at time of writing, this means that no runs touching at least 50% of
        the blockface have been saved to the database yet).

        The additional blockfaces returned when ``include_na=True`` is set will only have
        their geometry field set. All other fields will be `None`.
    offset : int, optional
        The results offset to use. Defaults to `0`, e.g. no offset.

        To prevent inappropriately large requests from overloading the database, this API is
        limited to returning 1000 items at a time. Use this parameter to fetch more results
        for a query exceeding this limit.

    Returns
    -------
    ``dict``
        Query result.
    """
    session = db_sessionmaker(profile)()
    sector = (session.query(Sector).filter(
        Sector.name == sector_name).one_or_none())
    if sector is None:
        raise ValueError(f"No {sector_name!r} sector in the database.")
    centerlines = (session.query(Centerline).filter(
        Centerline.geometry.ST_Intersects(sector.geometry)).all())
    centerline_ids = set(centerline.id for centerline in centerlines)
    statistics = (session.query(BlockfaceStatistic).filter(
        BlockfaceStatistic.centerline_id.in_(centerline_ids)).all())

    response_map = dict()
    for statistic in statistics:
        if statistic.centerline_id not in response_map:
            centerline_dict = centerline_obj_to_dict(statistic.centerline)
            response_map[statistic.centerline_id] = {
                'centerline': centerline_dict,
                'statistics': {
                    'left': None,
                    'middle': None,
                    'right': None
                }
            }
        statistic_dict = blockface_statistic_obj_to_dict(statistic)
        response_map[statistic.centerline_id]['statistics'][
            statistic.curb] = statistic_dict
    if include_na:
        for centerline in centerlines:
            if centerline.id not in response_map:
                response_map[centerline.id] = {
                    'centerline': centerline_obj_to_dict(centerline),
                    'statistics': {
                        'left': None,
                        'middle': None,
                        'right': None
                    }
                }
    return [response_map[centerline_id] for centerline_id in response_map]
예제 #10
0
def radial_get(coord, distance, profile, include_na=False, offset=0):
    """
    Returns all blockface statistics for blockfaces containing at least one point at most
    ``distance`` away from ``coord``.

    Parameters
    ----------
    coord : (x, y) coordinate tuple
        Centerpoint for the scan.
    distance : int
        Distance (in meters) from centerpoint to scan for.
    profile: str
        The database to connect to (e.g. "dev") as configured in ~/.rubbish/config.
    include_na : bool, optional
        Whether or not to include blockfaces for which blockface statistics do not yet exist.
        Defaults to ``False``.

        Blockfaces with no statistics have not met the minimum threshold for assignment of
        statistics yet (at time of writing, this means that no runs touching at least 50% of
        the blockface have been saved to the database yet).

        The additional blockfaces returned when ``include_na=True`` is set will only have
        their geometry field set. All other fields will be `None`.
    offset : int, optional
        The results offset to use. Defaults to `0`, e.g. no offset.

        To prevent inappropriately large requests from overloading the database, this API is
        limited to returning 1000 items at a time. Use this parameter to fetch more results
        for a query exceeding this limit.

    Returns
    -------
    ``dict``
        Query result.
    """
    session = db_sessionmaker(profile)()
    coord = f'SRID=4326;POINT({coord[0]} {coord[1]})'
    centerlines = (session.query(Centerline).filter(
        Centerline.geometry.ST_Distance(coord) < distance).all())
    centerline_ids = set(centerline.id for centerline in centerlines)
    statistics = (session.query(BlockfaceStatistic).filter(
        BlockfaceStatistic.centerline_id.in_(centerline_ids)).all())
    response_map = dict()
    for statistic in statistics:
        if statistic.centerline_id not in response_map:
            centerline_dict = centerline_obj_to_dict(statistic.centerline)
            response_map[statistic.centerline_id] = {
                'centerline': centerline_dict,
                'statistics': {
                    'left': None,
                    'middle': None,
                    'right': None
                }
            }
        statistic_dict = blockface_statistic_obj_to_dict(statistic)
        response_map[statistic.centerline_id]['statistics'][
            statistic.curb] = statistic_dict
    if include_na:
        for centerline in centerlines:
            if centerline.id not in response_map:
                response_map[centerline.id] = {
                    'centerline': centerline_obj_to_dict(centerline),
                    'statistics': {
                        'left': None,
                        'middle': None,
                        'right': None
                    }
                }
    if len(response_map) == 0:
        return []
    return [response_map[centerline_id] for centerline_id in response_map]
예제 #11
0
def write_pickups(pickups, profile, check_distance=True, logger=None):
    """
    Writes pickups to the database. This method hosts the primary logic for the overall service's
    POST path.

    Parameters
    ----------
    pickups: list
        A `list` of pickups. Each entry in the list is expected to be a `dict` in the following
        format:

        ```
        {"firebase_id": <str>,
        "firebase_run_id": <str>,
        "type": <str, from RUBBISH_TYPES>,
        "timestamp": <int; UTC UNIX timestamp>,
        "curb": <{left, right, middle, None}>,
        "geometry": <str; POINT in WKT format>}
        ```

        The list is to contain *all* pickups associated with an individual run.
    profile: str
        The name of the database write_pickups will write to. This named database must either be
        present on disk (written to `$HOME/.rubbish/config`, either manually or using the `set-db`
        admin CLI command), or its connection information must be set using the 
        `RUBBISH_POSTGIS_CONNSTR` environment variable.
    check_distance: bool, default `True`
        If set to `False`, the points will be matched to the nearest centerline in the database,
        regardless of distance. If `True`, points that are too far from any centerlines in the
        database (according to a heuristic threshold) will be discarded. This value should always
        be set to `True` in `prod`, but may be set to `False` for local testing purposes.
    logger: LogHandler object or None
        If set, this method will write logs using this log handler. See the definition of
        `LogHandler` in `python/functions/main.py`. If not set, logging is omitted.
    """
    # TODO: add debug-level logging. The logger is already being passed down by the function.
    # if logger is not None:
    #     logger.log_struct({"level": "debug", "message": "Got to write_pickups."})

    if len(pickups) == 0:
        return

    # validate input and perform type conversions
    for pickup in pickups:
        if not isinstance(pickup, dict):
            raise ValueError(
                f"Pickups must be of type dict, but found pickup of type {type(pickup)} instead."
            )
        for attr in ["firebase_id", "type", "timestamp", "curb", "geometry"]:
            if attr not in pickup:
                raise ValueError(
                    f"Found pickup missing required attribute {attr}.")
        try:
            geom = pickup["geometry"]
        except shapely.errors.WKTReadingError:
            raise shapely.errors.WKTReadingError(
                f"Pickups include invalid geometry {pickup['geometry']!r}.")
        if not isinstance(geom, Point):
            raise ValueError(f"Found geometry of invalid type {type(geom)}.")
        pickup["geometry"] = geom
        for int_attr in ["timestamp"]:
            try:
                pickup[int_attr] = int(float(pickup[int_attr]))
            except ValueError:
                raise ValueError(
                    f"Found pickup with {int_attr} of non-castable type.")
        # the five minutes of padding are just in case there is clock skew
        if pickup["timestamp"] > (datetime.utcnow() +
                                  timedelta(minutes=5)).timestamp():
            raise ValueError(
                f"Found pickup with greater than expected UTC timestamp {pickup['timestamp']}. "
                f"Current server UTC UNIX time is {datetime.utcnow()}. Are you sure your "
                f"timestamp is actually a UTC UNIX timestamp?")
        curb = pickup["curb"]
        if curb not in [None, "left", "right", "middle"]:
            raise ValueError(
                f"Found pickup with invalid curb value {curb} "
                f"(must be one of 'left', 'right', 'middle', None).")
        if pickup["type"] not in RUBBISH_TYPES:
            raise ValueError(
                f"Found pickup with type {pickup['type']!r} not in valid types {RUBBISH_TYPES!r}."
            )

    session = db_sessionmaker(profile)()

    # Snap points to centerlines.
    #
    # Recall that pickup locations are inaccurate due to GPS inaccuracy. Because of this, a
    # simplest nearest-point matching algorithm is not enough: this strategy will assign points
    # to streets that were not actually included in the run, only because of inaccurate GPS
    # triangulation.
    #
    # We use an iterative greedy algorithm instead. Points are initially matched to the nearest
    # centerline, but the result is thrown out if the centerline is not at least 50% covered
    # (in this context "coverage" means "distance between the first and last point assigned to
    # the centerline").
    #
    # Points failing this constraint are rematched to their second nearest centerline instead.
    # Points failing this constraint against are rematched to their third choice, and so on,
    # until the constraint is everywhere satisfied.
    #
    # This is a relatively simple heuristical algorithm that has some notable edge cases
    # (small centerlines, centerlines with just a single pickup) but should hopefully be robust
    # enough, given an accurate enough GPS.
    #
    # Curbs are ignored. Pickups with no curb set are matched to a curb in a separate routine.
    # This helps keep things simple.
    needs_work = True
    points_needing_work = pickups
    iter = 0
    centerlines = dict()
    while needs_work:
        for point in points_needing_work:
            point_geom = point["geometry"]
            centerline = nearest_centerline_to_point(
                point_geom, session, rank=iter, check_distance=check_distance)
            centerline_geom = to_shape(centerline.geometry)
            lr = centerline_geom.project(point_geom,
                                         normalized=True)  # linear reference
            c_id = centerline.id
            if c_id not in centerlines:
                centerlines[c_id] = (centerline, (lr, lr), [point])
            else:
                lr_min, lr_max = centerlines[c_id][1]
                points = centerlines[c_id][2] + [point]
                lr_min = min(lr_min, lr)
                lr_max = max(lr_max, lr)
                centerlines[c_id] = (centerline, (lr_min, lr_max), points)

        points_needing_work = []
        needs_work = False
        for c_id in list(centerlines):
            lr_min, lr_max = centerlines[c_id][1]
            points = centerlines[c_id][1]
            if lr_max - lr_min < 0.5:
                points_needing_work += centerlines[c_id][2]
                del centerlines[c_id]
                needs_work = True

        iter += 1

        # If no centerline achieves 50 percent coverage in the first pass, no centerline will pass
        # this threshold ever. To simplify the logic, we do not even bother inserting these points
        # into the database at all, we just raise a ValueError. "Every run must have coverage of
        # at least one street" is a meaningful business rule.
        at_least_one_centerline_with_coverage_geq_50_perc = len(
            centerlines) > 0
        if not at_least_one_centerline_with_coverage_geq_50_perc:
            raise ValueError(
                "This run was not inserted into the database because it violate the constraint "
                "that runs must cover at least one centerline.")

    # `centerlines` is a map with `centerline_id` keys and
    # (centerline_obj, (min_lr, max_lr), [...pickups]) values.

    # This code block handles inference of side-of-street for point distributions with
    # incomplete curb data.
    #
    # If every point assigned to a centerline has a curb set, we assume the user is following
    # procedure and faithfully indicating what side of the street the pickup occurred on, and
    # we do not modify any of the values.
    #
    # If any point fails this condition, we assume the user forgot or neglected to set this
    # flag for at least some of the pickups. In this case we use a statistical test.
    #
    # Determine whether the distribution is unimodal (pickups on one side of the street) or
    # bimodal (pickups on both sides). We expect a normal distribution on the centerline
    # (ignoring street width displacement!) with 2σ=~±8 meters (estimated GPS inacurracy from
    # https://bit.ly/3elXK0V). If support of the alternative hypothesis is present with p>0.05
    # we assume both sides were run and assign each side points.
    #
    # The actual normality test statistic used is the Shapiro-Wilk Test. For more information:
    # https://machinelearningmastery.com/a-gentle-introduction-to-normality-tests-in-python/.
    #
    # This worked relatively well for Polk Street, but Polk Street is very wide and we had a
    # *lot* of data to work with. With small data volumes, narrow streets, and relatively
    # heterogenous side-of-street rubbish distributions, this gets very hand-wavey. For this
    # reason it's *super important* to encourage the user to set side-of-street themselves.
    centerlines_needing_curb_inference = set()
    for c_id in centerlines:
        for pickup in centerlines[c_id][2]:
            if pickup['curb'] is None:
                centerlines_needing_curb_inference.add(c_id)
                break
    for c_id in centerlines_needing_curb_inference:
        dists = []
        sides = []
        centerline_geom = to_shape(centerlines[c_id][0].geometry)
        pickups = centerlines[c_id][2]

        # Shapiro requires n>=3 points, so if there are only 1 or 2, just set it to the first
        # value that appears; there's just not much we can do in this case. ¯\_(ツ)_/¯
        if len(pickups) < 3:
            if pickups[0]['curb'] is not None:
                curb = pickups[0]['curb']
            elif len(pickups) == 2 and pickups[1]['curb'] is not None:
                curb = pickups[1]['curb']
            for pickup in pickups:
                pickup['curb'] = curb
            continue

        for pickup in pickups:
            pickup_geom = pickup['geometry']
            dists.append(pickup_geom.distance(centerline_geom))
            sides.append(point_side_of_centerline(pickup_geom,
                                                  centerline_geom))
        _, p = shapiro(dists)
        if p > 0.05:
            # Gaussian unimodal case. Evidence that points are on one side of the street.
            # Pick the majority class.
            c = Counter(sides)
            curb = 'left' if c[0] > c[1] else 'right'
            for pickup in pickups:
                pickup['curb'] = curb
        else:
            # Non-Gaussian (bimodal) case. Evidence that points are on both sides of the street.
            # Use the user-set value if it's present, otherwise pick the closest match.
            for i, pickup in enumerate(pickups):
                if pickup['curb'] is None:
                    pickup['curb'] = sides[i]

    # From this point on, assume all curbs are set.

    # Construct a key-value map with blockface identifier keys and pickup_obj values. We will pass
    # over this map in the next step to construct blockface statistics.
    blockface_pickups = dict()
    blockface_lrs = dict()
    for c_id in centerlines:
        centerline_obj = centerlines[c_id][0]
        centerline_geom = to_shape(centerline_obj.geometry)
        pickups = centerlines[c_id][2]

        for pickup in pickups:
            pickup_geom = pickup['geometry']

            linear_reference = centerline_geom.project(pickup_geom,
                                                       normalized=True)
            snapped_pickup_geom = centerline_geom.interpolate(linear_reference,
                                                              normalized=True)

            pickup_obj = Pickup(
                geometry=f'SRID=4326;{str(pickup_geom)}',
                snapped_geometry=f'SRID=4326;{str(snapped_pickup_geom)}',
                centerline_id=centerline_obj.id,
                firebase_id=pickup['firebase_id'],
                firebase_run_id=pickup['firebase_run_id'],
                type=pickup['type'],
                timestamp=datetime.utcfromtimestamp(pickup['timestamp']),
                linear_reference=linear_reference,
                curb=pickup['curb'])
            session.add(pickup_obj)

            blockface_id_tup = (centerline_obj, pickup_obj.curb)
            if blockface_id_tup not in blockface_pickups:
                blockface_pickups[blockface_id_tup] = [pickup_obj]
            else:
                blockface_pickups[blockface_id_tup] += [pickup_obj]
            if blockface_id_tup not in blockface_lrs:
                blockface_lrs[blockface_id_tup] =\
                    (pickup_obj.linear_reference, pickup_obj.linear_reference)
            else:
                min_lr, max_lr = blockface_lrs[blockface_id_tup]
                if linear_reference < min_lr:
                    min_lr = linear_reference
                elif linear_reference > max_lr:
                    max_lr = linear_reference
                blockface_lrs[blockface_id_tup] = (min_lr, max_lr)

    # Insert blockface statistics into the database (or update existing ones).
    for blockface_id_tup in blockface_pickups:
        centerline, curb = blockface_id_tup
        pickups = blockface_pickups[blockface_id_tup]
        min_lr, max_lr = blockface_lrs[blockface_id_tup]
        coverage = max_lr - min_lr

        inferred_n_pickups = len(pickups) / coverage
        inferred_pickup_density = inferred_n_pickups / centerline.length_in_meters

        prior_information = (session.query(BlockfaceStatistic).filter(
            BlockfaceStatistic.centerline_id == centerline.id,
            BlockfaceStatistic.curb == curb).one_or_none())

        kwargs = {'centerline_id': centerline.id, 'curb': curb}
        if prior_information is None:
            blockface_statistic = BlockfaceStatistic(
                num_runs=1,
                rubbish_per_meter=inferred_pickup_density,
                **kwargs)
            session.add(blockface_statistic)
        else:
            updated_rubbish_per_meter = (
                (prior_information.rubbish_per_meter *
                 prior_information.num_runs + inferred_pickup_density) /
                (prior_information.num_runs + 1))
            blockface_statistic = BlockfaceStatistic(
                num_runs=prior_information.num_runs + 1,
                rubbish_per_meter=updated_rubbish_per_meter,
                **kwargs)

    try:
        session.commit()
    except:
        session.rollback()
        raise
    finally:
        session.close()
예제 #12
0
def update_zone(osmnx_name,
                name,
                profile,
                centerlines=None,
                wait=5,
                force_download=False):
    """
    Updates a zone, plopping the new centerlines into the database.

    The `osmnx_name` and `name` arguments map to database fields.
    
    The optional `centerlines` argument is used to avoid a network request in testing.
    """
    with OptionalCloudSQLProxyProcess(profile,
                                      wait=wait,
                                      force_download=force_download):
        session = db_sessionmaker(profile)()

        # insert zone
        # NOTE: flush writes DB ops to the database's transactional buffer without actually
        # performing a commit (and closing the transaction). This is important because it
        # allows us to reserve a primary key ID from the corresponding auto-increment
        # sequence, which we need when we use it as a foreign key. See SO#620610.
        if name is None:
            name = osmnx_name
        zone_query = (session.query(Zone).filter(
            Zone.osmnx_name == osmnx_name).one_or_none())
        zone_already_exists = zone_query is not None
        if zone_already_exists:
            zone = zone_query
        else:
            # We need to satisfy the non-null bounding box constraint to flush, but we don't have
            # the centerlines yet so we can't calculate it yet. For now, just use temp bounds.
            temp_bbox = f'SRID=4326;{str(Polygon([[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]))}'
            zone = Zone(osmnx_name=osmnx_name,
                        name=name,
                        bounding_box=temp_bbox)
            session.add(zone)
            session.flush()

        # modify old and insert new zone generation
        if zone_already_exists:
            zone_generation_query = (session.query(ZoneGeneration).filter(
                ZoneGeneration.zone_id == zone.id).order_by(
                    ZoneGeneration.generation).all())
            next_zone_generation = zone_generation_query[-1].generation + 1
        else:
            next_zone_generation = 0
        zone_generation = ZoneGeneration(zone_id=zone.id,
                                         generation=next_zone_generation,
                                         final_timestamp=None)
        session.add(zone_generation)
        session.flush()

        # insert centerlines
        if centerlines is None:
            G = ox.graph_from_place(osmnx_name,
                                    simplify=True,
                                    network_type="drive")
            _, edges = ox.graph_to_gdfs(G)

            # Centerline names entries may be NaN, a str name, or a list[str] of names. AFAIK there
            # isn't any interesting information in the ordering of names, so we'll use first-wins
            # rules for list[str]. For NaN names, we'll insert an "Unknown" string.
            #
            # Centerline osmid values cannot be NaN, but can map to a list. It's unclear why this
            # is the case.
            names = []
            for edge in G.edges:
                u, v, _ = edge
                names.append(_get_name_for_centerline_edge(G, u, v))
            edges = edges.assign(
                name=names,
                osmid=edges.osmid.map(lambda v: v
                                      if isinstance(v, int) else v[0]))
            centerlines = gpd.GeoDataFrame(
                {
                    "first_zone_generation": zone_generation.id,
                    "last_zone_generation": None,
                    "zone_id": zone.id,
                    "osmid": edges.osmid,
                    "name": edges.name
                },
                index=range(len(edges)),
                geometry=edges.geometry)
            centerlines.crs = "epsg:4326"
            centerlines["length_in_meters"] = centerlines.geometry.map(
                _calculate_linestring_length)

        else:
            centerlines["length_in_meters"] = centerlines.geometry.map(
                _calculate_linestring_length)

        minx, miny, maxx, maxy = centerlines.total_bounds
        poly = Polygon([[minx, miny], [minx, maxy], [maxx, maxy], [maxx, miny],
                        [minx, miny]])
        bbox = f'SRID=4326;{str(poly)}'
        zone.bounding_box = bbox

        # Cap the previous centerline generations (see previous comment).
        previously_current_centerlines = (session.query(Centerline).filter_by(
            zone_id=zone.id, last_zone_generation=None).all())
        for previously_current_centerline in previously_current_centerlines:
            previously_current_centerline.last_zone_generation = next_zone_generation - 1

        # Set the current zone generation's final timestamp.
        current_zone_generation = (session.query(ZoneGeneration).filter_by(
            zone_id=zone.id).order_by(sa.desc(ZoneGeneration.id)).first())
        if current_zone_generation:
            current_zone_generation.final_timestamp = datetime.now()

        session.add(zone)
        session.add(zone_generation)

        engine = session.bind
        try:
            session.commit()
            with warnings.catch_warnings():
                warnings.simplefilter('ignore')
                centerlines.to_postgis("centerlines",
                                       engine,
                                       if_exists="append")
        except:
            session.rollback()
            raise
        finally:
            session.close()
            engine.dispose()