Esempio n. 1
0
    def query_cell(self, event, context):
        cities = []
        resolution = h3.h3_get_resolution(event['h3_address'])
        base_cell = str(h3.h3_get_base_cell(event['h3_address']))

        if resolution < max_res:
            max_query_res = resolution
        else:
            max_query_res = max_res

        range_query = "#".join([
            h3.h3_to_parent(event['h3_address'], x)
            for x in range(min_res, max_query_res + 1)
        ])
        key_condition_expression = "ParentCell = :parentcell AND begins_with(CellLocationIndex, :index)"
        expression_values = {
            ":parentcell": {
                "S": base_cell
            },
            ":index": {
                "S": range_query
            }
        }
        resp = self.query_db_table(key_condition_expression, expression_values)
        for item in resp['Items']:
            city = item['CityName']['S']
            if city not in cities:
                cities.append(city)
        return cities
def occupied_neighbors(hex,
                       density_tgt,
                       density_max,
                       N,
                       hex_density,
                       method='siblings'):
    """

    :param hex: hex to query
    :param density_tgt: target density for hexs at this resolution
    :param density_max: maximum density at this resolution
    :param hex_density: dictionary of densities at each hex
    :param N:
    :param method: either siblings or neighbors
    :return:
    """
    # neigbhors = h3.hex_range(h, 1)
    #neigbhors = h3.h3_to_children(h3.h3_to_parent(h, resolution - 1), resolution)
    res = h3.h3_get_resolution(hex)
    if method == 'siblings':
        neighbors = h3.h3_to_children(h3.h3_to_parent(hex, res - 1), res)
    elif method == 'neighbors':
        neighbors = h3.hex_range(hex, 1)

    neighbors_above_tgt = 0
    for n in neighbors:
        if n not in hex_density:
            continue
        if hex_density[n]['clipped'] >= density_tgt:
            neighbors_above_tgt += 1
    clip = min(density_max, density_tgt * max(1,
                                              (neighbors_above_tgt - N + 1)))
    return clip
Esempio n. 3
0
def resolution_downsampling(gdf, hex_col, coarse_resolution, agg):
    '''
    Downsample hexagon resolution aggregating indicated metrics (e.g. Transform hexagon resolution from 9 to 6).

    Parameters
    ----------

    gdf: GeoDataFrame
         GeoDataFrame with hexagon geometries (output from gen_hexagons).

    hex_col: str
             Determines the column with the hex id.

    coarse_resolution: int, 0:15
                       Hexagon resolution lower than gdf actual resolution (higher values create smaller hexagons).

    Returns
    -------

    gdfc: GeoDataFrame
          GeoDataFrame with lower resolution hexagons geometry and metrics aggregated as indicated.

    '''

    gdf_coarse = gdf.copy()
    coarse_hex_col = 'hex_{}'.format(coarse_resolution)
    gdf_coarse[coarse_hex_col] = gdf_coarse[hex_col].apply(
        lambda x: h3.h3_to_parent(x, coarse_resolution))
    dfc = gdf_coarse.groupby([coarse_hex_col]).agg(agg).reset_index()
    gdfc_geometry = dfc[coarse_hex_col].apply(geo_boundary_to_polygon)

    return gpd.GeoDataFrame(dfc, geometry=gdfc_geometry, crs=gdf.crs)
Esempio n. 4
0
def add_h3_ids_to_points(df: pd.DataFrame, h3_max: int,
                         h3_min: int) -> pd.DataFrame:
    """Add Uber H3 ids to the point geometries in a Spatially Enabled DataFrame.
    :param df: Spatially Enabled DataFrame with point geometries to be aggregated.
    :param h3_max: Integer maximum H3 grid level defining the samllest geographic hex area - must be larger than the minimum.
    :param h3_min: Integer minimum H3 grid level defining the largest geograhpic hex area - must be smaller than the maximum.
    :return: Pandas DataFrame with Uber H3 ids added for all the resolutions betwen teh maximum and minimum.
    """
    assert h3_max > h3_min

    # get a list of zoom levels and ensure the H3 levels are sorted from highest to lowest resolution
    h3_lvl_lst = _get_h3_range_lst(h3_min, h3_max)
    h3_lvl_lst.sort(reverse=True)

    # calculate the highest resolution H3 id for each location
    first_level = h3_lvl_lst[0]
    df[_h3_col(
        first_level)] = df.SHAPE.swifter.apply(lambda geom: h3.geo_to_h3(
            geom.centroid[1], geom.centroid[0], first_level))

    # use the highest resolution H3 id to get progressivley lower resolution H3 id's
    for h3_lvl in h3_lvl_lst[1:]:
        df[_h3_col(h3_lvl)] = df[_h3_col(first_level)].swifter.apply(
            lambda first_val: h3.h3_to_parent(first_val, h3_lvl))

    return df
Esempio n. 5
0
 def load_cities(self, event, context):
     with open('usa_cities.geojson', 'r') as geoj:
         cities = json.load(geoj)['features']
         _db_table = dynamodb_resource.Table(db_table.name)
         print("Loading database table")
         with _db_table.batch_writer() as db_batch:
             for idx, city in enumerate(cities):
                 city_id = str(idx)
                 if city['properties']['NAME']:
                     hexagons = h3.polyfill(city['geometry'],
                                            max_res,
                                            geo_json_conformant=True)
                     if len(hexagons) > 0:
                         for hex in hexagons:
                             parents = [
                                 h3.h3_to_parent(hex, x)
                                 for x in range(min_res, max_res + 1)
                             ]
                             range_key = "#".join(parents) + "#{}".format(
                                 city_id)
                             db_item = {
                                 'ParentCell':
                                 "{}".format(h3.h3_get_base_cell(hex)),
                                 'CellLocationIndex':
                                 range_key,
                                 'CityName':
                                 city['properties']['NAME'],
                                 'CityID':
                                 city_id
                             }
                             db_batch.put_item(Item=db_item)
                 if idx % 1000 == 0:
                     print("Processed {} cities".format(idx))
Esempio n. 6
0
def cell_h3_downsampling(df, cell_id_col, metric_col, coarse_resolution, metric_type):
    """Aggregates a given attribute in h3 cell to a given coarser resolution level

    Parameters:
    df (pandas dataframe): dataframe with s2 ids and attributes for aggregation
    cell_id_col (string): name of s2 id column
    metric_col (string): name of a column for aggreagation
    coarse_resolution (integer): Coarser s2 resoluiton for aggregation
    metric_type (string): attribute type (numerical, categorical)
    Returns:
    Pandas dataframe
   """

    df_coarse = df.copy()
    coarse_id_col = 'cell_id_{}'.format(coarse_resolution)
    df_coarse[coarse_id_col] = df_coarse[cell_id_col].apply(lambda x: h3.h3_to_parent(x, coarse_resolution))

    if metric_type == 'numeric':
        dfc = df_coarse.groupby(coarse_id_col)[[metric_col]].mean().reset_index()
    elif metric_type == 'categorical':
        dfc = df_coarse.groupby([coarse_id_col, metric_col]).agg(count=(metric_col, 'count')).reset_index().sort_values(
            by=[coarse_id_col, metric_col, 'count']).groupby(coarse_id_col, as_index=False, sort=False).first()
        dfc.drop('count', axis=1, inplace=True)
    dfc.columns = [cell_id_col, metric_col]
    return dfc
def downscale_h3(time_win_df,
                 agg_brothers,
                 downscale_size=2,
                 h3_index_col="h3_index"):
    # tODO: 2nd for loop should be a while with selected res at top
    # Only with this number of brother the hexagons will be scaled (max = 7)
    n_min_brothers_to_scale = 5

    time_win_h3 = time_win_df.reset_index()
    time_win_h3["h3_res"] = time_win_h3[h3_index_col].apply(
        h3.h3_get_resolution)
    downscale_resulutions = range(time_win_h3["h3_res"].min(),
                                  time_win_h3["h3_res"].min() - downscale_size,
                                  -1)

    for child_h3_res_depth, downscale_res in enumerate(downscale_resulutions):
        print("Auto downscale h3 resolution:", downscale_res)
        for idx, row in time_win_h3.iterrows():
            # Once time_win_h3 indexs get changed during the loop and the time_win_h3.iterrows()
            # is a copy of the rows, they might not exist
            if idx not in time_win_h3.index:
                continue

            h3idx = row[h3_index_col]
            cell_res = row["h3_res"]
            # If its a different res than the one we re trying to downscale, skips
            if cell_res != downscale_res:
                continue

            parent = h3.h3_to_parent(h3idx, cell_res - 1)
            # finding all the brother cells
            brother_cells = list(h3.h3_to_children(parent, cell_res))
            # dont scale if there is less than X brothers
            if time_win_h3[h3_index_col].isin(
                    brother_cells).sum() < n_min_brothers_to_scale:
                continue

            # finding all the children cells
            for childh3res in range(1, child_h3_res_depth + 1):
                brother_cells.extend(
                    list(h3.h3_to_children(parent, cell_res + childh3res)))

            brothers_df = time_win_h3[time_win_h3[h3_index_col].isin(
                brother_cells)]

            agg_result = agg_brothers(brothers_df)

            if agg_result is False:
                continue
            # set the cols to the parent values
            agg_result[h3_index_col] = parent
            agg_result["h3_res"] = cell_res - 1
            time_win_h3.loc[idx] = agg_result
            # drop the rest of the brothers
            time_win_h3.drop([i for i in brothers_df.index if i != idx],
                             inplace=True)

    return time_win_h3.set_index(h3_index_col)
def hex_occupancy(hexs, level=None):
    """
    converts list of hexs to level (if provided) and returns a dictionary of with keys of hexs and values of counts
    :param hexs: list of hexs
    :param level: desired level (all provided hex's should be below or equal to level
    :return:
    """
    res = dict()
    for h in hexs:
        if level:
            h = h3.h3_to_parent(h, level)
        res.setdefault(h, 0)
        res[h] += 1
    return res
Esempio n. 9
0
    def to_parent(self) -> Tile:
        """Maps current tile to parent Tile object.

        Returns
        -------
        Tile
        """

        if self.grid_type == "s2":

            parent_id = s2.s2_to_parent(self.tile_id)

        elif self.grid_type == "h3":

            parent_id = h3.h3_to_parent(self.tile_id)

        elif self.grid_type in ("bing", "quadtree"):

            parent_id = quadtree.tile_to_parent(self.tile_id)

        return self.id_to_tile(parent_id)
def _get_cell(gjson, resolution, parent_resolution=1, keep_wkt=False):

    inverse_coords = True

    temp = pd.Series(list(
        h3.polyfill(gjson.values[0], resolution, inverse_coords)),
                     name="id").to_frame()

    if len(temp) == 0:
        temp = pd.Series(
            [
                h3.geo_to_h3(
                    **dict(
                        zip(("lng", "lat"),
                            gjson.values[0]["coordinates"][0][0])),
                    res=resolution,
                )
            ],
            name="id",
        ).to_frame()

        inverse_coords = True

    temp["resolution"] = resolution

    temp["id_parent"] = temp["id"].apply(
        lambda x: h3.h3_to_parent(x, res=parent_resolution))
    temp["group"] = (temp.index / 100).astype(int)

    if keep_wkt:
        temp["wkt"] = _to_wkt(gjson.values[0]["coordinates"][0])
    else:
        temp["wkt"] = temp["id"].apply(
            lambda x: _to_wkt(h3.h3_to_geo_boundary(x, inverse_coords)))

    return temp
Esempio n. 11
0
 def test_h3_to_parent(self):
     test_hexagon = '89283082813ffff'
     parent_hexagon = h3.h3_to_parent(test_hexagon, 8)
     self.assertEqual(parent_hexagon, '8828308281fffff',
                      'got the parent back')
Esempio n. 12
0
def sample(hotspots, density_tgt, density_max, R, N):

    # ==============================================================
    # Part 1, find hexs and density of hexs containing interactive
    #         hotspots at highest resolution
    # ==============================================================

    # determine density of occupied "tgt_resolution" hexs.  This sets our initial conditions.  I also track "actual" vs
    # clipped density to find discrepancies
    #hex_density will be keys of hexs (all resolutions) with a value of dict(clipped=0, actual=0)
    hex_density = dict()
    interactive = 0
    for h in hotspots:
        if is_interactive(h):
            hex = h3.h3_to_parent(h['location'], R)
            interactive += 1
            # initialize the hex if not in dictionary
            if hex not in hex_density:
                hex_density[hex] = dict(clipped=0, actual=0, unclipped=0)
            hex_density[hex]['clipped'] += 1
            hex_density[hex]['actual'] += 1
            hex_density[hex]['unclipped'] += 1
    print(f"{len(hotspots)} hotspots")
    print(f"{len(hex_density)} unique res {R} hexs")
    print(f"{lone_wolfs} lone wolfs")
    print(f"{interactive} interactive hotspots")
    #build a set of R resolution hexs, occupied child hexs are how we build occupied hexs for parent levels
    child_hexs = set(hex_density.keys())

    # ==============================================================
    # Part 2, go from high to low res, clipping density and determining
    #         densities of parent hexs
    # ==============================================================

    # iterate through resultion from just above target to 1 clipping child densities and calculating appropriate hex
    # densities at "resolution"
    for resolution in range(R - 1, 0, -1):
        # hold set of hex's to evaluate
        occupied_hexs = dict()  # key = parent hex, values = list of child hexs
        # density target and limit at  child's resolution.  This is simply scaled up by increased area
        density = density_tgt * 7**(R - resolution - 1)
        density_limit = density_max * 7**(R - resolution - 1)

        # print(f"res: {resolution+1}, density: {density}, limit: {density_limit}")

        # 1. find all occupied hexs at this resolution based on child hexs
        for h in child_hexs:
            occupied_hexs.setdefault(h3.h3_to_parent(h, resolution), [])
            occupied_hexs[h3.h3_to_parent(h, resolution)].append(h)

        # for each occupied hex at this level, evaluate its children
        for h in occupied_hexs:
            children = occupied_hexs[h]
            # 1. find count of children > tgt_density to possibly elevate clipping value of N threshold met.
            above_density_cnt = 0
            for c in children:
                if hex_density.get(c, dict(clipped=0, actual=0,
                                           uncipped=0))['clipped'] >= density:
                    above_density_cnt += 1

            hex_raw_density = 0
            hex_unclipped_density = 0
            # clip children at density_tgt unless above_density_cnt meets threshold, then calculate appropriate clipping
            clip = density
            if above_density_cnt > N:
                clip = min(density_limit,
                           density * (above_density_cnt - N + 1))

            # iterate through all children clipping density and calculating density for this hex.  Note this may not be
            # appropriately clipped since we need to evaluate all this hex's siblings (will be done in next iteration)
            # of outer loop
            for c in children:
                hex_density[c]['clipped'] = min(clip,
                                                hex_density[c]['clipped'])
                hex_unclipped_density += hex_density[c]['actual']
                hex_raw_density += hex_density[c]['clipped']

            # set this hex raw density unclipped (will be clipped at parent)
            hex_density[h] = dict(clipped=hex_raw_density,
                                  actual=hex_unclipped_density,
                                  unclipped=hex_raw_density)

        print(
            f"total of {len(occupied_hexs)} occupied hexes at resolution {resolution}"
        )
        # occupied hex's at this resolution are child hexs in next resolution
        child_hexs = occupied_hexs

    # ==============================================================
    # Part 3, print / store analysis
    # ==============================================================

    # occupied_hex's is now the top level hex evaluated.  Start here for descending to target a hotspot
    top_count = 0
    for h in occupied_hexs:
        #print(f"hex {h} has density {hex_density[h]}")
        top_count += hex_density[h]['clipped']

    print(f"total density of all top level hexs = {top_count}")

    # track max/min hex for gut check
    interactive_hspots = 0

    with open(f'hex_occupancy_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv',
              'w',
              newline='') as csvfile:
        hex_writer = csv.writer(csvfile,
                                delimiter=',',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL)
        hex_writer.writerow(
            ['hex', 'resolution', 'density_clipped', 'density_actual'])
        for k in hex_density:
            hex_writer.writerow([
                k,
                h3.h3_get_resolution(k), hex_density[k]['clipped'],
                hex_density[k]['actual']
            ])

    with open(
            f'hotspot_tgting_prob_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv',
            'w',
            newline='') as csvfile:
        hspot_writer = csv.writer(csvfile,
                                  delimiter=',',
                                  quotechar='"',
                                  quoting=csv.QUOTE_MINIMAL)
        hspot_writer.writerow(['address', 'name', 'city', 'state', 'prob'])
        # iterate through all interactive hotspots and evaluate probability of targeting. this will be outputted to CSV
        for hspot in hotspots:
            # start at top level and iterate through determining odds of selection
            if not is_interactive(hspot):
                continue
            interactive_hspots += 1
            sibling_total = top_count
            sibling_unclipped = 0
            probability = 1
            scale = 1
            for res in range(1, R + 1):
                #for res in range(R, 0, -1):
                hex = h3.h3_to_parent(hspot['location'], res)
                prob_orig = probability
                probability *= hex_density[hex]['clipped'] / sibling_total
                scale_orig = scale

                scale *= hex_density[hex]['clipped'] / hex_density[hex][
                    'unclipped']
                if hspot['name'] == 'blunt-clay-puppy':
                    print(
                        f"{hex} h3res:{res} has density clipped/unclipped of {hex_density[hex]['clipped']:3d}/{hex_density[hex]['unclipped']:3d}, prob reduced: {prob_orig:.3f} to {probability:.3f}"
                    )
                sibling_total = hex_density[hex]['clipped']
                sibling_unclipped = hex_density[hex]['actual']

            probability *= 1 / sibling_unclipped

            hspot_writer.writerow([
                hspot['address'], hspot['name'],
                hspot['geocode']['short_city'],
                hspot['geocode']['short_state'], f"{probability:.6f}"
            ])
            # print(f"hotspot {hspot['name']:30} has {sibling_unclipped} hotspots in res8 cell, probability {probability*100:.8f}%")

        print(f"total of {interactive_hspots} interactive hotspots")
Esempio n. 13
0
def sample_neighbor(hotspots, density_tgt, density_max, R, N):

    # ==============================================================
    # Part 1, find hexs and density of hexs containing interactive
    #         hotspots at target resolution
    # ==============================================================

    # determine density of occupied "tgt_resolution" hexs.  This sets our initial conditions.  I also track "actual" vs
    # clipped density to find discrepancies
    #hex_density will be keys of hexs (all resolutions) with a value of dict(clipped=0, actual=0)
    hex_density = dict()
    interactive = 0
    for h in hotspots:
        if is_interactive(h):
            hex = h3.h3_to_parent(h['location'], R)
            interactive += 1
            # initialize the hex if not in dictionary
            if hex not in hex_density:
                hex_density[hex] = dict(clipped=0, actual=0, unclipped=0)
            hex_density[hex]['clipped'] += 1
            hex_density[hex]['actual'] += 1
            hex_density[hex]['unclipped'] += 1

    for h in hex_density.keys():
        clip = occupied_neighbors(h,
                                  density_tgt,
                                  density_max,
                                  N,
                                  hex_density,
                                  method='neighbors')

        hex_density[h]['clipped'] = min(hex_density[h]['clipped'], clip)
        hex_density[h]['limit'] = clip

    print(f"{len(hotspots)} hotspots")
    print(f"{len(hex_density)} unique res {R} hexs")
    print(f"{lone_wolfs} lone wolfs")
    print(f"{interactive} interactive hotspots")
    #build a set of R resolution hexs, occupied child hexs are how we build occupied hexs for parent levels
    occupied_higher_res = set(hex_density.keys())

    # ==============================================================
    # Part 2, go from high to low res, clipping density and determining
    #         densities of parent hexs
    # ==============================================================

    # iterate through resultion from just above target to 1 clipping child densities and calculating appropriate hex
    # densities at "resolution"
    for resolution in range(R - 1, 0, -1):
        # hold set of hex's to evaluate
        occupied_hexs = set(
            [])  # key = parent hex, values = list of child hexs
        # density target and limit at  child's resolution.  This is simply scaled up by increased area
        density_res_tgt = density_tgt * 7**(R - resolution)
        density_res_max = density_max * 7**(R - resolution)

        # 1. find all occupied hexs at this resolution based on child hexs
        for h in occupied_higher_res:
            occupied_hexs.add(h3.h3_to_parent(h, resolution))

        for h in occupied_hexs:
            children = h3.h3_to_children(h, resolution + 1)

            # calculate density of this hex by summing the clipped density of its children
            hex_raw_density = 0
            hex_unclipped_density = 0
            for c in children:
                if c in hex_density:

                    hex_raw_density += hex_density[c]['clipped']
                    hex_unclipped_density += hex_density[c]['actual']
            hex_density[h] = dict(clipped=hex_raw_density,
                                  actual=hex_unclipped_density,
                                  unclipped=hex_raw_density)

        # now that we have unclipped densities of each occupied hex at this resolution, iterate through all occupied
        # hexs again and apply clipping by looking at neighbors:

        for h in occupied_hexs:
            #neigbhors = h3.hex_range(h, 1)
            #neigbhors = h3.h3_to_children(h3.h3_to_parent(h, resolution - 1), resolution)
            clip = occupied_neighbors(h,
                                      density_res_tgt,
                                      density_res_max,
                                      N,
                                      hex_density,
                                      method='neighbors')

            hex_density[h]['clipped'] = min(hex_density[h]['clipped'], clip)
            hex_density[h]['limit'] = clip
        occupied_higher_res = list(occupied_hexs)

        print(
            f"total of {len(occupied_hexs)} occupied hexes at resolution {resolution}"
        )
        # occupied hex's at this resolution are child hexs in next resolution
        child_hexs = occupied_hexs

    # ==============================================================
    # Part 3, print / store analysis
    # ==============================================================

    # occupied_hex's is now the top level hex evaluated.  Start here for descending to target a hotspot
    top_count = 0
    for h in occupied_hexs:
        #print(f"hex {h} has density {hex_density[h]}")
        top_count += hex_density[h]['clipped']

    print(f"total density of all top level hexs = {top_count}")
    # for k in hex_density.keys():
    #     hex_density[k]['border'] = h3.h3_to_geo_boundary(k, False)

    interactive_hspots = 0

    with open(f'hex_occupancy_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv',
              'w',
              newline='') as csvfile:
        hex_writer = csv.writer(csvfile,
                                delimiter=',',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL)
        hex_writer.writerow([
            'hex', 'resolution', 'density_clipped', 'density_actual',
            'density_limit'
        ])
        for k in hex_density:
            hex_writer.writerow([
                k,
                h3.h3_get_resolution(k), hex_density[k]['clipped'],
                hex_density[k]['actual'], hex_density[k]['limit']
            ])

    with open(
            f'hotspot_RewardScale_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv',
            'w',
            newline='') as csvfile:
        hspot_writer = csv.writer(csvfile,
                                  delimiter=',',
                                  quotechar='"',
                                  quoting=csv.QUOTE_MINIMAL)
        hspot_writer.writerow(
            ['address', 'name', 'city', 'state', 'reward_scale'])
        # iterate through all interactive hotspots and evaluate probability of targeting. this will be outputted to CSV
        for hspot in hotspots:
            # start at top level and iterate through determining odds of selection
            if not is_interactive(hspot):
                continue
            interactive_hspots += 1
            scale = 1
            probability = 1
            #for res in range(1, R+1):
            for res in range(R, 0, -1):
                hex = h3.h3_to_parent(hspot['location'], res)
                scale_orig = scale
                scale *= hex_density[hex]['clipped'] / hex_density[hex][
                    'unclipped']
                if hspot['name'] == 'daring-carmine-penguin':
                    print(
                        f"{hex} h3res:{res} has density clipped/unclipped of {hex_density[hex]['clipped']:3d}/{hex_density[hex]['unclipped']:3d}, scale reduced: {scale_orig:.3f} to {scale:.3f}"
                    )
                sibling_total = hex_density[hex]['clipped']
                sibling_unclipped = hex_density[hex]['actual']

            hspot_writer.writerow([
                hspot['address'], hspot['name'],
                hspot['geocode']['short_city'],
                hspot['geocode']['short_state'], f"{scale:.5f}"
            ])
            # print(f"hotspot {hspot['name']:30} has {sibling_unclipped} hotspots in res8 cell, probability {probability*100:.8f}%")

        print(f"total of {interactive_hspots} interactive hotspots")
Esempio n. 14
0
 def safe_h3_to_parent(h3_address):
     return h3.h3_to_parent(h3_address, 1)