Example #1
0
def euclidean_distances(hospitals_gdf: gpd.GeoDataFrame,
                        ed_inst_gdf: gpd.GeoDataFrame) -> np.ndarray:
    """Calculates pairwise Euclidean distances."""
    distances = np.zeros((len(hospitals_gdf), len(ed_inst_gdf)))
    for hosp_idx, hosp_row in enumerate(hospitals_gdf.itertuples()):
        for ed_idx, ed_row in enumerate(ed_inst_gdf.itertuples()):
            dist = hosp_row.geometry.distance(ed_row.geometry)
            distances[hosp_idx, ed_idx] = dist
    return distances
Example #2
0
def travel_time_distances(travel_time_df: pd.DataFrame,
                          hospitals_gdf: gpd.GeoDataFrame,
                          ed_inst_gdf: gpd.GeoDataFrame,
                          epsilon: float = 1e-4,
                          default_time: float = 10000) -> np.ndarray:
    """Loads precomputed pairwise travel time distances.

    :param travel_time_df: The precomputed table of pairwise travel times
        (in Olivia Walch's format).
    :param hospitals_gdf: Hospitals to calculate travel times for.
    :param ed_inst_gdf: Educational institutions to calculate travel times for.
    :param epsilon: The tolerance used for matching longitudes and latitudes.
    :param default_time: The default time to use when a (hospital,
         educational institution) pair is missing from the travel time data.
    :return: A pairwise distance matrix (rows are hospitals, columns are
        educational institutions).
    """
    times = default_time * np.ones((len(hospitals_gdf), len(ed_inst_gdf)))
    travel_time_index = {}
    for row in travel_time_df.itertuples():
        source = getattr(row, 'Source')
        dest = getattr(row, 'Destination')
        travel_time = getattr(row, 'Time') / 60
        travel_time_index[f'{source} -> {dest}'] = travel_time

    for hosp_idx, hosp_row in enumerate(hospitals_gdf.itertuples()):
        for ed_idx, ed_row in enumerate(ed_inst_gdf.itertuples()):
            # Try to match based on name, and then disambiguate based on long/lat.
            ed_name = getattr(ed_row, 'NAME').replace(',', '')
            hosp_name = getattr(hosp_row, 'NAME').replace(',', '')
            if f'{ed_name} -> {hosp_name}' in travel_time_index:
                travel_time = travel_time_index[f'{ed_name} -> {hosp_name}']
            else:
                ed_lat = getattr(ed_row, 'LATITUDE')
                ed_long = getattr(ed_row, 'LONGITUDE')
                hosp_lat = getattr(hosp_row, 'LATITUDE')
                hosp_long = getattr(hosp_row, 'LONGITUDE')
                df = travel_time_df
                travel_row = df[
                    ((df['SourceLat'] - ed_lat).abs() <= epsilon)
                    & ((df['SourceLong'] - ed_long).abs() <= epsilon) &
                    ((df['DestLat'] - hosp_lat).abs() <= epsilon) &
                    ((df['DestLong'] - hosp_long).abs() <= epsilon)]
                if not travel_row.empty:
                    if len(travel_row) > 1:
                        print('Warning:', ed_name, '->', hosp_name,
                              'ambiguous')
                    travel_time = travel_row['Time'].iloc[0] / 60
                else:
                    print('Warning: could not find travel time for',
                          f'{ed_name} -> {hosp_name}')
                    continue
            times[hosp_idx, ed_idx] = travel_time
    return times
Example #3
0
def overlap(target: GeoDataFrame, tiles: GeoDataFrame, verbose: bool):
    """
    Find all unique tiles that intersects given region, based on max coverage area
        Parameters
        ----------
        target: GeoDataFrame
            Input Polygon
        tiles: GeoDataFrame
            Tiles (Sentinel2)
        verbose: bool
            verbose mode, if True prints messages
        Returns
        -------
        GeoDataFrame
            Tiles for given Polygon
    """
    pprint(f"Start finding overlapping tiles", verbose)
    tiles, epsg = _get_intersect_tiles(target, tiles)

    result_tiles = list()
    for row in tiles.itertuples():
        start_area = target.geometry[0].area
        target.geometry[0] = target.geometry[0].difference(row.geometry)
        if start_area != target.geometry[0].area:
            result_tiles.append(dict(Name=row.Name, geometry=row.geometry))

    result = gp.GeoDataFrame(result_tiles, crs={'init': epsg})
    result = result.to_crs({'init': 'epsg:4326'})
    pprint(f"End finding overlapping tiles", verbose)

    return result
Example #4
0
def find_adjacent_lane_boundary(lane_boundaries: gpd.GeoDataFrame, lane,
                                left_right):
    lane_coords = [Point(c) for c in lane.geometry.coords]
    lane_start_azimuth = geo_util.calc_azimuth(lane_coords[0], lane_coords[1])
    lane_end_azimuth = geo_util.calc_azimuth(lane_coords[-2], lane_coords[-1])

    # Calculate stats
    stats = []
    for lane_boundary in lane_boundaries.itertuples():
        coords = [Point(c) for c in lane_boundary.geometry.coords]

        start_lateral_offset = geo_util.calc_lateral_offset(
            lane_coords[0], coords[0], lane_start_azimuth)
        end_lateral_offset = geo_util.calc_lateral_offset(
            lane_coords[-1], coords[-1], lane_end_azimuth)

        if left_right == "right":
            start_lateral_offset = -start_lateral_offset
            end_lateral_offset = -end_lateral_offset

        start_azimuth = geo_util.calc_azimuth(coords[0], coords[1])
        end_azimuth = geo_util.calc_azimuth(coords[-2], coords[-1])

        start_azimuth_diff = abs(
            geo_util.normalize_radian(start_azimuth - lane_start_azimuth))
        end_azimuth_diff = abs(
            geo_util.normalize_radian(end_azimuth - lane_end_azimuth))

        stats.append({
            "lane_boundary": lane_boundary,
            "start_lateral_offset": start_lateral_offset,
            "end_lateral_offset": end_lateral_offset,
            "start_azimuth_diff": start_azimuth_diff,
            "end_azimuth_diff": end_azimuth_diff,
        })

    # Filter by conditions
    th_min_dist = -0.1
    th_max_dist = 3
    th_max_azimuth = np.deg2rad(30)
    candidate_stats = list(
        filter(
            lambda stat:
            ((th_min_dist <= stat["start_lateral_offset"] <= th_max_dist) and
             (th_min_dist <= stat["end_lateral_offset"] <= th_max_dist) and
             (stat["start_azimuth_diff"] <= th_max_azimuth) and
             (stat["end_azimuth_diff"] <= th_max_azimuth)),
            stats,
        ))

    if not candidate_stats:
        return None

    # Sort by score
    sorted_stat = sorted(
        candidate_stats,
        key=lambda stat: stat["start_azimuth_diff"] + stat["end_azimuth_diff"])

    return sorted_stat[0]["lane_boundary"]
Example #5
0
def save_geopackage(path: str, data: geopandas.GeoDataFrame) -> None:
    """
    Save a `GeoDataFrame` to the disk at the specified path. If the path
    already exists, the existing destination will be overwritten.

    :param path: Path used for the output file
    :type path: str

    :param data: Output `GeoDataFrame` instance
    :type data: GeoDataFrame
    """
    for row in data.itertuples():
        data.to_file(path, driver="GPKG", layer=row.name)
Example #6
0
def buffer_geometry(gdf: gpd.GeoDataFrame,
                    width_table: Dict[str, float] = raster_table,
                    default_width: float = 1.5) -> List[Polygon]:
    roads: List[Polygon] = []
    for row in gdf.itertuples():
        if hasattr(row, 'highway'):
            road_class = row.highway
        else:
            road_class = row.fclass
        if type(road_class) == list:
            road_class = road_class[0]
        width = width_table.get(road_class, default_width) * WIDTH_FACTOR
        roads += [row.geometry.buffer(width)]
    return roads
Example #7
0
    def upload_gdf(
            self,
            gdf: gpd.GeoDataFrame,
            class_name: str,
            upload_altitude: bool = True):
        """Upload GeoDataFrame to Parse

        Args:
            gdf: GeoDataFrame with data to upload
            class_name: name of class to upload data to in Parse
            upload_altitude: whether to upload altitude as an attribute for Point Z geometries. Uploaded as "alt".
        """
        headers = self.headers.copy()
        headers['Content-Type'] = 'application/json'

        json_data = []
        geom_name = gdf.geometry.name
        columns = [x for x in gdf.columns if x != geom_name]
        for row in gdf.itertuples():
            d = {}

            # Make sure that type of geometry is point
            geom = getattr(row, geom_name)
            assert isinstance(geom, Point), 'Geometry not of type Point'

            # is it a 2D or 3D point?
            coords = list(geom.coords)[0]
            lon = coords[0]
            lat = coords[1]
            alt = coords[2] if len(coords) == 3 else None

            d[geom_name] = self.encode_geopoint(lon=lon, lat=lat)
            if (alt is not None) and upload_altitude:
                d['alt'] = alt

            for column in columns:
                d[column] = getattr(row, column)

            json_data.append(d)

        for group in chunker(json_data, 50):
            self.upload_batch(data=group, class_name=class_name)
Example #8
0
def calculate_area(gdf: gpd.GeoDataFrame):
    areas = []
    for row in gdf.itertuples():
        centroid = row.geometry.centroid
        utm_tuple = utm.from_latlon(centroid.y, centroid.x)
        if centroid.y > 0:
            south = False
        else:
            south = True
        crs = CRS.from_dict({
            "proj": "utm",
            "zone": utm_tuple[2],
            south: south
        })
        crs_code = f"EPSG:{crs.to_authority()[1]}"
        row_as_df = pd.DataFrame.from_records([row], columns=row._fields)
        row_as_gdf = gpd.GeoDataFrame(row_as_df,
                                      geometry=row_as_df.geometry,
                                      crs="EPSG:4326")
        row_as_utm = row_as_gdf.to_crs(crs_code)
        areas.append(row_as_utm.area.sum())
    area = sum(areas)
    return area
def attractivity_matrix():
    pois = pd.read_csv("../datasets/" + longlatfile)
    pois_geom = [Point(xy) for xy in zip(pois.iloc[:, 0], pois.iloc[:, 1])]
    pois_gdf = GeoDataFrame(None, crs=crs, geometry=pois_geom)

    bus_gdf = create_act_gdf(BUS)
    nightlife_gdf = create_act_gdf(NIGHTLIFE)
    supermarkt_gdf = create_act_gdf(SUPERMARKT)
    university_gdf = create_act_gdf(UNIVERSITY)

    data = pd.DataFrame(None,
                        index=range(len(pois)),
                        columns=[
                            "Lat", "Long", "public_transport", "nightlife",
                            "shops", "near_university"
                        ])
    #data = pd.DataFrame(None, index=range(len(pois)), columns=["avg_cost", "district"])
    for (i, poi) in pois_gdf.itertuples():
        #bwr, ortst = polygon_check(poi.x, poi.y)

        data.iloc[i] = [
            poi.y,
            poi.x,
            len(bus_gdf[bus_gdf["geometry"].apply(lambda x: haversine(x, poi))
                        < feature_range[BUS]]),
            len(nightlife_gdf[nightlife_gdf["geometry"].apply(
                lambda x: haversine(x, poi)) < feature_range[NIGHTLIFE]]),
            len(supermarkt_gdf[supermarkt_gdf["geometry"].apply(
                lambda x: haversine(x, poi)) < feature_range[SUPERMARKT]]),
            len(university_gdf[university_gdf["geometry"].apply(
                lambda x: haversine(x, poi)) < feature_range[UNIVERSITY]]),
        ]
        print(i)

    with open("../datasets/" + "final.csv", "w") as f:
        data.to_csv(f, index=False)
Example #10
0
def intersect_trail_with_polygons(trail: LineString, gdf: gpd.GeoDataFrame,
                                  key_col: str):
    """Intersect trail with polygons to produce overlapping line segments

    Both trail and gdf must be projected to a projected coordinate system
    before being passed to this function.

    This is used, e.g. to find the portions of the trail that are within
    national parks or national forests.

    Args:
        - trail: projected LineString of trail
        - gdf: projected GDF of polygons to find intersections of. It shouldn't matter if an area shows up once as a MultiPolygon or multiple times (with the same `key_col` value) as individual Polygons.
        - key_col: column of GDF to use as keys of dict

    Returns:
        - {key_col: {'geometry': MutliLineString, 'length': float}}
        where `lines` is a list of lines where the trail intersects with the
        given polygon, and `length` is the sum of distances in the polygon.
    """
    intersections = {}
    # Iterate over GeoDataFrame
    for row in gdf.itertuples():
        # Set geometry variable so that it can be updated if it needs to be made
        # valid. You can't update a namedtuple
        geometry = row.geometry

        # Check if geometry is valid
        if not geometry.is_valid:
            geometry = geometry.buffer(0)

        # Compute intersection
        int_line = trail.intersection(geometry)

        # Get key_col in dataset
        key = getattr(row, key_col)

        # Instantiate dict with key
        intersections[key] = intersections.get(key, {})

        if int_line.type == 'LineString':
            intersections[key]['geometry'] = MultiLineString([int_line])
        elif int_line.type == 'MultiLineString':
            intersections[key]['geometry'] = int_line
        elif int_line.type == 'GeometryCollection':
            msg = 'If GeometryCollection should not have intersection'
            assert len(int_line) == 0, msg
            intersections[key]['geometry'] = None
        else:
            msg = 'intersection of Polygon, LineString should be LineString'
            raise ValueError(msg)

    # Add length in projected coordinates to dictionary
    for key, d in intersections.items():
        if d['geometry'] is None:
            intersections[key]['length'] = None
            continue

        intersections[key]['length'] = d['geometry'].length

    return intersections
Example #11
0
def split_tiles(input_tiles: gpd.GeoDataFrame,
                nb_tiles_wanted: int) -> gpd.GeoDataFrame:

    nb_tiles = len(input_tiles)
    if nb_tiles >= nb_tiles_wanted:
        return input_tiles

    nb_tiles_ratio_target = nb_tiles_wanted / nb_tiles

    # Loop over all tiles in the grid
    result_tiles = []
    for tile in input_tiles.itertuples():

        # For this tile, as long as the curr_nb_tiles_ratio_todo is not 1, keep splitting
        curr_nb_tiles_ratio_todo = nb_tiles_ratio_target
        curr_tiles_being_split = [tile.geometry]
        while curr_nb_tiles_ratio_todo > 1:

            # Check in how many parts the tiles are split in this iteration
            divisor = 0
            if round(curr_nb_tiles_ratio_todo) == 3:
                divisor = 3
            else:
                divisor = 2
            curr_nb_tiles_ratio_todo /= divisor

            # Split all current tiles
            tmp_tiles_after_split = []
            for tile_to_split in curr_tiles_being_split:
                xmin, ymin, xmax, ymax = tile_to_split.bounds
                width = abs(xmax - xmin)
                height = abs(ymax - ymin)

                # Split in 2 or 3...
                if divisor == 3:
                    if width > height:
                        split_line = sh_geom.LineString([
                            (xmin + width / 3, ymin - 1),
                            (xmin + width / 3, ymax + 1),
                            (xmin + 2 * width / 3, ymax + 1),
                            (xmin + 2 * width / 3, ymin - 1)
                        ])
                    else:
                        split_line = sh_geom.LineString([
                            (xmin - 1, ymin + height / 3),
                            (xmax + 1, ymin + height / 3),
                            (xmax + 1, ymin + 2 * height / 3),
                            (xmin - 1, ymin + 2 * height / 3)
                        ])
                else:
                    if width > height:
                        split_line = sh_geom.LineString([
                            (xmin + width / 2, ymin - 1),
                            (xmin + width / 2, ymax + 1)
                        ])
                    else:
                        split_line = sh_geom.LineString([
                            (xmin - 1, ymin + height / 2),
                            (xmax + 1, ymin + height / 2)
                        ])
                tmp_tiles_after_split.extend(
                    sh_ops.split(tile_to_split, split_line).geoms)
            curr_tiles_being_split = tmp_tiles_after_split
        result_tiles.extend(curr_tiles_being_split)

    # We should be ready...
    return gpd.GeoDataFrame(geometry=result_tiles, crs=input_tiles.crs)
Example #12
0
gpd_bj_stations['U1'] = np.array(
    np.cos(mk_bj_results.Gradient * (np.pi) / 10000))
gpd_bj_stations['V1'] = np.array(
    np.sin(mk_bj_results.Gradient * (np.pi) / 10000))
gpd_bj_stations['p1'] = np.array(mk_bj_results.Significance)
fig, ax = plt.subplots()
ax.set_aspect('equal')
bjsp.plot(ax=ax, color='white', edgecolor='k')
df_clean.plot(ax=ax, marker='o', color='blue', markersize=25)
df_regionalbg.plot(ax=ax, marker='o', color='green', markersize=25)
df_suburban.plot(ax=ax, marker='o', color='orange', markersize=25)
df_traffic.plot(ax=ax, marker='o', color='red', markersize=25)
df_urban.plot(ax=ax, marker='o', color='purple', markersize=25)
plt.legend(types)
#ax.quiver(gpd_bj_stations['X'], gpd_bj_stations['Y'], gpd_bj_stations['U1'], gpd_bj_stations['V1'], width = 0.005)
for row in gpd_bj_stations.itertuples():
    if row.p1 <= 0.05 and row.V1 > 0:
        ax.quiver(row.X, row.Y, row.U1, row.V1, color='maroon', width=0.005)
    if 0.05 < row.p1 <= 0.10 and row.V1 > 0:
        ax.quiver(row.X, row.Y, row.U1, row.V1, color='orangered', width=0.005)
    if 0.10 < row.p1 <= 0.34 and row.V1 > 0:
        ax.quiver(row.X,
                  row.Y,
                  row.U1,
                  row.V1,
                  color='darkgoldenrod',
                  width=0.005)
    if row.p1 <= 0.05 and row.V1 < 0:
        ax.quiver(row.X,
                  row.Y,
                  row.U1,
Example #13
0
    def _load_db_from_df(self, stations_gdf: gpd.GeoDataFrame) -> str:
        """
        Load the DB based on a data frame.

        :return: None
        """
        # erase everything first
        # self.logger.info("Deleting old entries ...")
        # for station in ObservationStation.objects.all():
        #     station.delete()
        # self.logger.info("Deletions Completed.")

        # make a lookup for country_code
        country_lookup = EUCountries.get_country_code_lookup()

        stations_gdf = EEAStationDataSource._add_nuts_regions_to_df(stations_gdf)

        self.logger.info(f"Loading {len(stations_gdf)} stations to local data base ...")

        count = 0
        r_list = []
        for row in tqdm(stations_gdf.itertuples(), desc='loading stations'):

            country_code = country_lookup.get(row.Countrycode)
            if country_code is None:
                self.logger.debug(f"{row.Countrycode} skipped.  Not in NUTS regions.")
                continue

            try:
                r_list.append(ObservationStation.objects.create(
                    air_quality_station=row.AirQualityStation,
                    country_code=country_code,
                    air_quality_network=row.AirQualityNetwork,
                    air_quality_station_eoicode=row.AirQualityStationEoICode,
                    air_quality_station_natcode=row.AirQualityStationNatCode,
                    projection=row.Projection,
                    longitude=row.Longitude,
                    latitude=row.Latitude,
                    altitude=row.Altitude,
                    # nuts_0=row.NUTS_0,
                    nuts_1=row.NUTS_1,
                    nuts_2=row.NUTS_2,
                    nuts_3=row.NUTS_3,
                    air_quality_station_area=row.AirQualityStationArea))

                # record.save()
                count += 1

                if count % 10000 == 0:
                    ObservationStation.objects.bulk_create(r_list, ignore_conflicts=True)
                    r_list.clear()

            except Exception as e:
                # take no action. duplicates are expected as new files are loaded.
                # print(e)
                # print(row)
                continue

        if len(r_list) > 0:
            ObservationStation.objects.bulk_create(r_list, ignore_conflicts=True)
            r_list.clear()

        self.logger.info(f"Done downloading station meta data.  Loaded {count} stations.")

        return f"Loaded {count} stations."