Beispiel #1
0
    def convert_gazetteer_files_to_dataframe(
        self, gazetteer_files: List[GazetteerFile]
    ) -> Optional[DataFrame]:
        """Convert one or more Gazetteer files to a dataframe.

        Skips over null values produced by invalid responses from the
        Gazetteer file endpoint.
        """
        subsets = []
        gazetteer_tables: Iterable[DataFrame] = filter(partial(is_not, None), gazetteer_files)
        nad_83_epsg = 4269
        for gazetteer_table in gazetteer_tables:
            subset = GeoDataFrame(
                gazetteer_table,
                geometry=gpd.points_from_xy(
                    gazetteer_table['INTPTLONG'], gazetteer_table['INTPTLAT']
                ),
            )
            subset.crs = f'EPSG:{nad_83_epsg}'
            subset['gazetteer_geo_id'] = subset.apply(
                lambda row: normalize_geo_id(row['GEOID'], row['gazetteer_geo_type']),
                axis=1,
            )
            subsets.append(subset)

        # Return null value if no dataframes were obtained
        if not subsets:
            return None

        # Concatenate dataframes and return
        dataframe: GeoDataFrame = pd.concat(subsets)
        return dataframe
    def plot_metric(ax: Axes, metric: PlotMetric,
                    gdf: GeoDataFrame) -> Axes:  # type: ignore
        # nicely format the colorbar
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.1)

        gdf.plot(  # type: ignore
            metric.metric,
            ax=ax,
            legend=True,
            cmap=metric.cmap,
            vmin=metric.vmin,
            vmax=metric.vmax,
            cax=cax)
        ax.set_title(f'{metric.metric.upper()}')
        return ax
Beispiel #3
0
    def plot_all_regional_error_metrics(
        self,
        gdf: GeoDataFrame,  # type: ignore
        title: str = "",
        **kwargs: Dict,
    ) -> Tuple[Figure, List[Axes]]:
        """Plot area-based maps of the scores"""
        assert np.isin(["rmse", "mae", "r2"],
                       gdf.columns).all()  # type: ignore
        gdf = gdf.dropna(subset=["rmse", "mae", "r2"])  # type: ignore

        # get the PlotMetric objects
        rmse = self.get_metric("rmse", gdf, **kwargs)
        mae = self.get_metric("mae", gdf, **kwargs)
        r2 = self.get_metric("r2", gdf, **kwargs)

        # build multi-axis plot
        fig, axs = plt.subplots(1, 3, figsize=(12, 8))
        for i, metric in enumerate([rmse, mae, r2]):
            ax = axs[i]
            ax = self.plot_metric(gdf=gdf, ax=ax, metric=metric)
            ax.axis("off")

        fig.suptitle(title)
        return fig, axs
Beispiel #4
0
def main():

    filenames = getFileNames()

    print("Loading Shapefiles from " + filenames["shapefile"])
    # Open up the shapefile
    states = GeoDataFrame.from_file(filenames["shapefile"])
    # Set the 'Name' column as the index
    states.set_index("NAME", inplace=True)
    print("Done!")

    print("Loading data from " + filenames["data"])
    # Load the file the user wants to process
    data = LoadCSV(filenames["data"])
    print("Done!")

    # list out the columns in the data file
    PickColumns(data)
    # ask the user which columns they want to use for lat/lon
    lat_col = prompt.query("Please enter the column number for Latitude: ", default="7", validators=[])

    lon_col = prompt.query("Please enter the column number for Longitude: ", default="8", validators=[])

    # Wrap them in ints because they need to be referenced as numbers later on
    lat_col = int(lat_col)
    lon_col = int(lon_col)

    # Add a State column to the data file
    data = AddColumn(data, "State")

    # Process each row and add the state name to the new column
    data["State"] = data.apply(lambda row: GetStateFromPoint(row, lat_col, lon_col, states), axis=1)
    print("Writing file to " + filenames["output"])
    data.to_csv(filenames["output"])
    print("Done!")
Beispiel #5
0
    def join_model_performances_to_geometry(
            self, model_performance_df: pd.DataFrame,
            admin_name: str) -> GeoDataFrame:  # type: ignore
        """Join the `geometry` column from the shapefile read in as GeoDataFrame
        to the model performance metrics in model_performance_df. Required to
        make spatial plots of data.

        Arguments:
        ---------
        model_performance_df: pd.DataFrame
            the data showing the model performance for each

        admin_name: str
            the name of the administrative units (shapefile name) stored in
            `self.region_gdfs.keys()`
        """
        assert admin_name in [
            k for k in self.region_gdfs.keys()
        ], ("Invalid "
            f"`admin_name`. Expected one of: {[k for k in self.region_gdfs.keys()]}"
            f" Got: {admin_name}")
        gdf = self.region_gdfs[admin_name].gdf
        gdf_colname = self.region_gdfs[admin_name].gdf_colname
        gdf[gdf_colname] = gdf[gdf_colname].apply(str.rstrip).apply(str.lstrip)

        df_colname = "region_name"
        out_gdf = GeoDataFrame(  # type: ignore
            pd.merge(
                model_performance_df,
                gdf[[gdf_colname, "geometry"]],
                left_on=df_colname,
                right_on=gdf_colname,
            ))
        return out_gdf
Beispiel #6
0
 def get_choices(self, layer):
     # get_schema calls DescribeFeatureType and returns mangled property names
     #        properties = get_schema(layer.server.url, layer.layername, version=layer.server.version).get('properties',{})
     #        return ((prop,prop) for prop in properties.keys())
     response = layer.server.service.getfeature(typename=layer.layername,
                                                maxfeatures=1,
                                                outputFormat='GeoJSON')
     data = json.loads(response.read())
     features = GeoDataFrame.from_features(data)
     return ((col, col) for col in features.columns)
def get_average_prices(ad_type, asset_type):
    qfilter = {'ad_type': ad_type, 'asset_type': asset_type}
    prices = session.execute(
        'SELECT postcode, avg_price FROM inmosig_average_prices WHERE ad_type = :ad_type AND '
        'asset_type = :asset_type', qfilter)

    gdf = GeoDataFrame(columns=['geometry', 'price', 'postcode'])
    for price in prices.fetchall():
        postcode = postcode_dao.search_by_postcode(price[0])
        if postcode is not None:
            gdf = gdf.append(
                {
                    'geometry': to_shape(postcode.geom),
                    'price': float(price[1]),
                    'postcode': price[0]
                },
                ignore_index=True)

    return gdf
Beispiel #8
0
def extract_footprint_from_prism(path):
    citygml = etree.parse(path)
    root = citygml.getroot()
    if root.tag == "{http://www.opengis.net/citygml/1.0}CityModel":
        ns_citygml = "http://www.opengis.net/citygml/1.0"
        ns_gml = "http://www.opengis.net/gml"
        ns_bldg = "http://www.opengis.net/citygml/building/1.0"
    else:
        ns_citygml = "http://www.opengis.net/citygml/2.0"
        ns_gml = "http://www.opengis.net/gml"
        ns_bldg = "http://www.opengis.net/citygml/building/2.0"

    city_objects = []
    buildings = []
    footprints_by_floor = {}

    for obj in root.getiterator('{%s}cityObjectMember' % ns_citygml):
        city_objects.append(obj)

    if len(city_objects) > 0:
        for city_object in city_objects:
            for child in city_object.getchildren():
                if child.tag == '{%s}Building' % ns_bldg:
                    buildings.append(child)

        for b in buildings:
            rooms = room_finder(b)
            for room in rooms:
                polys = polygon_finder(room)
                footprint, height = extract_footprint(polys)

                if height not in footprints_by_floor:
                    footprints_by_floor[height] = []

                footprints_by_floor[height].append(Polygon(footprint))

    footprints_of_buildings = GeoDataFrame()
    for i in range(len(footprints_by_floor[0.0])):
        footprints_of_buildings.loc[i,
                                    'geometry'] = footprints_by_floor[0.0][i]

    return footprints_of_buildings
def _plot_single_gdf(ax: Axes,
                     gdf: GeoDataFrame,
                     column_to_plot: str,
                     title: Optional[str] = None,
                     cmap: Optional[str] = 'viridis',
                     vmin: Optional[float] = None,
                     vmax: Optional[float] = None) -> Axes:
    # nicely format the colorbar
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.1)

    gdf.plot(column_to_plot,
             ax=ax,
             legend=True,
             cmap=cmap,
             vmin=vmin,
             vmax=vmax,
             cax=cax)
    ax.set_title(title)
    return ax
    def plot_regional_error_metric(
            self,
            gdf: GeoDataFrame,  # type: ignore
            selection: str) -> Tuple[Figure, Axes]:
        valid_metrics = ['rmse', 'mae', 'r2']
        assert selection in valid_metrics, 'Expecting selection' \
            f' to be one of: {valid_metrics}'
        gdf = gdf.dropna(subset=valid_metrics)  # type: ignore
        metric = self.get_metric(selection)
        fig, ax = plt.subplots()
        ax = self.plot_metric(gdf=gdf, ax=ax, metric=metric)

        return fig, ax
Beispiel #11
0
    def plot_regional_error_metric(
        self,
        gdf: GeoDataFrame,  # type: ignore
        selection: str,
        **kwargs: Dict,
    ) -> Tuple[Figure, Axes]:
        valid_metrics = ["rmse", "mae", "r2"]
        assert selection in valid_metrics, ("Expecting selection"
                                            f" to be one of: {valid_metrics}")
        gdf = gdf.dropna(subset=valid_metrics)  # type: ignore
        metric = self.get_metric(selection, gdf, **kwargs)
        fig, ax = plt.subplots()
        ax = self.plot_metric(gdf=gdf, ax=ax, metric=metric)

        return fig, ax
Beispiel #12
0
def slice_polys(imgf, size=(512, 512), overlap=6):
    """
    Get Polygons Corresponding to Slices
    """
    ix_row = np.arange(0, imgf.meta["height"], size[0] - overlap)
    ix_col = np.arange(0, imgf.meta["width"], size[1] - overlap)
    lats = np.linspace(imgf.bounds.bottom, imgf.bounds.top, imgf.meta["height"])
    longs = np.linspace(imgf.bounds.left, imgf.bounds.right, imgf.meta["width"])

    polys = []
    for i in range(len(ix_row) - 1):
        for j in range(len(ix_col) - 1):
            box = shapely.geometry.box(longs[ix_col[j]], lats[ix_row[i]], longs[ix_col[j + 1]], lats[ix_row[i + 1]])
            polys.append(box)

    return GeoDataFrame(geometry=polys, crs=imgf.meta["crs"].to_string())
    def plot_all_regional_error_metrics(
            self,
            gdf: GeoDataFrame,  # type: ignore
            title: str = '') -> Tuple[Figure, List[Axes]]:
        """Plot area-based maps of the scores"""
        assert np.isin(['rmse', 'mae', 'r2'],
                       gdf.columns).all()  # type: ignore
        gdf = gdf.dropna(subset=['rmse', 'mae', 'r2'])  # type: ignore

        # get the PlotMetric objects
        rmse = self.get_metric('rmse')
        mae = self.get_metric('mae')
        r2 = self.get_metric('r2')

        # build multi-axis plot
        fig, axs = plt.subplots(1, 3, figsize=(24, 6))
        for i, metric in enumerate([rmse, mae, r2]):
            ax = axs[i]
            ax = self.plot_metric(gdf=gdf, ax=ax, metric=metric)

        fig.set_suptitle(title)
        return fig, axs
Beispiel #14
0
    def merge_all_model_performances_gdfs(
            self, all_models_df: pd.DataFrame) -> GeoDataFrame:  # type: ignore
        all_gdfs: List[GeoDataFrame] = []  # type: ignore
        assert "admin_level_name" in all_models_df.columns, (
            f"Expect to find admin_region"
            f"in {all_models_df.columns}")

        # join the geometry columns to make GeoDataFrames
        for admin_name in all_models_df.admin_level_name.unique():
            admin_level_df = all_models_df.loc[all_models_df.admin_level_name
                                               == admin_name]
            all_gdfs.append(
                self.join_model_performances_to_geometry(
                    model_performance_df=admin_level_df,
                    admin_name=admin_name))

        self.gdf = pd.concat(all_gdfs)

        # convert mean model outputs to float
        try:
            self.gdf = self.gdf.astype(  # type: ignore
                {
                    "predicted_mean_value": "float64",
                    "true_mean_value": "float64"
                })
        except KeyError:
            self.gdf = self.gdf.astype(  # type: ignore
                {
                    "rmse": "float64",
                    "mae": "float64",
                    "r2": "float64"
                })
        print("* Assigned the complete GeoDataFrame to `RegionGeoPlotter.gdf`")

        if not isinstance(self.gdf, GeoDataFrame):  # type: ignore
            self.gdf = GeoDataFrame(self.gdf)  # type: ignore

        return self.gdf
Beispiel #15
0
def simplify_and_mapping(data_source):
    if data_source == 'lwm':
        tau = 2
        buildings = extract_footprint_from_prism('../data/lwm-prism.gml')
    else:  # Only for Manhattan, New York
        tau = 0.00003
        buildings = ox.footprints_from_place(
            '{}, Manhattan, New York City'.format(data_source))
    douglas_peucker_buildings = GeoDataFrame()
    simplified_buildings = GeoDataFrame()
    sum_haus = [0.0, 0.0]
    total_points = [0, 0]
    tolerance = tau * 3 / 5

    def comparison(footprint, i):
        new_footprint = prism.simplify(footprint,
                                       tau=tau,
                                       epsilon=math.pi / 30)
        if new_footprint is not None:
            simplified_buildings.loc[i, 'geometry'] = new_footprint
            haus = footprint.hausdorff_distance(new_footprint)
            sum_haus[1] += haus
            total_points[1] += len(new_footprint.exterior.coords)

            dp_footprint = footprint.simplify(tolerance)
            douglas_peucker_buildings.loc[i, 'geometry'] = dp_footprint

            haus = footprint.hausdorff_distance(dp_footprint)
            sum_haus[0] += haus
            total_points[0] += len(dp_footprint.exterior.coords)

    count = 0
    for geom in buildings['geometry']:
        if geom.geom_type == 'Polygon':
            comparison(geom, count)
            count += 1
        if geom.geom_type == 'MultiPolygon':
            for poly in geom:
                comparison(poly, count)
                count += 1

    print("Average Hausdorff Distance (Douglas Peucker):", sum_haus[0] / count)
    print("Average Hausdorff Distance (Indoor Simplification):",
          sum_haus[1] / count)
    print("Total Number of Points (Douglas Peucker):", total_points[0])
    print("Total Number of Points (Indoor Simplification):", total_points[1])

    cell_text = [[tolerance, tau], [sum_haus[0] / count, sum_haus[1] / count],
                 [total_points[0], total_points[1]]]
    # mapping
    minx, miny, maxx, maxy = buildings.total_bounds
    map_scale = 50
    width = maxx - minx
    height = maxy - miny
    ratio = width / height
    mbr = (ratio * map_scale, map_scale)
    fig, ax = plt.subplots(figsize=mbr)
    buildings.plot(ax=ax,
                   facecolor='green',
                   edgecolor='grey',
                   linewidth=0.2,
                   alpha=0.1)
    douglas_peucker_buildings.plot(ax=ax, facecolor='blue', alpha=0.1)
    simplified_buildings.plot(ax=ax, facecolor='red', alpha=0.1)

    ax.table(cellText=cell_text,
             rowLabels=[
                 "Distance Tolerance", "Average Hausdorff Distance",
                 "Total Number of Points"
             ],
             colLabels=["Douglas Peucker", "Indoor Simplification"],
             colWidths=[0.05 / ratio, 0.05 / ratio],
             loc='lower right')

    legend_elements = [
        Patch(facecolor='green',
              edgecolor='grey',
              linewidth=0.2,
              alpha=0.1,
              label='Original'),
        Patch(facecolor='blue', alpha=0.1, label='Douglas Peucker'),
        Patch(facecolor='red', alpha=0.1, label='Indoor Simplification')
    ]
    ax.legend(handles=legend_elements,
              loc='upper right',
              title='Simplification Method',
              fontsize=map_scale,
              title_fontsize=map_scale)
    plt.tight_layout()
    plt.savefig('../examples/{}.pdf'.format(data_source), format='pdf')
Beispiel #16
0
postcode_dao = PostcodeDAO(session)
way_dao = WayDAO(session)

# Representación de las calles con GeoSeries
# Extraemos todas las calles
ways = []
for way in way_dao.getAll():
    ways.append(to_shape(way.geom))
wgs = GeoSeries(ways)
base = wgs.plot(color="blue")

qfilter = {'ad_type': 'RENT', 'asset_type': 'GARAGE'}
prices = session.execute(
    'SELECT postcode, avg_price FROM inmosig_average_prices WHERE ad_type = :ad_type AND '
    'asset_type = :asset_type', qfilter)

gdf = GeoDataFrame(columns=['geometry', 'price', 'postcode'])
for price in prices.fetchall():
    postcode = postcode_dao.search_by_postcode(price[0])
    if postcode is not None:
        gdf = gdf.append(
            {
                'geometry': to_shape(postcode.geom),
                'price': float(price[1]),
                'postcode': price[0]
            },
            ignore_index=True)

gdf.head()
gdf.plot(ax=base, column='price', cmap='OrRd', scheme="quantiles", legend=True)
plt.show()
Beispiel #17
0
 def to_crs(self, epsg_to: str) -> BaseGeometry:
     if self.epsg_code == epsg_to:
         return self.geo
     gdf = GeoDataFrame(crs=self.epsg_code, geometry=[self.geo])
     return gdf.to_crs(epsg_to).geometry.values[0]